Megatest

Diff
Login

Differences From Artifact [a06e687141]:

To Artifact [147cbc54ec]:


48
49
50
51
52
53
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

78
79
80
81
82



83





























84

85
86
87
88
89
90
91
92
93

94
95
96
97
98
99
100
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal itemmaps prereqs-not-met)

;; set up needed environment variables given a run-id and optionally a target, itempath etc.
;;
(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))

  (let* ((target    (or intarget 
			(common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
	 (link-tree (configf:lookup *configdat* "setup" "linktree")))
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))

    ;; get the info from the db and put it in the cache
    (if link-tree
	(setenv "MT_LINKTREE" link-tree)
	(debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section."))
    (if (not vals)
	(let ((ht (make-hash-table)))
	  (hash-table-set! *env-vars-by-run-id* run-id ht)
	  (set! vals ht)
	  (for-each
	   (lambda (key)
	     (hash-table-set! vals (car key) (cadr key)))
	   keyvals)))
    ;; from the cached data set the vars

    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 *default-log-port* "setenv " key " " val)
       (safe-setenv key val)))



    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))





























    (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))

    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
      (if runname
	  (setenv "MT_RUNNAME" runname)
	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)
    ;; if a testname and itempath are available set the remaining appropriate variables
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))

    (if (and testname link-tree)
	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
					(getenv "MT_TARGET")    "/"
					(getenv "MT_RUNNAME")   "/"
					(getenv "MT_TEST_NAME")
					(if (and itempath
						 (not (equal? itempath "")))







>






|
















>





>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>









>







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal itemmaps prereqs-not-met)

;; set up needed environment variables given a run-id and optionally a target, itempath etc.
;;
(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  (let* ((target    (or intarget 
			(common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
	 (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))

    ;; get the info from the db and put it in the cache
    (if link-tree
	(setenv "MT_LINKTREE" link-tree)
	(debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section."))
    (if (not vals)
	(let ((ht (make-hash-table)))
	  (hash-table-set! *env-vars-by-run-id* run-id ht)
	  (set! vals ht)
	  (for-each
	   (lambda (key)
	     (hash-table-set! vals (car key) (cadr key)))
	   keyvals)))
    ;; from the cached data set the vars
    
    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 *default-log-port* "setenv " key " " val)
       (safe-setenv key val)))
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1")
    ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals))

    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
    ;; we had a case where there was an exception generated by the hash-table-ref
    ;; due to *configdat* being #f Adding a handle and exit
    (let fatal-loop ((count 0)) 
      (handle-exceptions
	  exn
	  (let ((call-chain (get-call-chain))
		(msg        ((condition-property-accessor 'exn 'message) exn)))
	    (if (< count 5)
		(begin ;; this call is colliding, do some crude stuff to fix it.
		  (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count)
		  (launch:setup force-reread: #t)
		  (fatal-loop (+ count 1))) 
		(begin
		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg)
		  (debug:print 0 *default-log-port* "Call chain:")
		  (with-output-to-port *default-log-port*

                    (lambda ()
                      (print "*configdat* is >>"*configdat*"<<")
                      (pp *configdat*)
                      (pp call-chain)))
                  
		  (exit 1))))
          ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
          (when (or (not *configdat*) (not (hash-table? *configdat*)))
              (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen.  Brute force reread.")
              ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen.  Brute force reread.")
              (thread-sleep! 2) ;; assuming nfs lag.
              (launch:setup force-reread: #t))
          (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block.
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2")
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
      (if runname
	  (setenv "MT_RUNNAME" runname)
	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)
    ;; if a testname and itempath are available set the remaining appropriate variables
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3")
    (if (and testname link-tree)
	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
					(getenv "MT_TARGET")    "/"
					(getenv "MT_RUNNAME")   "/"
					(getenv "MT_TEST_NAME")
					(if (and itempath
						 (not (equal? itempath "")))
189
190
191
192
193
194
195






































196







































197
198
199
200
201
202
203
204
205
206


207
208
209
210
211
212
213
214
215
216






217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248




249
250
251
252
253
254
255
256
257











258
259
260
261
262
263
264
265

266




267
268
269
270
271
272
273
				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
				      (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup 
						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
				  #t)
				 (else #f))))
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))















































































;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))


	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f))))







    ;; per user request. If less than 100Meg space on dbdir partition, bail out with error
    ;; this will reduce issues in database corruption
    (common:check-db-dir-and-exit-if-insufficient)
    
    ;; override the number of reruns from the configs
    (if (and config-reruns
	     (> run-count config-reruns))
	(set! run-count config-reruns))
    
    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (let ((sighand (lambda (signum)
		     ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
		     (set! *time-to-exit* #t)
		     (print "Received signal " signum ", cleaning up before exit. Please wait...")
		     (let ((th1 (make-thread (lambda ()
					       (let ((tdbdat (tasks:open-db)))
						 (rmt:tasks-set-state-given-param-key task-key "killed"))
					       (print "Killed by signal " signum ". Exiting")
					       (thread-sleep! 3)
					       (exit))))
			   (th2 (make-thread (lambda ()
					       (thread-sleep! 5)
					       (debug:print 0 *default-log-port* "Done")
					       (exit 4)))))
		       (thread-start! th2)
		       (thread-start! th1)
		       (thread-join! th2)))))
      (set-signal-handler! signal/int sighand)
      (set-signal-handler! signal/term sighand))





    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (set! runconf (if (file-exists? runconfigf)
		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
		      (begin
			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
			#f)))

    (if (not test-patts) ;; first time in - adjust testpatt
	(set! test-patts (common:args-get-testpatt runconf)))












    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")

    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all))   ;; hash of testname => path-to-test
    (set! all-test-names     (hash-table-keys all-tests-registry))

    (set! test-names         (tests:filter-test-names all-test-names test-patts))





    ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up.

    ;; NEW STRATEGY HERE:
    ;; 1. fill required tests with test-patts
    ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt
    ;; 3. repeat until all deps propagated







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







|


>
>







|

|
>
>
>
>
>
>




|












|
|













>
>
>
>

|







>
>
>
>
>
>
>
>
>
>
>








>
|
>
>
>
>







225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
				      (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup 
						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
				  #t)
				 (else #f))))
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))

(define (runs:run-pre-hook run-id)
    (let* ((run-pre-hook   (configf:lookup *configdat* "runs" "pre-hook"))
           (existing-tests (if run-pre-hook
                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
                                                      #f #f ;; offset limit
                                                      #f ;; not-in
                                                      #f ;; sort-by
                                                      #f ;; sort-order
                                                      #f ;; get full data (not 'shortlist)
                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
                                                      'dashboard)
                               '()))
           (log-dir         (conc *toppath* "/logs"))
           (log-file        (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
           (full-log-fname  (conc log-dir "/" log-file)))
      (if run-pre-hook
          (if (null? existing-tests)
              (let* ((use-log-dir (if (not (directory-exists? log-dir))
                                      (handle-exceptions
                                       exn
                                       (begin
                                         (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
                                         #f)
                                       (create-directory log-dir #t)
                                       #t)
                                      #t))
                     (start-time   (current-seconds))
                     (actual-logf  (if use-log-dir full-log-fname log-file)))
                (handle-exceptions
                 exn
                 (begin
                   (print-call-chain *default-log-port*)
                   (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
                   (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file))
                 (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf)
                 (system (conc run-pre-hook " >> " actual-logf " 2>&1"))
                 (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))
              (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run.")))))
    
(define (runs:run-post-hook run-id)
    (let* ((run-post-hook   (configf:lookup *configdat* "runs" "post-hook"))
           (existing-tests (if run-post-hook
                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
                                                      #f #f ;; offset limit
                                                      #f ;; not-in
                                                      #f ;; sort-by
                                                      #f ;; sort-order
                                                      #f ;; get full data (not 'shortlist)
                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
                                                      'dashboard)
                               '()))
           (log-dir         (conc *toppath* "/logs"))
           (log-file        (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
           (full-log-fname  (conc log-dir "/" log-file)))
      (if run-post-hook
          ;; (if (null? existing-tests)
          ;;    (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run.")))))
	  (let* ((use-log-dir (if (not (directory-exists? log-dir))
				  (handle-exceptions
				      exn
				      (begin
					(debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
					#f)
				    (create-directory log-dir #t)
				    #t)
				  #t))
		 (start-time   (current-seconds))
		 (actual-logf  (if use-log-dir full-log-fname log-file)))
	    (handle-exceptions
		exn
		(begin
		  (print-call-chain *default-log-port*)
		  (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
		  (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file))
	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))

;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
         (dbfile             (conc  *toppath* "/megatest.db"))
         (readonly-mode      (not (file-write-access? dbfile)))
	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f))

    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
      (exit 1))

    ;; per user request. If less than 100Meg space on dbdir partition, bail out with error
    ;; this will reduce issues in database corruption
    (common:check-db-dir-and-exit-if-insufficient)

    ;; override the number of reruns from the configs
    (if (and config-reruns
	     (> run-count config-reruns))
	(set! run-count config-reruns))
    
    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (let ((sighand (lambda (signum)
		     ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
		     (set! *time-to-exit* #t)
		     (print "Received signal " signum ", cleaning up before exit. Please wait...")
		     (let ((th1 (make-thread (lambda ()
					       ;; (let ((tdbdat (tasks:open-db)))
						 (rmt:tasks-set-state-given-param-key task-key "killed") ;; )
					       (print "Killed by signal " signum ". Exiting")
					       (thread-sleep! 3)
					       (exit))))
			   (th2 (make-thread (lambda ()
					       (thread-sleep! 5)
					       (debug:print 0 *default-log-port* "Done")
					       (exit 4)))))
		       (thread-start! th2)
		       (thread-start! th1)
		       (thread-join! th2)))))
      (set-signal-handler! signal/int sighand)
      (set-signal-handler! signal/term sighand))

    ;; force the starting of a server -- removed BB 17ww28 - no longer needed.
    ;;(debug:print 0 *default-log-port* "waiting on server...")
    ;;(server:start-and-wait *toppath*)
    
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (set! runconf (if (common:file-exists? runconfigf)
		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
		      (begin
			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
			#f)))

    (if (not test-patts) ;; first time in - adjust testpatt
	(set! test-patts (common:args-get-testpatt runconf)))
    ;; if test-patts is #f at this point there is something wrong and we need to bail out
    (if (not test-patts)
	(begin
	  (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.")
	  (exit 0)))
    
    (if (args:get-arg "-tagexpr")
	(begin
	  (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ","))
	  	  (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests)
		  ));; tests will be ANDed with this list

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")

    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all))   ;; hash of testname => path-to-test
    (set! all-test-names     (hash-table-keys all-tests-registry))
    ;; filter first for allowed-tests (from -tagexpr) then for test-patts.
    (set! test-names         (tests:filter-test-names
			      (if allowed-tests
				  (tests:filter-test-names all-test-names allowed-tests)
				  all-test-names)
			      test-patts))

    ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up.

    ;; NEW STRATEGY HERE:
    ;; 1. fill required tests with test-patts
    ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt
    ;; 3. repeat until all deps propagated
298
299
300
301
302
303
304
305




306

307
308
309
310
311




312
313
314
315
316
317
318
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  ;;
	  ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
	  
	  ;; Now convert anything in allow-auto-rerun to NOT_STARTED
	  ;;
	  (for-each (lambda (state)




		      (rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state))

		    (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)





    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================







|
>
>
>
>
|
>
|




>
>
>
>







439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  ;;
	  ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
	  
	  ;; Now convert anything in allow-auto-rerun to NOT_STARTED
	  ;;
	  (for-each
	   (lambda (state-status)
	     (let* ((ss-lst (string-split-fields "/" state-status #:infix))
		    (state  (if (> (length ss-lst) 0)(car  ss-lst) #f))
		    (status (if (> (length ss-lst) 1)(cadr ss-lst) #f)))
	       (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status)))
	   ;; list of state/status pairs separated by spaces
	   (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; run the run prehook if there are no tests yet run for this run:
    ;;
    (runs:run-pre-hook run-id)
    
    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 (th1        (make-thread (lambda ()
					    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))
					    ;; (handle-exceptions
					    ;;  exn
					    ;;  (begin
					    ;;    (print-call-chain (current-error-port))
					    ;;    (debug:print-error 0 *default-log-port* "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))
					    ;;    (if (> run-queue-retries 0)
					    ;; 	   (begin
					    ;; 	     (set! run-queue-retries (- run-queue-retries 1))
					    ;; 	     (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))))
					    ;;  (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()				    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions







<
|
|
|
|
|
<
<
<
|
|







551
552
553
554
555
556
557

558
559
560
561
562



563
564
565
566
567
568
569
570
571
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 (th1        (make-thread (lambda ()

					    (handle-exceptions
						exn
						(begin
						  (print-call-chain)
						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))



					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()				    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
475
476
477
478
479
480
481








482
483

484
485
486
487
488
489
490
(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))









(define runs:nothing-left-in-queue-count 0)


(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*







>
>
>
>
>
>
>
>


>







621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
;;
(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
        reruns))                                          ;; reruns

(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.  
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533

     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  (begin
	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin







<
|
<
<







671
672
673
674
675
676
677

678


679
680
681
682
683
684
685

     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))

          (runs:loop-values tal reg reglen regfull reruns)


	  (begin
	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644

645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)
			reruns)))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))

     ((and (null? fails)
	   (null? prereq-fails)
	   (null? non-completed))
      (if  (runs:can-keep-running? hed 20)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
	    (thread-sleep! 60)
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  reruns))))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  (cons hed reruns)))

	  #f)) ;; #f flags do not loop

     ((and (not (null? fails))(member 'toplevel testmode))
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     ((null? runnables) #f) ;; if we get here and non-completed is null the it's all over.
     (else
      (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
      ;; (list (runs:queue-next-hed tal reg reglen regfull)
      ;;   	(runs:queue-next-tal tal reg reglen regfull)
      ;;   	(runs:queue-next-reg tal reg reglen regfull)
      ;;   	reruns)
      (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond







<
|
<
|


















<
|
<
|
















<
<
<
|
>






|


<
<
<
<







744
745
746
747
748
749
750

751

752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770

771

772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788



789
790
791
792
793
794
795
796
797
798
799




800
801
802
803
804
805
806

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f

                  (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)

                  ))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))

     ((and (null? fails)
	   (null? prereq-fails)
	   (null? non-completed))
      (if  (runs:can-keep-running? hed 20)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
	    (thread-sleep! 60)
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))

            (runs:loop-values tal reg reglen regfull reruns)

            )))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)



            (runs:loop-values tal reg reglen regfull (cons hed reruns))
            )
	  #f)) ;; #f flags do not loop

     ((and (not (null? fails))(member 'toplevel testmode))
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over.
     (else
      (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")




      (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
722
723
724
725
726
727
728
729

730
731
732
733
734
735
736
737
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
					  (runs:calc-not-completed prereqs-not-met)))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus #f))
	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3")))

	 (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met)







|
>
|







864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
					  (runs:calc-not-completed prereqs-not-met)))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus #f))
	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "1.2"))) ;; use a non-number string to disable
         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met)
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
      (if (or (not (null? tal))(not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  #f))
     
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs







<
|
<
<







896
897
898
899
900
901
902

903


904
905
906
907
908
909
910
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
      (if (or (not (null? tal))(not (null? reg)))

	  (runs:loop-values tal reg reglen regfull reruns)


	  #f))
     
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
	  (begin
	    (rmt:register-test run-id test-name "")
	    (if (rmt:get-test-id run-id test-name "")
		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
      (if (and (null? tal)(null? reg))
	  (list hed tal (append reg (list hed)) reruns)
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		;; NB// Here we are building reg as we register tests
		;; if regfull we must pop the front item off reg
		(if regfull
		    (append (cdr reg) (list hed))
		    (append reg (list hed)))
		reruns)))







|







921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
	  (begin
	    (rmt:register-test run-id test-name "")
	    (if (rmt:get-test-id run-id test-name "")
		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
      (if (and (null? tal)(null? reg))
	  (list hed tal (append reg (list hed)) reruns)
	  (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg
		(runs:queue-next-tal tal reg reglen regfull)
		;; NB// Here we are building reg as we register tests
		;; if regfull we must pop the front item off reg
		(if regfull
		    (append (cdr reg) (list hed))
		    (append reg (list hed)))
		reruns)))
819
820
821
822
823
824
825
826

827
828
829
830
831
832
833
834



835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (member 'toplevel testmode) ;;  'toplevel)
		    (null? non-completed))))

      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing
      (if (configf:lookup *configdat* "jobtools" "maxload") ;; only gate if maxload is specified
	  (common:wait-for-cpuload maxload numcpus waitdelay))



      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
      (runs:incremental-print-results run-id)
      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  #f))
     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 *default-log-port* "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue







|
>






|
|
>
>
>






<
|
<
<







959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984

985


986
987
988
989
990
991
992
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (member 'toplevel testmode) ;;  'toplevel)
		    (null? non-completed)
		    (not (member 'exclusive testmode)))))
      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing
      (if maxload ;; only gate if maxload is specified
          (common:wait-for-cpuload maxload numcpus waitdelay))
      (if maxhomehostload
          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
      
      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
      (runs:incremental-print-results run-id)
      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))

	  (runs:loop-values tal reg reglen regfull reruns)


	  #f))
     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 *default-log-port* "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)
			  (runs:queue-next-reg tal reg reglen regfull)
			  reruns ;; WAS: (cons hed reruns) ;; but that makes no sense?
			  ))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 4)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     ((or (not nth-try)
			  (and (number? nth-try)
			       (< nth-try 10)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		      ;; (list hed tal reg reruns)
		      ;; (list (car newtal)(cdr newtal) reg reruns)
		      ;; (hash-table-set! test-registry hed 'removed)
		      (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns))
		     ((symbol? nth-try)
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			    (hash-table-set! test-registry hed 0)
			    (list (runs:queue-next-hed newtal reg reglen regfull)
				  (runs:queue-next-tal newtal reg reglen regfull)
				  (runs:queue-next-reg newtal reg reglen regfull)
				  reruns))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met)))
		(if (null? runable-tests)
		    #f   ;; I think we are truly done here
		    (list (runs:queue-next-hed newtal reg reglen regfull)
			    (runs:queue-next-tal newtal reg reglen regfull)
			    (runs:queue-next-reg newtal reg reglen regfull)
			    reruns)))))))))

;; scan a list of tests looking to see if any are potentially runnable
;;
(define (runs:runable-tests tests)
  (filter (lambda (t)
	    (if (not (vector? t))
		t







<
|
<
<
<






<
|
<
<










<
<
<
<
|
<
<










<
|
<
<













|


<
|
<
<







1014
1015
1016
1017
1018
1019
1020

1021



1022
1023
1024
1025
1026
1027

1028


1029
1030
1031
1032
1033
1034
1035
1036
1037
1038




1039


1040
1041
1042
1043
1044
1045
1046
1047
1048
1049

1050


1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066

1067


1068
1069
1070
1071
1072
1073
1074
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)

		    (runs:loop-values tal reg reglen regfull reruns))



		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 4)

		      (runs:loop-values tal reg reglen regfull reruns))


		     ((or (not nth-try)
			  (and (number? nth-try)
			       (< nth-try 10)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)




		      (runs:loop-values newtal reg reglen regfull reruns))


		     ((symbol? nth-try)
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			    (hash-table-set! test-registry hed 0)

			    (runs:loop-values newtal reg reglen regfull))))


		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
	      ;; can't drop this - maybe running? Just keep trying
	      (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met?
		(if (null? runable-tests)
		    #f   ;; I think we are truly done here

		    (runs:loop-values newtal reg reglen regfull reruns)))))))))



;; scan a list of tests looking to see if any are potentially runnable
;;
(define (runs:runable-tests tests)
  (filter (lambda (t)
	    (if (not (vector? t))
		t
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info              (rmt:get-run-info run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
	(last-time-some-running (current-seconds))
	(tdbdat                (tasks:open-db))
	(runsdat (make-runs:dat
		  ;; hed: hed
		  ;; tal: tal
		  ;; reg: reg
		  ;; reruns: reruns
		  reglen: reglen
		  regfull: #f ;; regfull







|






|
<
<
<
|


|







1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177



1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info             (rmt:get-run-info run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))



        (reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
	(last-time-some-running (current-seconds))
	;; (tdbdat                (tasks:open-db))
	(runsdat (make-runs:dat
		  ;; hed: hed
		  ;; tal: tal
		  ;; reg: reg
		  ;; reruns: reruns
		  reglen: reglen
		  regfull: #f ;; regfull
1149
1150
1151
1152
1153
1154
1155


1156
1157

1158
1159



1160
1161
1162
1163
1164
1165
1166
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)


	;; every couple minutes verify the server is there for this run
	(if (and (common:low-noise-print 60 "try start server"  run-id)

		 (tasks:need-server run-id))
	    (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood



	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))







>
>
|
|
>
|
<
>
>
>







1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280

1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

        ;; -- removed BB 17ww28 - no longer needed.
	;; every 15 minutes verify the server is there for this run
	;; (if (and (common:low-noise-print 240 "try start server"  run-id)
	;; 	 (not (or (and *runremote*
	;; 		       (remote-server-url *runremote*)

	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test







|







1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347

1348
1349
1350
1351
1352
1353
1354
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 )))
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (BB> "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(begin
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds))
		  (rmt:find-and-mark-incomplete run-id #f)))
	    (if (not (eq? num-running prev-num-running))
		(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
	    (thread-sleep! 5)
	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!

    (debug:print-info 1 *default-log-port* "All tests launched")))

(define (runs:calc-fails prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED"))
		 (not (member (db:test-get-status test)







|


















>







1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 )))
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(begin
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds))
		  (rmt:find-and-mark-incomplete run-id #f)))
	    (if (not (eq? num-running prev-num-running))
		(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
	    (thread-sleep! 5)
	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    (runs:run-post-hook run-id)
    (debug:print-info 1 *default-log-port* "All tests launched")))

(define (runs:calc-fails prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED"))
		 (not (member (db:test-get-status test)
1379
1380
1381
1382
1383
1384
1385
1386

1387
1388
1389
1390
1391
1392
1393

(define (runs:calc-runnable prereqs-not-met)
  (filter 
   (lambda (t)
     (or (not (vector? t))
	 (and (equal? "NOT_STARTED" (db:test-get-state t))
	      (member (db:test-get-status t)
			      '("n/a" "KEEP_TRYING")))))

   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))







|
>







1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519

(define (runs:calc-runnable prereqs-not-met)
  (filter 
   (lambda (t)
     (or (not (vector? t))
	 (and (equal? "NOT_STARTED" (db:test-get-state t))
	      (member (db:test-get-status t)
		      '("n/a" "KEEP_TRYING")))
	 (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running
   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
		(begin
		  (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))
      (if (not testdat) ;; should NOT happen
	  (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
      (set! test-id (db:test-get-id testdat))
      (if (file-exists? test-path)
	  (change-directory test-path)
	  (begin
	    (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
	    (change-directory *toppath*)))
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat







|







1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
		(begin
		  (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))
      (if (not testdat) ;; should NOT happen
	  (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
      (set! test-id (db:test-get-id testdat))
      (if (common:file-exists? test-path)
	  (change-directory test-path)
	  (begin
	    (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
	    (change-directory *toppath*)))
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
			(configf:lookup test-conf "skip" "prevrunning"))
		   ;; run-ids = #f means *all* runs
		   (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
		     (if (not (null? running-tests)) ;; have to skip 
			 (set! skip-test "Skipping due to previous tests running"))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "fileexists"))
		   (if (file-exists? (configf:lookup test-conf "skip" "fileexists"))
		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))

		  ((and skip-check
			(configf:lookup test-conf "skip" "rundelay"))
		   ;; run-ids = #f means *all* runs
		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
			  (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
			  (last-run-times  (map db:mintest-get-event_time completed-tests))







|

<







1658
1659
1660
1661
1662
1663
1664
1665
1666

1667
1668
1669
1670
1671
1672
1673
			(configf:lookup test-conf "skip" "prevrunning"))
		   ;; run-ids = #f means *all* runs
		   (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
		     (if (not (null? running-tests)) ;; have to skip 
			 (set! skip-test "Skipping due to previous tests running"))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "fileexists"))
		   (if (common:file-exists? (configf:lookup test-conf "skip" "fileexists"))
		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))

		  ((and skip-check
			(configf:lookup test-conf "skip" "rundelay"))
		   ;; run-ids = #f means *all* runs
		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
			  (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
			  (last-run-times  (map db:mintest-get-event_time completed-tests))
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643










1644
1645
1646
1647
1648
1649
1650
;; fields are passing in through 
;; action:
;;    'remove-runs
;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '()))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 (tdbdat       (tasks:open-db))
	 (keys         (rmt:get-keys))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex)))










    (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
    (if (> 2 (length state-status))
	(begin
	  (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
	  (exit)))
    (for-each
     (lambda (run)







|


|








|
>
>
>
>
>
>
>
>
>
>







1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
;; fields are passing in through 
;; action:
;;    'remove-runs
;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '()))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 ;; (tdbdat       (tasks:open-db))
	 (keys         (rmt:get-keys))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex))
         (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".

    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait))
           (dbfile             (conc  *toppath* "/megatest.db"))
           (readonly-mode      (not (file-write-access? dbfile))))
      (when (and readonly-mode
                 (member action write-access-actions))
        (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
        (exit 1)))
    
    (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
    (if (> 2 (length state-status))
	(begin
	  (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
	  (exit)))
    (for-each
     (lambda (run)
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
		    ;; seek and kill in flight -runtests with % as testpatt here
		    ;; (if (equal? testpatt "%")
		    (tasks:kill-runner target run-name testpatt)
		    
		    ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)







|







1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
		    ;; seek and kill in flight -runtests with % as testpatt here
		    ;; (if (equal? testpatt "%")
		    (tasks:kill-runner target run-name testpatt)
		    
		    ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801

1802
1803
1804
1805
1806
1807
1808


1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
















1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
				      (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
				(if (and run-dir (not toplevel-with-children))
				    (let ((ddir (conc run-dir "/")))
				      (case (string->symbol (args:get-arg "-archive"))
					((save save-remove keep-html)
					 (if (file-exists? ddir)
					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       )))
		       )
		     (if worker-thread (thread-join! worker-thread))))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)

	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))
						"/"))))
		       (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")


		       (rmt:delete-run run-id)
		       (rmt:delete-old-deleted-test-records)
		       ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
		       ;; need to figure out the path to the run dir and remove it if empty
		       ;;    (if (null? (glob (conc runpath "/*")))
		       ;;        (begin
		       ;; 	 (debug:print 1 *default-log-port* "Removing run dir " runpath)
		       ;; 	 (system (conc "rmdir -p " runpath))))
		       )))))
	 ))
     runs)
    ;; (sqlite3:finalize! (db:delay-if-busy tdbdat))
    )
  #t)

(define (runs:remove-test-directory test mode) ;; remove-data-only)
  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (file-exists? run-dir)
			    ;; (resolve-pathname run-dir)
			    (common:nice-path run-dir)
			    #f)))
















    (case mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)
	     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
	(begin ;; let* ((realpath (resolve-pathname run-dir)))
	  (debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
	  (if (file-exists? real-dir)
	      (runs:safe-delete-test-dir real-dir)
	      (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
	(if real-dir 
	    (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
	    (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
    (if (symbolic-link? run-dir)
	(begin







|








>
|






>
>
|
|
|














|


|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|






|


|







1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
				      (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
				(if (and run-dir (not toplevel-with-children))
				    (let ((ddir (conc run-dir "/")))
				      (case (string->symbol (args:get-arg "-archive"))
					((save save-remove keep-html)
					 (if (common:file-exists? ddir)
					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       )))
		       )
		     (if worker-thread (thread-join! worker-thread))))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let* ((run-id   (db:get-value-by-header run header "id")) ;; NB// masks run-id from above?
                      (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))
						"/"))))
		       (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
                       (if (not keep-records)
                           (begin
                             (rmt:delete-run run-id)
                             (rmt:delete-old-deleted-test-records)))
                           ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
		       ;; need to figure out the path to the run dir and remove it if empty
		       ;;    (if (null? (glob (conc runpath "/*")))
		       ;;        (begin
		       ;; 	 (debug:print 1 *default-log-port* "Removing run dir " runpath)
		       ;; 	 (system (conc "rmdir -p " runpath))))
		       )))))
	 ))
     runs)
    ;; (sqlite3:finalize! (db:delay-if-busy tdbdat))
    )
  #t)

(define (runs:remove-test-directory test mode) ;; remove-data-only)
  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (common:file-exists? run-dir)
			    ;; (resolve-pathname run-dir)
			    (common:nice-path run-dir)
			    #f))
         (clean-mode    (or mode 'remove-all))
         (test-id       (db:test-get-id test))
        ;; (lock-key      (conc "test-" test-id))
        ;; (got-lock      (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	 ;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)))))))
	 )
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)
	     (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
	(begin ;; let* ((realpath (resolve-pathname run-dir)))
	  (debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
	  (if (common:file-exists? real-dir)
	      (runs:safe-delete-test-dir real-dir)
	      (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
	(if real-dir 
	    (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
	    (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
    (if (symbolic-link? run-dir)
	(begin
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869


1870
1871
1872
1873
1874
1875
1876
		 (delete-directory run-dir)))
	    (if (and run-dir
		     (not (member run-dir (list "n/a" "/tmp/badname"))))
		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (case mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "NOT_STARTED" "n/a" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))))



;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code







|
|

|
>
>







2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
		 (delete-directory run-dir)))
	    (if (and run-dir
		     (not (member run-dir (list "n/a" "/tmp/badname"))))
		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))
    ;; (rmt:no-sync-del! lock-key)
    ))

;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code
1886
1887
1888
1889
1890
1891
1892

1893
1894
1895
1896


1897
1898
1899
1900
1901
1902
1903
      (exit 3))
     (else
      (let (;; (db   #f)
	    (keys #f))
	(if (launch:setup)
	    (begin
	      (full-runconfigs-read) ;; cache the run config

	      (launch:cache-config)) ;; do not cache here - need to be sure runconfigs is processed
	    (begin 
	      (debug:print 0 *default-log-port* "Failed to setup, exiting")
	      (exit 1)))


	(set! keys (keys:config-get-fields *configdat*))
	;; have enough to process -target or -reqtarg here
	(if (args:get-arg "-reqtarg")
	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)







>
|



>
>







2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
      (exit 3))
     (else
      (let (;; (db   #f)
	    (keys #f))
	(if (launch:setup)
	    (begin
	      (full-runconfigs-read) ;; cache the run config
	      ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
	      ) ;; do not cache here - need to be sure runconfigs is processed
	    (begin 
	      (debug:print 0 *default-log-port* "Failed to setup, exiting")
	      (exit 1)))

        
	(set! keys (keys:config-get-fields *configdat*))
	;; have enough to process -target or -reqtarg here
	(if (args:get-arg "-reqtarg")
	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
;;
(define (runs:get-tests-matching-tags tagpatt)
  (let* ((tagdata (rmt:get-tests-tags))
         (res     '())) ;; list of tests that match one or more tags
    (for-each
     (lambda (tag)
       (if (patt-list-match tag tagpatt)
           (set! res (append (hash-table-ref tagdata tag)))))
     (hash-table-keys tagdata))
    res))
    

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))
	 (if test-conf (runs:update-test_meta test-name test-conf))))
     (hash-table-keys test-names))))

;; This could probably be refactored into one complex query ...
;; NOT PORTED - DO NOT USE YET
;;
(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
  (let* ((db              #f)
	 ;; register run operates on the main db
	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user))
	 (prev-tests      (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
	 (curr-tests-hash (make-hash-table)))
    (rmt:update-run-event_time new-run-id)
    ;; index the already saved tests by testname and itemdat in curr-tests-hash
    (for-each
     (lambda (testdat)







|




















|







2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
;;
(define (runs:get-tests-matching-tags tagpatt)
  (let* ((tagdata (rmt:get-tests-tags))
         (res     '())) ;; list of tests that match one or more tags
    (for-each
     (lambda (tag)
       (if (patt-list-match tag tagpatt)
           (set! res (append (hash-table-ref tagdata tag) res))))
     (hash-table-keys tagdata))
    res))
    

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))
	 (if test-conf (runs:update-test_meta test-name test-conf))))
     (hash-table-keys test-names))))

;; This could probably be refactored into one complex query ...
;; NOT PORTED - DO NOT USE YET
;;
(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
  (let* ((db              #f)
	 ;; register run operates on the main db
	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))
	 (prev-tests      (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
	 (curr-tests-hash (make-hash-table)))
    (rmt:update-run-event_time new-run-id)
    ;; index the already saved tests by testname and itemdat in curr-tests-hash
    (for-each
     (lambda (testdat)
2039
2040
2041
2042
2043
2044
2045























	     (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) "
		   "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;")
	     (db:test-get-id testdat))))
	 ))
     prev-tests)))
	 
     






























>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
	     (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) "
		   "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;")
	     (db:test-get-id testdat))))
	 ))
     prev-tests)))
	 
     
;; clean cache files
(define (runs:clean-cache target runname toppath)
  (if target
      (if runname
	  (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
		 (runtop   (conc linktree "/" target "/" runname))
		 (files    (if (common:file-exists? runtop)
			       (append (glob (conc runtop "/.megatest*"))
				       (glob (conc runtop "/.runconfig*")))
			       '())))
	    (if (null? files)
		(debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")
		(begin
		  (debug:print-info 0 *default-log-port* "Removing cached files:\n    " (string-intersperse files "\n    "))
		  (for-each 
		   (lambda (f)
		     (handle-exceptions
			 exn
			 (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f)
		       (delete-file f)))
		   files))))
	  (debug:print-error 0 *default-log-port* "-clean-cache requires -runname."))
      (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg")))