Megatest

Diff
Login

Differences From Artifact [b746e7df77]:

To Artifact [9cd1d4c354]:


17
18
19
20
21
22
23

24
25
26
27
28
29
30
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))


(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")








>







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
	  (toppath     *toppath*)
	  (envdat      keyvals) ;; initial values start with keyvals
	  (runconfig   #f)
	  (serverdat   (if (args:get-arg "-server")
			   *runremote*
			   #f)) ;; to be used later
	  (transport   (or (args:get-arg "-transport") 'http))
	  (db          (if (and mconfig
				(or (args:get-arg "-server")
				    (eq? transport 'fs)))
			   (open-db)
			   #f))
	  (run-id      #f))
    ;; Set all the environment vars we know so far, start with keys
    (for-each (lambda (keyval)
		(setenv (car keyval)(cadr keyval)))
	      keyvals)
    ;; Set up various and sundry known vars here
    (setenv "MT_RUN_AREA_HOME" toppath)







<
<
<
<
<







55
56
57
58
59
60
61





62
63
64
65
66
67
68
	  (toppath     *toppath*)
	  (envdat      keyvals) ;; initial values start with keyvals
	  (runconfig   #f)
	  (serverdat   (if (args:get-arg "-server")
			   *runremote*
			   #f)) ;; to be used later
	  (transport   (or (args:get-arg "-transport") 'http))





	  (run-id      #f))
    ;; Set all the environment vars we know so far, start with keys
    (for-each (lambda (keyval)
		(setenv (car keyval)(cadr keyval)))
	      keyvals)
    ;; Set up various and sundry known vars here
    (setenv "MT_RUN_AREA_HOME" toppath)
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127


128

129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
	  (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf)
	  (if db (sqlite3:finalize! db))
	  (exit 1)))
    ;; Now have runconfigs data loaded, set environment vars
    (for-each (lambda (section)
		(for-each (lambda (varval)
			    (set! envdat (append envdat (list varval)))
			    (setenv (car varval)(cadr varval)))
			  (configf:get-section runconfig section)))
	      (list "default" target))
    (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id)))

(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
  (let* ((target    (or (common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys      (if inkeys    inkeys    (cdb:remote-run db:get-keys #f)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
	 (link-tree (configf:lookup *configdat* "setup" "linktree")))
    ;; get the info from the db and put it in the cache
    (if link-tree
	(setenv "MT_LINKTREE" link-tree)
	(debug:print 0 "ERROR: linktree not set, should be set in megatest.config in [setup] section."))
    (if (not vals)
	(let ((ht (make-hash-table)))
	  (hash-table-set! *env-vars-by-run-id* run-id ht)
	  (set! vals ht)
	  (for-each
	   (lambda (key)
	     (hash-table-set! vals (car key) (cadr key))) ;; (cdb:remote-run db:get-run-key-val #f run-id (car key))))
	   keyvals)))
    ;; from the cached data set the vars
    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 "setenv " key " " val)
       (if (and (string? key)
		(string? val))
	   (setenv key val)
	   (debug:print 0 "ERROR: Malformed environment variable definition: var=" var ", val=" val))))
    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
    (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment


    (setenv "MT_RUNNAME" (if inrunname inrunname (cdb:remote-run db:get-run-name-from-id #f run-id)))

    (setenv "MT_RUN_AREA_HOME" *toppath*)))

(define (set-item-env-vars itemdat)
  (for-each (lambda (item)
	      (debug:print 2 "setenv " (car item) " " (cadr item))
	      (setenv (car item) (cadr item)))
	    itemdat))

;; Every time can-run-more-tests is called increment the delay
;;
;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine
;;
(define *last-num-running-tests* 0)
(define *runs:can-run-more-tests-count* 0)
(define (runs:shrink-can-run-more-tests-count) ;; the db is a dummy var so we can use cdb:remote-run
  (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2)))

;; Temporary globals. Move these into the logic or into common
;;
(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run
(define (runs:inc-cant-run-tests testname)
  (hash-table-set! *seen-cant-run-tests* testname
		   (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1)))

(define (runs:can-keep-running? testname n)
  (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n))

(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran

(define (runs:lownoise key waitval)
  (let ((lasttime (hash-table-ref/default *runs:denoise* key 0))
	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests jobgroup max-concurrent-jobs)
  (thread-sleep! (cond
		  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
		  (else 0)))
  (let* ((num-running             (cdb:remote-run db:get-count-tests-running #f))
	 (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup))
	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))







|







|













|






<
<
|
<



>
>
|
>














|








>














|



|
|







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116


117

118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
	  (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf)
	  (if db (sqlite3:finalize! db))
	  (exit 1)))
    ;; Now have runconfigs data loaded, set environment vars
    (for-each (lambda (section)
		(for-each (lambda (varval)
			    (set! envdat (append envdat (list varval)))
			    (safe-setenv (car varval)(cadr varval)))
			  (configf:get-section runconfig section)))
	      (list "default" target))
    (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id)))

(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
  (let* ((target    (or (common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys    (if inkeys    inkeys    (rmt:get-keys)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
	 (link-tree (configf:lookup *configdat* "setup" "linktree")))
    ;; get the info from the db and put it in the cache
    (if link-tree
	(setenv "MT_LINKTREE" link-tree)
	(debug:print 0 "ERROR: linktree not set, should be set in megatest.config in [setup] section."))
    (if (not vals)
	(let ((ht (make-hash-table)))
	  (hash-table-set! *env-vars-by-run-id* run-id ht)
	  (set! vals ht)
	  (for-each
	   (lambda (key)
	     (hash-table-set! vals (car key) (cadr key)))
	   keyvals)))
    ;; from the cached data set the vars
    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 "setenv " key " " val)


       (safe-setenv key val)))

    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
    (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
      (if runname
	  (setenv "MT_RUNNAME" runname)
	  (debug:print 0 "ERROR: no value for runname for id " run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)))

(define (set-item-env-vars itemdat)
  (for-each (lambda (item)
	      (debug:print 2 "setenv " (car item) " " (cadr item))
	      (setenv (car item) (cadr item)))
	    itemdat))

;; Every time can-run-more-tests is called increment the delay
;;
;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine
;;
(define *last-num-running-tests* 0)
(define *runs:can-run-more-tests-count* 0)
(define (runs:shrink-can-run-more-tests-count)
  (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2)))

;; Temporary globals. Move these into the logic or into common
;;
(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run
(define (runs:inc-cant-run-tests testname)
  (hash-table-set! *seen-cant-run-tests* testname
		   (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1)))

(define (runs:can-keep-running? testname n)
  (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n))

(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran

(define (runs:lownoise key waitval)
  (let ((lasttime (hash-table-ref/default *runs:denoise* key 0))
	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)
  (thread-sleep! (cond
		  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
		  (else 0)))
  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))

;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
(define (runs:run-tests target runname test-patts user flags) ;; test-names
  (common:clear-caches) ;; clear all caches
  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (cdb:remote-run db:register-run #f keyvals runname "new" "n/a" user))  ;;  test-name)))
	 (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
	 (test-records       (make-hash-table))
	 (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     (hash-table-keys all-tests-registry))
	 (test-names         (tests:filter-test-names all-test-names test-patts))
	 (required-tests     (lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work

    ;; Update the synchronous setting in the db based on the default or what is set by the user
    ;; This is done once here on a call to run tests rather than on every call to open-db
    (cdb:remote-run db:set-sync #f)

    (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars")
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
    (debug:print-info 0 "test names " test-names)

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  (cdb:delete-tests-in-state *runremote* run-id "NOT_STARTED")
	  (cdb:remote-run db:set-tests-state-status #f run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data







<


|








<
<
<
<



















|
|







198
199
200
201
202
203
204

205
206
207
208
209
210
211
212
213
214
215




216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))

;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
(define (runs:run-tests target runname test-patts user flags) ;; test-names

  (let* ((keys               (keys:config-get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user))  ;;  test-name)))
	 (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
	 (test-records       (make-hash-table))
	 (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     (hash-table-keys all-tests-registry))
	 (test-names         (tests:filter-test-names all-test-names test-patts))
	 (required-tests     (lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work





    (set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars")
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
    (debug:print-info 0 "test names " test-names)

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
	  (rmt:set-tests-state-status run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
385
386
387
388
389
390
391

392
393
394
395
396
397
398
399
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)

(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap)
  (let* ((loop-list       (list hed tal reg reruns))

	 (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 







>
|







377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)

(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap))
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id hed "")))
		(mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)







|







467
468
469
470
471
472
473
474
475
476
477
478
479
480
481

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(mt:test-set-state-status-by-id test-id "DEQUEUED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
594
595
596
597
598
599
600
601
602
603
604
605
606

607
608
609
610
611
612
613
614
	      ((string? t)
	       t)
	      (else 
	       (conc t))))
	   inlst)))

(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap)
  (let* ((run-limits-info         (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources          (car run-limits-info))
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))

	 (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                   (runs:calc-fails prereqs-not-met))
	 (non-completed           (runs:calc-not-completed prereqs-not-met))
	 (loop-list               (list hed tal reg reruns)))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)







|





>
|







587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
	      ((string? t)
	       t)
	      (else 
	       (conc t))))
	   inlst)))

(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmap)
  (let* ((run-limits-info         (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources          (car run-limits-info))
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))
	 (prereqs-not-met         (rmt:get-prereqs-not-met run-id waitons item-path testmode itemmap: itemmap))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                   (runs:calc-fails prereqs-not-met))
	 (non-completed           (runs:calc-not-completed prereqs-not-met))
	 (loop-list               (list hed tal reg reruns)))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
     
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f))
      (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
      (if (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
	  (begin
	    (cdb:tests-register-test *runremote* run-id test-name item-path)
	    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done))
	  (let ((th (make-thread (lambda ()
				   (mutex-lock! registry-mutex)
				   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
				   (mutex-unlock! registry-mutex)
				   ;; If haven't done it before register a top level test if this is an itemized test
				   (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done))
				       (cdb:tests-register-test *runremote* run-id test-name ""))
				   (cdb:tests-register-test *runremote* run-id test-name item-path)
				   (mutex-lock! registry-mutex)
				   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)
				   (mutex-unlock! registry-mutex))
				 (conc test-name "/" item-path))))
	    (thread-start! th)))
      (runs:shrink-can-run-more-tests-count)   ;; DELAY TWEAKER (still needed?)
      (if (and (null? tal)(null? reg))







|







|
|







634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
     
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f))
      (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" )
      (if (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
	  (begin
	    (rmt:general-call 'register-test run-id run-id test-name item-path)
	    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done))
	  (let ((th (make-thread (lambda ()
				   (mutex-lock! registry-mutex)
				   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start)
				   (mutex-unlock! registry-mutex)
				   ;; If haven't done it before register a top level test if this is an itemized test
				   (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done))
				       (rmt:general-call 'register-test run-id run-id test-name ""))
				   (rmt:general-call 'register-test run-id run-id test-name item-path)
				   (mutex-lock! registry-mutex)
				   (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done)
				   (mutex-unlock! registry-mutex))
				 (conc test-name "/" item-path))))
	    (thread-start! th)))
      (runs:shrink-can-run-more-tests-count)   ;; DELAY TWEAKER (still needed?)
      (if (and (null? tal)(null? reg))
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (cdb:remote-run db:find-and-mark-incomplete #f)

  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))







|







772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (cdb:remote-run db:find-and-mark-incomplete #f)

  (let ((run-info              (rmt:get-run-info run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (cdb:remote-run db:get-count-tests-running-for-run-id #f run-id)))

      (if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running 60))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f))
	    (begin
	      (cdb:tests-register-test *runremote* run-id test-name "")
	      (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done)))
	
	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
	;;
	(if (member (hash-table-ref/default test-registry tfullname #f) 
		    '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
	    (begin







|












|







827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id)))

      (if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running 60))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f))
	    (begin
	      (rmt:general-call 'register-test run-id run-id test-name "")
	      (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done)))
	
	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
	;;
	(if (member (hash-table-ref/default test-registry tfullname #f) 
		    '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
	    (begin
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))







|







956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmap)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
	   (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (cdb:remote-run db:get-test-id-cached #f  run-id test-name item-path))
	   (testdat       (if test-id (cdb:get-test-info-by-id *runremote* test-id) #f)))
      (if (not testdat)
	  (let loop ()
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    ;;
	    ;; (open-run-close tests:register-test db run-id test-name item-path)
	    ;;
	    ;; NB// for the above line. I want the test to be registered long before this routine gets called!
	    ;;
	    (if (not test-id)(set! test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
	    (if (not test-id)
		(begin
		  (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		  (cdb:tests-register-test *runremote* run-id test-name item-path)
		  (set! test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path))))
	    (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	    (set! testdat (cdb:get-test-info-by-id *runremote* test-id))
	    (if (not testdat)
		(begin
		  (debug:print-info 0 "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))
      (if (not testdat) ;; should NOT happen
	  (debug:print 0 "ERROR: failed to get test record for test-id " test-id))







|
|










|



|
|

|







1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
	   (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (rmt:get-test-id run-id test-name item-path))
	   (testdat       (if test-id (rmt:get-test-info-by-id run-id test-id) #f)))
      (if (not testdat)
	  (let loop ()
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    ;;
	    ;; (open-run-close tests:register-test db run-id test-name item-path)
	    ;;
	    ;; NB// for the above line. I want the test to be registered long before this routine gets called!
	    ;;
	    (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path)))
	    (if (not test-id)
		(begin
		  (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		  (rmt:general-call 'register-test run-id run-id test-name item-path)
		  (set! test-id (rmt:get-test-id run-id test-name item-path))))
	    (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	    (set! testdat (rmt:get-test-info-by-id run-id test-id))
	    (if (not testdat)
		(begin
		  (debug:print-info 0 "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))
      (if (not testdat) ;; should NOT happen
	  (debug:print 0 "ERROR: failed to get test record for test-id " test-id))
1157
1158
1159
1160
1161
1162
1163

1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
	       (let ((skip-test   #f)
		     (skip-check  (configf:get-section test-conf "skip")))
		 (cond 
		  ;; Have to check for skip conditions. This one skips if there are same-named tests
		  ;; currently running
		  ((and skip-check
			(configf:lookup test-conf "skip" "prevrunning"))

		   (let ((running-tests (cdb:remote-run db:get-tests-for-runs-mindata #f #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
		     (if (not (null? running-tests)) ;; have to skip 
			 (set! skip-test "Skipping due to previous tests running"))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "fileexists"))
		   (if (file-exists? (configf:lookup test-conf "skip" "fileexists"))
		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))))
		 (if skip-test
		     (begin
		       (mt:test-set-state-status-by-id test-id "COMPLETED" "SKIP" skip-test)
		       (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test))
		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			 (begin
			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			   (set! *globalexitstatus* 1) ;; 
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else      
	 (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 
	   ((COMPLETED INCOMPLETE)
	    (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN))
	   (else







>
|








|















|







1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
	       (let ((skip-test   #f)
		     (skip-check  (configf:get-section test-conf "skip")))
		 (cond 
		  ;; Have to check for skip conditions. This one skips if there are same-named tests
		  ;; currently running
		  ((and skip-check
			(configf:lookup test-conf "skip" "prevrunning"))
		   ;; run-ids = #f means *all* runs
		   (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
		     (if (not (null? running-tests)) ;; have to skip 
			 (set! skip-test "Skipping due to previous tests running"))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "fileexists"))
		   (if (file-exists? (configf:lookup test-conf "skip" "fileexists"))
		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists"))))))
		 (if skip-test
		     (begin
		       (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
		       (debug:print-info 1 "SKIPPING Test " full-test-name " due to " skip-test))
		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			 (begin
			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			   (set! *globalexitstatus* 1) ;; 
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else      
	 (debug:print 0 "ERROR: Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 
	   ((COMPLETED INCOMPLETE)
	    (hash-table-set! test-registry (runs:make-full-test-name test-name test-path) 'DONOTRUN))
	   (else
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 (keys         (cdb:remote-run db:get-keys db))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))))
    (debug:print-info 4 "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)







|







1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 (keys         (rmt:get-keys))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))))
    (debug:print-info 4 "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
1282
1283
1284
1285
1286
1287
1288
1289


1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306


1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   (else
		    (debug:print-info 0 "action not recognised " action)))
		 (let ((sorted-tests     (sort tests (lambda (a b)(let ((dira (db:test-get-rundir a))


									(dirb (db:test-get-rundir b)))
								    (if (and (string? dira)(string? dirb))
									(> (string-length dira)(string-length dirb))
									#f)))))
		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (cdb:get-test-info-by-id *runremote* test-id)))
		       (if (not new-test-dat)
			   (begin
			     (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!")
			     (if (not (null? tal))
				 (loop (car tal)(cdr tal))))
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))


				  (run-dir       (db:test-get-rundir new-test-dat))    ;; run dir is from the link tree
				  (real-dir      (if (file-exists? run-dir)
						     (resolve-pathname run-dir)
						     #f))
				  (test-state    (db:test-get-state new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat)))
			     (case action
			       ((remove-runs)
				(debug:print-info 0 "test: " test-name " itest-state: " test-state)
				(if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
				    (begin
				      (if (not (hash-table-ref/default test-retry-time test-fulln #f))
					  (begin
					    ;; want to set to REMOVING BUT CANNOT do it here?
					    (hash-table-set! test-retry-time test-fulln (current-seconds))))
				      (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
					  ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
					  ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
					  ;; up and blow it away.
					  (begin
					    (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
					    (mt:test-set-state-status-by-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
					    (thread-sleep! 1))
					  (begin
					    (mt:test-set-state-status-by-id (db:test-get-id test) "KILLREQ" "n/a" #f)
					    (thread-sleep! 1)))
				      ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
				      (if (null? tal)
					  (loop new-test-dat tal)
					  (loop (car tal)(append tal (list new-test-dat)))))
				    (begin
				      (mt:test-set-state-status-by-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
				      (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
				      (if (and real-dir 
					       (> (string-length real-dir) 5)
					       (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
					  (begin ;; let* ((realpath (resolve-pathname run-dir)))
					    (debug:print-info 1 "Recursively removing " real-dir)
					    (if (file-exists? real-dir)







|
>
>
|








|







>
>
|




















|


|






|







1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   (else
		    (debug:print-info 0 "action not recognised " action)))
		 (let ((sorted-tests     (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr 
									 (db:test-get-rundir a)) ;; )  ;; (filedb:get-path *fdb* (db:test-get-rundir a)))
									(dirb ;; (rmt:sdb-qry 'getstr 
									 (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								    (if (and (string? dira)(string? dirb))
									(> (string-length dira)(string-length dirb))
									#f)))))
		       (test-retry-time  (make-hash-table))
		       (allow-run-time   10)) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (rmt:get-test-info-by-id run-id test-id)))
		       (if (not new-test-dat)
			   (begin
			     (debug:print 0 "ERROR: We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!")
			     (if (not (null? tal))
				 (loop (car tal)(cdr tal))))
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       ;;(filedb:get-path *fdb*
				   ;; (rmt:sdb-qry 'getid 
				   (db:test-get-rundir new-test-dat)) ;; )    ;; run dir is from the link tree
				  (real-dir      (if (file-exists? run-dir)
						     (resolve-pathname run-dir)
						     #f))
				  (test-state    (db:test-get-state new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat)))
			     (case action
			       ((remove-runs)
				(debug:print-info 0 "test: " test-name " itest-state: " test-state)
				(if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
				    (begin
				      (if (not (hash-table-ref/default test-retry-time test-fulln #f))
					  (begin
					    ;; want to set to REMOVING BUT CANNOT do it here?
					    (hash-table-set! test-retry-time test-fulln (current-seconds))))
				      (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
					  ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
					  ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
					  ;; up and blow it away.
					  (begin
					    (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
					    (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
					    (thread-sleep! 1))
					  (begin
					    (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
					    (thread-sleep! 1)))
				      ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
				      (if (null? tal)
					  (loop new-test-dat tal)
					  (loop (car tal)(append tal (list new-test-dat)))))
				    (begin
				      (mt:test-set-state-status-by-id run-id (db:test-get-id test) "REMOVING" "LOCKED" #f)
				      (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
				      (if (and real-dir 
					       (> (string-length real-dir) 5)
					       (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
					  (begin ;; let* ((realpath (resolve-pathname run-dir)))
					    (debug:print-info 1 "Recursively removing " real-dir)
					    (if (file-exists? real-dir)
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
						   (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
						   (delete-directory run-dir)))
					      (if run-dir
						  (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
						  (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
					      ))
				      ;; Only delete the records *after* removing the directory. If things fail we have a record 
				      (cdb:remote-run db:delete-test-records db #f (db:test-get-id test))
				      (if (not (null? tal))
					  (loop (car tal)(cdr tal))))))
			       ((set-state-status)
				(debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests))))))))
		       )))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))
						"/"))))
		       (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
		       (cdb:remote-run db:delete-run db run-id)
		       ;; This is a pretty good place to purge old DELETED tests
		       (cdb:remote-run db:delete-tests-for-run db run-id)
		       (cdb:remote-run db:delete-old-deleted-test-records db)
		       (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds))
		       ;; need to figure out the path to the run dir and remove it if empty
		       ;;    (if (null? (glob (conc runpath "/*")))
		       ;;        (begin
		       ;; 	 (debug:print 1 "Removing run dir " runpath)
		       ;; 	 (system (conc "rmdir -p " runpath))))
		       )))))
	 ))







|




|



















|
<
<
|
|







1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394


1395
1396
1397
1398
1399
1400
1401
1402
1403
						   (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
						   (delete-directory run-dir)))
					      (if run-dir
						  (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
						  (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
					      ))
				      ;; Only delete the records *after* removing the directory. If things fail we have a record 
				      (rmt:delete-test-records (db:test-get-run_id test)(db:test-get-id test))
				      (if (not (null? tal))
					  (loop (car tal)(cdr tal))))))
			       ((set-state-status)
				(debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests))))))))
		       )))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))
						"/"))))
		       (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
		       (rmt:delete-run run-id)


		       (rmt:delete-old-deleted-test-records)
		       ;; (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds))
		       ;; need to figure out the path to the run dir and remove it if empty
		       ;;    (if (null? (glob (conc runpath "/*")))
		       ;;        (begin
		       ;; 	 (debug:print 1 "Removing run dir " runpath)
		       ;; 	 (system (conc "rmdir -p " runpath))))
		       )))))
	 ))
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512


1513
1514
1515

1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
    (for-each (lambda (run)
		(let ((run-id (db:get-value-by-header run header "id")))
		  (if (or lock
			  (and unlock
			       (begin
				 (print "Do you really wish to unlock run " run-id "?\n   y/n: ")
				 (equal? "y" (read-line)))))
		      (cdb:remote-run db:lock/unlock-run db run-id lock unlock user)
		      (debug:print-info 0 "Skipping lock/unlock on " run-id))))
	      runs)))
;;======================================================================
;; Rollup runs
;;======================================================================

;; Update the test_meta table for this test
(define (runs:update-test_meta test-name test-conf)
  (let ((currrecord (cdb:remote-run db:testmeta-get-record #f test-name)))
    (if (not currrecord)
	(begin
	  (set! currrecord (make-vector 11 #f))
	  (cdb:remote-run db:testmeta-add-record #f test-name)))
    (for-each 
     (lambda (key)
       (let* ((idx (cadr key))
	      (fld (car  key))
	      (val (config-lookup test-conf "test_meta" fld)))
	 ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val)
	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
	     (begin
	       (print "Updating " test-name " " fld " to " val)
	       (cdb:remote-run db:testmeta-update-field #f test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))
	 ;; use the cdb:remote-run instead of passing in db
	 (if test-conf (runs:update-test_meta test-name test-conf))))
     (hash-table-keys test-names))))

;; This could probably be refactored into one complex query ...


(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
  (let* ((db              #f)

	 (new-run-id      (cdb:remote-run db:register-run #f keyvals runname "new" "n/a" user))
	 (prev-tests      (cdb:remote-run test:get-matching-previous-test-run-records db new-run-id "%" "%"))
	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
	 (curr-tests-hash (make-hash-table)))
    (cdb:remote-run db:update-run-event_time db new-run-id)
    ;; index the already saved tests by testname and itemdat in curr-tests-hash
    (for-each
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path)))
	 (hash-table-set! curr-tests-hash full-name testdat)))
     curr-tests)
    ;; NOPE: Non-optimal approach. Try this instead.
    ;;   1. tests are received in a list, most recent first
    ;;   2. replace the rollup test with the new *always*
    (for-each 
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path))
	      (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f))
	      (test-steps    (cdb:remote-run db:get-steps-for-test db (db:test-get-id testdat)))
	      (new-test-record #f))
	 ;; replace these with insert ... select
	 (apply sqlite3:execute 
		db 
		(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) "
		      "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);")
		new-run-id (cddr (vector->list testdat)))







|








|



|









|








<




>
>



>
|
|


|

















|







1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504

1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
    (for-each (lambda (run)
		(let ((run-id (db:get-value-by-header run header "id")))
		  (if (or lock
			  (and unlock
			       (begin
				 (print "Do you really wish to unlock run " run-id "?\n   y/n: ")
				 (equal? "y" (read-line)))))
		      (rmt:lock/unlock-run run-id lock unlock user)
		      (debug:print-info 0 "Skipping lock/unlock on " run-id))))
	      runs)))
;;======================================================================
;; Rollup runs
;;======================================================================

;; Update the test_meta table for this test
(define (runs:update-test_meta test-name test-conf)
  (let ((currrecord (rmt:testmeta-get-record test-name)))
    (if (not currrecord)
	(begin
	  (set! currrecord (make-vector 11 #f))
	  (rmt:testmeta-add-record test-name)))
    (for-each 
     (lambda (key)
       (let* ((idx (cadr key))
	      (fld (car  key))
	      (val (config-lookup test-conf "test_meta" fld)))
	 ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val)
	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
	     (begin
	       (print "Updating " test-name " " fld " to " val)
	       (rmt:testmeta-update-field test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))

	 (if test-conf (runs:update-test_meta test-name test-conf))))
     (hash-table-keys test-names))))

;; This could probably be refactored into one complex query ...
;; NOT PORTED - DO NOT USE YET
;;
(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
  (let* ((db              #f)
	 ;; register run operates on the main db
	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user))
	 (prev-tests      (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
	 (curr-tests-hash (make-hash-table)))
    (rmt:update-run-event_time new-run-id)
    ;; index the already saved tests by testname and itemdat in curr-tests-hash
    (for-each
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path)))
	 (hash-table-set! curr-tests-hash full-name testdat)))
     curr-tests)
    ;; NOPE: Non-optimal approach. Try this instead.
    ;;   1. tests are received in a list, most recent first
    ;;   2. replace the rollup test with the new *always*
    (for-each 
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path))
	      (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f))
	      (test-steps    (rmt:get-steps-for-test (db:test-get-id testdat)))
	      (new-test-record #f))
	 ;; replace these with insert ... select
	 (apply sqlite3:execute 
		db 
		(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) "
		      "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);")
		new-run-id (cddr (vector->list testdat)))