Megatest

Changes On Branch f7fdbdc3056f7aa6
Login

Changes In Branch v1.64-keep-running-fix Through [f7fdbdc305] Excluding Merge-Ins

This is equivalent to a diff from 32584d6c1d to f7fdbdc305

2017-09-29
17:56
got further. noticed race condition when not stepping one at a time by setting launcher. noticed xor does not handle preq-fail. check-in: 89fedf98b8 user: bjbarcla tags: v1.64-keep-running-fix
2017-09-28
17:58
updated - itemwait continues forward now, but runs do not stop. it is progress.

problems 1- toplevel goes to completed when not all items have started but so-far started items are completed. 2- not-started/preq-fail propagates not-started/na (which propagates not-started/preq-fail) check-in: f7fdbdc305 user: bjbarcla tags: v1.64-keep-running-fix

2017-09-27
16:00
cherrypicked from b95f7 check-in: 19d039fc17 user: pjhatwal tags: v1.64
2017-09-26
12:46
fixed display of PREQ_FAIL in runs summary tab; updated color of PREQ_FAIL to salmon (light red) instead of white check-in: 42d3bf31ad user: bjbarcla tags: v1.64-keep-running-fix
2017-09-14
17:10
fixed issue where item gets stuck in keep_waiting status when prerequisite item failed check-in: 23745b4302 user: bjbarcla tags: v1.6431, v1.64-keep-running-fix
2017-08-31
11:08
Merged in v1.64, for reference only. Do not merge to prod. Closed-Leaf check-in: 1b86fa4903 user: mrwellan tags: v1.63-configdbsync
2017-08-29
11:51
Merged v1.64 changes into v1.65 check-in: 2120db9cff user: mrwellan tags: v1.65
10:50
Bringing in latest changes from v1.64 Closed-Leaf check-in: b1eee0709a user: mrwellan tags: v1.64-server-connection-tagging
2017-08-28
11:47
Merged v1.64 into areas-dashboard branch. check-in: 1f5e744ec1 user: matt tags: v1.64-areas-dashboard
11:42
Cleaned up couple more named loop calls in runs.scm. Added post-run-hook. check-in: 32584d6c1d user: matt tags: v1.64
2017-08-25
17:59
Minor refactor of some runs.scm code? check-in: cc163f91ad user: mrwellan tags: v1.64

Modified codescanlib.scm from [85429a3289] to [bf0bf2c4eb].

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
                     (if (atom? defname) ;; filter out things we dont understand (procname is a list, what??)
                         (list defname filename args body)
                         #f)]
                    [else #f] ) scm-tree))))
    procs))


;; given a sexp, return a flat lost of atoms in that sexp
(define (get-atoms-in-body body)
  (cond
   ((null? body) '())
   ((atom? body) (list body))
   (else
    (apply append (map get-atoms-in-body body)))))








|







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
                     (if (atom? defname) ;; filter out things we dont understand (procname is a list, what??)
                         (list defname filename args body)
                         #f)]
                    [else #f] ) scm-tree))))
    procs))


;; given a sexp, return a flat list of atoms in that sexp
(define (get-atoms-in-body body)
  (cond
   ((null? body) '())
   ((atom? body) (list body))
   (else
    (apply append (map get-atoms-in-body body)))))

Modified common.scm from [071499421b] to [93c8a7d160].

529
530
531
532
533
534
535

536
537
538
539
540
541
542
543
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (9 "FAIL")

    (10 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))








>
|







529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (9 "FAIL")
    (10 "PREQ_FAIL")
    (11 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

Modified common_records.scm from [0a5321af09] to [9609771573].

145
146
147
148
149
150
151


152
153
154
155
156
157
158
159
160
161
162
163
     (lambda (frame)
       (let* ((this-loc (vector-ref frame 0))
              (temp     (string-split (->string this-loc) " "))
              (this-func (if (and (list? temp) (> (length temp) 1)) (cadr temp) "???")))
         (if (equal? this-func "BB>")
             (set! location this-loc))))
     stack)


    (let ((dp-args
           (append
            (list 0 *default-log-port*
                  (conc location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000)"   ")  )
            in-args)))
      (apply debug:print dp-args))))

(define *BBpp_custom_expanders_list* (make-hash-table))



;; register hash tables with BBpp.







>
>
|
|
|
|
|







145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
     (lambda (frame)
       (let* ((this-loc (vector-ref frame 0))
              (temp     (string-split (->string this-loc) " "))
              (this-func (if (and (list? temp) (> (length temp) 1)) (cadr temp) "???")))
         (if (equal? this-func "BB>")
             (set! location this-loc))))
     stack)
    (let* ((color-on "\x1b[1m")
           (color-off "\x1b[0m")
           (dp-args
            (append
             (list 0 *default-log-port*
                   (conc color-on location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000) color-off "   ")  )
             in-args)))
      (apply debug:print dp-args))))

(define *BBpp_custom_expanders_list* (make-hash-table))



;; register hash tables with BBpp.

Modified db.scm from [144a083df6] to [f4290acd99].

1
2
3
4
5
6
7
8
;;======================================================================
;; Copyright 2006-2016, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|







1
2
3
4
5
6
7
8
;======================================================================
;; Copyright 2006-2016, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
;; RA => Returns a db handler; sets the lock if opened in writable mode
;;
;;(define *db-open-mutex* (make-mutex))

(define (db:lock-create-open fname initproc)
  (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
         (raw-fname    (pathname-file fname))
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (common:file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;;(mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(condition-case
         (let* ((lockfname   (conc fname ".lock"))
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (common:file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))







|









|







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
;; RA => Returns a db handler; sets the lock if opened in writable mode
;;
;; (define *db-open-mutex* (make-mutex))

(define (db:lock-create-open fname initproc)
  (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
         (raw-fname    (pathname-file fname))
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (common:file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(condition-case
         (let* ((lockfname   (conc fname ".lock"))
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (common:file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
        
	(condition-case
         (begin
           (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
           (let ((db (sqlite3:open-database fname)))
             ;;(mutex-unlock! *db-open-mutex*)
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
	)))







|







244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
        
	(condition-case
         (begin
           (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
           (let ((db (sqlite3:open-database fname)))
             ;; (mutex-unlock! *db-open-mutex*)
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
	)))
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCED'));

(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   7200))) ;; two hours
    (db:with-db 
     dbstruct #f #f
     (lambda (db)







|







1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCED'));

(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) ;; FIXME suspect test run time & deadtime are not well matched; resulting in COMPLETED/DEAD status of an a-ok running test
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   7200))) ;; two hours
    (db:with-db 
     dbstruct #f #f
     (lambda (db)
1648
1649
1650
1651
1652
1653
1654
1655

1656
1657
1658
1659
1660
1661
1662
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE")
               (for-each
                (lambda (test-id)
                  (db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test failed to complete")) ;; fix for one aspect of Randy's ticket 1405717332

                all-ids))))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 







|
>







1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE")
               (for-each
                (lambda (test-id)
                  (db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test failed to complete")) ;; fix for one aspect of Randy's ticket 1405717332 ;; TODO - fix problem where test goes to COMPLETED/DEAD while in progress, only later to go to COMPLETED/PASS.  ref ticket 220546828

                all-ids))))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 
3490
3491
3492
3493
3494
3495
3496



3497
3498
3499
3500
3501


3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514



3515
3516
3517

3518

3519
3520
3521








3522
3523

3524
3525
3526
3527
3528
3529
3530
                                                    (if (not (equal? state "DELETED"))
                                                        (cons status (map dbr:counts-status state-status-counts))
                                                        (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
			    (non-completes     (filter (lambda (x)
							 (not (equal? x "COMPLETED")))
						       all-curr-states))



			    (num-non-completes (length non-completes))
                            
                            (newstate          (cond
						((> running 0)
						 "RUNNING") ;; anything running, call the situation running


						((> bad-not-started 0)  ;; we have an ugly situation, it is completed in the sense we cannot do more.
						 "COMPLETED") 
						((> num-non-completes 0) ;;
						 (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states)))
                                                ;; only rollup DELETED if all DELETED
						(else
						 (car all-curr-states))))
			                       ;; (if (> running 0)
                                               ;;     "RUNNING"
                                               ;;     (if (> bad-not-started 0)
                                               ;;         "COMPLETED"
                                               ;;         (car all-curr-states))))
                            (newstatus            (if (or (> bad-not-started 0)



							  (and (equal? newstate "NOT_STARTED")
							       (> num-non-completes 0)))
						      "STARTED"

                                                      (car all-curr-statuses))))

                       ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states)
                       ;;      " newstate: " newstate " newstatus: " newstatus)
                       ;; NB// Pass the db so it is part of the transaction








                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f))))))))

         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))
;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path)
  (db:with-db







>
>
>
|
<

<
|
>
>
|
<
<
|
<
<
|





|
>
>
>
|
|
|
>
|
>



>
>
>
>
>
>
>
>

|
>







3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501

3502

3503
3504
3505
3506


3507


3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
                                                    (if (not (equal? state "DELETED"))
                                                        (cons status (map dbr:counts-status state-status-counts))
                                                        (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
			    (non-completes     (filter (lambda (x)
							 (not (equal? x "COMPLETED")))
						       all-curr-states))
			    (preq-fails        (filter (lambda (x)
							 (equal? x "PREQ_FAIL"))
						       all-curr-statuses))
                            (num-non-completes (length non-completes))

                            (newstate          (cond

						((> running 0)           "RUNNING")            ;; anything running, call the situation running
                                                ((> (length preq-fails) 0)
                                                 "NOT_STARTED")
						((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.


						((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED


						(else                    (car all-curr-states))))
			                       ;; (if (> running 0)
                                               ;;     "RUNNING"
                                               ;;     (if (> bad-not-started 0)
                                               ;;         "COMPLETED"
                                               ;;         (car all-curr-states))))
                            (newstatus         (cond
                                                ((> (length preq-fails) 0)
                                                 "PREQ_FAIL")
                                                ((or (> bad-not-started 0)
                                                     (and (equal? newstate "NOT_STARTED")
                                                          (> num-non-completes 0)))
                                                 "STARTED")
                                                (else
                                                 (car all-curr-statuses)))))

                       ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states)
                       ;;      " newstate: " newstate " newstatus: " newstatus)
                       ;; NB// Pass the db so it is part of the transaction
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path"> bad-not-started="bad-not-started" newstate="newstate" newstatus="newstatus" num-non-completes="num-non-completes" non-completes="non-completes "len(sscs)="(length state-status-counts)  " state-status-counts: "
                                    (apply conc
                                           (map (lambda (x)
                                                  (conc
                                                   (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                                                state-status-counts))
                                    
                                    ); end debug:print
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))
;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path)
  (db:with-db
4045
4046
4047
4048
4049
4050
4051


4052
4053


4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142


















4143

4144
4145
4146
4147
4148
4149
4150
4151
;;
;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
;;       mode 'toplevel means that tests must be COMPLETED only
;;       mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]]
;;       mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING
;; 
;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode)


(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f))
  ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items


  (append
   (if (member 'exclusive mode)
       (let ((running-tests (db:get-tests-for-run dbstruct
						  #f  ;; run-id of #f means for all runs. 
						  (if (string=? ref-item-path "")   ;; testpatt
						      ref-test-name
						      (conc ref-test-name "/" ref-item-path))
						  '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states
						  '()          ;; statuses
						  #f           ;; offset
						  #f           ;; limit
						  #f           ;; not-in
						  #f           ;; sort by
						  #f           ;; sort order
						  'shortlist   ;; query type
						  0            ;; last update, beginning of time ....
						  #f           ;; mode
						  )))
	;;(map (lambda (testdat)
	;;	(if (equal? (db:test-get-item-path testdat) "")
	;;	    (db:test-get-testname testdat)
	;;	    (conc (db:test-get-testname testdat)
	;;		  "/"
	;;		  (db:test-get-item-path testdat))))
	 running-tests) ;; calling functions want the entire data
       '())
   (if (or (not waitons)
	   (null? waitons))
       '()
       (let* ((unmet-pre-reqs '())
	      (result         '()))
	 (for-each 
	  (lambda (waitontest-name)
	    ;; by getting the tests with matching name we are looking only at the matching test 
	    ;; and related sub items
	    ;; next should be using mt:get-tests-for-run?
	    (let ((tests             (db:get-tests-for-run-state-status dbstruct run-id waitontest-name))
		  (ever-seen         #f)
		  (parent-waiton-met #f)
		  (item-waiton-met   #f))
	      (for-each 
	       (lambda (test) ;; BB- this is the upstream test
		 ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ...
		 (let* ((state             (db:test-get-state test))
			(status            (db:test-get-status test))
			(item-path         (db:test-get-item-path test)) ;; BB- this is the upstream itempath
			(is-completed      (equal? state "COMPLETED"))
			(is-running        (equal? state "RUNNING"))
			(is-killed         (equal? state "KILLED"))
			(is-ok             (member status '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))
			;;                                       testname-b    path-a    path-b
			(same-itempath     (db:compare-itempaths ref-test-name item-path ref-item-path itemmaps))) ;; (equal? ref-item-path item-path)))
		   (set! ever-seen #t)
		   (cond
		    ;; case 1, non-item (parent test) is 
		    ((and (equal? item-path "") ;; this is the parent test of the waiton being examined
			  is-completed
			  (or is-ok (not (null? (lset-intersection eq? mode '(toplevel)))))) ;;  itemmatch itemwait))))))
		     (set! parent-waiton-met #t))
		    ;; Special case for toplevel and KILLED
		    ((and (equal? item-path "") ;; this is the parent test
			  is-killed
			  (member 'toplevel mode))
		     (set! parent-waiton-met #t))
		    ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met
		    ((and (not (null? (lset-intersection eq? mode '(itemmatch itemwait)))) ;; how is that different from (member mode '(itemmatch itemwait)) ?????
			  ;; (not (equal? item-path "")) ;; this applies to both top level (to allow launching of next batch) and items
			  same-itempath)
		     (if (and is-completed is-ok)
			 (set! item-waiton-met #t))
		     (if (and (equal? item-path "") ;; if upstream rollup test is completed, parent-waiton-met is set
			      (or is-completed is-running));; this is the parent, set it to run if completed or running ;; BB1
			 (set! parent-waiton-met #t)))
		    ;; normal checking of parent items, any parent or parent item not ok blocks running
		    ((and is-completed
			  (or is-ok 
			      (member 'toplevel mode))              ;; toplevel does not block on FAIL
			  (and is-ok (member 'itemmatch mode))) ;; itemmatch blocks on not ok
		     (set! item-waiton-met #t)))))
	       tests)
	      ;; both requirements, parent and item-waiton must be met to NOT add item to
	      ;; prereq's not met list
	      (if (not (or parent-waiton-met item-waiton-met))
		  (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; appends the string if the full record is not available
	      ;; if the test is not found then clearly the waiton is not met...
	      ;; (if (not ever-seen)(set! result (cons waitontest-name result)))))
	      (if (not ever-seen)
		  (set! result (append (if (null? tests)(list waitontest-name) tests) result)))))
	  waitons)


















	 (delete-duplicates result)))))


;;======================================================================
;; Just for sync, procedures to make sync easy
;;======================================================================

;; get an alist of record ids changed since time since-time
;;   '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...))
;;







>
>


>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
|







4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
;;
;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
;;       mode 'toplevel means that tests must be COMPLETED only
;;       mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]]
;;       mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING
;; 
;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode)


(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f))
  ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items
  (let* ((ok-statuses '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))
         (have-itemized (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))))
    (append
     (if (member 'exclusive mode)
         (let ((running-tests (db:get-tests-for-run dbstruct
                                                    #f  ;; run-id of #f means for all runs. 
                                                    (if (string=? ref-item-path "")   ;; testpatt
                                                        ref-test-name
                                                        (conc ref-test-name "/" ref-item-path))
                                                    '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states
                                                    '()          ;; statuses
                                                    #f           ;; offset
                                                    #f           ;; limit
                                                    #f           ;; not-in
                                                    #f           ;; sort by
                                                    #f           ;; sort order
                                                    'shortlist   ;; query type
                                                    0            ;; last update, beginning of time ....
                                                    #f           ;; mode
                                                    )))
           ;;(map (lambda (testdat)
           ;;	(if (equal? (db:test-get-item-path testdat) "")
           ;;	    (db:test-get-testname testdat)
           ;;	    (conc (db:test-get-testname testdat)
           ;;		  "/"
           ;;		  (db:test-get-item-path testdat))))
           running-tests) ;; calling functions want the entire data
         '())
     (if (or (not waitons)
             (null? waitons))
         '()
         (let* ((unmet-pre-reqs '())
                (result         '()))
           (for-each 
            (lambda (waitontest-name)
              ;; by getting the tests with matching name we are looking only at the matching test 
              ;; and related sub items
              ;; next should be using mt:get-tests-for-run?
              (let ((tests             (db:get-tests-for-run-state-status dbstruct run-id waitontest-name))
                    (ever-seen         #f)
                    (parent-waiton-met #f)
                    (item-waiton-met   #f))
                (for-each 
                 (lambda (test) ;; BB- this is the upstream test
                   ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ...
                   (let* ((state             (db:test-get-state test))
                          (status            (db:test-get-status test))
                          (item-path         (db:test-get-item-path test)) ;; BB- this is the upstream itempath
                          (is-completed      (equal? state "COMPLETED"))
                          (is-running        (equal? state "RUNNING"))
                          (is-killed         (equal? state "KILLED"))
                          (is-ok             (member status ok-statuses))
                          ;;                                       testname-b    path-a    path-b
                          (same-itempath     (db:compare-itempaths ref-test-name item-path ref-item-path itemmaps))) ;; (equal? ref-item-path item-path)))
                     (set! ever-seen #t)
                     (cond
                      ;; case 1, non-item (parent test) is 
                      ((and (equal? item-path "") ;; this is the parent test of the waiton being examined
                            is-completed
                            (or is-ok (not (null? (lset-intersection eq? mode '(toplevel)))))) ;;  itemmatch itemwait))))))
                       (set! parent-waiton-met #t))
                      ;; Special case for toplevel and KILLED
                      ((and (equal? item-path "") ;; this is the parent test
                            is-killed
                            (member 'toplevel mode))
                       (set! parent-waiton-met #t))
                      ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met
                      ((and have-itemized ;; how is that different from (member mode '(itemmatch itemwait)) ?????
                            ;; (not (equal? item-path "")) ;; this applies to both top level (to allow launching of next batch) and items
                            same-itempath)
                       (if (and is-completed is-ok)
                           (set! item-waiton-met #t))
                       (if (and (equal? item-path "") ;; if upstream rollup test is completed, parent-waiton-met is set
                                (or is-completed is-running));; this is the parent, set it to run if completed or running ;; BB1
                           (set! parent-waiton-met #t)))
                      ;; normal checking of parent items, any parent or parent item not ok blocks running
                      ((and is-completed
                            (or is-ok 
                                (member 'toplevel mode))              ;; toplevel does not block on FAIL
                            (and is-ok (member 'itemmatch mode))) ;; itemmatch blocks on not ok
                       (set! item-waiton-met #t)))))
                 tests)
                ;; both requirements, parent and item-waiton must be met to NOT add item to
                ;; prereq's not met list
                (if (not (or parent-waiton-met item-waiton-met))
                    (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; appends the string if the full record is not available
                ;; if the test is not found then clearly the waiton is not met...
                ;; (if (not ever-seen)(set! result (cons waitontest-name result)))))
                (if (not ever-seen)
                    (set! result (append (if (null? tests)(list waitontest-name) tests) result)))))
            waitons)

           ;; TODO: for itemwait and itemmatch mode, filter out failed toplevel prereq test if any items passed.
           
           ;; a rewrite might help understanding, but quick fix is just remove tests from result which are completed/pass. -BB
           ;;(pp result)
           ;; (let ((prereq-tests-some-items-passed-list '(ref-test-name))) ;; seed with ref-test-name; do not wait on self.

           ;;   (for-each (lambda (test)
           ;;               (if (vector? test)
           ;;                   (if (and
           ;;                        (equal? (db:test-get-state test) "COMPLETED")
           ;;                        (member (db:test-get-status test) ok-statuses)
           ;;                        (not (equal? (db:test-get-item-path test) "")))
           ;;                       (set! prereq-tests-some-items-passed-list (cons (db:test-get-testname test) prereq-tests-some-items-passed-list)))))
           ;;             result)
           ;;   (set! prereq-tests-some-items-passed-list (delete-duplicates prereq-tests-some-items-passed-list))
                                               

           (delete-duplicates result)
           )))))
  
;;======================================================================
;; Just for sync, procedures to make sync easy
;;======================================================================

;; get an alist of record ids changed since time since-time
;;   '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...))
;;

Modified gutils.scm from [60c484ab36] to [c0f6b41f65].

21
22
23
24
25
26
27

28
29
30
31
32
33
34
	 (c2 (map string->number (string-split color2)))
	 (delta (map (lambda (a b)(abs (- a b))) c1 c2)))
    (null? (filter (lambda (x)(> x 3)) delta))))

(define gutils:colors
  '((PASS . "70 249 73")
    (FAIL . "253 33 49")

    (SKIP . "230 230 0")))

(define (gutils:get-color-spec effective-state)
  (or (alist-ref effective-state gutils:colors)
      (alist-ref 'FAIL gutils:colors)))

;; BBnote - state status dashboard button color / text defined here







>







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
	 (c2 (map string->number (string-split color2)))
	 (delta (map (lambda (a b)(abs (- a b))) c1 c2)))
    (null? (filter (lambda (x)(> x 3)) delta))))

(define gutils:colors
  '((PASS . "70 249 73")
    (FAIL . "253 33 49")
    (PREQ_FAIL . "255 127 127")
    (SKIP . "230 230 0")))

(define (gutils:get-color-spec effective-state)
  (or (alist-ref effective-state gutils:colors)
      (alist-ref 'FAIL gutils:colors)))

;; BBnote - state status dashboard button color / text defined here
57
58
59
60
61
62
63

64
65
66
67
68
69
70
    ((LAUNCHED)         (list "101 123 142"  state))
    ((CHECK)            (list "255 100 50"   state))
    ((REMOTEHOSTSTART)  (list "50 130 195"   state))
    ((RUNNING STARTED)          (list "9 131 232"    state))
    ((KILLREQ)          (list "39 82 206"    state))
    ((KILLED)           (list "234 101 17"   state))
    ((NOT_STARTED)      (case (string->symbol status)

			  ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
			  (else   (list "240 240 240"                 state))))
    ;; for xor mode below
    ;;
    ((CLEAN)
     (case (string->symbol status)
       ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these







>







58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
    ((LAUNCHED)         (list "101 123 142"  state))
    ((CHECK)            (list "255 100 50"   state))
    ((REMOTEHOSTSTART)  (list "50 130 195"   state))
    ((RUNNING STARTED)          (list "9 131 232"    state))
    ((KILLREQ)          (list "39 82 206"    state))
    ((KILLED)           (list "234 101 17"   state))
    ((NOT_STARTED)      (case (string->symbol status)
                          ((PREQ_FAIL)(list (gutils:get-color-spec 'PREQ_FAIL) status))
			  ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
			  (else   (list "240 240 240"                 state))))
    ;; for xor mode below
    ;;
    ((CLEAN)
     (case (string->symbol status)
       ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these

Modified megatest-version.scm from [ea84f1a115] to [f7a1ccbc61].

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6429)






|

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6431)

Modified runs.scm from [147cbc54ec] to [6846f73920].

463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
    
    (if (not (null? test-names))
	(let loop ((hed (car test-names))   ;; NOTE: This is the main loop that iterates over the test-names
		   (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
	  (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
	  (setenv "MT_TEST_NAME" hed) ;; 
	  (let*-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry)))
	    (debug:print-info 8 *default-log-port* "waitons: " waitons)
	    ;; check for hed in waitons => this would be circular, remove it and issue an
	    ;; error
	    (if (or (member hed waitons)
		    (member hed waitors))
		(begin
		  (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!")
		  (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))
		  (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors))))
	    
	    ;; (items   (items:get-items-from-config config)))
	    (if (not (hash-table-ref/default test-records hed #f))
		(hash-table-set! test-records
				 hed (vector hed     ;; 0
					     config  ;; 1
					     waitons ;; 2
					     (config-lookup config "requirements" "priority")     ;; priority 3
					     (tests:get-items config) ;; expand the [items] and or [itemstable] into explict items
					     #f      ;; itemsdat 5
					     #f      ;; spare - used for item-path
					     waitors ;; 
					     )))
	    (for-each 
	     (lambda (waiton)
	       (if (and waiton (not (member waiton test-names)))
		   (let* ((waiton-record   (hash-table-ref/default test-records waiton #f))
			  (waiton-tconfig  (if waiton-record (vector-ref waiton-record 1) #f))
			  (waiton-itemized (and waiton-tconfig
						(or (hash-table-ref/default waiton-tconfig "items" #f)
						    (hash-table-ref/default waiton-tconfig "itemstable" #f))))
			  (itemmaps        (tests:get-itemmaps config))  ;; (configf:lookup config "requirements" "itemmap"))
			  (new-test-patts  (tests:extend-test-patts test-patts hed waiton itemmaps)))
		     (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items")
		     ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%"
		     ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt
		     ;; is this satisfied by merely appending "/" to the waiton name added to the list?
		     ;;
		     ;; This approach causes all of the items in an upstream test to be run 








|

















|
|
|
|

|













|







463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
    
    (if (not (null? test-names)) ;; BEGIN test-names loop
	(let loop ((hed (car test-names))   ;; NOTE: This is the main loop that iterates over the test-names
		   (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
	  (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
	  (setenv "MT_TEST_NAME" hed) ;; 
	  (let*-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry)))
	    (debug:print-info 8 *default-log-port* "waitons: " waitons)
	    ;; check for hed in waitons => this would be circular, remove it and issue an
	    ;; error
	    (if (or (member hed waitons)
		    (member hed waitors))
		(begin
		  (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!")
		  (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))
		  (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors))))
	    
	    ;; (items   (items:get-items-from-config config)))
	    (if (not (hash-table-ref/default test-records hed #f))
		(hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue
				 hed (vector hed     ;; 0 ;; testname
					     config  ;; 1 
					     waitons ;; 2 
					     (config-lookup config "requirements" "priority")     ;; priority 3
					     (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items
					     #f      ;; itemsdat 5
					     #f      ;; spare - used for item-path
					     waitors ;; 
					     )))
	    (for-each 
	     (lambda (waiton)
	       (if (and waiton (not (member waiton test-names)))
		   (let* ((waiton-record   (hash-table-ref/default test-records waiton #f))
			  (waiton-tconfig  (if waiton-record (vector-ref waiton-record 1) #f))
			  (waiton-itemized (and waiton-tconfig
						(or (hash-table-ref/default waiton-tconfig "items" #f)
						    (hash-table-ref/default waiton-tconfig "itemstable" #f))))
			  (itemmaps        (tests:get-itemmaps config))  ;; (configf:lookup config "requirements" "itemmap"))
			  (new-test-patts  (tests:extend-test-patts test-patts hed waiton itemmaps)))   ;; BB:  items expanded here.
		     (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items")
		     ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%"
		     ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt
		     ;; is this satisfied by merely appending "/" to the waiton name added to the list?
		     ;;
		     ;; This approach causes all of the items in an upstream test to be run 

540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
		     ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
		     )))
	     (delete-duplicates (append waitons waitors)))
	    (let ((remtests (delete-duplicates (append waitons tal))))
	      (if (not (null? remtests))
		  (begin
		    ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", "))
		    (loop (car remtests)(cdr remtests))))))))

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 (th1        (make-thread (lambda ()
					    (handle-exceptions
						exn
						(begin
						  (print-call-chain)
						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()				    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime)))
							run-ids)))
					  "runs: mark-incompletes")))
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th1)
	    (set! keep-going #f)
	    (thread-join! th2)







|


















|







|







540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
		     ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
		     )))
	     (delete-duplicates (append waitons waitors)))
	    (let ((remtests (delete-duplicates (append waitons tal))))
	      (if (not (null? remtests))
		  (begin
		    ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", "))
		    (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 (th1        (make-thread (lambda ()
					    (handle-exceptions
						exn
						(begin
						  (print-call-chain)
						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th1)
	    (set! keep-going #f)
	    (thread-join! th2)
628
629
630
631
632
633
634










635
636
637
638









639
640
641
642
643
644
645
646
647
648
































649
650
651
652
653



















654
655
656
657

658
659

660
661
662
663
664
665



666
667


668
669
670
671
672
673
674


675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690









691

692


693
694


695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714

715
716
717

718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757

758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777

778
779
780
781
782
783
784


785
786
787
788
789
790
791
792
793

794
795
796



797

798

799
800


801
802
803
804
805
806
807
;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
;;
(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
        reruns))                                          ;; reruns











(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.  









(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '()))))
































	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met)))



















    (debug:print-info 4 *default-log-port* "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)

		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n prereq-fails:    " (runs:pretty-string prereq-fails)

		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items



		      "\n can-run-more:    " can-run-more)



   (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here


      (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
          (runs:loop-values tal reg reglen regfull reruns)
	  (begin
	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin
		  (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness")
		  (exit 0))
		(set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1)))
	    #f)))










     ;; 

     ((or (null? prereqs-not-met)


	  (and (member 'toplevel testmode)
	       (null? non-completed)))


      (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))
	(setenv "MT_TEST_NAME" test-name) ;; 
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id   (rmt:get-test-id run-id test-name ""))
			  (num-items (rmt:test-toplevel-num-items run-id test-name)))
		      (if (and test-id
			       (not (> num-items 0)))
			  (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))


     ((and (null? fails)
	   (null? prereq-fails)
	   (not (null? non-completed)))

      (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      (append newtal reruns)))
	     ;; prereqstrs is a list of test names as strings that are prereqs for hed
             (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
						 prereqs-not-met)))
	     ;; a prereq that is not found in allinqueue will be put in the notinqueue list
	     ;; 
             ;; (notinqueue (filter (lambda (x)
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
                  (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
                  ))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))

     ((and (null? fails)
	   (null? prereq-fails)
	   (null? non-completed))

      (if  (runs:can-keep-running? hed 20)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
	    (thread-sleep! 60)
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
            (runs:loop-values tal reg reglen regfull reruns)
            )))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))

      (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")


		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
            (runs:loop-values tal reg reglen regfull (cons hed reruns))
            )
	  #f)) ;; #f flags do not loop

     ((and (not (null? fails))(member 'toplevel testmode))

      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 



     ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over.

     (else

      (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
      (list (car newtal)(cdr newtal) reg reruns)))))



(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)







>
>
>
>
>
>
>
>
>
>



|
>
>
>
>
>
>
>
>
>


|







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
|
|
>
|
|
>
|
|

|
|
|
>
>
>
|
|
>
>
|
|
|
|
|
|
|
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

>
>
>
>
>
>
>
>
>
|
>
|
>
>
|
|
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

>
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|

|
|
|
|
|

|
|
|
|
|
|
|
|
|

|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
>
|
|
|
|
|
|
|
>
>
|
|
|
|
|
|
|

|
>
|
|
|
>
>
>
|
>
|
>
|
|
>
>







628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
;;
(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
        reruns))                                          ;; reruns

;; objective - iterate thru tests
;;    => want to prioritize tests we haven't seen before
;;    => sometimes need to squeeze things in (added to reg)
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.



(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.

;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature:
;;    (let loop ((hed         (car sorted-test-names))
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))

;; runs:expand-items: for a given test, expand its items into real tests ready to be processed at this time
;;  this procedure's operation only makes sense in context of runs-tests-queue.
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '()))))
         (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
         (ok-statuses '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))

         ;; (prereqs-not-met
         ;;  (filter (lambda (test)
         ;;               (if (vector? test)  ;; BB: result may be a collection of strings or test vectors-- why not just test vectors?
         ;;                   (let ((testname (db:test-get-testname test))
         ;;                         (itempath (db:test-get-item-path test))
         ;;                         (state (db:test-get-state test) )
         ;;                         (status (db:test-get-status test)))
         ;;                     (cond
         ;;                      ((and (equal? state "COMPLETED") (member status ok-statuses)) #f)
         ;;                      ((and have-itemized (equal? "" itempath) (member testname prereq-tests-some-items-passed-list)) #f)
         ;;                      (else #t)))
         ;;                   test))
         ;;             (delete-duplicates prereqs)))

         (prereq-items-completed
          (filter (lambda (test)
                       (if (vector? test)  ;; BB: result may be a collection of strings or test vectors-- why not just test vectors?
                           (let ((testname (db:test-get-testname test))
                                 (itempath (db:test-get-item-path test))
                                 (state (db:test-get-state test) )
                                 (status (db:test-get-status test)))
                             (cond
                              ((equal? state "COMPLETED") #t)
                              ((not (equal? "" itempath))  #f)
                              (else #f)))
                           #f))
                       prereqs))

          
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs)) ;; prereqs 
	 (prereq-fails    (runs:calc-prereq-fail prereqs)) ;; filter - get NOT_STARTED's which are not status n/a or KEEP_TRYING
	 (non-completed   (runs:calc-not-completed prereqs))
	 (runnable-prereqs       (runs:calc-runnable prereqs))
         
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))
                           (items       (tests:testqueue-get-items  test-rec)))
                      (BB> "HEY " testname "=>"items)
                      (or (procedure? items)(eq? items 'have-procedure))))
                  waitons))
         (completed-prereq-items 
          (let ((foo (begin (BB> "hello prereqs: "prereqs) #t))
                (res (filter (lambda (test)
                               (BB> "foo - "test)
                               (and (vector? test)
                                    (equal? "COMPLETED" (db:test-get-state test))
                                    (not (equal? "" (db:test-get-item-path test)))))
                             prereqs)))
            res)) 

         )
    (debug:print-info 4 *default-log-port* "START OF INNER COND #2 "
		      "\n can-run-more:            " can-run-more
		      "\n testname:                " hed
		      "\n prereqs:                 " (runs:pretty-string prereqs)
                      "\n completed-prereq-items:  " (runs:pretty-string completed-prereq-items)
		      "\n non-completed:           " (runs:pretty-string non-completed) 
		      "\n prereq-fails:            " (runs:pretty-string prereq-fails)
                      "\n runnable-prereqs:        " (runs:pretty-string runnable-prereqs)
		      "\n fails:                   " (runs:pretty-string fails)
		      "\n testmode:                " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):      " (null? non-completed)
		      "\n reruns:                  " reruns
		      "\n items:                   " items
		      "\n unexpanded-prereqs:      " unexpanded-prereqs ;;all-prereqs-expanded
                      "\n completed-prereq-items:  " completed-prereq-items
                      "\n have-itemized:           " have-itemized
		      "\n can-run-more:            " can-run-more)
    
    (BB> "before runs:expand-items cond")
    (let ((res
           (cond
            ;; all prereqs met, fire off the test
            ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
            ;; runs:expand-items case: test of interest not toplevel and IS blackballed -> ???
            ((and (not (member 'toplevel testmode)) ;; test has been blackballed elsewhere
                  (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
                          '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
             (BB> "cb1")
             
             (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
             (if (or (not (null? tal))
                     (not (null? reg))) 
                 (runs:loop-values tal reg reglen regfull reruns) ;; blackballed test - throw it away
                 (begin
                   (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
                   ;; If get here twice then we know we've tried to expand all items
                   ;; since there must be a logic issue with the handling of loops in the 
                   ;; items expand phase we will brute force an exit here.
                   (if (> runs:nothing-left-in-queue-count 2)
                       (begin
                         (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness")
                         (exit 0))
                       (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1)))
                   #f)))

            ;;; desired result of below cond branch:
            ;;   we want to expand items in our test of interest (hed) in the following cases:
            ;;    case 1 - mode is itemmatch or itemwait: (TODO)
            ;;       - all prereq tests have been expanded
            ;;       - at least one prereq's items have completed
            ;;    case 2 - mode is toplevel   (DONE)
            ;;       - prereqs are completed.
            ;;    case 3 - mode not specified (DONE)
            ;;       - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current)
            
            ;; runs:expand-items case: toplevel or else no dangling prerequeistes -- expand items now.
            ((or
              (and have-itemized (null? unexpanded-prereqs) (not (null? completed-prereq-items)))
              (null? prereqs)            ;; nothing is in our way to proceed (need to expand this to an item level check.)
              (and (member 'toplevel testmode)   ;; for toplevel test - proceed (nothing in our way)
                   (null? non-completed)))
             (BB> "cb2")

             (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs) (and (member 'toplevel testmode)(null? non-completed)))")
             (let ((test-name (tests:testqueue-get-testname test-record)))
               (setenv "MT_TEST_NAME" test-name) ;; hack to give context to get-items-from-config TODO: call-with-environment-variables
               (setenv "MT_RUNNAME"   runname)   
               (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
               (let ((items-list (items:get-items-from-config tconfig))) ;; BB: RIGHT HERE is where item expansion occurs..  target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.
                 (if (list? items-list)
                     (begin                      ;; we have discovered we have items we need to process, so stuff them into test list and recur
                       (if (null? items-list)
                           (let ((test-id   (rmt:get-test-id run-id test-name ""))
                                 (num-items (rmt:test-toplevel-num-items run-id test-name)))
                             (if (and test-id
                                      (not (> num-items 0)))
                                 (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
                       (tests:testqueue-set-items! test-record items-list) ; stuffing happens here
                       (list hed tal reg reruns)) ;; return value..
                     (begin
                       (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this")
                       (exit 1))))))

            ;; runs:expand-items case: no fails, no prereq-fails, some non-completed
            ((and (null? fails)
                  (null? prereq-fails)
                  (not (null? non-completed)))
             (BB> "cb3")
             (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
                                     (append newtal reruns)))
                    ;; prereqstrs is a list of test names as strings that are prereqs for hed
                    (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
                                                        prereqs)))
                    ;; a prereq that is not found in allinqueue will be put in the notinqueue list
                    ;; 
                    ;; (notinqueue (filter (lambda (x)
                    ;;    		   (not (member x allinqueue)))
                    ;;    		 prereqstrs))
                    (give-up    #f))

               ;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
               ;; We need to use this to dequeue this item as CANNOTRUN
               ;; 
               (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
                   (for-each (lambda (prereq)
                               (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
                                   (set! give-up #t)))
                             prereqstrs))

               (if (and give-up
                        (not (and (null? tal)(null? reg))))
                   (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
                         (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
                     (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

                     (let ((test-id (rmt:get-test-id run-id hed "")))
                       (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
                     
                     (if (and (null? trimmed-tal)
                              (null? trimmed-reg))
                         #f
                         (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
                         ))
                   (list (car newtal)(append (cdr newtal) reg) '() reruns))))

            ((and (null? fails) ;; have not-started tests, but unable to run them.  everything looks completed with no prospect of unsticking something that is stuck.  we should mark hed as moribund and exit or continue if there are more tests to consider
                  (null? prereq-fails)
                  (null? non-completed))
             (BB> "cb4")
             (if  (runs:can-keep-running? hed 20)
                  (begin
                    (runs:inc-cant-run-tests hed)
                    (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; 
                    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
                    (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing)
                    ;; num-retries code was here
                    ;; we use this opportunity to move contents of reg to tal
                    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
                  (begin
                    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
                    (let ((test-id (rmt:get-test-id run-id hed "")))
                      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
                    (runs:loop-values tal reg reglen regfull reruns)
                    )))

            ((and 
              (or (not (null? fails))
                  (not (null? prereq-fails)))
              (member 'normal testmode))
             (BB> "cb5")
             (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
                               (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
                               ", removing it from to-do list")
             (let ((test-id (rmt:get-test-id run-id hed "")))
               (if test-id
                   (if (not (null? prereq-fails))
                       (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
                       (begin
                         (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed)
                         (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))) ;; BB: this works, btu equivalent for itemwait mode does not work.
             (if (or (not (null? reg))(not (null? tal)))
                 (begin
                   (hash-table-set! test-registry hed 'CANNOTRUN)
                   (runs:loop-values tal reg reglen regfull (cons hed reruns))
                   )
                 #f)) ;; #f flags do not loop

            ((and (not (null? fails))(member 'toplevel testmode))
             (BB> "cb6")
             (if (or (not (null? reg))(not (null? tal)))
                 (list (car newtal)(append (cdr newtal) reg) '() reruns)
                 #f))
            
            ((null? runnable-prereqs)
             (BB> "cb7")
             #f) ;; if we get here and non-completed is null then it is all over.
            
            (else
             (BB> "cb8")
             (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
             (list (car newtal)(cdr newtal) reg reruns)))))
      (BB> "after runs:expand-items big cond")
      res)))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)
816
817
818
819
820
821
822



823
824
825
826
827
828
829
	       (conc t))))
	   inlst)))


;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
(define (runs:process-expanded-tests runsdat testdat)
  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).



  (let* ((hed                    (runs:testdat-hed testdat))
	 (tal                    (runs:testdat-tal testdat))
	 (reg                    (runs:testdat-reg testdat))
	 (reruns                 (runs:testdat-reruns testdat))
	 (test-name              (runs:testdat-test-name testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (jobgroup               (runs:testdat-jobgroup testdat))







>
>
>







923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
	       (conc t))))
	   inlst)))


;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
(define (runs:process-expanded-tests runsdat testdat)
  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
  (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" )
  (debug:print 2 *default-log-port* (with-output-to-string
                                            (lambda () (pp (runs:testdat->alist testdat) ))))
  (let* ((hed                    (runs:testdat-hed testdat))
	 (tal                    (runs:testdat-tal testdat))
	 (reg                    (runs:testdat-reg testdat))
	 (reruns                 (runs:testdat-reruns testdat))
	 (test-name              (runs:testdat-test-name testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (jobgroup               (runs:testdat-jobgroup testdat))
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
    
    (cond
     
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)







|







997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
    
    (cond ; cond 894- 1067
     
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018




1019

1020
1021
1022

1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items




		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)

		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
		    (runs:loop-values tal reg reglen regfull reruns))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))

		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 4)
		      (runs:loop-values tal reg reglen regfull reruns))
		     ((or (not nth-try)
			  (and (number? nth-try)
			       (< nth-try 10)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		      (runs:loop-values newtal reg reglen regfull reruns))
		     ((symbol? nth-try)
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			    (hash-table-set! test-registry hed 0)
			    (runs:loop-values newtal reg reglen regfull))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)







|








>
>
>
>
|
>


|
>






|










|






|







1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (or (vector? hed)  (not (null? fails))) ;; BB: why do we need a vector?  in my case, fails is populated (prereq failed), reg is not nul, and we really want to drop this one
		  (begin
		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items

                    (if (not (null? fails))
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) 
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) )
		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
		    (runs:loop-values tal reg reglen regfull reruns))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector...
                    (debug:print 2 *default-log-port* "nth-try("hed")="nth-try)
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 4)
		      (runs:loop-values tal reg reglen regfull reruns))
		     ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try 
			  (and (number? nth-try)
			       (< nth-try 10)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		      (runs:loop-values newtal reg reglen regfull reruns))
		     ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed.  This is first "try"
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))
			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			    (hash-table-set! test-registry hed 0)
			    (runs:loop-values newtal reg reglen regfull))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info             (rmt:get-run-info run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
        (reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
	(last-time-some-running (current-seconds))
	;; (tdbdat                (tasks:open-db))
	(runsdat (make-runs:dat
		  ;; hed: hed
		  ;; tal: tal
		  ;; reg: reg
		  ;; reruns: reruns
		  reglen: reglen
		  regfull: #f ;; regfull
		  ;; test-record: test-record
		  runname: runname
		  ;; test-name: test-name
		  ;; item-path: item-path
		  ;; jobgroup: jobgroup
		  max-concurrent-jobs: max-concurrent-jobs
		  run-id: run-id
		  ;; waitons: waitons
		  ;; testmode: testmode
		  test-patts: test-patts
		  required-tests: required-tests
		  test-registry: test-registry
		  registry-mutex: registry-mutex
		  flags: flags
		  keyvals: keyvals
		  run-info: run-info
		  ;; newtal: newtal
		  all-tests-registry: all-tests-registry
		  ;; itemmaps: itemmaps
		  ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
		  ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
		  )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info             (rmt:get-run-info run-id))
         (tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
         (sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
         (test-registry         (make-hash-table))
         (registry-mutex        (make-mutex))
         (num-retries           0)
         (max-retries           (config-lookup *configdat* "setup" "maxretries"))
         (max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
         (reglen                (if (number? reglen-in) reglen-in 1))
         (last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
         (last-time-some-running (current-seconds))
         ;; (tdbdat                (tasks:open-db))
         (runsdat (make-runs:dat
                   ;; hed: hed
                   ;; tal: tal
                   ;; reg: reg
                   ;; reruns: reruns
                   reglen: reglen
                   regfull: #f ;; regfull
                   ;; test-record: test-record
                   runname: runname
                   ;; test-name: test-name
                   ;; item-path: item-path
                   ;; jobgroup: jobgroup
                   max-concurrent-jobs: max-concurrent-jobs
                   run-id: run-id
                   ;; waitons: waitons
                   ;; testmode: testmode
                   test-patts: test-patts
                   required-tests: required-tests
                   test-registry: test-registry
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps
                   ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f))
	    (begin
	      (rmt:register-test run-id test-name "")







|
|







1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f))
	    (begin
	      (rmt:register-test run-id test-name "")
1408
1409
1410
1411
1412
1413
1414


1415
1416
1417
1418
1419
1420
1421
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS


	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))







>
>







1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
         ;; 
         ;; * the condition for (eq? items 'have-procedure) below ensure that runs:expand-items is not called on the same test twice -- expand-items will flatten the procedure to an actual list of items.
	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))
1440
1441
1442
1443
1444
1445
1446

1447
1448
1449
1450
1451
1452
1453
1454
	 ((not (null? tal))
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))

	 )))
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")







>
|







1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
	 ((not (null? tal))
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 ))) ;; end loop on sorted test names
    
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494

1495
1496
1497
1498
1499
1500
1501
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "NOT_STARTED")
		 (not (member (db:test-get-status test)
			      '("n/a" "KEEP_TRYING")))))
	  prereqs-not-met))

(define (runs:calc-not-completed prereqs-not-met)
  (filter
   (lambda (t)
     (or (not (vector? t))

	 (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED")))))
   prereqs-not-met))

;; (define (runs:calc-not-completed prereqs-not-met)
;;   (filter
;;    (lambda (t)
;;      (or (not (vector? t))







|
|


>







1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "NOT_STARTED")
		 (not (member (db:test-get-status test)
			      '("n/a" "KEEP_TRYING")))))
	  prereqs-not-met))

(define (runs:calc-not-completed prereqs-not-met) ;; filter out tests which have reached a ground state -- they are done one way or another.
  (filter 
   (lambda (t)
     (or (not (vector? t))
         (not (and (equal? (db:test-get-state t) "NOT_STARTED") (equal? (db:test-get-status t) "PREQ_FAIL")))
	 (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED")))))
   prereqs-not-met))

;; (define (runs:calc-not-completed prereqs-not-met)
;;   (filter
;;    (lambda (t)
;;      (or (not (vector? t))
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
	 (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running
   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
       lst))

;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
;;
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry)
  ;; All these vars might be referenced by the testconfig file reader
  (let* ((test-name    (tests:testqueue-get-testname   test-record))







|







1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
	 (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running
   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t)"/"(db:test-get-item-path t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
       lst))

;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
;;
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry)
  ;; All these vars might be referenced by the testconfig file reader
  (let* ((test-name    (tests:testqueue-get-testname   test-record))

Modified server.scm from [45bfab59ba] to [40df68dca1].

456
457
458
459
460
461
462
463

464
465
466
467
468
469
470
471
472
473
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (sync-duration 0) ;; run time of the sync in milliseconds
        (this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))

    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))







|
>


|







456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (sync-duration 0) ;; run time of the sync in milliseconds
        ;;(this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
577
578
579
580
581
582
583
584
585
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))








|

578
579
580
581
582
583
584
585
586
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))