Megatest

Changes On Branch v1.64-keep-running-fix
Login

Changes In Branch v1.64-keep-running-fix Excluding Merge-Ins

This is equivalent to a diff from 32584d6c1d to 6baf47004a

2017-10-02
17:57
upgraded mt: to rmt: in runs.scm for status setting, fleshed out statuses that qualify for killed & in progress in db:prereqs-not-met

.. dead end. learnings applied to v1.64-itemflow2 branch. Closed-Leaf check-in: 6baf47004a user: bjbarcla tags: v1.64-keep-running-fix

2017-09-29
17:56
got further. noticed race condition when not stepping one at a time by setting launcher. noticed xor does not handle preq-fail. check-in: 89fedf98b8 user: bjbarcla tags: v1.64-keep-running-fix
2017-09-27
16:00
cherrypicked from b95f7 check-in: 19d039fc17 user: pjhatwal tags: v1.64
2017-09-14
17:10
fixed issue where item gets stuck in keep_waiting status when prerequisite item failed check-in: 23745b4302 user: bjbarcla tags: v1.6431, v1.64-keep-running-fix
2017-08-31
11:08
Merged in v1.64, for reference only. Do not merge to prod. Closed-Leaf check-in: 1b86fa4903 user: mrwellan tags: v1.63-configdbsync
2017-08-29
11:51
Merged v1.64 changes into v1.65 check-in: 2120db9cff user: mrwellan tags: v1.65
10:50
Bringing in latest changes from v1.64 Closed-Leaf check-in: b1eee0709a user: mrwellan tags: v1.64-server-connection-tagging
2017-08-28
11:47
Merged v1.64 into areas-dashboard branch. check-in: 1f5e744ec1 user: matt tags: v1.64-areas-dashboard
11:42
Cleaned up couple more named loop calls in runs.scm. Added post-run-hook. check-in: 32584d6c1d user: matt tags: v1.64
2017-08-25
17:59
Minor refactor of some runs.scm code? check-in: cc163f91ad user: mrwellan tags: v1.64

Modified codescanlib.scm from [85429a3289] to [bf0bf2c4eb].

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
                     (if (atom? defname) ;; filter out things we dont understand (procname is a list, what??)
                         (list defname filename args body)
                         #f)]
                    [else #f] ) scm-tree))))
    procs))


;; given a sexp, return a flat lost of atoms in that sexp
(define (get-atoms-in-body body)
  (cond
   ((null? body) '())
   ((atom? body) (list body))
   (else
    (apply append (map get-atoms-in-body body)))))








|







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
                     (if (atom? defname) ;; filter out things we dont understand (procname is a list, what??)
                         (list defname filename args body)
                         #f)]
                    [else #f] ) scm-tree))))
    procs))


;; given a sexp, return a flat list of atoms in that sexp
(define (get-atoms-in-body body)
  (cond
   ((null? body) '())
   ((atom? body) (list body))
   (else
    (apply append (map get-atoms-in-body body)))))

Modified common.scm from [071499421b] to [93c8a7d160].

529
530
531
532
533
534
535

536
537
538
539
540
541
542
543
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (9 "FAIL")

    (10 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))








>
|







529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (9 "FAIL")
    (10 "PREQ_FAIL")
    (11 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

Modified common_records.scm from [0a5321af09] to [9609771573].

145
146
147
148
149
150
151


152
153
154
155
156
157
158
159
160
161
162
163
     (lambda (frame)
       (let* ((this-loc (vector-ref frame 0))
              (temp     (string-split (->string this-loc) " "))
              (this-func (if (and (list? temp) (> (length temp) 1)) (cadr temp) "???")))
         (if (equal? this-func "BB>")
             (set! location this-loc))))
     stack)


    (let ((dp-args
           (append
            (list 0 *default-log-port*
                  (conc location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000)"   ")  )
            in-args)))
      (apply debug:print dp-args))))

(define *BBpp_custom_expanders_list* (make-hash-table))



;; register hash tables with BBpp.







>
>
|
|
|
|
|







145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
     (lambda (frame)
       (let* ((this-loc (vector-ref frame 0))
              (temp     (string-split (->string this-loc) " "))
              (this-func (if (and (list? temp) (> (length temp) 1)) (cadr temp) "???")))
         (if (equal? this-func "BB>")
             (set! location this-loc))))
     stack)
    (let* ((color-on "\x1b[1m")
           (color-off "\x1b[0m")
           (dp-args
            (append
             (list 0 *default-log-port*
                   (conc color-on location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000) color-off "   ")  )
             in-args)))
      (apply debug:print dp-args))))

(define *BBpp_custom_expanders_list* (make-hash-table))



;; register hash tables with BBpp.

Modified db.scm from [144a083df6] to [1045a4a832].

1
2
3
4
5
6
7
8
;;======================================================================
;; Copyright 2006-2016, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|







1
2
3
4
5
6
7
8
;======================================================================
;; Copyright 2006-2016, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
;; RA => Returns a db handler; sets the lock if opened in writable mode
;;
;;(define *db-open-mutex* (make-mutex))

(define (db:lock-create-open fname initproc)
  (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
         (raw-fname    (pathname-file fname))
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (common:file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;;(mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(condition-case
         (let* ((lockfname   (conc fname ".lock"))
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (common:file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))







|









|







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
;; RA => Returns a db handler; sets the lock if opened in writable mode
;;
;; (define *db-open-mutex* (make-mutex))

(define (db:lock-create-open fname initproc)
  (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
         (raw-fname    (pathname-file fname))
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (common:file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(condition-case
         (let* ((lockfname   (conc fname ".lock"))
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (common:file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
        
	(condition-case
         (begin
           (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
           (let ((db (sqlite3:open-database fname)))
             ;;(mutex-unlock! *db-open-mutex*)
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
	)))







|







244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
        
	(condition-case
         (begin
           (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
           (let ((db (sqlite3:open-database fname)))
             ;; (mutex-unlock! *db-open-mutex*)
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
	)))
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
;;
;; (define (db:launch-delay-left dbstruct run-id launch-delay)
  

;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCED'));

(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   7200))) ;; two hours
    (db:with-db 
     dbstruct #f #f
     (lambda (db)







|





|







1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
;;
;; (define (db:launch-delay-left dbstruct run-id launch-delay)
  

;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));

(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) ;; FIXME suspect test run time & deadtime are not well matched; resulting in COMPLETED/DEAD status of an a-ok running test
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   7200))) ;; two hours
    (db:with-db 
     dbstruct #f #f
     (lambda (db)
1648
1649
1650
1651
1652
1653
1654
1655

1656
1657
1658
1659
1660
1661
1662
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE")
               (for-each
                (lambda (test-id)
                  (db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test failed to complete")) ;; fix for one aspect of Randy's ticket 1405717332

                all-ids))))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 







|
>







1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE")
               (for-each
                (lambda (test-id)
                  (db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test failed to complete")) ;; fix for one aspect of Randy's ticket 1405717332 ;; TODO - fix problem where test goes to COMPLETED/DEAD while in progress, only later to go to COMPLETED/PASS.  ref ticket 220546828

                all-ids))))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 
3490
3491
3492
3493
3494
3495
3496



3497
3498
3499
3500
3501


3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514



3515
3516
3517

3518

3519
3520
3521









3522
3523

3524
3525
3526
3527
3528
3529
3530
                                                    (if (not (equal? state "DELETED"))
                                                        (cons status (map dbr:counts-status state-status-counts))
                                                        (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
			    (non-completes     (filter (lambda (x)
							 (not (equal? x "COMPLETED")))
						       all-curr-states))



			    (num-non-completes (length non-completes))
                            
                            (newstate          (cond
						((> running 0)
						 "RUNNING") ;; anything running, call the situation running


						((> bad-not-started 0)  ;; we have an ugly situation, it is completed in the sense we cannot do more.
						 "COMPLETED") 
						((> num-non-completes 0) ;;
						 (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states)))
                                                ;; only rollup DELETED if all DELETED
						(else
						 (car all-curr-states))))
			                       ;; (if (> running 0)
                                               ;;     "RUNNING"
                                               ;;     (if (> bad-not-started 0)
                                               ;;         "COMPLETED"
                                               ;;         (car all-curr-states))))
                            (newstatus            (if (or (> bad-not-started 0)



							  (and (equal? newstate "NOT_STARTED")
							       (> num-non-completes 0)))
						      "STARTED"

                                                      (car all-curr-statuses))))

                       ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states)
                       ;;      " newstate: " newstate " newstatus: " newstatus)
                       ;; NB// Pass the db so it is part of the transaction









                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f))))))))

         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))
;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path)
  (db:with-db







>
>
>
|
<

<
|
>
>
|
<
<
|
<
<
|





|
>
>
>
|
|
|
>
|
>



>
>
>
>
>
>
>
>
>

|
>







3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501

3502

3503
3504
3505
3506


3507


3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
                                                    (if (not (equal? state "DELETED"))
                                                        (cons status (map dbr:counts-status state-status-counts))
                                                        (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
			    (non-completes     (filter (lambda (x)
							 (not (equal? x "COMPLETED")))
						       all-curr-states))
			    (preq-fails        (filter (lambda (x)
							 (equal? x "PREQ_FAIL"))
						       all-curr-statuses))
                            (num-non-completes (length non-completes))

                            (newstate          (cond

						((> running 0)           "RUNNING")            ;; anything running, call the situation running
                                                ((> (length preq-fails) 0)
                                                 "NOT_STARTED")
						((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.


						((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED


						(else                    (car all-curr-states))))
			                       ;; (if (> running 0)
                                               ;;     "RUNNING"
                                               ;;     (if (> bad-not-started 0)
                                               ;;         "COMPLETED"
                                               ;;         (car all-curr-states))))
                            (newstatus         (cond
                                                ((> (length preq-fails) 0)
                                                 "PREQ_FAIL")
                                                ((or (> bad-not-started 0)
                                                     (and (equal? newstate "NOT_STARTED")
                                                          (> num-non-completes 0)))
                                                 "STARTED")
                                                (else
                                                 (car all-curr-statuses)))))

                       ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states)
                       ;;      " newstate: " newstate " newstatus: " newstatus)
                       ;; NB// Pass the db so it is part of the transaction

                       ;; (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path"> bad-not-started="bad-not-started" newstate="newstate" newstatus="newstatus" num-non-completes="num-non-completes" non-completes="non-completes "len(sscs)="(length state-status-counts)  " state-status-counts: "
                       ;;              (apply conc
                       ;;                     (map (lambda (x)
                       ;;                            (conc
                       ;;                             (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                       ;;                          state-status-counts))
                                    
                       ;;              ); end debug:print
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))
;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path)
  (db:with-db
4045
4046
4047
4048
4049
4050
4051


4052
4053


4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133


4134
4135


4136


4137









4138
4139
4140
4141
4142


















4143

4144
4145
4146
4147
4148
4149
4150
4151
;;
;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
;;       mode 'toplevel means that tests must be COMPLETED only
;;       mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]]
;;       mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING
;; 
;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode)


(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f))
  ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items


  (append
   (if (member 'exclusive mode)
       (let ((running-tests (db:get-tests-for-run dbstruct
						  #f  ;; run-id of #f means for all runs. 
						  (if (string=? ref-item-path "")   ;; testpatt
						      ref-test-name
						      (conc ref-test-name "/" ref-item-path))
						  '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states
						  '()          ;; statuses
						  #f           ;; offset
						  #f           ;; limit
						  #f           ;; not-in
						  #f           ;; sort by
						  #f           ;; sort order
						  'shortlist   ;; query type
						  0            ;; last update, beginning of time ....
						  #f           ;; mode
						  )))
	;;(map (lambda (testdat)
	;;	(if (equal? (db:test-get-item-path testdat) "")
	;;	    (db:test-get-testname testdat)
	;;	    (conc (db:test-get-testname testdat)
	;;		  "/"
	;;		  (db:test-get-item-path testdat))))
	 running-tests) ;; calling functions want the entire data
       '())
   (if (or (not waitons)
	   (null? waitons))
       '()
       (let* ((unmet-pre-reqs '())
	      (result         '()))
	 (for-each 
	  (lambda (waitontest-name)
	    ;; by getting the tests with matching name we are looking only at the matching test 
	    ;; and related sub items
	    ;; next should be using mt:get-tests-for-run?
	    (let ((tests             (db:get-tests-for-run-state-status dbstruct run-id waitontest-name))
		  (ever-seen         #f)
		  (parent-waiton-met #f)
		  (item-waiton-met   #f))
	      (for-each 
	       (lambda (test) ;; BB- this is the upstream test
		 ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ...
		 (let* ((state             (db:test-get-state test))
			(status            (db:test-get-status test))
			(item-path         (db:test-get-item-path test)) ;; BB- this is the upstream itempath
			(is-completed      (equal? state "COMPLETED"))
			(is-running        (equal? state "RUNNING"))
			(is-killed         (equal? state "KILLED"))
			(is-ok             (member status '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))
			;;                                       testname-b    path-a    path-b
			(same-itempath     (db:compare-itempaths ref-test-name item-path ref-item-path itemmaps))) ;; (equal? ref-item-path item-path)))
		   (set! ever-seen #t)
		   (cond
		    ;; case 1, non-item (parent test) is 
		    ((and (equal? item-path "") ;; this is the parent test of the waiton being examined
			  is-completed
			  (or is-ok (not (null? (lset-intersection eq? mode '(toplevel)))))) ;;  itemmatch itemwait))))))
		     (set! parent-waiton-met #t))
		    ;; Special case for toplevel and KILLED
		    ((and (equal? item-path "") ;; this is the parent test
			  is-killed
			  (member 'toplevel mode))
		     (set! parent-waiton-met #t))
		    ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met
		    ((and (not (null? (lset-intersection eq? mode '(itemmatch itemwait)))) ;; how is that different from (member mode '(itemmatch itemwait)) ?????
			  ;; (not (equal? item-path "")) ;; this applies to both top level (to allow launching of next batch) and items
			  same-itempath)
		     (if (and is-completed is-ok)
			 (set! item-waiton-met #t))
		     (if (and (equal? item-path "") ;; if upstream rollup test is completed, parent-waiton-met is set
			      (or is-completed is-running));; this is the parent, set it to run if completed or running ;; BB1
			 (set! parent-waiton-met #t)))
		    ;; normal checking of parent items, any parent or parent item not ok blocks running
		    ((and is-completed
			  (or is-ok 
			      (member 'toplevel mode))              ;; toplevel does not block on FAIL
			  (and is-ok (member 'itemmatch mode))) ;; itemmatch blocks on not ok
		     (set! item-waiton-met #t)))))
	       tests)


	      ;; both requirements, parent and item-waiton must be met to NOT add item to
	      ;; prereq's not met list


	      (if (not (or parent-waiton-met item-waiton-met))


		  (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; appends the string if the full record is not available









	      ;; if the test is not found then clearly the waiton is not met...
	      ;; (if (not ever-seen)(set! result (cons waitontest-name result)))))
	      (if (not ever-seen)
		  (set! result (append (if (null? tests)(list waitontest-name) tests) result)))))
	  waitons)


















	 (delete-duplicates result)))))


;;======================================================================
;; Just for sync, procedures to make sync easy
;;======================================================================

;; get an alist of record ids changed since time since-time
;;   '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...))
;;







>
>


>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
|
|
>
>
|
>
>
|
>
>
>
>
>
>
>
>
>
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
|







4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
;;
;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
;;       mode 'toplevel means that tests must be COMPLETED only
;;       mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]]
;;       mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING
;; 
;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode)


(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f))
  ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items
  (let* ((ok-statuses '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))
         (have-itemized (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))))
    (append
     (if (member 'exclusive mode)
         (let ((running-tests (db:get-tests-for-run dbstruct
                                                    #f  ;; run-id of #f means for all runs. 
                                                    (if (string=? ref-item-path "")   ;; testpatt
                                                        ref-test-name
                                                        (conc ref-test-name "/" ref-item-path))
                                                    '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states
                                                    '()          ;; statuses
                                                    #f           ;; offset
                                                    #f           ;; limit
                                                    #f           ;; not-in
                                                    #f           ;; sort by
                                                    #f           ;; sort order
                                                    'shortlist   ;; query type
                                                    0            ;; last update, beginning of time ....
                                                    #f           ;; mode
                                                    )))
           ;;(map (lambda (testdat)
           ;;	(if (equal? (db:test-get-item-path testdat) "")
           ;;	    (db:test-get-testname testdat)
           ;;	    (conc (db:test-get-testname testdat)
           ;;		  "/"
           ;;		  (db:test-get-item-path testdat))))
           running-tests) ;; calling functions want the entire data
         '())
     (if (or (not waitons)
             (null? waitons))
         '()
         (let* ((unmet-pre-reqs '())
                (result         '()))
           (for-each ;; waitons
            (lambda (waitontest-name)
              ;; by getting the tests with matching name we are looking only at the matching test 
              ;; and related sub items
              ;; next should be using mt:get-tests-for-run?
              (let ((tests             (db:get-tests-for-run-state-status dbstruct run-id waitontest-name))
                    (ever-seen         #f)
                    (parent-waiton-met #f)
                    (item-waiton-met   #f))
                (for-each ;; item (test record) in waiton
                 (lambda (test) ;; BB- this is the upstream test
                   ;; (if (equal? waitontest-name (db:test-get-testname test)) ;; by defintion this had better be true ...
                   (let* ((state             (db:test-get-state test))
                          (status            (db:test-get-status test))
                          (item-path         (db:test-get-item-path test)) ;; BB- this is the upstream itempath
                          (is-completed      (equal? state "COMPLETED"))
                          (is-running        (member state '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING")))
                          (is-killed         (member state '("KILLREQ" "KILLING" "KILLED")))
                          (is-ok             (member status ok-statuses))
                          ;;                                       testname-b    path-a    path-b
                          (same-itempath     (db:compare-itempaths ref-test-name item-path ref-item-path itemmaps))) ;; (equal? ref-item-path item-path)))
                     (set! ever-seen #t)
                     (cond
                      ;; case 1, non-item (parent test) is 
                      ((and (equal? item-path "") ;; this is the parent test of the waiton being examined
                            is-completed
                            (or is-ok (not (null? (lset-intersection eq? mode '(toplevel)))))) ;;  itemmatch itemwait))))))
                       (set! parent-waiton-met #t))
                      ;; Special case for toplevel and KILLED
                      ((and (equal? item-path "") ;; this is the parent test
                            is-killed
                            (member 'toplevel mode))
                       (set! parent-waiton-met #t))
                      ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met
                      ((and have-itemized ;; how is that different from (member mode '(itemmatch itemwait)) ?????
                            ;; (not (equal? item-path "")) ;; this applies to both top level (to allow launching of next batch) and items
                            same-itempath)
                       (if (and is-completed is-ok)
                           (set! item-waiton-met #t))
                       (if (and (equal? item-path "") ;; if upstream rollup test is completed, parent-waiton-met is set
                                (or is-completed is-running));; this is the parent, set it to run if completed or running ;; BB1
                           (set! parent-waiton-met #t)))
                      ;; normal checking of parent items, any parent or parent item not ok blocks running
                      ((and is-completed
                            (or is-ok 
                                (member 'toplevel mode))              ;; toplevel does not block on FAIL
                            (and is-ok (member 'itemmatch mode))) ;; itemmatch blocks on not ok
                       (set! item-waiton-met #t)))))
                 tests) ;; end of item for-each


                ;; both requirements, parent and item-waiton must be met to NOT add item to
                ;; prereq's not met list

                ;; is:
                (if (not (or
                          (and (equal? ref-item-path "") parent-waiton-met)
                          item-waiton-met))
                    (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; appends the string if the full record is not available
                ;; was briefly:
                ;; (if (not
                ;;      (and
                ;;       item-waiton-met
                ;;       (or parent-waiton-met (not (equal? ref-item-path "")))))
                ;;     ;;add to list
                ;;     (set! result (append (if (null? tests) (list waitontest-name) tests) result))) ;; appends the string if the full record is not available


                ;; if the test is not found then clearly the waiton is not met...
                ;; (if (not ever-seen)(set! result (cons waitontest-name result)))))
                (if (not ever-seen)
                    (set! result (append (if (null? tests)(list waitontest-name) tests) result)))))
            waitons)

           ;; TODO: for itemwait and itemmatch mode, filter out failed toplevel prereq test if any items passed.
           
           ;; a rewrite might help understanding, but quick fix is just remove tests from result which are completed/pass. -BB
           ;;(pp result)
           ;; (let ((prereq-tests-some-items-passed-list '(ref-test-name))) ;; seed with ref-test-name; do not wait on self.

           ;;   (for-each (lambda (test)
           ;;               (if (vector? test)
           ;;                   (if (and
           ;;                        (equal? (db:test-get-state test) "COMPLETED")
           ;;                        (member (db:test-get-status test) ok-statuses)
           ;;                        (not (equal? (db:test-get-item-path test) "")))
           ;;                       (set! prereq-tests-some-items-passed-list (cons (db:test-get-testname test) prereq-tests-some-items-passed-list)))))
           ;;             result)
           ;;   (set! prereq-tests-some-items-passed-list (delete-duplicates prereq-tests-some-items-passed-list))
                                               

           (delete-duplicates result)
           )))))
  
;;======================================================================
;; Just for sync, procedures to make sync easy
;;======================================================================

;; get an alist of record ids changed since time since-time
;;   '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...))
;;

Modified gutils.scm from [60c484ab36] to [c0f6b41f65].

21
22
23
24
25
26
27

28
29
30
31
32
33
34
	 (c2 (map string->number (string-split color2)))
	 (delta (map (lambda (a b)(abs (- a b))) c1 c2)))
    (null? (filter (lambda (x)(> x 3)) delta))))

(define gutils:colors
  '((PASS . "70 249 73")
    (FAIL . "253 33 49")

    (SKIP . "230 230 0")))

(define (gutils:get-color-spec effective-state)
  (or (alist-ref effective-state gutils:colors)
      (alist-ref 'FAIL gutils:colors)))

;; BBnote - state status dashboard button color / text defined here







>







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
	 (c2 (map string->number (string-split color2)))
	 (delta (map (lambda (a b)(abs (- a b))) c1 c2)))
    (null? (filter (lambda (x)(> x 3)) delta))))

(define gutils:colors
  '((PASS . "70 249 73")
    (FAIL . "253 33 49")
    (PREQ_FAIL . "255 127 127")
    (SKIP . "230 230 0")))

(define (gutils:get-color-spec effective-state)
  (or (alist-ref effective-state gutils:colors)
      (alist-ref 'FAIL gutils:colors)))

;; BBnote - state status dashboard button color / text defined here
57
58
59
60
61
62
63

64
65
66
67
68
69
70
    ((LAUNCHED)         (list "101 123 142"  state))
    ((CHECK)            (list "255 100 50"   state))
    ((REMOTEHOSTSTART)  (list "50 130 195"   state))
    ((RUNNING STARTED)          (list "9 131 232"    state))
    ((KILLREQ)          (list "39 82 206"    state))
    ((KILLED)           (list "234 101 17"   state))
    ((NOT_STARTED)      (case (string->symbol status)

			  ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
			  (else   (list "240 240 240"                 state))))
    ;; for xor mode below
    ;;
    ((CLEAN)
     (case (string->symbol status)
       ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these







>







58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
    ((LAUNCHED)         (list "101 123 142"  state))
    ((CHECK)            (list "255 100 50"   state))
    ((REMOTEHOSTSTART)  (list "50 130 195"   state))
    ((RUNNING STARTED)          (list "9 131 232"    state))
    ((KILLREQ)          (list "39 82 206"    state))
    ((KILLED)           (list "234 101 17"   state))
    ((NOT_STARTED)      (case (string->symbol status)
                          ((PREQ_FAIL)(list (gutils:get-color-spec 'PREQ_FAIL) status))
			  ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
			  (else   (list "240 240 240"                 state))))
    ;; for xor mode below
    ;;
    ((CLEAN)
     (case (string->symbol status)
       ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these

Modified launch.scm from [0c1e4ef13e] to [d90ac4eb29].

1295
1296
1297
1298
1299
1300
1301

1302
1303
1304
1305
1306
1307
1308
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex

  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin







>







1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (BB> "entered launch-test")
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
1329
1330
1331
1332
1333
1334
1335

1336
1337
1338
1339
1340
1341
1342
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)
       (list "MT_CONTOUR"   contour)
       )
      itemdat))

    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
           ;; for tconfig, why do we allow fallback to test-conf?
	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
				(begin
                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
                                  test-conf))) ;; force re-read now that all vars are set
	   (useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))







>







1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)
       (list "MT_CONTOUR"   contour)
       )
      itemdat))
    (BB> "set env vars")
    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
           ;; for tconfig, why do we allow fallback to test-conf?
	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
				(begin
                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
                                  test-conf))) ;; force re-read now that all vars are set
	   (useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))
1374
1375
1376
1377
1378
1379
1380

1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392

1393
1394
1395
1396
1397
1398
1399

1400
1401
1402
1403
1404
1405
1406
1407
1408

1409
1410
1411
1412
1413
1414
1415
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))

      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	  (begin
	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
      

      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))

      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))

      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)







>











|
>







>









>







1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))
      (BB> "entered let 1")
      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	  (begin
	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record

      (BB> "after launcher set")
      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (BB> "after launch state set")
      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
      (BB> "after disk path set")
      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
					(list 'target    mt_target)
					(list 'contour   contour)
					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
      (if (common:file-exists? work-area)
	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname







|







1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
					(list 'target    mt_target)
					(list 'contour   contour)
					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      (BB> "after cmdparams set")
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
      (if (common:file-exists? work-area)
	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname
1456
1457
1458
1459
1460
1461
1462

1463
1464
1465
1466
1467
1468
1469
1470
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.

      (let* ((commonprevvals (alist->env-vars
			      (hash-table-ref/default *configdat* "env-override" '())))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list (list "MT_TEST_RUN_DIR" work-area)
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) 
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)







>
|







1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
      (BB> "after set *last-launch*")
      (let* ((commonprevvals (alist->env-vars ;; observed this let can be very slow. (>5 sec)
			      (hash-table-ref/default *configdat* "env-override" '())))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list (list "MT_TEST_RUN_DIR" work-area)
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) 
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)
1483
1484
1485
1486
1487
1488
1489

1490
1491
1492
1493
1494
1495
1496
					  (if launchwait
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd)))))

        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))







>







1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
					  (if launchwait
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd)))))
        (BB> "let depth 2 entered")
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))

Modified megatest-version.scm from [ea84f1a115] to [f7a1ccbc61].

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6429)






|

1
2
3
4
5
6
7
;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6431)

Modified runs.scm from [147cbc54ec] to [59d5521d01].

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)
	(let ((can-not-run-more (cond
				 ;; if max-concurrent-jobs is set and the number running is greater 
				 ;; than it then cannot run more jobs
				 ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))







|







202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 1 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)
	(let ((can-not-run-more (cond
				 ;; if max-concurrent-jobs is set and the number running is greater 
				 ;; than it then cannot run more jobs
				 ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
    
    (if (not (null? test-names))
	(let loop ((hed (car test-names))   ;; NOTE: This is the main loop that iterates over the test-names
		   (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
	  (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
	  (setenv "MT_TEST_NAME" hed) ;; 
	  (let*-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry)))
	    (debug:print-info 8 *default-log-port* "waitons: " waitons)
	    ;; check for hed in waitons => this would be circular, remove it and issue an
	    ;; error
	    (if (or (member hed waitons)
		    (member hed waitors))
		(begin
		  (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!")
		  (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))
		  (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors))))
	    
	    ;; (items   (items:get-items-from-config config)))
	    (if (not (hash-table-ref/default test-records hed #f))
		(hash-table-set! test-records
				 hed (vector hed     ;; 0
					     config  ;; 1
					     waitons ;; 2
					     (config-lookup config "requirements" "priority")     ;; priority 3
					     (tests:get-items config) ;; expand the [items] and or [itemstable] into explict items
					     #f      ;; itemsdat 5
					     #f      ;; spare - used for item-path
					     waitors ;; 
					     )))
	    (for-each 
	     (lambda (waiton)
	       (if (and waiton (not (member waiton test-names)))
		   (let* ((waiton-record   (hash-table-ref/default test-records waiton #f))
			  (waiton-tconfig  (if waiton-record (vector-ref waiton-record 1) #f))
			  (waiton-itemized (and waiton-tconfig
						(or (hash-table-ref/default waiton-tconfig "items" #f)
						    (hash-table-ref/default waiton-tconfig "itemstable" #f))))
			  (itemmaps        (tests:get-itemmaps config))  ;; (configf:lookup config "requirements" "itemmap"))
			  (new-test-patts  (tests:extend-test-patts test-patts hed waiton itemmaps)))
		     (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items")
		     ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%"
		     ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt
		     ;; is this satisfied by merely appending "/" to the waiton name added to the list?
		     ;;
		     ;; This approach causes all of the items in an upstream test to be run 








|

















|
|
|
|

|













|







463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
    
    (if (not (null? test-names)) ;; BEGIN test-names loop
	(let loop ((hed (car test-names))   ;; NOTE: This is the main loop that iterates over the test-names
		   (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
	  (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
	  (setenv "MT_TEST_NAME" hed) ;; 
	  (let*-values (((waitons waitors config)(tests:get-waitons hed all-tests-registry)))
	    (debug:print-info 8 *default-log-port* "waitons: " waitons)
	    ;; check for hed in waitons => this would be circular, remove it and issue an
	    ;; error
	    (if (or (member hed waitons)
		    (member hed waitors))
		(begin
		  (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!")
		  (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))
		  (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors))))
	    
	    ;; (items   (items:get-items-from-config config)))
	    (if (not (hash-table-ref/default test-records hed #f))
		(hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue
				 hed (vector hed     ;; 0 ;; testname
					     config  ;; 1 
					     waitons ;; 2 
					     (config-lookup config "requirements" "priority")     ;; priority 3
					     (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items
					     #f      ;; itemsdat 5
					     #f      ;; spare - used for item-path
					     waitors ;; 
					     )))
	    (for-each 
	     (lambda (waiton)
	       (if (and waiton (not (member waiton test-names)))
		   (let* ((waiton-record   (hash-table-ref/default test-records waiton #f))
			  (waiton-tconfig  (if waiton-record (vector-ref waiton-record 1) #f))
			  (waiton-itemized (and waiton-tconfig
						(or (hash-table-ref/default waiton-tconfig "items" #f)
						    (hash-table-ref/default waiton-tconfig "itemstable" #f))))
			  (itemmaps        (tests:get-itemmaps config))  ;; (configf:lookup config "requirements" "itemmap"))
			  (new-test-patts  (tests:extend-test-patts test-patts hed waiton itemmaps)))   ;; BB:  items expanded here.
		     (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items")
		     ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%"
		     ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt
		     ;; is this satisfied by merely appending "/" to the waiton name added to the list?
		     ;;
		     ;; This approach causes all of the items in an upstream test to be run 

540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
		     ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
		     )))
	     (delete-duplicates (append waitons waitors)))
	    (let ((remtests (delete-duplicates (append waitons tal))))
	      (if (not (null? remtests))
		  (begin
		    ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", "))
		    (loop (car remtests)(cdr remtests))))))))

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 (th1        (make-thread (lambda ()
					    (handle-exceptions
						exn
						(begin
						  (print-call-chain)
						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()				    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime)))
							run-ids)))
					  "runs: mark-incompletes")))
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th1)
	    (set! keep-going #f)
	    (thread-join! th2)







|


















|







|







540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
		     ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
		     )))
	     (delete-duplicates (append waitons waitors)))
	    (let ((remtests (delete-duplicates (append waitons tal))))
	      (if (not (null? remtests))
		  (begin
		    ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", "))
		    (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 (th1        (make-thread (lambda ()
					    (handle-exceptions
						exn
						(begin
						  (print-call-chain)
						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th1)
	    (set! keep-going #f)
	    (thread-join! th2)
628
629
630
631
632
633
634










635
636
637
638









639
640
641
642
643
644
645
646
647
648
































649
650
651
652
653





















654
655
656
657

658
659

660
661
662
663
664
665



666
667


668
669
670
671
672
673
674


675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690










691

692


693
694


695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714

715
716
717

718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757

758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777

778
779
780
781
782
783
784





785


786
787
788
789
790
791
792
793

794
795
796



797

798

799
800


801
802
803
804
805
806
807
;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
;;
(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
        reruns))                                          ;; reruns











(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.  









(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '()))))
































	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met)))





















    (debug:print-info 4 *default-log-port* "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)

		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n prereq-fails:    " (runs:pretty-string prereq-fails)

		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items



		      "\n can-run-more:    " can-run-more)



   (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here


      (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
          (runs:loop-values tal reg reglen regfull reruns)
	  (begin
	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin
		  (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness")
		  (exit 0))
		(set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1)))
	    #f)))











     ;; 

     ((or (null? prereqs-not-met)


	  (and (member 'toplevel testmode)
	       (null? non-completed)))


      (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))
	(setenv "MT_TEST_NAME" test-name) ;; 
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id   (rmt:get-test-id run-id test-name ""))
			  (num-items (rmt:test-toplevel-num-items run-id test-name)))
		      (if (and test-id
			       (not (> num-items 0)))
			  (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))


     ((and (null? fails)
	   (null? prereq-fails)
	   (not (null? non-completed)))

      (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      (append newtal reruns)))
	     ;; prereqstrs is a list of test names as strings that are prereqs for hed
             (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
						 prereqs-not-met)))
	     ;; a prereq that is not found in allinqueue will be put in the notinqueue list
	     ;; 
             ;; (notinqueue (filter (lambda (x)
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
                  (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
                  ))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))

     ((and (null? fails)
	   (null? prereq-fails)
	   (null? non-completed))

      (if  (runs:can-keep-running? hed 20)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
	    (thread-sleep! 60)
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
            (runs:loop-values tal reg reglen regfull reruns)
            )))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))

      (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")





		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))


      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
            (runs:loop-values tal reg reglen regfull (cons hed reruns))
            )
	  #f)) ;; #f flags do not loop

     ((and (not (null? fails))(member 'toplevel testmode))

      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 



     ((null? runnables) #f) ;; if we get here and non-completed is null then it is all over.

     (else

      (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
      (list (car newtal)(cdr newtal) reg reruns)))))



(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)







>
>
>
>
>
>
>
>
>
>



|
>
>
>
>
>
>
>
>
>


|







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
>
|
|
>
|
|

|
|
|
>
>
>
|
|
>
>
|
|
|
|
|
|
|
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

>
>
>
>
>
>
>
>
>
>
|
>
|
>
>
|
|
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

>
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|

|
|
|
|
|

|
|
|
|
|
|
|
|
|

|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
>
|
|
|
|
|
|
|
>
>
>
>
>
|
>
>
|
|
|
|
|
|

|
>
|
|
|
>
>
>
|
>
|
>
|
|
>
>







628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
;;
(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
        reruns))                                          ;; reruns

;; objective - iterate thru tests
;;    => want to prioritize tests we haven't seen before
;;    => sometimes need to squeeze things in (added to reg)
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.



(define runs:nothing-left-in-queue-count 0)

;; BB: for future reference - suspect target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.  on first pass, var not set, on second pass, ok.

;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature:
;;    (let loop ((hed         (car sorted-test-names))
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))

;; runs:expand-items: for a given test, expand its items into real tests ready to be processed at this time
;;  this procedure's operation only makes sense in context of runs-tests-queue.
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '()))))
         (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
         (ok-statuses '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))

         ;; (prereqs-not-met
         ;;  (filter (lambda (test)
         ;;               (if (vector? test)  ;; BB: result may be a collection of strings or test vectors-- why not just test vectors?
         ;;                   (let ((testname (db:test-get-testname test))
         ;;                         (itempath (db:test-get-item-path test))
         ;;                         (state (db:test-get-state test) )
         ;;                         (status (db:test-get-status test)))
         ;;                     (cond
         ;;                      ((and (equal? state "COMPLETED") (member status ok-statuses)) #f)
         ;;                      ((and have-itemized (equal? "" itempath) (member testname prereq-tests-some-items-passed-list)) #f)
         ;;                      (else #t)))
         ;;                   test))
         ;;             (delete-duplicates prereqs)))

         (prereq-items-completed
          (filter (lambda (test)
                       (if (vector? test)  ;; BB: result may be a collection of strings or test vectors-- why not just test vectors?
                           (let ((testname (db:test-get-testname test))
                                 (itempath (db:test-get-item-path test))
                                 (state (db:test-get-state test) )
                                 (status (db:test-get-status test)))
                             (cond
                              ((equal? state "COMPLETED") #t)
                              ((not (equal? "" itempath))  #f)
                              (else #f)))
                           #f))
                       prereqs))

          
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs)) ;; prereqs 
	 (prereq-fails    (runs:calc-prereq-fail prereqs)) ;; filter - get NOT_STARTED's which are not status n/a or KEEP_TRYING
	 (non-completed   (runs:calc-not-completed prereqs))
	 (runnable-prereqs       (runs:calc-runnable prereqs))
         
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))
                           (items       (tests:testqueue-get-items  test-rec)))
                      ;;(BB> "HEY " testname "=>"items)
                      (or (procedure? items)(eq? items 'have-procedure))))
                  waitons))
         (completed-prereq-items 
          (let ((foo (begin ;;(BB> "hello prereqs: "prereqs)
                            #t))
                (res (filter (lambda (test)
                               ;;(BB> "foo - "test)
                               (and (vector? test)
                                    (equal? "COMPLETED" (db:test-get-state test))
                                    (equal? "COMPLETED" (db:test-get-state test))
                                    (not (equal? "" (db:test-get-item-path test)))))
                             prereqs)))
            res)) 

         )
    (debug:print-info 1 *default-log-port* "START OF INNER COND #2 "
		      "\n can-run-more:            " can-run-more
		      "\n testname:                " hed
		      "\n prereqs:                 " (runs:pretty-string prereqs)
                      "\n completed-prereq-items:  " (runs:pretty-string completed-prereq-items)
		      "\n non-completed:           " (runs:pretty-string non-completed) 
		      "\n prereq-fails:            " (runs:pretty-string prereq-fails)
                      "\n runnable-prereqs:        " (runs:pretty-string runnable-prereqs)
		      "\n fails:                   " (runs:pretty-string fails)
		      "\n testmode:                " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):      " (null? non-completed)
		      "\n reruns:                  " reruns
		      "\n items:                   " items
		      "\n unexpanded-prereqs:      " unexpanded-prereqs ;;all-prereqs-expanded
                      "\n completed-prereq-items:  " completed-prereq-items
                      "\n have-itemized:           " have-itemized
		      "\n can-run-more:            " can-run-more)
    
    ;;(BB> "before runs:expand-items cond")
    (let ((res
           (cond
            ;; all prereqs met, fire off the test
            ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
            ;; runs:expand-items case: test of interest not toplevel and IS blackballed -> ???
            ((and (not (member 'toplevel testmode)) ;; test has been blackballed elsewhere
                  (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
                          '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
             ;;(BB> "cb1")
             
             (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
             (if (or (not (null? tal))
                     (not (null? reg))) 
                 (runs:loop-values tal reg reglen regfull reruns) ;; blackballed test - throw it away
                 (begin
                   (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
                   ;; If get here twice then we know we've tried to expand all items
                   ;; since there must be a logic issue with the handling of loops in the 
                   ;; items expand phase we will brute force an exit here.
                   (if (> runs:nothing-left-in-queue-count 2)
                       (begin
                         (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness")
                         (exit 0))
                       (set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1)))
                   #f)))

            ;;; desired result of below cond branch:
            ;;   we want to expand items in our test of interest (hed) in the following cases:
            ;;    case 1 - mode is itemmatch or itemwait: (DONE)
            ;;       - all prereq tests have been expanded
            ;;       - at least one prereq's items have completed
            ;;    case 2 - mode is toplevel   (PARTIAL)
            ;;       - prereqs are completed.
            ;;       - or no prereqs can complete (TODO)
            ;;    case 3 - mode not specified (DONE)
            ;;       - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current)          
                   
            ;; runs:expand-items case: toplevel or else no dangling prerequeistes -- expand items now.
            ((or
              (and have-itemized (null? unexpanded-prereqs) (not (null? completed-prereq-items)))
              (null? prereqs)            ;; nothing is in our way to proceed (need to expand this to an item level check.)
              (and (member 'toplevel testmode)   ;; for toplevel test - proceed (nothing in our way)
                   (null? non-completed)))
             ;;(BB> "cb2")

             (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs) (and (member 'toplevel testmode)(null? non-completed)))")
             (let ((test-name (tests:testqueue-get-testname test-record)))
               (setenv "MT_TEST_NAME" test-name) ;; hack to give context to get-items-from-config TODO: call-with-environment-variables
               (setenv "MT_RUNNAME"   runname)   
               (runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
               (let ((items-list (items:get-items-from-config tconfig))) ;; BB: RIGHT HERE is where item expansion occurs..  target vars are not expanded to env vars at this point (item expansion using [items]\nwhatever [system echo $TARGETVAR] doesnt work right whereas [system echo #{targetvar}] does.. Tal and Randy have tix on this.
                 (if (list? items-list)
                     (begin                      ;; we have discovered we have items we need to process, so stuff them into test list and recur
                       (if (null? items-list)
                           (let ((test-id   (rmt:get-test-id run-id test-name ""))
                                 (num-items (rmt:test-toplevel-num-items run-id test-name)))
                             (if (and test-id
                                      (not (> num-items 0)))
                                 (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
                       (tests:testqueue-set-items! test-record items-list) ; stuffing happens here
                       (list hed tal reg reruns)) ;; return value..
                     (begin
                       (debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this")
                       (exit 1))))))

            ;; runs:expand-items case: no fails, no prereq-fails, some non-completed
            ((and (null? fails)
                  (null? prereq-fails)
                  (not (null? non-completed)))
             ;;(BB> "cb3")
             (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
                                     (append newtal reruns)))
                    ;; prereqstrs is a list of test names as strings that are prereqs for hed
                    (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
                                                        prereqs)))
                    ;; a prereq that is not found in allinqueue will be put in the notinqueue list
                    ;; 
                    ;; (notinqueue (filter (lambda (x)
                    ;;    		   (not (member x allinqueue)))
                    ;;    		 prereqstrs))
                    (give-up    #f))

               ;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
               ;; We need to use this to dequeue this item as CANNOTRUN
               ;; 
               (if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
                   (for-each (lambda (prereq)
                               (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
                                   (set! give-up #t)))
                             prereqstrs))

               (if (and give-up
                        (not (and (null? tal)(null? reg))))
                   (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
                         (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
                     (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

                     (let ((test-id (rmt:get-test-id run-id hed "")))
                       (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
                     
                     (if (and (null? trimmed-tal)
                              (null? trimmed-reg))
                         #f
                         (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
                         ))
                   (list (car newtal)(append (cdr newtal) reg) '() reruns))))

            ((and (null? fails) ;; have not-started tests, but unable to run them.  everything looks completed with no prospect of unsticking something that is stuck.  we should mark hed as moribund and exit or continue if there are more tests to consider
                  (null? prereq-fails)
                  (null? non-completed))
             ;;(BB> "cb4")
             (if  (runs:can-keep-running? hed 20)
                  (begin
                    (runs:inc-cant-run-tests hed)
                    (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; 
                    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
                    (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing)
                    ;; num-retries code was here
                    ;; we use this opportunity to move contents of reg to tal
                    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
                  (begin
                    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
                    (let ((test-id (rmt:get-test-id run-id hed "")))
                      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
                    (runs:loop-values tal reg reglen regfull reruns)
                    )))

            ((and 
              (or (not (null? fails))
                  (not (null? prereq-fails)))
              (member 'normal testmode))
             ;;(BB> "cb5")
             (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
                               (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
                               ", removing it from to-do list")
             (let ((test-id (rmt:get-test-id run-id hed "")))
               (if test-id
                   (if (not (null? prereq-fails))
                       ;;was: (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
                       (rmt:set-state-status-and-roll-up-items run-id test-id #f "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
                       (begin
                         ;;(debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed)

                         
                         ;;was: (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")
                         (rmt:set-state-status-and-roll-up-items run-id test-id #f "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")
                         )))) 
             (if (or (not (null? reg))(not (null? tal)))
                 (begin
                   (hash-table-set! test-registry hed 'CANNOTRUN)
                   (runs:loop-values tal reg reglen regfull (cons hed reruns))
                   )
                 #f)) ;; #f flags do not loop

            ((and (not (null? fails))(member 'toplevel testmode))
             ;;(BB> "cb6")
             (if (or (not (null? reg))(not (null? tal)))
                 (list (car newtal)(append (cdr newtal) reg) '() reruns)
                 #f))
            
            ((null? runnable-prereqs)
             ;;(BB> "cb7")
             #f) ;; if we get here and non-completed is null then it is all over.
            
            (else
             ;;(BB> "cb8")
             (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
             (list (car newtal)(cdr newtal) reg reruns)))))
      ;;(BB> "after runs:expand-items big cond")
      res)))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)
816
817
818
819
820
821
822



823
824
825
826
827
828
829
	       (conc t))))
	   inlst)))


;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
(define (runs:process-expanded-tests runsdat testdat)
  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).



  (let* ((hed                    (runs:testdat-hed testdat))
	 (tal                    (runs:testdat-tal testdat))
	 (reg                    (runs:testdat-reg testdat))
	 (reruns                 (runs:testdat-reruns testdat))
	 (test-name              (runs:testdat-test-name testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (jobgroup               (runs:testdat-jobgroup testdat))







>
>
>







931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
	       (conc t))))
	   inlst)))


;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
(define (runs:process-expanded-tests runsdat testdat)
  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
  (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" )
  (debug:print 2 *default-log-port* (with-output-to-string
                                            (lambda () (pp (runs:testdat->alist testdat) ))))
  (let* ((hed                    (runs:testdat-hed testdat))
	 (tal                    (runs:testdat-tal testdat))
	 (reg                    (runs:testdat-reg testdat))
	 (reruns                 (runs:testdat-reruns testdat))
	 (test-name              (runs:testdat-test-name testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (jobgroup               (runs:testdat-jobgroup testdat))
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
    
    (cond
     
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)







|







1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
    
    (cond ; cond 894- 1067
     
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014

1015


1016
1017
1018




1019

1020
1021
1022

1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))

		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))


		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items




		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)

		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
		    (runs:loop-values tal reg reglen regfull reruns))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0)))

		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 4)
		      (runs:loop-values tal reg reglen regfull reruns))
		     ((or (not nth-try)
			  (and (number? nth-try)
			       (< nth-try 10)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		      (runs:loop-values newtal reg reglen regfull reruns))
		     ((symbol? nth-try)
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state " nth-try " will be overridden and we'll retry."))
			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			    (hash-table-set! test-registry hed 0)
			    (runs:loop-values newtal reg reglen regfull))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)







|




>
|
>
>
|


>
>
>
>
|
>


|
>






|










|






|







1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (or (vector? hed)  (not (null? fails))) ;; BB: why do we need a vector?  in my case, fails is populated (prereq failed), reg is not nul, and we really want to drop this one
		  (begin
		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id
                          ;; was: ;(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
                          (rmt:set-state-status-and-roll-up-items run-id test-id #f "NOT_STARTED"  "PREQ_FAIL" "Failed to run due to failed prerequisites")))
                                        
                    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items

                    (if (not (null? fails))
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) 
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) )
		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
		    (runs:loop-values tal reg reglen regfull reruns))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector...
                    (debug:print 2 *default-log-port* "nth-try("hed")="nth-try)
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 4)
		      (runs:loop-values tal reg reglen regfull reruns))
		     ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try 
			  (and (number? nth-try)
			       (< nth-try 10)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		      (runs:loop-values newtal reg reglen regfull reruns))
		     ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed.  This is first "try"
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))
			    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
			    (hash-table-set! test-registry hed 0)
			    (runs:loop-values newtal reg reglen regfull))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info             (rmt:get-run-info run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
        (reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
	(last-time-some-running (current-seconds))
	;; (tdbdat                (tasks:open-db))
	(runsdat (make-runs:dat
		  ;; hed: hed
		  ;; tal: tal
		  ;; reg: reg
		  ;; reruns: reruns
		  reglen: reglen
		  regfull: #f ;; regfull
		  ;; test-record: test-record
		  runname: runname
		  ;; test-name: test-name
		  ;; item-path: item-path
		  ;; jobgroup: jobgroup
		  max-concurrent-jobs: max-concurrent-jobs
		  run-id: run-id
		  ;; waitons: waitons
		  ;; testmode: testmode
		  test-patts: test-patts
		  required-tests: required-tests
		  test-registry: test-registry
		  registry-mutex: registry-mutex
		  flags: flags
		  keyvals: keyvals
		  run-info: run-info
		  ;; newtal: newtal
		  all-tests-registry: all-tests-registry
		  ;; itemmaps: itemmaps
		  ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
		  ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
		  )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info             (rmt:get-run-info run-id))
         (tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
         (sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
         (test-registry         (make-hash-table))
         (registry-mutex        (make-mutex))
         (num-retries           0)
         (max-retries           (config-lookup *configdat* "setup" "maxretries"))
         (max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
         (reglen                (if (number? reglen-in) reglen-in 1))
         (last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
         (last-time-some-running (current-seconds))
         ;; (tdbdat                (tasks:open-db))
         (runsdat (make-runs:dat
                   ;; hed: hed
                   ;; tal: tal
                   ;; reg: reg
                   ;; reruns: reruns
                   reglen: reglen
                   regfull: #f ;; regfull
                   ;; test-record: test-record
                   runname: runname
                   ;; test-name: test-name
                   ;; item-path: item-path
                   ;; jobgroup: jobgroup
                   max-concurrent-jobs: max-concurrent-jobs
                   run-id: run-id
                   ;; waitons: waitons
                   ;; testmode: testmode
                   test-patts: test-patts
                   required-tests: required-tests
                   test-registry: test-registry
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps
                   ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f))
	    (begin
	      (rmt:register-test run-id test-name "")







|
|







1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
	;; 		       (server:ping (remote-server-url *runremote*)))
	;; 		  (server:check-if-running *toppath*))))
	;;     (server:kind-run *toppath*))
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f))
	    (begin
	      (rmt:register-test run-id test-name "")
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
		  ;; (loop (car tal)(cdr tal) reg reruns))))

	(runs:incremental-print-results run-id)
	(debug:print 4 *default-log-port* "TOP OF LOOP => "
		     "test-name: " test-name
		     "\n  test-record  " test-record
		     "\n  hed:         " hed
		     "\n  itemdat:     " itemdat
		     "\n  items:       " items
		     "\n  item-path:   " item-path
		     "\n  waitons:     " waitons







|







1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
		  ;; (loop (car tal)(cdr tal) reg reruns))))

	(runs:incremental-print-results run-id)
	(debug:print 1 *default-log-port* "TOP OF LOOP => "
		     "test-name: " test-name
		     "\n  test-record  " test-record
		     "\n  hed:         " hed
		     "\n  itemdat:     " itemdat
		     "\n  items:       " items
		     "\n  item-path:   " item-path
		     "\n  waitons:     " waitons
1348
1349
1350
1351
1352
1353
1354







1355
1356
1357
1358
1359
1360
1361
					    (not (member waiton reruns)))
				       1
				       #f))
				 waitons))))) ;; could do this more elegantly with a marker....
	  (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.")
	  (hash-table-set! test-registry tfullname 'removed))








	 ;; items is #f then the test is ok to be handed off to launch (but not before)
	 ;; 
	 ((not items)
	  (debug:print-info 4 *default-log-port* "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))







>
>
>
>
>
>
>







1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
					    (not (member waiton reruns)))
				       1
				       #f))
				 waitons))))) ;; could do this more elegantly with a marker....
	  (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.")
	  (hash-table-set! test-registry tfullname 'removed))


         ; BB - a possibility for preqfail propagation
         ;; ((and (not items) (string? item-path) (not (equal? item-path ""))
         ;;       (lset-intersection testmode '(itemmatch itemwait)))

         ;;  )

	 ;; items is #f then the test is ok to be handed off to launch (but not before)
	 ;; 
	 ((not items)
	  (debug:print-info 4 *default-log-port* "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))
1408
1409
1410
1411
1412
1413
1414


1415
1416
1417
1418
1419
1420
1421
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS


	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))







>
>







1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
	    
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
         ;; 
         ;; * the condition for (eq? items 'have-procedure) below ensure that runs:expand-items is not called on the same test twice -- expand-items will flatten the procedure to an actual list of items.
	 ((or (procedure? items)(eq? items 'have-procedure)) ;; BB - target vars are env vars here? to allow expansion of [items]\nsomething [system echo $SOMETARGVAR], which is wonky
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)))
1440
1441
1442
1443
1444
1445
1446

1447
1448
1449
1450
1451
1452
1453
1454
	 ((not (null? tal))
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))

	 )))
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")







>
|







1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
	 ((not (null? tal))
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 ))) ;; end loop on sorted test names
    
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
1468
1469
1470
1471
1472
1473
1474
1475
1476

1477


1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494

1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    (runs:run-post-hook run-id)
    (debug:print-info 1 *default-log-port* "All tests launched")))

(define (runs:calc-fails prereqs-not-met)
  (filter (lambda (test)

	    (and (vector? test) ;; not (string? test))


		 (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED"))
		 (not (member (db:test-get-status test)
			      '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))
	  prereqs-not-met))

(define (runs:calc-prereq-fail prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "NOT_STARTED")
		 (not (member (db:test-get-status test)
			      '("n/a" "KEEP_TRYING")))))
	  prereqs-not-met))

(define (runs:calc-not-completed prereqs-not-met)
  (filter
   (lambda (t)
     (or (not (vector? t))

	 (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED")))))
   prereqs-not-met))

;; (define (runs:calc-not-completed prereqs-not-met)
;;   (filter
;;    (lambda (t)
;;      (or (not (vector? t))
;; 	 (not (equal? "COMPLETED" (db:test-get-state t)))))
;;    prereqs-not-met))

(define (runs:calc-runnable prereqs-not-met)
  (filter 
   (lambda (t)
     (or (not (vector? t))
	 (and (equal? "NOT_STARTED" (db:test-get-state t))
	      (member (db:test-get-status t)
		      '("n/a" "KEEP_TRYING")))
	 (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running
   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
       lst))

;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
;;
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry)
  ;; All these vars might be referenced by the testconfig file reader
  (let* ((test-name    (tests:testqueue-get-testname   test-record))







|

>
|
>
>
|
|
|






|



|
|


>

















|






|







1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    (runs:run-post-hook run-id)
    (debug:print-info 1 *default-log-port* "All tests launched")))

(define (runs:calc-fails prereqs-not-met) ;; BB is this redundant with runs:runable-tests ?
  (filter (lambda (test)
            (or
             (and (vector? test) ;; not (string? test))
                  (member (db:test-get-status test) '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" "KILLED")))
             (and (vector? test) ;; not (string? test))
                  (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED"))
                  (not (member (db:test-get-status test)
                               '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))))))
	  prereqs-not-met))

(define (runs:calc-prereq-fail prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "NOT_STARTED")
                 (not (member (db:test-get-status test)
			      '("n/a" "KEEP_TRYING")))))
	  prereqs-not-met))

(define (runs:calc-not-completed prereqs-not-met) ;; filter out tests which have reached a ground state -- they are done one way or another.
  (filter 
   (lambda (t)
     (or (not (vector? t))
         (not (member (db:test-get-status t) '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" "KILLED")))
	 (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED")))))
   prereqs-not-met))

;; (define (runs:calc-not-completed prereqs-not-met)
;;   (filter
;;    (lambda (t)
;;      (or (not (vector? t))
;; 	 (not (equal? "COMPLETED" (db:test-get-state t)))))
;;    prereqs-not-met))

(define (runs:calc-runnable prereqs-not-met)
  (filter 
   (lambda (t)
     (or (not (vector? t))
	 (and (equal? "NOT_STARTED" (db:test-get-state t))
	      (member (db:test-get-status t)
		      '("n/a" "KEEP_TRYING")))
	 (and (member (db:test-get-state t) '("RUNNING" "LAUNCHED" "REMOTEHOSTSTART" ))))) ;; account for a test that is running
   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t)"/"(db:test-get-item-path t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
       lst))

;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
;;
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry)
  ;; All these vars might be referenced by the testconfig file reader
  (let* ((test-name    (tests:testqueue-get-testname   test-record))

Modified server.scm from [45bfab59ba] to [40df68dca1].

456
457
458
459
460
461
462
463

464
465
466
467
468
469
470
471
472
473
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (sync-duration 0) ;; run time of the sync in milliseconds
        (this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))

    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))







|
>


|







456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?))
        (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
	(debug-mode   (debug:debug-mode 1))
	(last-time    (current-seconds))
	(no-sync-db   (db:open-no-sync-db))
        (sync-duration 0) ;; run time of the sync in milliseconds
        ;;(this-wd-num  (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let* (;;(dbstruct (db:setup))
	       (mtdb       (dbr:dbstruct-mtdb dbstruct))
	       (mtpath     (db:dbdat-get-path mtdb))
	       (tmp-area   (common:get-db-tmp-area))
	       (start-file (conc tmp-area "/.start-sync"))
	       (end-file   (conc tmp-area "/.end-sync")))
577
578
579
580
581
582
583
584
585
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))








|

578
579
580
581
582
583
584
585
586
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (if (not *time-to-exit*) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))