Megatest

Check-in [e719f22355]
Login
Overview
Comment:end of run detection
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-end-of-run
Files: files | file ages | folders
SHA1: e719f22355973db6ddadc5c9c809b19621e3516c
User & Date: pjhatwal on 2019-03-11 17:56:24
Other Links: branch diff | manifest | tags
Context
2019-03-18
10:57
catch up with v1.65 Closed-Leaf check-in: fe83540f6a user: pjhatwal tags: v1.65-end-of-run
2019-03-11
17:56
end of run detection check-in: e719f22355 user: pjhatwal tags: v1.65-end-of-run
17:55
added options for lock unlck via mtutil check-in: f36ee6d2df user: pjhatwal tags: v1.65-end-of-run
Changes

Modified api.scm from [1541791de9] to [4abe8743ea].

78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

102
103
104
105
106
107
108
109
110
111
112

113
114
115
116
117
118
119
78
79
80
81
82
83
84

85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

113
114
115
116
117
118
119
120







-
+
















+










-
+







    login
    tasks-get-last
    testmeta-get-record
    have-incompletes?
    synchash-get
    get-changed-record-ids
		get-run-record-ids 
    ))
    get-not-completed-cnt))

(define api:write-queries
  '(
    get-keys-write ;; dummy "write" query to force server start

    ;; SERVERS
    start-server
    kill-server

    ;; TESTS
    test-set-state-status-by-id
    delete-test-records
    delete-old-deleted-test-records
    test-set-state-status
    test-set-top-process-pid
    set-state-status-and-roll-up-items
     
    update-pass-fail-counts
    top-test-set-per-pf-counts ;; (db:top-test-set-per-pf-counts (db:get-db *db* 5) 5 "runfirst")

    ;; RUNS
    register-run
    set-tests-state-status
    delete-run
    lock/unlock-run
    update-run-event_time
    mark-incomplete

    set-state-status-and-roll-up-run
    ;; STEPS
    teststep-set-status!

    ;; TEST DATA
    test-data-rollup
    csv->test-data

191
192
193
194
195
196
197

198
199
200
201
202
203
204
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206







+







                     ))
                   
                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                   ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))

                   ;; RUNS
                   ((register-run)                 (apply db:register-run dbstruct params))
                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
                   ((delete-run)                   (apply db:delete-run dbstruct params))
269
270
271
272
273
274
275

276
277
278
279
280
281
282
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285







+







                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
                   ((get-not-completed-cnt)           (apply db:get-not-completed-cnt  dbstruct params)) 
                   ((synchash-get)                    (apply synchash:server-get dbstruct params))
                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
		   ((get-test-times)                  (apply db:get-test-times dbstruct params))

                   ;; RUNS
                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
                   ((get-run-status)               (apply db:get-run-status dbstruct params))

Modified dashboard.scm from [067e696739] to [2679042d5f].

155
156
157
158
159
160
161

162
163
164
165
166
167
168
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169







+







      (debug:print 0 *default-log-port* "WARNING: Current policy requires running dashboard on homehost: " (common:get-homehost))))
    
;; RA => Might require revert for filters 
;; create a watch dog to move changes from lt/.db/*.db to megatest.db
;;
;;;(if (file-write-access? (conc *toppath* "/megatest.db"))
;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn")

(thread-start! (make-thread common:watchdog "Watchdog thread"))
;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn")
;; (if (not (args:get-arg "-use-db-cache"))
;;     (begin
;;       (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db")
;;       (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;)
;;)
2634
2635
2636
2637
2638
2639
2640

2641
2642
2643
2644

2645
2646
2647
2648
2649
2650
2651
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654







+




+







		result))
	     (tabs (apply iup:tabs
			  #:tabchangepos-cb (lambda (obj curr prev)
					      (debug:catch-and-dump
					       (lambda ()
						 (let* ((tab-num (dboard:commondat-curr-tab-num commondat))
							(tabdat  (dboard:common-get-tabdat commondat tab-num: tab-num)))
                   
						   (dboard:tabdat-layout-update-ok-set! tabdat #f))
						 (dboard:commondat-curr-tab-num-set! commondat curr)
						 (let* ((tab-num (dboard:commondat-curr-tab-num commondat))
							(tabdat  (dboard:common-get-tabdat commondat tab-num: tab-num)))
               
						   (dboard:commondat-please-update-set! commondat #t)
						   (dboard:tabdat-layout-update-ok-set! tabdat #t)))
					       "tabchangepos"))
			  (dashboard:summary commondat stats-dat tab-num: 0)
			  runs-view
			  (dashboard:runs-summary commondat onerun-dat tab-num: 2)
			  ;; (dashboard:new-view db data new-view-dat tab-num: 3)

Modified db.scm from [bf6ebf1f66] to [6a0d8206a6].

2599
2600
2601
2602
2603
2604
2605
2606

2607
2608








2609
2610
2611
2612
2613
2614
2615
2599
2600
2601
2602
2603
2604
2605

2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623







-
+


+
+
+
+
+
+
+
+







			user (conc newlockval " " run-id))
       (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id)))))

(define (db:set-run-status dbstruct run-id status msg)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (if msg
       (if msg
         (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id)
         (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))))

(define (db:set-run-state-status dbstruct run-id state status )
  (db:with-db
   dbstruct #f #f
   (lambda (db)
          (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id))))



(define (db:get-run-status dbstruct run-id)
  (let ((res "n/a"))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row 
2991
2992
2993
2994
2995
2996
2997












2998
2999
3000
3001
3002
3003
3004
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024







+
+
+
+
+
+
+
+
+
+
+
+







   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;" run-id testname))))


(define (db:get-not-completed-cnt dbstruct run-id)
(db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
      (print "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=" run-id)  
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=?;" run-id))))

(define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup)
  (if (not jobgroup)
      0 ;; 
      (let ((testnames '()))
	;; get the testnames
	(db:with-db
	 dbstruct #f #f
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3685
3686
3687
3688
3689
3690
3691

3692
3693
3694
3695
3696
3697
3698







-







;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
3692
3693
3694
3695
3696
3697
3698



















3699
3700
3701
3702
3703
3704






3705
3706

3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720











3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731










3732
3733
3734
3735



3736
3737
3738
3739
3740
3741
3742


3743
3744
3745
3746
3747




3748
3749

3750
3751

3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773














3774
3775
3776
3777
3778




3779
3780
3781
3782
3783
3784














3785
3786


3787
3788
3789
3790
3791
3792
3793
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736






3737
3738
3739
3740
3741
3742


3743














3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755










3756
3757
3758
3759
3760
3761
3762
3763
3764
3765




3766
3767
3768







3769
3770





3771
3772
3773
3774


3775


3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788










3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802





3803
3804
3805
3806
3807





3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
-
-
-
-
-
-
-
+
+
-
-
-
-
-
+
+
+
+
-
-
+
-
-
+












-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+

-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+


+
+







              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			      						  (state-stauses (db:roll-up-rules state-status-counts state status))
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
                            (running              (length (filter (lambda (x)
                                                                    (member (dbr:counts-state x) *common:running-states*))
                                                                  state-status-counts)))
                            (bad-not-started      (length (filter (lambda (x)
                                                                    (and (equal? (dbr:counts-state x) "NOT_STARTED")
                                                                         (not (member (dbr:counts-status x)
		(let* ((running     (length (filter (lambda (x)
                          (member (dbr:counts-state x) *common:running-states*))
                                 state-status-counts)))
           (bad-not-started      (length (filter (lambda (x)
                                      (and (equal? (dbr:counts-state x) "NOT_STARTED") 
                                        (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
                                                                                      *common:not-started-ok-statuses*))))
								  state-status-counts)))
																	state-status-counts)))
                            ;; (non-completes        (filter (lambda (x)
                            ;;                                 (not (equal? (dbr:counts-state x) "COMPLETED")))
                            ;;                               state-status-counts))
                            (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
                                                       (delete-duplicates
                                                        (if (not (member state *common:dont-roll-up-states*))
                                                            (cons state (map dbr:counts-state state-status-counts))
                                                            (map dbr:counts-state state-status-counts)))
                                                       *common:std-states* >))
                            (all-curr-statuses    (common:special-sort  ;; worst -> best
                                                   (delete-duplicates
                                                    (if (not (member state *common:dont-roll-up-states*))
                                                        (cons status (map dbr:counts-status state-status-counts))
                                                        (map dbr:counts-status state-status-counts)))
           (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
                                    (delete-duplicates
                                      (if (and state (not (member state *common:dont-roll-up-states*)))
                                          (cons state (map dbr:counts-state state-status-counts))
                                          (map dbr:counts-state state-status-counts)))
                                                  *common:std-states* >))
           (all-curr-statuses    (common:special-sort  ;; worst -> best
                                    (delete-duplicates
                                      (if (and state status (not (member state *common:dont-roll-up-states*)))
                                          (cons status (map dbr:counts-status state-status-counts))
                                          (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
			    (non-completes     (filter (lambda (x)
							 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
						       all-curr-states))
			    (preq-fails        (filter (lambda (x)
							 (equal? x "PREQ_FAIL"))
						       all-curr-statuses))
                            (num-non-completes (length non-completes))
                            (newstate          (cond
						((> running 0)           "RUNNING")            ;; anything running, call the situation running
                                                ((> (length preq-fails) 0)
           (non-completes        (filter (lambda (x)
							 										 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
						       									all-curr-states))
			     (preq-fails        (filter (lambda (x)
							 								(equal? x "PREQ_FAIL"))
						       							all-curr-statuses))
           (num-non-completes (length non-completes))
 					 (newstate          (cond
															((> running 0)           "RUNNING")            ;; anything running, call the situation running
                              ((> (length preq-fails) 0) "NOT_STARTED")
                                                 "NOT_STARTED")
						((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
						((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
						(else                    (car all-curr-states))))
															((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
															((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
															(else                    (car all-curr-states))))
			                       ;; (if (> running 0)
                                               ;;     "RUNNING"
                                               ;;     (if (> bad-not-started 0)
                                               ;;         "COMPLETED"
                                               ;;         (car all-curr-states))))
                            (newstatus         (cond
                                                ((> (length preq-fails) 0)
           (newstatus         (cond
                              ((> (length preq-fails) 0)  "PREQ_FAIL")
                                                 "PREQ_FAIL")
                                                ((or (> bad-not-started 0)
                                                     (and (equal? newstate "NOT_STARTED")
                                                          (> num-non-completes 0)))
                                                 "STARTED")
                              ((or (> bad-not-started 0)
                                   (and (equal? newstate "NOT_STARTED")
                                      (> num-non-completes 0)))
                                            "STARTED")
                                                (else
                                                 (car all-curr-statuses)))))
                              (else (car all-curr-statuses)))))

                       (debug:print-info 2 *default-log-port*
 					(debug:print-info 2 *default-log-port*
                                         "\n--> probe db:set-state-status-and-roll-up-items: "
                                         "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
                                         "\n--> running:             "running
                                         "\n--> bad-not-started:     "bad-not-started
                                         "\n--> non-non-completes:   "num-non-completes
                                         "\n--> non-completes:       "non-completes
                                         "\n--> all-curr-states:     "all-curr-states
                                         "\n--> all-curr-statuses:     "all-curr-statuses
                                         "\n--> newstate              "newstate
                                         "\n--> newstatus            "newstatus
                                         "\n\n")

                       ;; (print "bad-not-supported: " bad-not-support " all-curr-states: " all-curr-states " all-curr-statuses: " all-curr-states)
                       ;;      " newstate: " newstate " newstatus: " newstatus)
                       ;; NB// Pass the db so it is part of the transaction
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path"> bad-not-started="bad-not-started" newstate="newstate" newstatus="newstatus" num-non-completes="num-non-completes" non-completes="non-completes "len(sscs)="(length state-status-counts)  " state-status-counts: "
                                    (apply conc
                                           (map (lambda (x)
                                                  (conc
                                                   (with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                                                state-status-counts))
                                    
                        ;; NB// Pass the db so it is part of the transaction
         (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
													(state-stauses (db:roll-up-rules state-status-counts #f #f ))
                                    ); end debug:print
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
                           
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses))) 
                    
                   (db:set-run-state-status dbstruct run-id newstate newstatus ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))
;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)
 (let* ((test-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)
                                    (sqlite3:map-row
                                     (lambda (state status count)
                                        (make-dbr:counts state: state status: status count: count))
                                     db
                                     "SELECT state,status,count(id) FROM tests WHERE run_id=?  GROUP BY state,status;"
                                     run-id )))))
   test-count-recs))


;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in)
  (let* ((test-info   (db:get-test-info dbstruct run-id test-name item-path))
         (item-state  (or item-state-in (db:test-get-state test-info))) 
         (item-status (or item-status-in (db:test-get-status test-info)))
         (other-items-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)
                                    (sqlite3:map-row
3862
3863
3864
3865
3866
3867
3868
3869

3870
3871
3872
3873
3874
3875
3876
3901
3902
3903
3904
3905
3906
3907

3908
3909
3910
3911
3912
3913
3914
3915







-
+







  (list '(update-run-duration     "UPDATE tests SET run_duration=? WHERE id=?;")

	;; TESTS
	'(register-test          "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');")
	;; Test state and status
	'(set-test-state         "UPDATE tests SET state=?   WHERE id=?;")
	'(set-test-status        "UPDATE tests SET state=?   WHERE id=?;")
	'(state-status           "UPDATE tests SET state=?,status=? WHERE id=?;") ;; DONE
	'(state-status           "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE
	'(state-status-msg       "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE
	;; Test comment
	'(set-test-comment       "UPDATE tests SET comment=? WHERE id=?;")
	'(set-test-start-time    "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE
	'(pass-fail-counts       "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;")
	;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps
	'(test_data-pf-rollup    "UPDATE tests

Modified launch.scm from [9077b3d3fc] to [b89d82dc9d].

779
780
781
782
783
784
785
786
787

788
789
790
791
792
793
794
779
780
781
782
783
784
785


786
787
788
789
790
791
792
793







-
-
+







	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   ;; only state and status needed - use lazy routine
		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
	      ;; Am I completed?
	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
				                                        ;; "COMPLETED"
							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
				                                        ;; "COMPLETED"							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
				    )
			(new-status (cond
				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
811
812
813
814
815
816
817
818
819

820
821
822
823
824
825
826
827













828
829
830
831




























































832
833
834
835
836
837
838
810
811
812
813
814
815
816


817


818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836




837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903







-
-
+
-
-






+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items run-id test-id test-name #f))
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
	    (mutex-unlock! m)

            (launch:end-of-run-check run-id)
            (launch:end-of-run-check run-id )

            
	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
	    (if (not (launch:einf-exit-status exit-info))
		(exit 4))))
        )))

;; Spec for End of test
;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
;; At transition to run COMPLETED/X do hooks
;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND
;; we can prove the process is not alive (ssh host pstree -A pid)
;; if dead safe to mark the test as killed in the db
;; State/status table
;; new
;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
;; 0 RUNNING ==> this is actually the first condition, should not get here

(define (launch:end-of-run-check run-id)
  (debug:print 0 *default-log-port* "end-of-run-check would go here for run-id="run-id)
  #f)

(define (launch:end-of-run-check run-id )
    (let*	((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id))))
     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing  
     (cond 
       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
           	(debug:print 0 *default-log-port* "rollup run state/status")                      
          	(rmt:set-state-status-and-roll-up-run  run-id)
          	(debug:print 0 *default-log-port* "look for  post hook.")
          	(runs:run-post-hook run-id))
        ((> running-cnt 3) 
        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
        ((> running-cnt 0)
            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
           					(launch:end-of-run-check run-id)))) ;;todo
        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
         (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests)))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11)))
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
              (if (not (null? tal))
				  (loop (car tal) (cdr tal))))))))))        
 
(define (launch:is-test-alive host pid)
(if (and host pid (not (equal? host "n/a")))
(let* ((cmd (conc "ssh " host " pstree -A " pid))
      (output (with-input-from-pipe cmd read-lines)))
  (print "cmd: " cmd "\n op: " output )
  (if(eq? (length output) 0)
     #f
     #t))
#t))
 
(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11))
								 (test-id (vector-ref running-test 0))
                 (host (vector-ref running-test 6))
                 (pid  (rmt:test-get-top-process-pid run-id test-id))   
                 (event-time (vector-ref running-test 5))
                 (duration (vector-ref running-test 12))
                 (flag 0)   
                 (curr-time (current-seconds)))
       (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed
           (begin    
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
              (set! flag 1) 
              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
               (if (not (null? tal))
				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
                 (+ kill-cnt flag))))))

;; DO NOT USE - caching of configs is handled in launch:setup now.
;;
(define (launch:cache-config)
  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
  (if (and *configdat* 

Modified rmt.scm from [0a05f35135] to [f3cb7b6857].

669
670
671
672
673
674
675




676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691




692
693
694
695
696
697
698
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706







+
+
+
+
















+
+
+
+







;;   (rmt:send-receive #f 'get-run-ids-matching (list keynames target res)))

(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))

(define (rmt:get-count-tests-running-for-run-id run-id)
  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))

(define (rmt:get-not-completed-cnt run-id)
  (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))


;; Statistical queries

(define (rmt:get-count-tests-running run-id)
  (rmt:send-receive 'get-count-tests-running run-id (list run-id)))

(define (rmt:get-count-tests-running-for-testname run-id testname)
  (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))

(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
  (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))

;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))

(define (rmt:set-state-status-and-roll-up-run run-id)
  (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id)))


(define (rmt:update-pass-fail-counts run-id test-name)
  (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))

(define (rmt:top-test-set-per-pf-counts run-id test-name)
  (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))

Modified runs.scm from [0d98a3ef41] to [f0db5bffd4].

511
512
513
514
515
516
517
518


519
520
521
522
523
524
525
511
512
513
514
515
516
517

518
519
520
521
522
523
524
525
526







-
+
+








    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; run the run prehook if there are no tests yet run for this run:
    ;;
    (runs:run-pre-hook run-id)
    
    ;; mark all test launced flag as false in the meta table 
    (rmt:set-var (conc "lunch-complete-" run-id) "no")
    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
665
666
667
668
669
670
671
672
673
674
675
676
677






678
679
680
681
682
683
684
666
667
668
669
670
671
672






673
674
675
676
677
678
679
680
681
682
683
684
685







-
-
-
-
-
-
+
+
+
+
+
+







		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
    (if (eq? run-count 0)
     (begin  
      (debug:print-info 0 *default-log-port* "Calling Post Hook")  
      (debug:print-info 2 *default-log-port* " run-count " run-count)
      (runs:run-post-hook run-id))
      (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   
    ;(if (eq? run-count 0)
    ; (begin  
    ;  (debug:print-info 0 *default-log-port* "Calling Post Hook")  
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   
    (rmt:tasks-set-state-given-param-key task-key "done")
    ;; (sqlite3:finalize! tasks-db)
    ))


;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable.
;;
1638
1639
1640
1641
1642
1643
1644


1645

1646
1647
1648
1649
1650
1651
1652
1639
1640
1641
1642
1643
1644
1645
1646
1647

1648
1649
1650
1651
1652
1653
1654
1655







+
+
-
+







          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-8")
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-9")
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launced and now you can mark the run in metadata table as all launced 
    (rmt:set-var (conc "lunch-complete-" run-id) "yes")
    
  
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
2471
2472
2473
2474
2475
2476
2477
2478




2479
2480

2481

2482
2483


2484

2485
2486
2487
2488
2489
2490
2491
2474
2475
2476
2477
2478
2479
2480

2481
2482
2483
2484
2485
2486
2487

2488
2489

2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500







-
+
+
+
+


+
-
+

-
+
+

+








(define (runs:handle-locking target keys runname lock unlock user)
  (let* ((db       #f)
	 (rundat   (mt:get-runs-by-patt keys runname target))
	 (header   (vector-ref rundat 0))
	 (runs     (vector-ref rundat 1)))
    (for-each (lambda (run)
		(let ((run-id (db:get-value-by-header run header "id")))
		(let ((run-id (db:get-value-by-header run header "id"))
           (str (if lock 
									"lock"
                  "unlock")))
		  (if (or lock
			  (and unlock
			       (or (args:get-arg "-force")
			       (begin
                (begin
				 (print "Do you really wish to unlock run " run-id "?\n   y/n: ")
				 (equal? "y" (read-line)))))
				 (equal? "y" (read-line))))))
          (begin 
		      (rmt:lock/unlock-run run-id lock unlock user)
					(debug:print-info 0 *default-log-port* "Done " str " on run id " run-id))
		      (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id))))
	      runs)))
;;======================================================================
;; Rollup runs
;;======================================================================

;; Update the test_meta table for this test