Megatest

Check-in [ad100ae4c2]
Login
Overview
Comment:Copied dashboard speedup changes from checkin 51ee5bb785 and added handling for failed glob
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70
Files: files | file ages | folders
SHA1: ad100ae4c28032308d0de95a9623a1c38d9ee2c6
User & Date: mmgraham on 2023-01-31 16:10:38
Other Links: branch diff | manifest | tags
Context
2023-02-14
14:25
changed version to 1.7012 check-in: b44fc61aa6 user: mmgraham tags: v1.70
2023-01-31
16:10
Copied dashboard speedup changes from checkin 51ee5bb785 and added handling for failed glob check-in: ad100ae4c2 user: mmgraham tags: v1.70
2023-01-20
16:49
Fixed calls to dbfile:cautious-open-database to correct arguments types of sync-mode and journal-mode. (optional -> non-optional) check-in: 759f18e4d9 user: mmgraham tags: v1.70
Changes

Modified dashboard.scm from [0995d5cbb4] to [c86c652832].

54
55
56
57
58
59
60




61
62
63
64
65
66
67
(include "task_records.scm")
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "vg_records.scm")

(dbfile:db-init-proc db:initialize-main-db)





(define help (conc 
	      "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version 
              " license GPL, Copyright (C) Matt Welland 2012-2017

Usage: dashboard [options]
  -h                    : this help
  -test run-id test-id  : open a test control panel on this test







>
>
>
>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
(include "task_records.scm")
(include "megatest-version.scm")
(include "megatest-fossil-hash.scm")
(include "vg_records.scm")

(dbfile:db-init-proc db:initialize-main-db)

;; globals to dashboard module
(define *updaters-running* #f)
(define *updaters-thread*  #f)

(define help (conc 
	      "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version 
              " license GPL, Copyright (C) Matt Welland 2012-2017

Usage: dashboard [options]
  -h                    : this help
  -test run-id test-id  : open a test control panel on this test
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
                                         (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f)))
                                           (if val (set! res (cons (list key val) res))))))
                                   dbkeys)
                         res))))
       fres))))

(define (dashboard:runs-tab-updater commondat tab-num)
  (debug:catch-and-dump 
   (lambda ()
     (let* ((tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))
	    (dbkeys (dboard:tabdat-dbkeys tabdat)))
       (dashboard:do-update-rundat tabdat)
       (let ((uidat (dboard:commondat-uidat commondat)))
	 (update-buttons tabdat uidat (dboard:tabdat-numruns tabdat) (dboard:tabdat-num-tests tabdat)))
       ))
   "dashboard:runs-tab-updater"))

;;======================================================================
;; The heavy lifting starts here
;;======================================================================

(stop-the-train)








|
|






|







3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
                                         (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f)))
                                           (if val (set! res (cons (list key val) res))))))
                                   dbkeys)
                         res))))
       fres))))

(define (dashboard:runs-tab-updater commondat tab-num)
;;  (debug:catch-and-dump 
;;   (lambda ()
     (let* ((tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))
	    (dbkeys (dboard:tabdat-dbkeys tabdat)))
       (dashboard:do-update-rundat tabdat)
       (let ((uidat (dboard:commondat-uidat commondat)))
	 (update-buttons tabdat uidat (dboard:tabdat-numruns tabdat) (dboard:tabdat-num-tests tabdat)))
       ))
;;   "dashboard:runs-tab-updater"))

;;======================================================================
;; The heavy lifting starts here
;;======================================================================

(stop-the-train)

3850
3851
3852
3853
3854
3855
3856

3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867




3868
3869
3870
3871
3872
3873

3874
3875
3876
3877
3878
3879

3880
3881

3882

3883
3884

3885
3886
3887
3888
3889
3890
3891
3892
3893
	 tab-num: 0)
        ;; may not want this alive (manually merged it from v1.66)
	;; (dboard:commondat-add-updater 
	;;  commondat 
	;;  (lambda ()
	   ;;  (dashboard:runs-tab-updater commondat 1))
	;; tab-num: 2)

	(iup:callback-set! *tim*
			   "ACTION_CB"
			   (lambda (time-obj)
			     (let ((update-is-running #f))
			     (mutex-lock! (dboard:commondat-update-mutex commondat))
			     (set! update-is-running (dboard:commondat-updating commondat))
			     (if (not update-is-running)
			     (dboard:commondat-updating-set! commondat #t))
			     (mutex-unlock! (dboard:commondat-update-mutex commondat))
			     (if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update
			     (begin




			     (dboard:common-run-curr-updaters commondat) ;; (dashboard:run-update commondat)
			     (mutex-lock! (dboard:commondat-update-mutex commondat))
			     (dboard:commondat-updating-set! commondat #f)
			     (mutex-unlock! (dboard:commondat-update-mutex commondat)))
				   ))
			     1))))

      ;; (debug:print 0 *default-log-port* "Starting updaters")
      (let ((th1 (make-thread (lambda ()
				(thread-sleep! 1)
				(dboard:common-run-curr-updaters commondat 0) ;; force update of summary tab 
				) "update buttons once"))
	    (th2 (make-thread iup:main-loop "Main loop")))

        ;; (print "Starting main loop")
	(thread-start! th2)

	(thread-join! th2)

      )
    )

  )
)

(define last-copy-time 0)


;; Sync to tmp only if in read-only mode.

(define (sync-db-to-tmp tabdat)







>



<
<
<
|
<
<
<

>
>
>
>
|
|
<
<
<
<
>
|
|
|
<
|
<
>
|
|
>
|
>
|
|
>
|
<







3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864



3865



3866
3867
3868
3869
3870
3871
3872




3873
3874
3875
3876

3877

3878
3879
3880
3881
3882
3883
3884
3885
3886
3887

3888
3889
3890
3891
3892
3893
3894
	 tab-num: 0)
        ;; may not want this alive (manually merged it from v1.66)
	;; (dboard:commondat-add-updater 
	;;  commondat 
	;;  (lambda ()
	   ;;  (dashboard:runs-tab-updater commondat 1))
	;; tab-num: 2)
        
	(iup:callback-set! *tim*
			   "ACTION_CB"
			   (lambda (time-obj)



			     (if (not *updaters-thread*)



			     (begin
				   ;; (debug:print-info 0 *default-log-port* "Updater started...")
				   (set! *updaters-thread*
					 (make-thread
					  (lambda ()
					    (dboard:common-run-curr-updaters commondat))))
				   (thread-start! *updaters-thread*))




				 (begin
				   (debug:print-info 0 *default-log-port* "Updater restarted...")
				   (thread-resume! *updaters-thread*)))
			     (thread-sleep! 0.25)

			     (if (eq? (thread-state *updaters-thread*) 'running)

				 (begin
				   (debug:print-info 0 *default-log-port* "Updater suspended...")
				   (thread-suspend! *updaters-thread*))
				 (begin
				   (set! *updaters-thread* #f)
				   ;; (debug:print-info 0 *default-log-port* "Updater done...")
				   ))
			     1))))
        (iup:main-loop)
      )))


(define last-copy-time 0)


;; Sync to tmp only if in read-only mode.

(define (sync-db-to-tmp tabdat)

Modified runs.scm from [52f98f2a96] to [6164fa6d2a].

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
	   (args:get-arg "-one-pass"))
      (exit 0))

  (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))

  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin







|







334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
	   (args:get-arg "-one-pass"))
      (exit 0))

  (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))

  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (configf:lookup-number *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin

Modified server.scm from [b0155e0a8d] to [89a8625f42].

235
236
237
238
239
240
241









242




243
244
245
246
247
248
249
		  (directory-exists? (conc areapath "/logs")))
		'()))

        ;; Get the list of server logs.
	(let* (
               ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
               ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))









               (server-logs   (glob (conc areapath "/logs/server-*-*.log")))




	       (num-serv-logs (length server-logs)))
	  (if (or (null? server-logs) (= num-serv-logs 0))
              (let ()
                 (debug:print 2  *default-log-port* "There are no servers running at " (common:human-time))
	         '()
              )
	      (let loop ((hed  (string-chomp (car server-logs)))







>
>
>
>
>
>
>
>
>
|
>
>
>
>







235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
		  (directory-exists? (conc areapath "/logs")))
		'()))

        ;; Get the list of server logs.
	(let* (
               ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
               ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
               (server-logs   
                (handle-exceptions
		   exn
		   (begin
		     (debug:print 0 *default-log-port* "server:get-list: glob failed , exn=" exn)
                     (thread-sleep! 60)
                     (system "lsof -c mtest > /tmp/$USER/glob-failed.$$.lsof")
                     (debug:print 0 *default-log-port* "lsof output saved in /tmp/$USER/glob-failed.$$.lsof")
                     (thread-sleep! 60)
                     (glob (conc areapath "/logs/server-*-*.log"))
		   )
                   (glob (conc areapath "/logs/server-*-*.log"))
                )
               )
	       (num-serv-logs (length server-logs)))
	  (if (or (null? server-logs) (= num-serv-logs 0))
              (let ()
                 (debug:print 2  *default-log-port* "There are no servers running at " (common:human-time))
	         '()
              )
	      (let loop ((hed  (string-chomp (car server-logs)))