Megatest

Changes On Branch 46b8846fd761dfcf
Login

Changes In Branch v1.71 Through [46b8846fd7] Excluding Merge-Ins

This is equivalent to a diff from eb2060809c to 46b8846fd7

2024-04-01
11:10
Changed .mtdb_v1.71 to .mtdb. Removed exit-on-version-changed. check-in: 32b4deecd4 user: mmgraham tags: v1.71
2023-06-27
18:45
cherry picked 8ff6166610. Fixes the quote problem in PATH check-in: 2438268e0b user: mmgraham tags: v1.70, v1.7014
2023-05-17
16:43
corrected a typo check-in: 46b8846fd7 user: mmgraham tags: v1.71
15:51
changed .megatest dir to .mtdb_1.71. Put this name in a global dbdir check-in: 655edf212f user: mmgraham tags: v1.71
2023-05-15
18:59
Limit to 10 dbs in num-run-dbs. Fully turned off running find-and-mark in runs.scm check-in: 447b257e2f user: matt tags: v1.71
2023-05-08
16:46
Added with-transaction around a for-each-row check-in: eb2060809c user: mmgraham tags: v1.70
2023-05-07
22:56
Corrected file paths of -shm and -wal files in db:all-db-sync check-in: 8bf6fd860b user: mmgraham tags: v1.70

Modified common.scm from [760479d289] to [fd4892e22f].

131
132
133
134
135
136
137

138
139
140
141
142
143
144
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145







+








(define *pkts-info*    (make-hash-table)) ;; store stuff like the last parent here
(define *configinfo*   #f)   ;; raw results from setup, includes toppath and table from megatest.config
(define *runconfigdat* #f)   ;; run configs data
(define *configdat*    #f)   ;; megatest.config data
(define *configstatus* #f)   ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done
(define *toppath*      #f)
(define *dbdir* ".mtdb_v1.71")
(define *already-seen-runconfig-info* #f)

(define *test-meta-updated* (make-hash-table))
(define *globalexitstatus*  0) ;; attempt to work around possible thread issues
(define *passnum*           0) ;; when running track calls to run-tests or similar
;; (define *alt-log-file* #f)  ;; used by -log
;; (define *common:denoise*    (make-hash-table)) ;; for low noise printing
952
953
954
955
956
957
958
959
960


961
962
963
964


965
966
967
968
969
970
971
953
954
955
956
957
958
959


960
961
962
963


964
965
966
967
968
969
970
971
972







-
-
+
+


-
-
+
+







					  (string-translate *toppath* "/" "."))
				    (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
					  "/megatest_localdb/"
					  tsname
					  (string-translate *toppath* "/" "."))
				    ))))
		(set! *db-cache-path* dbpath)
		;; ensure megatest area has .megatest
		(let ((dbarea (conc *toppath* "/.megatest")))
		;; ensure megatest area has dbdir
		(let ((dbarea (conc *toppath* "/" *dbdir*)))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		;; ensure tmp area has .megatest
		(let ((dbarea (conc dbpath "/.megatest")))
		;; ensure tmp area has dbdir
		(let ((dbarea (conc dbpath "/" *dbdir*)))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		dbpath))
	  #f)))

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

Modified dashboard.scm from [0995d5cbb4] to [381037dda7].

662
663
664
665
666
667
668
669

670
671
672
673
674
675
676
662
663
664
665
666
667
668

669
670
671
672
673
674
675
676







-
+







                        0
                        (dboard:rundat-last-update run-dat)))
	 (last-db-time (if do-not-use-db-file-timestamps
			   0
			   (dboard:rundat-last-db-time run-dat)))
	 (db-path      (or (dboard:rundat-db-path run-dat)
			   (let* ((db-dir (common:get-db-tmp-area))
				  (db-pth (conc db-dir "/.megatest/main.db")))
				  (db-pth (conc db-dir "/" *dbdir* "/main.db")))
			     (dboard:rundat-db-path-set! run-dat db-pth)
			     db-pth)))
	 (db-mod-time  (common:lazy-sqlite-db-modification-time db-path))
	 (db-modified  (>= db-mod-time last-db-time))
	 (multi-get    (> (dboard:rundat-run-data-offset run-dat) 0))  ;; multi-get in progress
	 (tmptests     (if (or do-not-use-db-file-timestamps
			       (dboard:tabdat-filters-changed tabdat)
3790
3791
3792
3793
3794
3795
3796
3797

3798
3799
3800
3801
3802
3803
3804
3790
3791
3792
3793
3794
3795
3796

3797
3798
3799
3800
3801
3802
3803
3804







-
+







;;======================================================================

(stop-the-train)

(define (main)
  ;; (print "Starting dashboard main")
    
  (let* ((mtdb-path (conc *toppath* "/.megatest/main.db"))
  (let* ((mtdb-path (conc *toppath* "/" *dbdir* "/main.db"))
         (target (args:get-arg "-target"))
         (commondat       (dboard:commondat-make)))
    (if target
        (begin
          (args:remove-arg-from-ht "-target")
          (dboard:commondat-target-set! commondat target)
        )
3887
3888
3889
3890
3891
3892
3893
3894

3895
3896
3897
3898
3899
3900
3901
3887
3888
3889
3890
3891
3892
3893

3894
3895
3896
3897
3898
3899
3900
3901







-
+








(define last-copy-time 0)


;; Sync to tmp only if in read-only mode.

(define (sync-db-to-tmp tabdat)
  (let* ((db-file "./.megatest/main.db"))
  (let* ((db-file (conc "./" *dbdir* "/main.db")))
    (if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5)))
      (begin
        (db:multi-db-sync (db:setup #f) 'old2new)
        (set! last-copy-time (current-seconds))
      )
    )
  )

Modified db.scm from [d4dcfd72a3] to [82748fbfed].

409
410
411
412
413
414
415
416

417
418
419
420
421
422
423
424
425

426
427
428
429
430
431
432
409
410
411
412
413
414
415

416
417
418
419
420
421
422
423
424

425
426
427
428
429
430
431
432







-
+








-
+










(define (db:all-db-sync dbstruct)
  (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
	 (data-synced       0) ;; count of changed records
    (tmp-area       (common:get-db-tmp-area))
    (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
    (dbfiles        (glob (conc tmp-area"/" *dbdir* "/*.db")))
    (sync-durations (make-hash-table))
    (no-sync-db        (db:open-no-sync-db)))
    (for-each
     (lambda (file) ;; tmp db file
       (debug:print-info 3 *default-log-port* "file: " file)
       (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
              (wal-file (conc file "-wal"))
              (shm-file (conc file "-shm"))
	      (fulln       (conc *toppath*"/.megatest/"fname)) ;; fulln is nfs db name
	      (fulln       (conc *toppath*"/" *dbdir* "/"fname)) ;; fulln is nfs db name
              (wal-time     (if (file-exists? wal-file)             
			       (file-modification-time wal-file)
                               0))
              (shm-time     (if (file-exists? shm-file)             
			       (file-modification-time shm-file)
                               0))

487
488
489
490
491
492
493
494

495
496
497
498
499
500
501
487
488
489
490
491
492
493

494
495
496
497
498
499
500
501







-
+







    (tmp-area       (common:get-db-tmp-area))
    (old2new (member 'old2new options))
    (dejunk (member 'dejunk options))
    (killservers (member 'killservers options))
    (servers (server:get-list *toppath*))
    (src-area (if old2new *toppath* tmp-area))
    (dest-area (if old2new tmp-area *toppath*))
    (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
    (dbfiles        (if old2new (glob (conc *toppath* "/" *dbdir* "/*.db")) (glob (conc tmp-area "/" *dbdir* "/*.db"))))
    (keys (db:get-keys dbstruct))
    (sync-durations (make-hash-table)))


    (if killservers
      (begin
       	  (for-each
514
515
516
517
518
519
520
521
522


523
524
525
526
527
528
529
514
515
516
517
518
519
520


521
522
523
524
525
526
527
528
529







-
-
+
+







    )
    (for-each
     (lambda (srcfile)
       (debug:print-info 3 *default-log-port* "file: " srcfile)
       (let* ((fname (conc (pathname-file srcfile) ".db"))
              (basename (pathname-file srcfile))
              (run-id (if (string= basename "main") #f (string->number basename)))
	      (destfile (conc dest-area "/.megatest/" fname))
              (dest-directory  (conc dest-area "/.megatest/"))
	      (destfile (conc dest-area "/" *dbdir* "/" fname))
              (dest-directory  (conc dest-area "/" *dbdir* "/"))
              (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile))
              (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk))
              ;; TODO: time1 and time2 need to take into account -wal and -shm files
	      (time1 (file-modification-time srcfile))
              (time2 (if (file-exists? destfile)
                         (begin
                            (debug:print-info 2 *default-log-port* "destfile " destfile " exists")
1677
1678
1679
1680
1681
1682
1683
1684

1685
1686
1687
1688
1689
1690
1691
1677
1678
1679
1680
1681
1682
1683

1684
1685
1686
1687
1688
1689
1690
1691







-
+












 (define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir"))
        (alldbs     (glob (conc dbdir "/.megatest/[0-9]*.db*")))
        (alldbs     (glob (conc dbdir "/" *dbdir* "/[0-9]*.db*")))
        (changed    (filter (lambda (dbfile)
                              (> (file-modification-time dbfile) since-time))
                            alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
           (let* ((res (string-match ".*\\/(\\d\\d)\\.db*" dbfile)))
             (if res
4147
4148
4149
4150
4151
4152
4153
4154

4155
4156
4157
4158
4159
4160
4161
4147
4148
4149
4150
4151
4152
4153

4154
4155
4156
4157
4158
4159
4160
4161







-
+







         (changed_run_dbs (db:get-changed-run-ids since-time)) ;; gets the rundb numbers
         (all_run_ids 
          (db:with-db dbstruct #f #f 
            (lambda (dbdat db)
              (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))
          )
         )
         (changed_run_ids (filter (lambda (run) (member (modulo run 100) changed_run_dbs)) all_run_ids))
         (changed_run_ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed_run_dbs)) all_run_ids))
         ;; TODO: couldn't we just use changed_run_ids for run_ids?
         (run_ids 
          (db:with-db dbstruct #f #f 
            (lambda (dbdat db)
              (sqlite3:fold-row backcons '() db "SELECT id FROM runs  WHERE last_update>=?" since-time))
          )
         )
4370
4371
4372
4373
4374
4375
4376
4377

4378
4379
4380
4381
4382
4383

4384
4385
4386
4387
4388
4389
4390
4370
4371
4372
4373
4374
4375
4376

4377
4378
4379
4380
4381
4382

4383
4384
4385
4386
4387
4388
4389
4390







-
+





-
+







	  #f
        ))))

;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
  (let* ((tmp-area       (common:get-db-tmp-area))
	 (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
	 (dbfiles        (glob (conc tmp-area"/" *dbdir* "/*.db")))
	 (sync-durations (make-hash-table)))
    ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
    (for-each
     (lambda (file)
       (let* ((fname (conc (pathname-file file) ".db"))
	      (fulln (conc *toppath*"/.megatest/"fname))
	      (fulln (conc *toppath*"/" *dbdir* "/"fname))
	      (time1 (if (file-exists? file)
			 (file-modification-time file)
			 (begin
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "file)
			   1)))
	      (time2 (if (file-exists? fulln)
			 (file-modification-time fulln)

Modified dbfile.scm from [a0594e55e5] to [5e983c1ba3].

40
41
42
43
44
45
46




47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66


67
68
69
70


71
72
73
74
75
76
77
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68


69
70
71
72


73
74
75
76
77
78
79
80
81







+
+
+
+


















-
-
+
+


-
-
+
+







	ports

	commonmod
	)

;; (import debugprint)

;; Parameters

(define num-run-dbs           (make-parameter 10))

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct
;;
(defstruct dbr:dbstruct
  (areapath  #f)
  (homehost  #f)
  (tmppath   #f)
  (read-only #f)
  (subdbs (make-hash-table))
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
  (dbname      #f) ;; .megatest/1.db
  (mtdbfile    #f) ;; mtrah/.megatest/1.db
  (dbname      #f) ;; " *dbdir* "/1.db
  (mtdbfile    #f) ;; mtrah/" *dbdir* "/1.db
  (mtdbdat     #f) ;; only need one of these for syncing
  ;; (dbdats      (make-hash-table))  ;; id => dbdat 
  (tmpdbfile   #f) ;; /tmp/.../.megatest/1.db
  ;; (refndbfile  #f) ;; /tmp/.../.megatest/1.db_ref
  (tmpdbfile   #f) ;; /tmp/.../" *dbdir* "/1.db
  ;; (refndbfile  #f) ;; /tmp/.../" *dbdir* "/1.db_ref
  (dbstack     (make-stack)) ;; stack for tmp dbr:dbdat,
  (homehost    #f) ;; not used yet
  (on-homehost #f) ;; not used yet
  (read-only   #f)
  (last-sync   0)
  (last-write  (current-seconds))
  )                ;; goal is to converge on one struct for an area but for now it is too confusing
91
92
93
94
95
96
97

98
99
100
101
102
103
104
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109







+







(define *db-sync-in-progress* #f)
(define *db-with-db-mutex*    (make-mutex))
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access*     #t)
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
(define *dbdir* ".mtdb_v1.71")

(define (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply dbfile:print-err message)
  (dbfile:print-err
    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
194
195
196
197
198
199
200
201
202


203
204
205
206
207
208
209
199
200
201
202
203
204
205


206
207
208
209
210
211
212
213
214







-
-
+
+








(define (db:dbname->path apath dbname)
  (conc apath"/"dbname))

;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
(define (dbfile:run-id->dbname run-id)
  (cond
   ((number? run-id) (conc ".megatest/" (modulo run-id 100) ".db"))
   ((not run-id)     (conc ".megatest/main.db"))
   ((number? run-id) (conc  *dbdir* "/" (modulo run-id (num-run-dbs)) ".db"))
   ((not run-id)     (conc  *dbdir* "/main.db"))
   (else             run-id)))

;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
;; called in http-transport and replicated in rmt.scm for *local* access. 
;;
(define (dbfile:setup do-sync areapath tmppath)
803
804
805
806
807
808
809
810



811
812
813
814
815
816
817
808
809
810
811
812
813
814

815
816
817
818
819
820
821
822
823
824







-
+
+
+







	        (sqlite3:for-each-row
	          (lambda (a . b)
	            (set! fromdat (cons (apply vector a b) fromdat))
	            (if (> (length fromdat) batch-len)
		      (begin
		        (set! fromdats (cons fromdat fromdats))
		        (set! fromdat  '())
		        (set! totrecords (+ totrecords 1)))
		        (set! totrecords (+ totrecords 1))
                        (thread-sleep! 2)
                      )
                    )
                 )
	         (dbr:dbdat-dbh fromdb)
	         full-sel)
              )
            )

Modified megatest-version.scm from [9b7afeda4b] to [4f15dbe2cd].

16
17
18
19
20
21
22
23

16
17
18
19
20
21
22

23







-
+
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.7014)
(define megatest-version 1.7101)

Modified runs.scm from [6164fa6d2a] to [9381a36070].

808
809
810
811
812
813
814
815

816
817
818
819
820
821
822
823
824
825
826
827

828
829
830
831
832
833

834
835
836
837
838
839
840
808
809
810
811
812
813
814

815
816
817
818
819
820
821
822
823
824
825
826

827
828
829
830
831
832

833
834
835
836
837
838
839
840







-
+











-
+





-
+







		;; 				exn
		;; 				(begin
		;; 				  (print-call-chain)
		;; 				  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
		;; 			      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
		;; 						    (any->number reglen) all-tests-registry)))
		;; 			  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
		 #;(th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    ;; (thread-start! th1)
	    (thread-start! th2)
	    ;; (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)
	    (set! keep-going #f)
	    (thread-join! th2)
	    #;(thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
2378
2379
2380
2381
2382
2383
2384
2385

2386
2387
2388
2389
2390
2391
2392
2378
2379
2380
2381
2382
2383
2384

2385
2386
2387
2388
2389
2390
2391
2392







-
+







	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex))
         (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
	 (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop

    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
           (dbfile             (conc  *toppath* "/.megatest/main.db"))
           (dbfile             (conc  *toppath* "/" *dbdir* "/main.db"))
           (readonly-mode      (not (file-write-access? dbfile))))
      (when (and readonly-mode
                 (member action write-access-actions))
        (debug:print-error 0 *default-log-port* dbfile " is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
        (exit 1)))
    
    (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)