Megatest

Check-in [a6be57bfc9]
Login
Overview
Comment:Cleaned up some gratuitous database opens, quietened some debug messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70
Files: files | file ages | folders
SHA1: a6be57bfc9bc93eb74fcc1eb41b2b0a7456f2ac7
User & Date: matt on 2022-05-22 18:02:07
Other Links: branch diff | manifest | tags
Context
2022-05-22
20:20
Some awful hacks to keep the system running. There is something causing servers to crash, I suspect sync is the problem. This work-around just constantly replaces the servers with new ones. check-in: 3cdcb8c138 user: matt tags: v1.70
18:02
Cleaned up some gratuitous database opens, quietened some debug messages check-in: a6be57bfc9 user: matt tags: v1.70
15:42
Ensure that db opens are only done once per process per db file. Put out messages if this is not the case. check-in: d0bca99717 user: matt tags: v1.70
Changes

Modified db.scm from [73d0c4cca7] to [18a5213140].

211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
			#f))
	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(dbr:dbdat-dbh dbdat)
			dbstruct))
	 (fname     (if dbdat
			(dbr:dbdat-dbfile dbdat)
			"nofilenameavailable"))
	 (subdb     (if have-struct
			(dbfile:get-subdb dbstruct run-id)
			#f))
	 (use-mutex (> *api-process-request-count* 25))) ;; was 25
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access"))
    (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*))
	(debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
    (condition-case
	(begin
	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
	  (let ((res (apply proc dbdat db params)))
	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	    (if dbdat







|







|







211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
			#f))
	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(dbr:dbdat-dbh dbdat)
			dbstruct))
	 (fname     (if dbdat
			(dbr:dbdat-dbfile dbdat)
			"nofilenameavailable"))
	 #;(subdb     (if have-struct
			(dbfile:get-subdb dbstruct run-id)
			#f))
	 (use-mutex (> *api-process-request-count* 25))) ;; was 25
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access"))
    (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*))
	(debug:print-info 1 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
    (condition-case
	(begin
	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
	  (let ((res (apply proc dbdat db params)))
	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	    (if dbdat
432
433
434
435
436
437
438


439
440
441
442
443
444
445
446
447
448
449
450
    ;; (cons db dbpath)))
    (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))

;; sync run from tmp disk to nfs disk if touched
;;
(define (db:sync-touched dbstruct run-id #!key (force-sync #f))
  (debug:print-info 0 *default-log-port* "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))


  (let* ((subdb   (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id db:initialize-main-db)))
         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
	 (mtdb    (dbr:subdb-mtdbdat subdb))
         (tmpdb (dbfile:open-sqlite3-db tmpdbfile #f))
	 (start-t (current-seconds)))
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)







>
>
|

|
|
|







432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
    ;; (cons db dbpath)))
    (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))

;; sync run from tmp disk to nfs disk if touched
;;
(define (db:sync-touched dbstruct run-id #!key (force-sync #f))
  (debug:print-info 0 *default-log-port* "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))
  (let* (;; the subdb is needed to access the mtdbdat
	 (subdb     (or (dbfile:get-subdb dbstruct run-id)
			(dbfile:init-subdb dbstruct run-id db:initialize-main-db)))
         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
	 (mtdb      (dbr:subdb-mtdbdat subdb))
         (tmpdb     (dbfile:open-db dbstruct run-id db:initialize-main-db)) ;; sqlite3-db tmpdbfile #f))
	 (start-t   (current-seconds)))
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)

Modified dbfile.scm from [0891876475] to [9934719f0f].

236
237
238
239
240
241
242



243
244
245
246
247
248
249
				    mtdbfile:  mtdbpath
				    tmpdbfile: tmpdbpath
				    mtdbdat:   mtdbdat)))
    (dbfile:set-subdb dbstruct run-id newsubdb)
    newsubdb)) ;; return the new subdb - but shouldn't really use it

;; returns dbdat with dbh and dbfilepath



;;  1. if needed setup the subdb for the given run-id
;;  2. if there is no existing db handle in the stack
;;     create a new handle and return it (do NOT add
;;     it to the stack).
;;
(define (dbfile:open-db dbstruct run-id init-proc)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))







>
>
>







236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
				    mtdbfile:  mtdbpath
				    tmpdbfile: tmpdbpath
				    mtdbdat:   mtdbdat)))
    (dbfile:set-subdb dbstruct run-id newsubdb)
    newsubdb)) ;; return the new subdb - but shouldn't really use it

;; returns dbdat with dbh and dbfilepath
;;
;; NOTE: the handle is on /tmp db file!
;;
;;  1. if needed setup the subdb for the given run-id
;;  2. if there is no existing db handle in the stack
;;     create a new handle and return it (do NOT add
;;     it to the stack).
;;
(define (dbfile:open-db dbstruct run-id init-proc)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277

;; this stuff is for initial debugging, please remove it when
;; this code stabilizes
(define *dbopens* (make-hash-table))
(define (dbfile:inc-db-open dbfile)
  (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1)))
    (if (> curr-opens-count 1) ;; this should NOT be happening
	(dbfile:print-err "ERROR: db "dbfile" has been opened "curr-opens-count" times!"))
    (hash-table-set! *dbopens* dbfile curr-opens-count)
    curr-opens-count))

;; Open the classic megatest.db file (defaults to open in toppath)
;;
;;   NOTE: returns a dbdat not a dbstruct!
;;







|







266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

;; this stuff is for initial debugging, please remove it when
;; this code stabilizes
(define *dbopens* (make-hash-table))
(define (dbfile:inc-db-open dbfile)
  (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1)))
    (if (> curr-opens-count 1) ;; this should NOT be happening
	(dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!"))
    (hash-table-set! *dbopens* dbfile curr-opens-count)
    curr-opens-count))

;; Open the classic megatest.db file (defaults to open in toppath)
;;
;;   NOTE: returns a dbdat not a dbstruct!
;;

Modified dbmod.scm from [9cc13aa737] to [043beb90c3].

29
30
31
32
33
34
35
36



















37
	srfi-69)

(define (db:run-id->dbname run-id)
  (cond
   ((number? run-id)(conc run-id ".db"))
   ((not run-id)    "main.db")
   (else            run-id)))
 



















)







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
	srfi-69)

(define (db:run-id->dbname run-id)
  (cond
   ((number? run-id)(conc run-id ".db"))
   ((not run-id)    "main.db")
   (else            run-id)))


;;======================================================================
;; hash of hashs
;;======================================================================


(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (if subhash
	(hash-table-set! subhash key2 val)
	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))

(define (db:hoh-get dat key1 key2)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (and subhash
	 (hash-table-ref/default subhash key2 #f))))

)

Modified runs.scm from [a43ae4acb3] to [32a6ef2bce].

1500
1501
1502
1503
1504
1505
1506


1507
1508
1509
1510
1511
1512
1513
;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

(define (runs:pretty-long-list lst)
   (if (> (length lst) 8)(append (take lst 3)(list "...")) lst))



;;======================================================================
;; runs:run-tests-queue is called by runs:run-tests
;;======================================================================
;;
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)







>
>







1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

(define (runs:pretty-long-list lst)
   (if (> (length lst) 8)(append (take lst 3)(list "...")) lst))

(define *last-loop-time-ms* 0)

;;======================================================================
;; runs:run-tests-queue is called by runs:run-tests
;;======================================================================
;;
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
1638
1639
1640
1641
1642
1643
1644












1645
1646
1647
1648
1649
1650
1651
			   jobgroup:    jobgroup
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))












	(runs:dat-regfull-set! runsdat regfull)
    
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))







>
>
>
>
>
>
>
>
>
>
>
>







1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
			   jobgroup:    jobgroup
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))

	;; too-tight loop detection and delay, this might hide issues
	;; that occur in long run times. Consider commenting when debugging
	;; 
	(if (and (>= num-running max-concurrent-jobs)
		 (< (- (current-milliseconds) *last-loop-time-ms*) 500))
	    (begin
	      (if (runs:lownoise "too-tight-loop" 5)
		  (debug:print-info 2 *default-log-port* "Excessively fast loop, delaying 1/2 second"))
	      (thread-sleep! 0.5)))
	(set! *last-loop-time-ms* (current-milliseconds))
     
	(runs:dat-regfull-set! runsdat regfull)
    
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))