Megatest

Check-in [b1db729de1]
Login
Overview
Comment:Commented out some not-used fuctions, removed the server start every 120 seconds and added dbfile handle count
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70
Files: files | file ages | folders
SHA1: b1db729de1b6d53dc1c12034629e9ebdb83034cc
User & Date: matt on 2022-05-27 19:21:22
Other Links: branch diff | manifest | tags
Context
2022-05-30
23:05
Changed dbfile:cautious-open-database to use most of the code from db:lock-create-open check-in: 58512a448e user: mmgraham tags: v1.70
2022-05-27
20:23
Move bunch of functions from db.scm to dbfile.scm check-in: ed0f0698ab user: matt tags: v1.70-refactor-procedures
19:21
Commented out some not-used fuctions, removed the server start every 120 seconds and added dbfile handle count check-in: b1db729de1 user: matt tags: v1.70
2022-05-22
20:20
Some awful hacks to keep the system running. There is something causing servers to crash, I suspect sync is the problem. This work-around just constantly replaces the servers with new ones. check-in: 3cdcb8c138 user: matt tags: v1.70
Changes

Modified db.scm from [66cbe6fa4e] to [48aca6e494].

1269
1270
1271
1272
1273
1274
1275
1276

1277
1278
1279
1280
1281
1282
1283
    (for-each
     (lambda (subdb)
       (let* ((dbname (db:run-id->dbname run-id))
	      (mtdb   (dbr:subdb-mtdb subdb))
	      (tmpdb  (db:get-subdb dbstruct run-id))
	      (refndb (dbr:subdb-refndb subdb))
	      (newres (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))
	 (stack-push! (dbr:subdb-dbstack subdb) tmpdb)

	 (set! res (cons newres res))))
     subdbs)
    res))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps







|
>







1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
    (for-each
     (lambda (subdb)
       (let* ((dbname (db:run-id->dbname run-id))
	      (mtdb   (dbr:subdb-mtdb subdb))
	      (tmpdb  (db:get-subdb dbstruct run-id))
	      (refndb (dbr:subdb-refndb subdb))
	      (newres (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))
	 ;; (stack-push! (dbr:subdb-dbstack subdb) tmpdb)
	 (dbfile:add-dbdat dbstruct run-id tmpdb)
	 (set! res (cons newres res))))
     subdbs)
    res))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))

;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)







|







5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))

;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
#;(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (db:do-sync no-sync-db)
  (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))
    (dbstruct (db:setup #t)))

    (debug:print 0 *default-log-port* "db:do-sync: sync-method: " syncer)
    (cond
      ((equal? syncer "brute-force-sync")
       (db:run-lock-and-sync no-sync-db))







|







5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


#;(define (db:do-sync no-sync-db)
  (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))
    (dbstruct (db:setup #t)))

    (debug:print 0 *default-log-port* "db:do-sync: sync-method: " syncer)
    (cond
      ((equal? syncer "brute-force-sync")
       (db:run-lock-and-sync no-sync-db))
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
    )
  )
)




(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  #;(let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()







|







5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
    )
  )
)




#;(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  #;(let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()

Modified dbfile.scm from [9934719f0f] to [330fd055bc].

200
201
202
203
204
205
206


207
208
209
210
211
212
213
214
215
216
217
218


219
220
221
222
223

224
225
226
227
228
229
230
(define (dbfile:get-subdb dbstruct run-id)
  (let* ((dbfname (dbfile:run-id->dbname run-id)))
    (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))

(define (dbfile:set-subdb dbstruct run-id subdb)
  (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb))



;; Get/open a database
;;    if run-id => get run specific db
;;    if #f     => get main db
;;    if run-id is a string treat it as a filename
;;    if db already open - return inmem
;;    if db not open, open inmem, rundb and sync then return inmem
;;    inuse gets set automatically for rundb's
;;
(define (dbfile:get-dbdat dbstruct run-id)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
    (if (stack-empty? (dbr:subdb-dbstack subdb))
	 #f


	(stack-pop! (dbr:subdb-dbstack subdb)))))

;; return a previously opened db handle to the stack of available handles
(define (dbfile:add-dbdat dbstruct run-id dbdat)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))

    (stack-push! (dbr:subdb-dbstack subdb) dbdat)))

;; set up a subdb
;;
(define (dbfile:init-subdb dbstruct run-id init-proc)
  (let* ((dbname    (dbfile:run-id->dbname run-id))
	 (areapath  (dbr:dbstruct-areapath dbstruct))







>
>











|
>
>
|




>







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
(define (dbfile:get-subdb dbstruct run-id)
  (let* ((dbfname (dbfile:run-id->dbname run-id)))
    (hash-table-ref/default (dbr:dbstruct-subdbs dbstruct) dbfname #f)))

(define (dbfile:set-subdb dbstruct run-id subdb)
  (hash-table-set! (dbr:dbstruct-subdbs dbstruct) (dbfile:run-id->dbname run-id) subdb))

(define *dbfile:num-handles-in-use* 0)

;; Get/open a database
;;    if run-id => get run specific db
;;    if #f     => get main db
;;    if run-id is a string treat it as a filename
;;    if db already open - return inmem
;;    if db not open, open inmem, rundb and sync then return inmem
;;    inuse gets set automatically for rundb's
;;
(define (dbfile:get-dbdat dbstruct run-id)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
    (if (stack-empty? (dbr:subdb-dbstack subdb))
	#f
	(begin
	  (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1))
	  (stack-pop! (dbr:subdb-dbstack subdb))))))

;; return a previously opened db handle to the stack of available handles
(define (dbfile:add-dbdat dbstruct run-id dbdat)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
    (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1))
    (stack-push! (dbr:subdb-dbstack subdb) dbdat)))

;; set up a subdb
;;
(define (dbfile:init-subdb dbstruct run-id init-proc)
  (let* ((dbname    (dbfile:run-id->dbname run-id))
	 (areapath  (dbr:dbstruct-areapath dbstruct))
245
246
247
248
249
250
251




252
253
254
255
256
257
258
;;
;;  1. if needed setup the subdb for the given run-id
;;  2. if there is no existing db handle in the stack
;;     create a new handle and return it (do NOT add
;;     it to the stack).
;;
(define (dbfile:open-db dbstruct run-id init-proc)




  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
    (if (not subdb) ;; not yet defined
	(begin
	  (dbfile:init-subdb dbstruct run-id init-proc)
	  (dbfile:open-db dbstruct run-id init-proc))
	(let* ((dbdat (dbfile:get-dbdat dbstruct run-id)))
	  (if dbdat







>
>
>
>







250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
;;
;;  1. if needed setup the subdb for the given run-id
;;  2. if there is no existing db handle in the stack
;;     create a new handle and return it (do NOT add
;;     it to the stack).
;;
(define (dbfile:open-db dbstruct run-id init-proc)
  (if (> *dbfile:num-handles-in-use* 10)
      (let* ((wait-delay (- *dbfile:num-handles-in-use* 9)))
	(dbfile:print-err "INFO: over ten dbfile handle threads in use ("*dbfile:num-handles-in-use*") delaying "wait-delay" second")
	(thread-sleep! wait-delay)))
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
    (if (not subdb) ;; not yet defined
	(begin
	  (dbfile:init-subdb dbstruct run-id init-proc)
	  (dbfile:open-db dbstruct run-id init-proc))
	(let* ((dbdat (dbfile:get-dbdat dbstruct run-id)))
	  (if dbdat

Modified http-transport.scm from [3269081060] to [4ca66092ff].

529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
	     (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond
	 ((and *server-run*
	       (> (- (current-seconds) server-start-time) 120)) ;; let's try server replacement
	  ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1))
	  (let* ((loaddat       (common:get-normalized-cpu-load #f))
		 (adj-proc-load (alist-ref 'adj-proc-load loaddat))
		 (adj-core-load (alist-ref 'adj-core-load loaddat))
		 (adj-load      (max adj-proc-load adj-core-load)))
	    (if (< adj-load 2) ;; reduce chance of runaway
		(server:run *toppath*))







|
|







529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
	     (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond
	 #;((and *server-run*
	       (> (- (current-seconds) server-start-time) 420)) ;; let's try server replacement
	  ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1))
	  (let* ((loaddat       (common:get-normalized-cpu-load #f))
		 (adj-proc-load (alist-ref 'adj-proc-load loaddat))
		 (adj-core-load (alist-ref 'adj-core-load loaddat))
		 (adj-load      (max adj-proc-load adj-core-load)))
	    (if (< adj-load 2) ;; reduce chance of runaway
		(server:run *toppath*))