Overview
Context
Changes
Modified common.scm
from [9ceabb42c1]
to [3380145d50].
︙ | | |
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
|
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
|
-
-
+
-
-
+
+
+
-
+
+
+
-
-
+
+
+
|
(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)
(thread-sleep! 0.05) ;; delay for startup
(let ((legacy-sync (common:run-sync?))
(debug-mode (debug:debug-mode 1))
(last-time (current-seconds))
(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x))))
)
(debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
(if (and legacy-sync (not *time-to-exit*))
(let ((dbstruct (db:setup)))
(let* ((dbstruct (db:setup))
(mtdb (dbr:dbstruct-mtdb dbstruct))
(mtpath (db:dbdat-get-path mtdb)))
(debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
(let loop ()
;;(BB> "watchdog loop. pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
;; sync for filesystem local db writes
;;
(mutex-lock! *db-multi-sync-mutex*)
(let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
(sync-in-progress *db-sync-in-progress*)
(should-sync (and (not *time-to-exit*)
(> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
(start-time (current-seconds))
(mt-mod-time (file-modification-time mtpath))
(recently-synced (> (- start-time mt-mod-time) 4))
(will-sync (and (or need-sync should-sync)
(not sync-in-progress)))
(start-time (current-seconds)))
(not sync-in-progress)
(not recently-synced))))
;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
(if will-sync (set! *db-sync-in-progress* #t))
(mutex-unlock! *db-multi-sync-mutex*)
(if will-sync
(let ((res (common:sync-to-megatest.db dbstruct))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
(if (> res 0) ;; some records were transferred, keep the db alive
(begin
|
︙ | | |
Modified db.scm
from [0ef09f1d37]
to [196dbb1410].
︙ | | |
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
-
+
-
+
|
;;======================================================================
;; Database access
;;======================================================================
;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc
(use (srfi 18) extras tcp stack) ;; RADT => use of require-extension?
(use (srfi 18) extras tcp stack)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records matchable)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:)) ;; RADT => prefix??
(import (prefix base64 base64:))
(declare (unit db))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))
|
︙ | | |
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
|
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
|
+
+
-
+
+
|
(lambda (run-id)
(set! run-ids (cons run-id run-ids)))
db
"SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;")
(reverse run-ids)))))
;; get some basic run stats
;;
;; data structure:
;;
;; ( (runname (( state count ) ... ))
;; ( ...
;; ( ...
;;
(define (db:get-run-stats dbstruct)
(let* ((totals (make-hash-table))
(curr (make-hash-table))
(res '())
(runs-info '()))
;; First get all the runname/run-ids
(db:with-db
|
︙ | | |
Modified http-transport.scm
from [05f38775ca]
to [e6b844f91c].
︙ | | |
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
|
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
|
-
+
|
(start-time (current-milliseconds)))
;; Use this opportunity to sync the tmp db to megatest.db
(if (not server-going) ;; *dbstruct-db*
(begin
(debug:print 0 *default-log-port* "SERVER: dbprep")
(set! *dbstruct-db* (db:setup)) ;; run-id))
(set! server-going #t)
(debug:print 0 *default-log-port* "SERVER: running") ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
(debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
(thread-start! *watchdog*)))
;; when things go wrong we don't want to be doing the various queries too often
;; so we strive to run this stuff only every four seconds or so.
(let* ((sync-time (- (current-milliseconds) start-time))
(rem-time (quotient (- 4000 sync-time) 1000)))
(if (and (<= rem-time 4)
|
︙ | | |
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
|
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
|
-
-
+
+
+
-
+
|
(adjusted-timeout (if (> hrs-since-start 1)
(- server-timeout (inexact->exact (round (* hrs-since-start 60)))) ;; subtract 60 seconds per hour
server-timeout)))
(if (common:low-noise-print 120 "server timeout")
(debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout))
(cond
((and *server-run*
(> (+ last-access server-timeout)
(current-seconds)))
(> (+ last-access server-timeout)
(current-seconds))
(< (- (current-seconds) server-start-time) 3600)) ;; do not update log or touch log if we've been running for more than one hour.
(if (common:low-noise-print 120 "server continuing")
(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
(let ((curr-time (current-seconds)))
(change-file-times server-log-file curr-time curr-time)))
(loop 0 server-state bad-sync-count (current-milliseconds)))
(else
(debug:print-info 0 *default-log-port* "Server timeed out. seconds since last db access: " (- (current-seconds) last-access))
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(http-transport:server-shutdown port)))))))
(define (http-transport:server-shutdown port)
(let ((tdbdat (tasks:open-db)))
;;(BB> "http-transport:server-shutdown called")
(debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
;;
|
︙ | | |