;;======================================================================
;; Copyright 2019, Matthew Welland.
;;
;; This file is part of Megatest.
;;
;; Megatest is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; Megatest is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with Megatest. If not, see <http://www.gnu.org/licenses/>.
;;======================================================================
(declare (unit servermod))
(declare (uses commonmod))
(declare (uses dbmod))
(declare (uses mtconfigf))
(declare (uses mtargs))
(declare (uses tasksmod))
(module servermod
*
(import scheme chicken data-structures extras files)
(import (prefix sqlite3 sqlite3:) posix typed-records srfi-18
srfi-69 format ports srfi-1 matchable
directory-utils md5 message-digest regex
stack)
(import commonmod)
(import dbmod)
(import tasksmod)
(import (prefix mtargs args:))
(import (prefix mtconfigf configf:))
;; (use (prefix ulex ulex:))
(include "common_records.scm")
(define (server:make-server-url hostport)
(if (not hostport)
#f
(conc "http://" (car hostport) ":" (cadr hostport))))
(define *server-loop-heart-beat* (current-seconds))
;; options:
;;
;; 'killservers - kills all servers
;; 'dejunk - removes junk records
;; 'adj-testids - move test-ids into correct ranges
;; 'old2new - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db
;; 'new2old - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced)
;; 'closeall - close all opened dbs
;; 'schema - attempt to apply schema changes
;; run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
;; (if (not (launch:setup))
;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
(let* ((mtdb (dbr:dbstruct-mtdb dbstruct))
(tmpdb (db:get-db dbstruct))
(refndb (dbr:dbstruct-refndb dbstruct))
(allow-cleanup #t) ;; (if run-ids #f #t))
(servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
(data-synced 0)) ;; count of changed records (I hope)
(for-each
(lambda (option)
(case option
;; kill servers
((killservers)
(for-each
(lambda (server)
(match-let (((mod-time host port start-time pid) server))
(if (and host pid)
(tasks:kill-server host pid))))
servers)
;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock
(delete-file* (common:get-sync-lock-filepath))
)
;; clear out junk records
;;
((dejunk)
(db:delay-if-busy mtdb) ;; ok to delay on mtdb
(when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb))
(db:clean-up tmpdb)
(db:clean-up refndb))
;; sync runs, test_meta etc.
;;
((old2new)
(set! data-synced
(+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)
data-synced)))
;; now ensure all newdb data are synced to megatest.db
;; do not use the run-ids list passed in to the function
;;
((new2old)
(set! data-synced
(+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)
data-synced)))
((adj-target)
(db:adj-target (db:dbdat-get-db mtdb))
(db:adj-target (db:dbdat-get-db tmpdb))
(db:adj-target (db:dbdat-get-db refndb)))
((schema)
(db:patch-schema-maindb (db:dbdat-get-db mtdb))
(db:patch-schema-maindb (db:dbdat-get-db tmpdb))
(db:patch-schema-maindb (db:dbdat-get-db refndb))
(db:patch-schema-rundb (db:dbdat-get-db mtdb))
(db:patch-schema-rundb (db:dbdat-get-db tmpdb))
(db:patch-schema-rundb (db:dbdat-get-db refndb))))
(stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))
options)
data-synced))
(define (db:tmp->megatest.db-sync dbstruct last-update)
(let* ((mtdb (dbr:dbstruct-mtdb dbstruct))
(tmpdb (db:get-db dbstruct))
(refndb (dbr:dbstruct-refndb dbstruct))
(res (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))
(stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)
res))
;;;; run-ids
;; if #f use *db-local-sync* : or 'local-sync-flags
;; if #t use timestamps : or 'timestamps
;;
;; NB// no-sync-db is the db handle, not a flag!
;;
(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f))
(let* ((start-time (current-seconds))
(last-full-update (if no-sync-db
(db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0)
0))
(full-sync-needed (> (- start-time last-full-update) 3600)) ;; every hour do a full sync
(last-update (if full-sync-needed
0
(if no-sync-db
(db:no-sync-get/default no-sync-db "LAST_UPDATE" 0)
0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0))
(sync-needed (> (- start-time last-update) 6))
(res (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds
full-sync-needed)
(begin
(if no-sync-db
(begin
(if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time))
(db:no-sync-set no-sync-db "LAST_UPDATE" start-time)))
(db:tmp->megatest.db-sync dbstruct last-update))
0))
(sync-time (- (current-seconds) start-time)))
(debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
(if (common:low-noise-print 30 "sync new to old")
(if sync-needed
(debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
(debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago")))
res))
;;======================================================================
;; server stuff that operates on the server log files
;;======================================================================
(define (server:get-num-servers #!key (numservers 2))
(let ((ns (string->number
(or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
(or ns numservers)))
;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
(let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
(day-seconds (* 24 60 60)))
;; if the directory exists continue to get the list
;; otherwise attempt to create the logs dir and then
;; continue
(if (if (directory-exists? (conc areapath "/logs"))
'()
(if (file-write-access? areapath)
(begin
(condition-case
(create-directory (conc areapath "/logs") #t)
(exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
(exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list.")))
(directory-exists? (conc areapath "/logs")))
'()))
(let* ((server-logs (glob (conc areapath "/logs/server-*.log")))
(num-serv-logs (length server-logs)))
(if (null? server-logs)
'()
(let loop ((hed (car server-logs))
(tal (cdr server-logs))
(res '()))
(let* ((mod-time (handle-exceptions
exn
(current-seconds) ;; 0
(file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
(down-time (- (current-seconds) mod-time))
(serv-dat (if (or (< num-serv-logs 10)
(< down-time 900)) ;; day-seconds))
(server:logf-get-start-info hed)
'())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
(serv-rec (cons mod-time serv-dat))
(fmatch (string-match fname-rx hed))
(pid (if fmatch (string->number (list-ref fmatch 2)) #f))
(new-res (if (null? serv-dat)
res
(cons (append serv-rec (list pid)) res))))
(if (null? tal)
(if (and limit
(> (length new-res) limit))
new-res ;; (take new-res limit) <= need intelligent sorting before this will work
new-res)
(loop (car tal)(cdr tal) new-res)))))))))
;; given a path to a server log return: host port startseconds
;;
(define (server:logf-get-start-info logf)
(let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
(handle-exceptions
exn
(list #f #f #f) ;; no idea what went wrong, call it a bad server
(with-input-from-file
logf
(lambda ()
(let loop ((inl (read-line))
(lnum 0))
(if (not (eof-object? inl))
(let ((mlst (string-match rx inl)))
(if (not mlst)
(if (< lnum 500) ;; give up if more than 500 lines of server log read
(loop (read-line)(+ lnum 1))
(list #f #f #f))
(let ((dat (cdr mlst)))
(list (car dat) ;; host
(string->number (cadr dat)) ;; port
(string->number (caddr dat))))))
(list #f #f #f))))))))
(define (server:get-num-alive srvlst)
(let ((num-alive 0))
(for-each
(lambda (server)
(match-let (((mod-time host port start-time pid)
server))
(let* ((uptime (- (current-seconds) mod-time))
(runtime (if start-time
(- mod-time start-time)
0)))
(if (< uptime 5)(set! num-alive (+ num-alive 1))))))
srvlst)
num-alive))
;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;
;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; and servers should stick around for about two hours or so.
;;
(define (server:get-best srvlst)
(let* ((nums (server:get-num-servers))
(now (current-seconds))
(slst (sort
(filter (lambda (rec)
(if (and (list? rec)
(> (length rec) 2))
(let ((start-time (list-ref rec 3))
(mod-time (list-ref rec 0)))
;; (print "start-time: " start-time " mod-time: " mod-time)
(and start-time mod-time
(> (- now start-time) 0) ;; been running at least 0 seconds
(< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
(< (- now start-time)
(+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
180)
(random 360))) ;; under one hour running time +/- 180
))
#f))
srvlst)
(lambda (a b)
(< (list-ref a 3)
(list-ref b 3))))))
(if (> (length slst) nums)
(take slst nums)
slst)))
(define (server:get-first-best areapath)
(let ((srvrs (server:get-best (server:get-list areapath))))
(if (and srvrs
(not (null? srvrs)))
(car srvrs)
#f)))
(define (server:get-rand-best areapath)
(let ((srvrs (server:get-best (server:get-list areapath))))
(if (and (list? srvrs)
(not (null? srvrs)))
(let* ((len (length srvrs))
(idx (random len)))
(list-ref srvrs idx))
#f)))
(define (server:record->url servr)
(match-let (((mod-time host port start-time pid)
servr))
(if (and host port)
(conc host ":" port)
#f)))
;; timeout is hms string: 1h 5m 3s, default is 1 minute
;;
(define (server:expiration-timeout)
(let ((tmo (configf:lookup *configdat* "server" "timeout")))
(if (and (string? tmo)
(common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
(* 3600 (string->number tmo))
60)))
;;======================================================================
;; P K T S S T U F F
;;======================================================================
;; ???
;;======================================================================
;; P K T S S T U F F
;;======================================================================
;; ???
;;======================================================================
;; S E R V E R
;;======================================================================
;; Call this to start the actual server
;;
;;======================================================================
;; S E R V E R U T I L I T I E S
;;======================================================================
;; Get the transport
(define (server:get-transport)
(if *transport-type*
*transport-type*
(let ((ttype (string->symbol
(or (args:get-arg "-transport")
(configf:lookup *configdat* "server" "transport")
"rpc"))))
(set! *transport-type* ttype)
ttype)))
;; Generate a unique signature for this server
(define (server:mk-signature)
(message-digest-string (md5-primitive)
(with-output-to-string
(lambda ()
(write (list (current-directory)
(argv)))))))
;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;;
(define (server:reply return-addr query-sig success/fail result)
(debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
;; (send-message pubsock target send-more: #t)
;; (send-message pubsock
(case (server:get-transport)
((rpc) (db:obj->string (vector success/fail query-sig result)))
((http) (db:obj->string (vector success/fail query-sig result)))
((fs) result)
(else
(debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
result)))
(define (server:get-client-signature)
;; (if *my-client-signature* *my-client-signature*
;; (let ((sig
(server:mk-signature)) ;; )
;; (set! *my-client-signature* sig)
;; *my-client-signature*)))
(define (server:kill servr)
(match-let (((mod-time hostname port start-time pid)
servr))
(tasks:kill-server hostname pid)))
;; (define server:sync-lock-token "SERVER_SYNC_LOCK")
;; (define (server:release-sync-lock)
;; (db:no-sync-del! *no-sync-db* server:sync-lock-token))
;; (define (server:have-sync-lock?)
;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token))
;; (have-lock? (car have-lock-pair))
;; (lock-time (cdr have-lock-pair))
;; (lock-age (- (current-seconds) lock-time)))
;; (cond
;; (have-lock? #t)
;; ((>lock-age
;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180)))
;; (server:release-sync-lock)
;; (server:have-sync-lock?))
;; (else #f))))
;; moving this here as it needs access to db and cannot be in common.
;;
(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
(sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
(tmp-area (common:get-db-tmp-area))
(tmp-db (conc tmp-area "/megatest.db"))
(staging-file (conc *toppath* "/.megatest.db"))
(mtdbfile (conc *toppath* "/megatest.db"))
(lockfile (common:get-sync-lock-filepath))
(sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log))
(sync-cmd (if fork-to-background
(conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"")
sync-cmd-core))
(default-min-intersync-delay 2)
(min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay))
(default-duty-cycle 0.1)
(duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle))
(last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle)
(calculate-off-time (lambda (work-duration duty-cycle)
(* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds)))
(off-time min-intersync-delay) ;; adjusted in closure below.
(do-a-sync
(lambda ()
#;(BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync)
(let* ((finalres
(let retry-loop ((num-tries 0))
(if (common:simple-file-lock lockfile)
(begin
(cond
((not (or fork-to-background persist-until-sync))
(debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay
" , off-time="off-time" seconds ]")
(thread-sleep! (max off-time min-intersync-delay)))
(else
(debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit...")))
(if (not (configf:lookup *configdat* "server" "disable-db-snapshot"))
(common:snapshot-file mtdbfile subdir: ".db-snapshot"))
(delete-file* staging-file)
(let* ((start-time (current-milliseconds))
(res (system sync-cmd))
(res2
(cond
((eq? 0 res)
(delete-file* (conc mtdbfile ".backup"))
(if (eq? 0 (file-size sync-log))
(delete-file sync-log))
(system (conc "/bin/mv " staging-file " " mtdbfile))
(set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000))
(set! off-time (calculate-off-time
last-sync-seconds
(cond
((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1))
duty-cycle)
(else
(debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle)
default-duty-cycle))))
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec")
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time)
'sync-completed)
(else
(system (conc "/bin/cp "sync-log" "sync-log".fail"))
(debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail")
(if (file-exists? (conc mtdbfile ".backup"))
(system (conc "/bin/cp "mtdbfile ".backup " mtdbfile)))
#f))))
(common:simple-file-release-lock lockfile)
#;(BB> "released lockfile: " lockfile)
#;(when (common:file-exists? lockfile)
(BB> "DID NOT ACTUALLY RELEASE LOCKFILE"))
res2) ;; end let
);; end begin
;; else
(cond
(persist-until-sync
(thread-sleep! 1)
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)")
(retry-loop (add1 num-tries)))
(else
(thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay)))
(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.")
'parallel-sync-in-progress))
) ;; end if got lockfile
)
))
#;(BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres)
finalres)
) ;; end lambda
))
do-a-sync))
(define (server:writable-watchdog-bruteforce dbstruct)
(thread-sleep! 1) ;; delay for startup
(let* ((do-a-sync (server:get-bruteforce-syncer dbstruct))
(final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
(when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
(args:get-arg "-server"))
(let loop ()
(do-a-sync)
(if (not *time-to-exit*) (loop))) ;; keep going unless time to exit
;; time to exit, close the no-sync db here
(final-sync)
(if (common:low-noise-print 30)
(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
)))))
(define (server:writable-watchdog-deltasync dbstruct)
(thread-sleep! 0.05) ;; delay for startup
(let ((legacy-sync (common:run-sync?))
(sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
(debug-mode (debug:debug-mode 1))
(last-time (current-seconds))
(no-sync-db (db:open-no-sync-db))
(sync-duration 0) ;; run time of the sync in milliseconds
;;(this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
)
(set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
(debug:print-info 2 *default-log-port* "Periodic sync thread started.")
(debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num)
(if (and legacy-sync (not *time-to-exit*))
(let* (;;(dbstruct (db:setup))
(mtdb (dbr:dbstruct-mtdb dbstruct))
(mtpath (db:dbdat-get-path mtdb))
(tmp-area (common:get-db-tmp-area))
(start-file (conc tmp-area "/.start-sync"))
(end-file (conc tmp-area "/.end-sync")))
(debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
(let loop ()
;; sync for filesystem local db writes
;;
(mutex-lock! *db-multi-sync-mutex*)
(let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
(sync-in-progress *db-sync-in-progress*)
(min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
(should-sync (and (not *time-to-exit*)
(> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
(start-time (current-seconds))
(cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
(mt-mod-time (file-modification-time mtpath))
(last-sync-start (if (common:file-exists? start-file)
(file-modification-time start-file)
0))
(last-sync-end (if (common:file-exists? end-file)
(file-modification-time end-file)
10))
(sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
(recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
(< mt-mod-time last-sync-start)))
(sync-done (<= last-sync-start last-sync-end))
(sync-stale (> start-time (+ last-sync-start sync-stale-seconds)))
(will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting
(or need-sync should-sync)
(or sync-done sync-stale)
(not sync-in-progress)
(not recently-synced))))
(debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress
" should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
" sync-done=" sync-done " sync-period=" sync-period)
(if (and (> sync-period 5)
(common:low-noise-print 30 "sync-period"))
(debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
(if will-sync (set! *db-sync-in-progress* #t))
(mutex-unlock! *db-multi-sync-mutex*)
(if will-sync
(let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
(sync-start (current-milliseconds)))
(with-output-to-file start-file (lambda ()(print (current-process-id))))
;; put lock here
;; (if (or (not max-sync-duration)
;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
(let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
(set! sync-duration (- (current-milliseconds) sync-start))
(if (> res 0) ;; some records were transferred, keep the db alive
(begin
(mutex-lock! *heartbeat-mutex*)
(set! *db-last-access* (current-seconds))
(mutex-unlock! *heartbeat-mutex*)
(debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
(debug:print-info 2 *default-log-port* "sync called but zero records transferred")))))
;; ;; TODO: factor this next routine out into a function
;; (with-input-from-pipe ;; this should not block other threads but need to verify this
;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;; (lambda ()
;; (let loop ((inl (read-line))
;; (res #f))
;; (if (eof-object? inl)
;; (begin
;; (set! sync-duration (- (current-milliseconds) sync-start))
;; (cond
;; ((not res)
;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;; ((> res 0)
;; (mutex-lock! *heartbeat-mutex*)
;; (set! *db-last-access* (current-seconds))
;; (mutex-unlock! *heartbeat-mutex*))))
;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;; (if matches
;; (string->number (cadr matches))
;; #f))))
;; (loop (read-line)
;; (or num-synced res))))))))))
(if will-sync
(begin
(mutex-lock! *db-multi-sync-mutex*)
(set! *db-sync-in-progress* #f)
(set! *db-last-sync* start-time)
(with-output-to-file end-file (lambda ()(print (current-process-id))))
;; release lock here
(mutex-unlock! *db-multi-sync-mutex*)))
(if (and debug-mode
(> (- start-time last-time) 60))
(begin
(set! last-time start-time)
(debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
;; keep going unless time to exit
;;
(if (not *time-to-exit*)
(let delay-loop ((count 0))
;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
(if (and (not *time-to-exit*)
(< count 6)) ;; was 11, changing to 4.
(begin
(thread-sleep! 1)
(delay-loop (+ count 1))))
(if (not *time-to-exit*) (loop))))
;; time to exit, close the no-sync db here
(db:no-sync-close-db no-sync-db)
(if (common:low-noise-print 30)
(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))
)