@@ -16,20 +16,18 @@
;; along with Megatest. If not, see .
;;
(declare (unit server))
-(declare (uses commonmod))
-(declare (uses configfmod))
-(declare (uses debugprint))
(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
-;; (declare (uses synchash))
-;;(declare (uses rpc-transport))
+(declare (uses debugprint))
+(declare (uses commonmod))
+(declare (uses configfmod))
+(declare (uses rmtmod))
(declare (uses launch))
-;; (declare (uses daemon))
(declare (uses mtargs))
(use (srfi 18) extras s11n)
(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(use directory-utils posix-extras matchable utils)
@@ -41,727 +39,5 @@
(prefix mtargs args:))
(include "common_records.scm")
(include "db_records.scm")
-(define (server:make-server-url hostport)
- (if (not hostport)
- #f
- (conc "http://" (car hostport) ":" (cadr hostport))))
-
-(define *server-loop-heart-beat* (current-seconds))
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; P K T S S T U F F
-;;======================================================================
-
-;; ???
-
-;;======================================================================
-;; S E R V E R
-;;======================================================================
-
-;; Call this to start the actual server
-;;
-
-;;======================================================================
-;; S E R V E R U T I L I T I E S
-;;======================================================================
-
-;; Get the transport
-(define (server:get-transport)
- (if *transport-type*
- *transport-type*
- (let ((ttype (string->symbol
- (or (args:get-arg "-transport")
- (configf:lookup *configdat* "server" "transport")
- "rpc"))))
- (set! *transport-type* ttype)
- ttype)))
-
-;; Generate a unique signature for this server
-(define (server:mk-signature)
- (message-digest-string (md5-primitive)
- (with-output-to-string
- (lambda ()
- (write (list (current-directory)
- (current-process-id)
- (argv)))))))
-
-(define (server:get-client-signature)
- (if *my-client-signature* *my-client-signature*
- (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
- (set! *my-client-signature* sig)
- *my-client-signature*)))
-
-(define (server:get-server-id)
- (if *server-id* *server-id*
- (let ((sig (server:mk-signature))) ;; clients re-use the server:mk-signature logic
- (set! *server-id* sig)
- *server-id*)))
-
-;; ;; When using zmq this would send the message back (two step process)
-;; ;; with spiffy or rpc this simply returns the return data to be returned
-;; ;;
-;; (define (server:reply return-addr query-sig success/fail result)
-;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result)
-;; ;; (send-message pubsock target send-more: #t)
-;; ;; (send-message pubsock
-;; (case (server:get-transport)
-;; ((rpc) (db:obj->string (vector success/fail query-sig result)))
-;; ((http) (db:obj->string (vector success/fail query-sig result)))
-;; ((fs) result)
-;; (else
-;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*)
-;; result)))
-
-;; Given an area path, start a server process ### NOTE ### > file 2>&1
-;; if the target-host is set
-;; try running on that host
-;; incidental: rotate logs in logs/ dir.
-;;
-(define (server:run areapath) ;; areapath is *toppath* for a given testsuite area
- (let* ((testsuite (common:get-testsuite-name))
- (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
- (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
- ""))
- (cmdln (conc (common:get-megatest-exe)
- " -server - ";; (or target-host "-")
- (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
- " -daemonize "
- "")
- ;; " -log " logfile
- " -m testsuite:" testsuite
- " " profile-mode
- )) ;; (conc " >> " logfile " 2>&1 &")))))
- (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
- (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
- ;; we want the remote server to start in *toppath* so push there
- (push-directory areapath)
- (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
- (thread-start! log-rotate)
-
- ;; host.domain.tld match host?
- ;; (if (and target-host
- ;; ;; look at target host, is it host.domain.tld or ip address and does it
- ;; ;; match current ip or hostname
- ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
- ;; (not (equal? curr-ip target-host)))
- ;; (begin
- ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
- ;; (setenv "TARGETHOST" target-host)))
- ;;
- (setenv "TARGETHOST_LOGF" logfile)
- (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time
- (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time))
- (system (conc "nbfake " cmdln))
- (unsetenv "TARGETHOST_LOGF")
- ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
- (thread-join! log-rotate)
- (pop-directory)))
-
-;; given a path to a server log return: host port startseconds server-id
-;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let
-;; example of what it's looking for in the log file:
-;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4
-
-(define (server:logf-get-start-info logf)
- (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id
- (dbprep-rx (regexp "^SERVER: dbprep"))
- (dbprep-found 0)
- (bad-dat (list #f #f #f #f #f)))
- (handle-exceptions
- exn
- (begin
- ;; WARNING: this is potentially dangerous to blanket ignore the errors
- (if (file-exists? logf)
- (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn))
- bad-dat) ;; no idea what went wrong, call it a bad server
- (with-input-from-file
- logf
- (lambda ()
- (let loop ((inl (read-line))
- (lnum 0))
- (if (not (eof-object? inl))
- (let ((mlst (string-match server-rx inl))
- (dbprep (string-match dbprep-rx inl)))
- (if dbprep (set! dbprep-found 1))
- (if (not mlst)
- (if (< lnum 500) ;; give up if more than 500 lines of server log read
- (loop (read-line)(+ lnum 1))
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
- bad-dat))
- (match mlst
- ((_ host port start server-id pid)
- (list host
- (string->number port)
- (string->number start)
- server-id
- (string->number pid)))
- (else
- (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst)
- bad-dat))))
- (begin
- (if dbprep-found
- (begin
- (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time))
- (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting?
- (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))))
- bad-dat))))))))
-
-;; ;; get a list of servers from the log files, with all relevant data
-;; ;; ( mod-time host port start-time pid )
-;; ;;
-;; (define (server:get-list areapath #!key (limit #f))
-;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
-;; (day-seconds (* 24 60 60)))
-;; ;; if the directory exists continue to get the list
-;; ;; otherwise attempt to create the logs dir and then
-;; ;; continue
-;; (if (if (directory-exists? (conc areapath "/logs"))
-;; '()
-;; (if (file-write-access? areapath)
-;; (begin
-;; (condition-case
-;; (create-directory (conc areapath "/logs") #t)
-;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
-;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn)))
-;; (directory-exists? (conc areapath "/logs")))
-;; '()))
-;;
-;; ;; Get the list of server logs.
-;; (let* (
-;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers.
-;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'")))
-;; (server-logs (glob (conc areapath "/logs/server-*-*.log")))
-;; (num-serv-logs (length server-logs)))
-;; (if (or (null? server-logs) (= num-serv-logs 0))
-;; (let ()
-;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time))
-;; '()
-;; )
-;; (let loop ((hed (string-chomp (car server-logs)))
-;; (tal (cdr server-logs))
-;; (res '()))
-;; (let* ((mod-time (handle-exceptions
-;; exn
-;; (begin
-;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn)
-;; (current-seconds)) ;; 0
-;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
-;; (down-time (- (current-seconds) mod-time))
-;; (serv-dat (if (or (< num-serv-logs 10)
-;; (< down-time 900)) ;; day-seconds))
-;; (server:logf-get-start-info hed)
-;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at
-;; (serv-rec (cons mod-time serv-dat))
-;; (fmatch (string-match fname-rx hed))
-;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f))
-;; (new-res (if (null? serv-dat)
-;; res
-;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
-;; (if (null? tal)
-;; (if (and limit
-;; (> (length new-res) limit))
-;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work
-;; new-res)
-;; (loop (string-chomp (car tal)) (cdr tal) new-res)))))))))
-
-#;(define (server:get-num-alive srvlst)
- (let ((num-alive 0))
- (for-each
- (lambda (server)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn))
- (match-let (((mod-time host port start-time server-id pid)
- server))
- (let* ((uptime (- (current-seconds) mod-time))
- (runtime (if start-time
- (- mod-time start-time)
- 0)))
- (if (< uptime 5)(set! num-alive (+ num-alive 1)))))))
- srvlst)
- num-alive))
-
-;; ;; given a list of servers get a list of valid servers, i.e. at least
-;; ;; 10 seconds old, has started and is less than 1 hour old and is
-;; ;; active (i.e. mod-time < 10 seconds
-;; ;;
-;; ;; mod-time host port start-time pid
-;; ;;
-;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
-;; ;; and servers should stick around for about two hours or so.
-;; ;;
-;; (define (server:get-best srvlst)
-;; (let* ((nums (server:get-num-servers))
-;; (now (current-seconds))
-;; (slst (sort
-;; (filter (lambda (rec)
-;; (if (and (list? rec)
-;; (> (length rec) 2))
-;; (let ((start-time (list-ref rec 3))
-;; (mod-time (list-ref rec 0)))
-;; ;; (print "start-time: " start-time " mod-time: " mod-time)
-;; (and start-time mod-time
-;; (> (- now start-time) 0) ;; been running at least 0 seconds
-;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
-;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set
-;; (< (- now start-time)
-;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime"))
-;; 180)
-;; (random 360)))) ;; under one hour running time +/- 180
-;; ))
-;; #f))
-;; srvlst)
-;; (lambda (a b)
-;; (< (list-ref a 3)
-;; (list-ref b 3))))))
-;; (if (> (length slst) nums)
-;; (take slst nums)
-;; slst)))
-
-;; ;; switch from server:get-list to server:get-servers-info
-;; ;;
-;; (define (server:get-first-best areapath)
-;; (let ((srvrs (server:get-best (server:get-list areapath))))
-;; (if (and srvrs
-;; (not (null? srvrs)))
-;; (car srvrs)
-;; #f)))
-;;
-;; (define (server:get-rand-best areapath)
-;; (let ((srvrs (server:get-best (server:get-list areapath))))
-;; (if (and (list? srvrs)
-;; (not (null? srvrs)))
-;; (let* ((len (length srvrs))
-;; (idx (random len)))
-;; (list-ref srvrs idx))
-;; #f)))
-
-(define (server:record->id servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn)
- #f)
- (match-let (((host port start-time server-id pid)
- servr))
- (if server-id
- server-id
- #f))))
-
-(define (server:record->url servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn)
- #f)
- (match-let (((host port start-time server-id pid)
- servr))
- (if (and host port)
- (conc host ":" port)
- #f))))
-
-
-;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough.
-;; if it is old enough, overwrite it and wait 0.25 seconds.
-;; if it then has the wrong server key, wait + 1 and call this function recursively.
-;;
-#;(define (server:wait-for-server-start-last-flag areapath)
- (let* ((start-flag (conc areapath "/logs/server-start-last"))
- ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
- (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4))
- (server-key (conc (get-host-name) "-" (current-process-id))))
- (if (file-exists? start-flag)
- (let* ((fmodtime (file-modification-time start-flag))
- (delta (- (current-seconds) fmodtime))
- (old-enough (> delta idletime))
- (new-server-key ""))
- ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t.
- ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process.
- (if (and old-enough
- (begin
- (debug:print-info 2 *default-log-port* "Writing " start-flag)
- (with-output-to-file start-flag (lambda () (print server-key)))
- (thread-sleep! 0.25)
- (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
- (equal? server-key new-server-key)))
- #t
- ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively.
- (begin
- (debug:print-info 0 *default-log-port* "Gating server start, last start: "
- (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
-
- (thread-sleep! ( + 1 idletime))
- (server:wait-for-server-start-last-flag areapath)))))))
-
-;; oldest server alive determines host then choose random of youngest
-;; five servers on that host
-;;
-(define (server:get-servers-info areapath)
- ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
- (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
- (if (not (file-exists? servinfodir))
- (create-directory servinfodir))
- (let* ((allfiles (glob (conc servinfodir"/*")))
- (res (make-hash-table)))
- (for-each
- (lambda (f)
- (let* ((hostport (pathname-strip-directory f))
- (serverdat (server:logf-get-start-info f)))
- (match serverdat
- ((host port start server-id pid)
- (if (and host port start server-id pid)
- (hash-table-set! res hostport serverdat)
- (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
- (else
- (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
- allfiles)
- res)))
-
-;; check the .servinfo directory, are there other servers running on this
-;; or another host?
-;;
-;; returns #t => ok to start another server
-;; #f => not ok to start another server
-;;
-(define (server:minimal-check areapath)
- (server:clean-up-old areapath)
- (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo"))
- (servrs (glob (conc srvdir"/*")))
- (thishostip (server:get-best-guess-address (get-host-name)))
- (thisservrs (glob (conc srvdir"/"thishostip":*")))
- (homehostinf (server:choose-server areapath 'homehost))
- (havehome (car homehostinf))
- (wearehome (cdr homehostinf)))
- (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome
- ", numservers: "(length thisservrs))
- (cond
- ((not havehome) #t) ;; no homehost yet, go for it
- ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another
- ((and havehome (not wearehome)) #f) ;; we are not the home host
- ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running
- (else
- (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs)
- #t))))
-
-
-(define server-last-start 0)
-
-
-;; oldest server alive determines host then choose random of youngest
-;; five servers on that host
-;;
-;; mode:
-;; best - get best server (random of newest five)
-;; home - get home host based on oldest server
-;; info - print info
-(define (server:choose-server areapath #!optional (mode 'best))
- ;; age is current-starttime
- ;; find oldest alive
- ;; 1. sort by age ascending and ping until good
- ;; find alive rand from youngest
- ;; 1. sort by age descending
- ;; 2. take five
- ;; 3. check alive, discard if not and repeat
- ;; first we clean up old server files
- (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode))
- (server:clean-up-old areapath)
- (let* ((since-last (- (current-seconds) server-last-start))
- (server-start-delay 10))
- (if ( < (- (current-seconds) server-last-start) 10 )
- (begin
- (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
- (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
- (thread-sleep! server-start-delay)
- )
- (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
- )
- )
- (let* ((serversdat (server:get-servers-info areapath))
- (servkeys (hash-table-keys serversdat))
- (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
- (sort servkeys ;; list of "host:port"
- (lambda (a b)
- (>= (list-ref (hash-table-ref serversdat a) 2)
- (list-ref (hash-table-ref serversdat b) 2))))
- '())))
- (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
- (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
- (if (not (null? by-time-asc))
- (let* ((oldest (last by-time-asc))
- (oldest-dat (hash-table-ref serversdat oldest))
- (host (list-ref oldest-dat 0))
- (all-valid (filter (lambda (x)
- (equal? host (list-ref (hash-table-ref serversdat x) 0)))
- by-time-asc))
- (best-ten (lambda ()
- (if (> (length all-valid) 11)
- (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
- (if (> (length all-valid) 8)
- (drop-right all-valid 1)
- all-valid))))
- (names->dats (lambda (names)
- (map (lambda (x)
- (hash-table-ref serversdat x))
- names)))
- (am-home? (lambda ()
- (let* ((currhost (get-host-name))
- (bestadrs (server:get-best-guess-address currhost)))
- (or (equal? host currhost)
- (equal? host bestadrs))))))
- (case mode
- ((info)
- (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
- (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid))))
- ((home) host)
- ((homehost) (cons host (am-home?))) ;; shut up old code
- ((home?) (am-home?))
- ((best-ten)(names->dats (best-ten)))
- ((all-valid)(names->dats all-valid))
- ((best) (let* ((best-ten (best-ten))
- (len (length best-ten)))
- (hash-table-ref serversdat (list-ref best-ten (random len)))))
- ((count)(length all-valid))
- (else
- (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
- #f)))
- (begin
- (server:run areapath)
- (set! server-last-start (current-seconds))
- ;; (thread-sleep! 3)
- (case mode
- ((homehost) (cons #f #f))
- (else #f))))))
-
-(define (server:get-servinfo-dir areapath)
- (let* ((spath (conc areapath"/.servinfo")))
- (if (not (file-exists? spath))
- (create-directory spath #t))
- spath))
-
-(define (server:clean-up-old areapath)
- ;; any server file that has not been touched in ten minutes is effectively dead
- (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
- (for-each
- (lambda (sfile)
- (let* ((modtime (handle-exceptions
- exn
- (begin
- (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
- (current-seconds))
- (file-modification-time sfile))))
- (if (and (number? modtime)
- (> (- (current-seconds) modtime)
- 600))
- (begin
- (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
- (handle-exceptions
- exn
- (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
- (delete-file sfile))))))
- sfiles)))
-
-;; would like to eventually get rid of this
-;;
-(define (common:on-homehost?)
- (if (eq? (rmt:transport-mode) 'http)
- (server:choose-server *toppath* 'home?)
- #t)) ;; there is no homehost for tcp and nfs is always on home so #t should work
-
-;; kind start up of server, wait before allowing another server for a given
-;; area to be launched
-;;
-(define (server:kind-run areapath)
- ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
- ;; and wait for it to be at least seconds old
- ;; (server:wait-for-server-start-last-flag areapath)
- (let loop ()
- (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2)
- (begin
- (if (common:low-noise-print 30 "our-host-load")
- (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server."))
- (loop))))
- (if (< (server:choose-server areapath 'count) 20)
- (server:run areapath))
- #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
- (let* ((lock-file (conc areapath "/logs/server-start.lock")))
- (let* ((start-flag (conc areapath "/logs/server-start-last")))
- (common:simple-file-lock-and-wait lock-file expire-time: 25)
- (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
- (system (conc "touch " start-flag)) ;; lazy but safe
- (server:run areapath)
- (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
- (common:simple-file-release-lock lock-file)))
- (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
-
-;; this one seems to be the general entry point
-;;
-(define (server:start-and-wait areapath #!key (timeout 60))
- (let ((give-up-time (+ (current-seconds) timeout)))
- (let loop ((server-info (server:check-if-running areapath))
- (try-num 0))
- (if (or server-info
- (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
- (server:record->url server-info)
- (let* ( (servers (server:choose-server areapath 'all-valid))
- (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0)))
- (if (and (> try-num 0) ;; first time through simply wait a little while then try again
- (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one
- (server:run areapath))
- (thread-sleep! 5)
- (loop (server:check-if-running areapath)
- (+ try-num 1)))))))
-
-(define (server:get-num-servers #!key (numservers 2))
- (let ((ns (string->number
- (or (configf:lookup *configdat* "server" "numservers") "notanumber"))))
- (or ns numservers)))
-
-;; no longer care if multiple servers are started by accident. older servers will drop off in time.
-;;
-(define (server:check-if-running areapath) ;; #!key (numservers "2"))
- (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed
- (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath))))
- (if (or (and servers
- (null? servers))
- (not servers))
- ;; (and (list? servers)
- ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers
- #f
- (let loop ((hed (car servers))
- (tal (cdr servers)))
- (let ((res (server:check-server hed)))
- (if res
- hed
- (if (null? tal)
- #f
- (loop (car tal)(cdr tal)))))))))
-
-;; ping the given server
-;;
-(define (server:check-server server-record)
- (let* ((server-url (server:record->url server-record))
- (server-id (server:record->id server-record))
- (res (server:ping server-url server-id)))
- (if res
- server-url
- #f)))
-
-(define (server:kill servr)
- (handle-exceptions
- exn
- (begin
- (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn)
- #f)
- (match-let (((hostname port start-time server-id pid)
- servr))
- (tasks:kill-server hostname pid))))
-
-;; ;; called in megatest.scm, host-port is string hostname:port
-;; ;;
-;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running
-;; ;; in the same process as the server.
-;; ;;
-;; (define (server:ping host:port server-id #!key (do-exit #f))
-;; (let* ((host-port (cond
-;; ((string? host:port)
-;; (let ((slst (string-split host:port ":")))
-;; (if (eq? (length slst) 2)
-;; (list (car slst)(string->number (cadr slst)))
-;; #f)))
-;; (else
-;; #f))))
-;; (cond
-;; ((and (list? host-port)
-;; (eq? (length host-port) 2))
-;; (let* ((myrunremote (make-and-init-remote *toppath*))
-;; (iface (car host-port))
-;; (port (cadr host-port))
-;; (server-dat (client:connect iface port server-id myrunremote))
-;; (login-res (rmt:login-no-auto-client-setup myrunremote)))
-;; (http-transport:close-connections myrunremote)
-;; (if (and (list? login-res)
-;; (car login-res))
-;; (begin
-;; ;; (print "LOGIN_OK")
-;; (if do-exit (exit 0))
-;; #t)
-;; (begin
-;; ;; (print "LOGIN_FAILED")
-;; (if do-exit (exit 1))
-;; #f))))
-;; (else
-;; (if host:port
-;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port))
-;; (if do-exit
-;; (exit 1)
-;; #f)))))
-;;
-;; ;; run ping in separate process, safest way in some cases
-;; ;;
-;; (define (server:ping-server ifaceport)
-;; (with-input-from-pipe
-;; (conc (common:get-megatest-exe) " -ping " ifaceport)
-;; (lambda ()
-;; (let loop ((inl (read-line))
-;; (res "NOREPLY"))
-;; (if (eof-object? inl)
-;; (case (string->symbol res)
-;; ((NOREPLY) #f)
-;; ((LOGIN_OK) #t)
-;; (else #f))
-;; (loop (read-line) inl))))))
-;;
-;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
-;; ;;
-;; (define (server:login toppath)
-;; (lambda (toppath)
-;; (set! *db-last-access* (current-seconds)) ;; might not be needed.
-;; (if (equal? *toppath* toppath)
-;; #t
-;; #f)))
-
-;; timeout is hms string: 1h 5m 3s, default is 1 minute
-;; This is currently broken. Just use the number of hours with no unit.
-;; Default is 600 seconds.
-;;
-(define (server:expiration-timeout)
- (let* ((tmo (configf:lookup *configdat* "server" "timeout")))
- (if (string? tmo)
- (let* ((num (string->number tmo)))
- (if num
- (* 3600 num)
- (common:hms-string->seconds tmo)))
- 600 ;; this is the default
- )))
-
-(define (server:get-best-guess-address hostname)
- (let ((res #f))
- (for-each
- (lambda (adr)
- (if (not (eq? (u8vector-ref adr 0) 127))
- (set! res adr)))
- ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME
- (vector->list (hostinfo-addresses (hostname->hostinfo hostname))))
- (string-intersperse
- (map number->string
- (u8vector->list
- (if res res (hostname->ip hostname)))) ".")))
-
-;; moving this here as it needs access to db and cannot be in common.
-;;
-
-(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
- (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")
- (lambda ()
- (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")))
-