Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -260,10 +260,22 @@ $(PREFIX)/bin/mtexec : $(PREFIX)/bin/.$(ARCHSTR)/mtexec utils/mk_wrapper utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec chmod a+x $(PREFIX)/bin/mtexec +# mtserv + +mtserv: $(OFILES) $(MOFILES) megatest-fossil-hash.scm mtserv.scm + csc $(CSCOPTS) $(OFILES) $(MOFILES) mtserv.scm -o mtserv + +$(PREFIX)/bin/.$(ARCHSTR)/mtserv : mtserv + $(INSTALL) mtserv $(PREFIX)/bin/.$(ARCHSTR)/mtserv + +$(PREFIX)/bin/mtserv : $(PREFIX)/bin/.$(ARCHSTR)/mtserv utils/mk_wrapper + utils/mk_wrapper $(PREFIX) mtserv $(PREFIX)/bin/mtserv + chmod a+x $(PREFIX)/bin/mtserv + # tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt $(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt ADDED mtserv.scm Index: mtserv.scm ================================================================== --- /dev/null +++ mtserv.scm @@ -0,0 +1,94 @@ +; Copyright 2006-2017, Matthew Welland. +;; +;; This file is part of Megatest. +;; +;; Megatest is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; Megatest is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with Megatest. If not, see . +;; + +;; (include "common.scm") +;; (include "megatest-version.scm") + +;; fake out readline usage of toplevel-command +(define (toplevel-command . a) #f) + +(use srfi-1 posix srfi-69 readline ;; regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras) + srfi-19 srfi-18 extras format pkts regex regex-case + (prefix dbi dbi:) + ) + +;; (declare (uses common)) +(declare (uses margs)) +(declare (uses configf)) +;; (declare (uses rmt)) + +;; (use ducttape-lib) +(include "megatest-version.scm") +(include "megatest-fossil-hash.scm") + +;; (require-library stml) + +(define help (conc " +mtutil, part of the Megatest tool suite, documentation at http://www.kiatoa.com/fossils/megatest + version " megatest-version " + license GPL, Copyright Matt Welland 2006-2017 + +Usage: mtutil action [options] + -h : this help + -manual : show the Megatest user manual + -version : print megatest version (currently " megatest-version ") + +Examples: + +Called as " (string-intersperse (argv) " ") " +Version " megatest-version ", built from " megatest-fossil-hash )) + ;; first token is our action, but only if no leading dash + +(define *action* (if (and (> (length (argv)) 1) + (not (string-match "^\\-.*" (cadr (argv))))) + (cadr (argv)) + #f)) + +(define *remargs* + (args:get-args + (if *action* (cdr (argv)) (argv)) + '("-log") + '("-h") + args:arg-hash + 0)) + +(if (args:get-arg "-h") + (begin + (print help) + (exit))) + +(if (or (args:get-arg "-repl") + (args:get-arg "-load")) + (begin + (import extras) ;; might not be needed + ;; (import csi) + (import readline) + (import apropos) + ;; (import (prefix sqlite3 sqlite3:)) ;; doesn't work ... + + (install-history-file (get-environment-variable "HOME") ".mtutil_history") ;; [homedir] [filename] [nlines]) + (current-input-port (make-readline-port "mtutil> ")) + (if (args:get-arg "-repl") + (repl) + (load (args:get-arg "-load"))))) + +#| +(define mtconf (car (simple-setup #f))) +(define dat (common:with-queue-db mtconf (lambda (conn)(get-pkts conn '())))) +(pp (pkts#flatten-all dat '((cmd . ((parent . P)(url . M)))(runtype . ((parent . P)))) 'id 'group-id 'uuid 'parent 'pkt-type 'pkt 'processed)) +|# Index: servermod.scm ================================================================== --- servermod.scm +++ servermod.scm @@ -18,7 +18,804 @@ (declare (unit servermod)) (module servermod * + +(import scheme + chicken + + md5 + message-digest + ports + posix + ) + +(define *client-server-id* #f) + +;;====================================================================== +;; S E R V E R U T I L I T I E S +;;====================================================================== + +;; Generate a unique signature for this server +(define (mk-signature) + (message-digest-string (md5-primitive) + (with-output-to-string + (lambda () + (write (list (current-directory) + (current-process-id) + (argv))))))) + +(define (get-client-server-id) + (if *client-server-id* *client-server-id* + (let ((sig (mk-signature))) ;; clients re-use the server:mk-signature logic + (set! *client-server-id* sig) + *client-server-id*))) + +;; ;; When using zmq this would send the message back (two step process) +;; ;; with spiffy or rpc this simply returns the return data to be returned +;; ;; +;; (define (server:reply return-addr query-sig success/fail result) +;; (debug:print-info 11 *default-log-port* "server:reply return-addr=" return-addr ", result=" result) +;; ;; (send-message pubsock target send-more: #t) +;; ;; (send-message pubsock +;; (case (server:get-transport) +;; ((rpc) (db:obj->string (vector success/fail query-sig result))) +;; ((http) (db:obj->string (vector success/fail query-sig result))) +;; ((fs) result) +;; (else +;; (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) +;; result))) +;; +;; ;; Given an area path, start a server process ### NOTE ### > file 2>&1 +;; ;; if the target-host is set +;; ;; try running on that host +;; ;; incidental: rotate logs in logs/ dir. +;; ;; +;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area +;; (let* ((testsuite (common:get-testsuite-name)) +;; (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) +;; (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") +;; "")) +;; (cmdln (conc (common:get-megatest-exe) +;; " -server - ";; (or target-host "-") +;; (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") +;; " -daemonize " +;; "") +;; ;; " -log " logfile +;; " -m testsuite:" testsuite +;; " " profile-mode +;; )) ;; (conc " >> " logfile " 2>&1 &"))))) +;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? +;; (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) +;; ;; we want the remote server to start in *toppath* so push there +;; (push-directory areapath) +;; (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") +;; (thread-start! log-rotate) +;; +;; ;; host.domain.tld match host? +;; ;; (if (and target-host +;; ;; ;; look at target host, is it host.domain.tld or ip address and does it +;; ;; ;; match current ip or hostname +;; ;; (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) +;; ;; (not (equal? curr-ip target-host))) +;; ;; (begin +;; ;; (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) +;; ;; (setenv "TARGETHOST" target-host))) +;; ;; +;; (setenv "TARGETHOST_LOGF" logfile) +;; (thread-sleep! (/ (random 3000) 1000)) ;; add a random initial delay. It seems pretty common that many running tests request a server at the same time +;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) +;; (system (conc "nbfake " cmdln)) +;; (unsetenv "TARGETHOST_LOGF") +;; ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) +;; (thread-join! log-rotate) +;; (pop-directory))) +;; +;; ;; given a path to a server log return: host port startseconds server-id +;; ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let +;; ;; example of what it's looking for in the log file: +;; ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 +;; +;; (define (server:logf-get-start-info logf) +;; (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id +;; (dbprep-rx (regexp "^SERVER: dbprep")) +;; (dbprep-found 0) +;; (bad-dat (list #f #f #f #f #f))) +;; (handle-exceptions +;; exn +;; (begin +;; ;; WARNING: this is potentially dangerous to blanket ignore the errors +;; (if (file-exists? logf) +;; (debug:print-info 2 *default-log-port* "Unable to get server info from "logf", exn=" exn)) +;; bad-dat) ;; no idea what went wrong, call it a bad server +;; (with-input-from-file +;; logf +;; (lambda () +;; (let loop ((inl (read-line)) +;; (lnum 0)) +;; (if (not (eof-object? inl)) +;; (let ((mlst (string-match server-rx inl)) +;; (dbprep (string-match dbprep-rx inl))) +;; (if dbprep (set! dbprep-found 1)) +;; (if (not mlst) +;; (if (< lnum 500) ;; give up if more than 500 lines of server log read +;; (loop (read-line)(+ lnum 1)) +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) +;; bad-dat)) +;; (match mlst +;; ((_ host port start server-id pid) +;; (list host +;; (string->number port) +;; (string->number start) +;; server-id +;; (string->number pid))) +;; (else +;; (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) +;; bad-dat)))) +;; (begin +;; (if dbprep-found +;; (begin +;; (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) +;; (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? +;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) +;; bad-dat)))))))) +;; +;; ;; ;; get a list of servers from the log files, with all relevant data +;; ;; ;; ( mod-time host port start-time pid ) +;; ;; ;; +;; ;; (define (server:get-list areapath #!key (limit #f)) +;; ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) +;; ;; (day-seconds (* 24 60 60))) +;; ;; ;; if the directory exists continue to get the list +;; ;; ;; otherwise attempt to create the logs dir and then +;; ;; ;; continue +;; ;; (if (if (directory-exists? (conc areapath "/logs")) +;; ;; '() +;; ;; (if (file-write-access? areapath) +;; ;; (begin +;; ;; (condition-case +;; ;; (create-directory (conc areapath "/logs") #t) +;; ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) +;; ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) +;; ;; (directory-exists? (conc areapath "/logs"))) +;; ;; '())) +;; ;; +;; ;; ;; Get the list of server logs. +;; ;; (let* ( +;; ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. +;; ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) +;; ;; (server-logs (glob (conc areapath "/logs/server-*-*.log"))) +;; ;; (num-serv-logs (length server-logs))) +;; ;; (if (or (null? server-logs) (= num-serv-logs 0)) +;; ;; (let () +;; ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) +;; ;; '() +;; ;; ) +;; ;; (let loop ((hed (string-chomp (car server-logs))) +;; ;; (tal (cdr server-logs)) +;; ;; (res '())) +;; ;; (let* ((mod-time (handle-exceptions +;; ;; exn +;; ;; (begin +;; ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) +;; ;; (current-seconds)) ;; 0 +;; ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted +;; ;; (down-time (- (current-seconds) mod-time)) +;; ;; (serv-dat (if (or (< num-serv-logs 10) +;; ;; (< down-time 900)) ;; day-seconds)) +;; ;; (server:logf-get-start-info hed) +;; ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at +;; ;; (serv-rec (cons mod-time serv-dat)) +;; ;; (fmatch (string-match fname-rx hed)) +;; ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) +;; ;; (new-res (if (null? serv-dat) +;; ;; res +;; ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let +;; ;; (if (null? tal) +;; ;; (if (and limit +;; ;; (> (length new-res) limit)) +;; ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work +;; ;; new-res) +;; ;; (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) +;; +;; #;(define (server:get-num-alive srvlst) +;; (let ((num-alive 0)) +;; (for-each +;; (lambda (server) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) +;; (match-let (((mod-time host port start-time server-id pid) +;; server)) +;; (let* ((uptime (- (current-seconds) mod-time)) +;; (runtime (if start-time +;; (- mod-time start-time) +;; 0))) +;; (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) +;; srvlst) +;; num-alive)) +;; +;; ;; ;; given a list of servers get a list of valid servers, i.e. at least +;; ;; ;; 10 seconds old, has started and is less than 1 hour old and is +;; ;; ;; active (i.e. mod-time < 10 seconds +;; ;; ;; +;; ;; ;; mod-time host port start-time pid +;; ;; ;; +;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off +;; ;; ;; and servers should stick around for about two hours or so. +;; ;; ;; +;; ;; (define (server:get-best srvlst) +;; ;; (let* ((nums (server:get-num-servers)) +;; ;; (now (current-seconds)) +;; ;; (slst (sort +;; ;; (filter (lambda (rec) +;; ;; (if (and (list? rec) +;; ;; (> (length rec) 2)) +;; ;; (let ((start-time (list-ref rec 3)) +;; ;; (mod-time (list-ref rec 0))) +;; ;; ;; (print "start-time: " start-time " mod-time: " mod-time) +;; ;; (and start-time mod-time +;; ;; (> (- now start-time) 0) ;; been running at least 0 seconds +;; ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds +;; ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set +;; ;; (< (- now start-time) +;; ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) +;; ;; 180) +;; ;; (random 360)))) ;; under one hour running time +/- 180 +;; ;; )) +;; ;; #f)) +;; ;; srvlst) +;; ;; (lambda (a b) +;; ;; (< (list-ref a 3) +;; ;; (list-ref b 3)))))) +;; ;; (if (> (length slst) nums) +;; ;; (take slst nums) +;; ;; slst))) +;; +;; ;; ;; switch from server:get-list to server:get-servers-info +;; ;; ;; +;; ;; (define (server:get-first-best areapath) +;; ;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; ;; (if (and srvrs +;; ;; (not (null? srvrs))) +;; ;; (car srvrs) +;; ;; #f))) +;; ;; +;; ;; (define (server:get-rand-best areapath) +;; ;; (let ((srvrs (server:get-best (server:get-list areapath)))) +;; ;; (if (and (list? srvrs) +;; ;; (not (null? srvrs))) +;; ;; (let* ((len (length srvrs)) +;; ;; (idx (random len))) +;; ;; (list-ref srvrs idx)) +;; ;; #f))) +;; +;; (define (server:record->id servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) +;; #f) +;; (match-let (((host port start-time server-id pid) +;; servr)) +;; (if server-id +;; server-id +;; #f)))) +;; +;; (define (server:record->url servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) +;; #f) +;; (match-let (((host port start-time server-id pid) +;; servr)) +;; (if (and host port) +;; (conc host ":" port) +;; #f)))) +;; +;; +;; ;; if server-start-last exists, and wasn't old enough, wait + 1, then call this function recursively until it is old enough. +;; ;; if it is old enough, overwrite it and wait 0.25 seconds. +;; ;; if it then has the wrong server key, wait + 1 and call this function recursively. +;; ;; +;; #;(define (server:wait-for-server-start-last-flag areapath) +;; (let* ((start-flag (conc areapath "/logs/server-start-last")) +;; ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) +;; (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) +;; (server-key (conc (get-host-name) "-" (current-process-id)))) +;; (if (file-exists? start-flag) +;; (let* ((fmodtime (file-modification-time start-flag)) +;; (delta (- (current-seconds) fmodtime)) +;; (old-enough (> delta idletime)) +;; (new-server-key "")) +;; ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than seconds, and the new file still has the same server key as you just wrote, return #t. +;; ;; the intention is to make sure nfs can read the file we just wrote, and make sure it was written by us, and not another process. +;; (if (and old-enough +;; (begin +;; (debug:print-info 2 *default-log-port* "Writing " start-flag) +;; (with-output-to-file start-flag (lambda () (print server-key))) +;; (thread-sleep! 0.25) +;; (set! new-server-key (with-input-from-file start-flag (lambda () (read-line)))) +;; (equal? server-key new-server-key))) +;; #t +;; ;; If either of the above conditions is not true, print a "Gating server start" message, wait + 1, then call this function recursively. +;; (begin +;; (debug:print-info 0 *default-log-port* "Gating server start, last start: " +;; (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server")) +;; +;; (thread-sleep! ( + 1 idletime)) +;; (server:wait-for-server-start-last-flag areapath))))))) +;; +;; ;; oldest server alive determines host then choose random of youngest +;; ;; five servers on that host +;; ;; +;; (define (server:get-servers-info areapath) +;; ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.") +;; (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo"))) +;; (if (not (file-exists? servinfodir)) +;; (create-directory servinfodir)) +;; (let* ((allfiles (glob (conc servinfodir"/*"))) +;; (res (make-hash-table))) +;; (for-each +;; (lambda (f) +;; (let* ((hostport (pathname-strip-directory f)) +;; (serverdat (server:logf-get-start-info f))) +;; (match serverdat +;; ((host port start server-id pid) +;; (if (and host port start server-id pid) +;; (hash-table-set! res hostport serverdat) +;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))) +;; (else +;; (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat))))) +;; allfiles) +;; res))) +;; +;; ;; check the .servinfo directory, are there other servers running on this +;; ;; or another host? +;; ;; +;; ;; returns #t => ok to start another server +;; ;; #f => not ok to start another server +;; ;; +;; (define (server:minimal-check areapath) +;; (server:clean-up-old areapath) +;; (let* ((srvdir (server:get-servinfo-dir areapath)) ;; (conc areapath"/.servinfo")) +;; (servrs (glob (conc srvdir"/*"))) +;; (thishostip (server:get-best-guess-address (get-host-name))) +;; (thisservrs (glob (conc srvdir"/"thishostip":*"))) +;; (homehostinf (server:choose-server areapath 'homehost)) +;; (havehome (car homehostinf)) +;; (wearehome (cdr homehostinf))) +;; (debug:print-info 0 *default-log-port* thishostip", have homehost: "havehome", we are homehost: "wearehome +;; ", numservers: "(length thisservrs)) +;; (cond +;; ((not havehome) #t) ;; no homehost yet, go for it +;; ((and havehome wearehome (< (length thisservrs) 20)) #t) ;; we are home and less than 20 servers, ok to start another +;; ((and havehome (not wearehome)) #f) ;; we are not the home host +;; ((and havehome wearehome (>= (length thisservrs) 20)) #f) ;; have enough running +;; (else +;; (debug:print 0 *default-log-port* "WARNING: Unrecognised scenario, servrs="servrs", thishostip="thishostip", thisservrs="thisservrs) +;; #t)))) +;; +;; +;; (define server-last-start 0) +;; +;; +;; ;; oldest server alive determines host then choose random of youngest +;; ;; five servers on that host +;; ;; +;; ;; mode: +;; ;; best - get best server (random of newest five) +;; ;; home - get home host based on oldest server +;; ;; info - print info +;; (define (server:choose-server areapath #!optional (mode 'best)) +;; ;; age is current-starttime +;; ;; find oldest alive +;; ;; 1. sort by age ascending and ping until good +;; ;; find alive rand from youngest +;; ;; 1. sort by age descending +;; ;; 2. take five +;; ;; 3. check alive, discard if not and repeat +;; ;; first we clean up old server files +;; (server:clean-up-old areapath) +;; (let* ((since-last (- (current-seconds) server-last-start)) +;; (server-start-delay 10)) +;; (if ( < (- (current-seconds) server-last-start) 10 ) +;; (begin +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds") +;; (thread-sleep! server-start-delay) +;; ) +;; (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start)) +;; ) +;; ) +;; (let* ((serversdat (server:get-servers-info areapath)) +;; (servkeys (hash-table-keys serversdat)) +;; (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last +;; (sort servkeys ;; list of "host:port" +;; (lambda (a b) +;; (>= (list-ref (hash-table-ref serversdat a) 2) +;; (list-ref (hash-table-ref serversdat b) 2)))) +;; '()))) +;; (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat) +;; (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys) +;; (if (not (null? by-time-asc)) +;; (let* ((oldest (last by-time-asc)) +;; (oldest-dat (hash-table-ref serversdat oldest)) +;; (host (list-ref oldest-dat 0)) +;; (all-valid (filter (lambda (x) +;; (equal? host (list-ref (hash-table-ref serversdat x) 0))) +;; by-time-asc)) +;; (best-ten (lambda () +;; (if (> (length all-valid) 11) +;; (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out +;; (if (> (length all-valid) 8) +;; (drop-right all-valid 1) +;; all-valid)))) +;; (names->dats (lambda (names) +;; (map (lambda (x) +;; (hash-table-ref serversdat x)) +;; names))) +;; (am-home? (lambda () +;; (let* ((currhost (get-host-name)) +;; (bestadrs (server:get-best-guess-address currhost))) +;; (or (equal? host currhost) +;; (equal? host bestadrs)))))) +;; (case mode +;; ((info) +;; (print "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid) +;; (print "youngest: "(hash-table-ref serversdat (car all-valid)))) +;; ((home) host) +;; ((homehost) (cons host (am-home?))) ;; shut up old code +;; ((home?) (am-home?)) +;; ((best-ten)(names->dats (best-ten))) +;; ((all-valid)(names->dats all-valid)) +;; ((best) (let* ((best-ten (best-ten)) +;; (len (length best-ten))) +;; (hash-table-ref serversdat (list-ref best-ten (random len))))) +;; ((count)(length all-valid)) +;; (else +;; (debug:print 0 *default-log-port* "ERROR: invalid command "mode) +;; #f))) +;; (begin +;; (server:run areapath) +;; (set! server-last-start (current-seconds)) +;; ;; (thread-sleep! 3) +;; (case mode +;; ((homehost) (cons #f #f)) +;; (else #f)))))) +;; +;; (define (server:get-servinfo-dir areapath) +;; (let* ((spath (conc areapath"/.servinfo"))) +;; (if (not (file-exists? spath)) +;; (create-directory spath #t)) +;; spath)) +;; +;; (define (server:clean-up-old areapath) +;; ;; any server file that has not been touched in ten minutes is effectively dead +;; (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*")))) +;; (for-each +;; (lambda (sfile) +;; (let* ((modtime (handle-exceptions +;; exn +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile) +;; (current-seconds)) +;; (file-modification-time sfile)))) +;; (if (and (number? modtime) +;; (> (- (current-seconds) modtime) +;; 600)) +;; (begin +;; (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.") +;; (handle-exceptions +;; exn +;; (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile) +;; (delete-file sfile)))))) +;; sfiles))) +;; +;; ;; would like to eventually get rid of this +;; ;; +;; (define (common:on-homehost?) +;; (server:choose-server *toppath* 'home?)) +;; +;; ;; kind start up of server, wait before allowing another server for a given +;; ;; area to be launched +;; ;; +;; (define (server:kind-run areapath) +;; ;; look for $MT_RUN_AREA_HOME/logs/server-start-last +;; ;; and wait for it to be at least seconds old +;; ;; (server:wait-for-server-start-last-flag areapath) +;; (let loop () +;; (if (> (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)) 2) +;; (begin +;; (if (common:low-noise-print 30 "our-host-load") +;; (debug:print 0 *default-log-port* "WARNING: system load is high, waiting to start server.")) +;; (loop)))) +;; (if (< (server:choose-server areapath 'count) 20) +;; (server:run areapath)) +;; #;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running? +;; (let* ((lock-file (conc areapath "/logs/server-start.lock"))) +;; (let* ((start-flag (conc areapath "/logs/server-start-last"))) +;; (common:simple-file-lock-and-wait lock-file expire-time: 25) +;; (debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag) +;; (system (conc "touch " start-flag)) ;; lazy but safe +;; (server:run areapath) +;; (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". +;; (common:simple-file-release-lock lock-file))) +;; (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another."))) +;; +;; ;; this one seems to be the general entry point +;; ;; +;; (define (server:start-and-wait areapath #!key (timeout 60)) +;; (let ((give-up-time (+ (current-seconds) timeout))) +;; (let loop ((server-info (server:check-if-running areapath)) +;; (try-num 0)) +;; (if (or server-info +;; (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. +;; (server:record->url server-info) +;; (let* ( (servers (server:choose-server areapath 'all-valid)) +;; (num-ok (if servers (length (server:choose-server areapath 'all-valid)) 0))) +;; (if (and (> try-num 0) ;; first time through simply wait a little while then try again +;; (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one +;; (server:run areapath)) +;; (thread-sleep! 5) +;; (loop (server:check-if-running areapath) +;; (+ try-num 1))))))) +;; +;; (define (server:get-num-servers #!key (numservers 2)) +;; (let ((ns (string->number +;; (or (configf:lookup *configdat* "server" "numservers") "notanumber")))) +;; (or ns numservers))) +;; +;; ;; no longer care if multiple servers are started by accident. older servers will drop off in time. +;; ;; +;; (define (server:check-if-running areapath) ;; #!key (numservers "2")) +;; (let* ((ns (server:get-num-servers)) ;; get the setting the for maximum number of servers allowed +;; (servers (server:choose-server areapath 'best-ten))) ;; (server:get-best (server:get-list areapath)))) +;; (if (or (and servers +;; (null? servers)) +;; (not servers)) +;; ;; (and (list? servers) +;; ;; (< (length servers) (+ 1 (random ns))))) ;; somewhere between 1 and numservers +;; #f +;; (let loop ((hed (car servers)) +;; (tal (cdr servers))) +;; (let ((res (server:check-server hed))) +;; (if res +;; hed +;; (if (null? tal) +;; #f +;; (loop (car tal)(cdr tal))))))))) +;; +;; ;; ping the given server +;; ;; +;; (define (server:check-server server-record) +;; (let* ((server-url (server:record->url server-record)) +;; (server-id (server:record->id server-record)) +;; (res (server:ping server-url server-id))) +;; (if res +;; server-url +;; #f))) +;; +;; (define (server:kill servr) +;; (handle-exceptions +;; exn +;; (begin +;; (debug:print-info 0 *default-log-port* "Unable to get host and/or port from " servr ", exn=" exn) +;; #f) +;; (match-let (((mod-time hostname port start-time server-id pid) +;; servr)) +;; (tasks:kill-server hostname pid)))) +;; +;; ;; called in megatest.scm, host-port is string hostname:port +;; ;; +;; ;; NOTE: This is NOT called directly from clients as not all transports support a client running +;; ;; in the same process as the server. +;; ;; +;; (define (server:ping host:port server-id #!key (do-exit #f)) +;; (let* ((host-port (cond +;; ((string? host:port) +;; (let ((slst (string-split host:port ":"))) +;; (if (eq? (length slst) 2) +;; (list (car slst)(string->number (cadr slst))) +;; #f))) +;; (else +;; #f)))) +;; (cond +;; ((and (list? host-port) +;; (eq? (length host-port) 2)) +;; (let* ((myrunremote (make-remote)) +;; (iface (car host-port)) +;; (port (cadr host-port)) +;; (server-dat (client:connect iface port server-id myrunremote)) +;; (login-res (rmt:login-no-auto-client-setup myrunremote))) +;; (if (and (list? login-res) +;; (car login-res)) +;; (begin +;; ;; (print "LOGIN_OK") +;; (if do-exit (exit 0)) +;; #t) +;; (begin +;; ;; (print "LOGIN_FAILED") +;; (if do-exit (exit 1)) +;; #f)))) +;; (else +;; (if host:port +;; (debug:print 0 *default-log-port* "ERROR: bad host:port "host:port)) +;; (if do-exit +;; (exit 1) +;; #f))))) +;; +;; ;; run ping in separate process, safest way in some cases +;; ;; +;; (define (server:ping-server ifaceport) +;; (with-input-from-pipe +;; (conc (common:get-megatest-exe) " -ping " ifaceport) +;; (lambda () +;; (let loop ((inl (read-line)) +;; (res "NOREPLY")) +;; (if (eof-object? inl) +;; (case (string->symbol res) +;; ((NOREPLY) #f) +;; ((LOGIN_OK) #t) +;; (else #f)) +;; (loop (read-line) inl)))))) +;; +;; ;; NOT USED (well, ok, reference in rpc-transport but otherwise not used). +;; ;; +;; (define (server:login toppath) +;; (lambda (toppath) +;; (set! *db-last-access* (current-seconds)) ;; might not be needed. +;; (if (equal? *toppath* toppath) +;; #t +;; #f))) +;; +;; ;; timeout is hms string: 1h 5m 3s, default is 1 minute +;; ;; This is currently broken. Just use the number of hours with no unit. +;; ;; Default is 60 seconds. +;; ;; +;; (define (server:expiration-timeout) +;; (let ((tmo (configf:lookup *configdat* "server" "timeout"))) +;; (if (and (string? tmo) +;; (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below +;; (* 3600 (string->number tmo)) +;; 600))) +;; +;; (define (server:get-best-guess-address hostname) +;; (let ((res #f)) +;; (for-each +;; (lambda (adr) +;; (if (not (eq? (u8vector-ref adr 0) 127)) +;; (set! res adr))) +;; ;; NOTE: This can fail when there is no mention of the host in /etc/hosts. FIXME +;; (vector->list (hostinfo-addresses (hostname->hostinfo hostname)))) +;; (string-intersperse +;; (map number->string +;; (u8vector->list +;; (if res res (hostname->ip hostname)))) "."))) +;; +;; ;; (define server:sync-lock-token "SERVER_SYNC_LOCK") +;; ;; (define (server:release-sync-lock) +;; ;; (db:no-sync-del! *no-sync-db* server:sync-lock-token)) +;; ;; (define (server:have-sync-lock?) +;; ;; (let* ((have-lock-pair (db:no-sync-get-lock *no-sync-db* server:sync-lock-token)) +;; ;; (have-lock? (car have-lock-pair)) +;; ;; (lock-time (cdr have-lock-pair)) +;; ;; (lock-age (- (current-seconds) lock-time))) +;; ;; (cond +;; ;; (have-lock? #t) +;; ;; ((>lock-age +;; ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) +;; ;; (server:release-sync-lock) +;; ;; (server:have-sync-lock?)) +;; ;; (else #f)))) +;; +;; ;; moving this here as it needs access to db and cannot be in common. +;; ;; +;; +;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f)) +;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!") +;; (lambda () +;; (debug:print "WARNING: bruteforce-syncer is called but has been disabled!")) +;; #;(let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh +;; (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) +;; (tmp-area (common:get-db-tmp-area)) +;; (tmp-db (conc tmp-area "/megatest.db")) +;; (staging-file (conc *toppath* "/.megatest.db")) +;; (mtdbfile (conc *toppath* "/megatest.db")) +;; (lockfile (common:get-sync-lock-filepath)) +;; (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) +;; (sync-cmd (if fork-to-background +;; (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake \""sync-cmd-core" && /bin/mv -f " staging-file " " mtdbfile" \"") +;; sync-cmd-core)) +;; (default-min-intersync-delay 2) +;; (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: default-min-intersync-delay)) +;; (default-duty-cycle 0.1) +;; (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) +;; (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) +;; (calculate-off-time (lambda (work-duration duty-cycle) +;; (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) +;; (off-time min-intersync-delay) ;; adjusted in closure below. +;; (do-a-sync +;; (lambda () +;; (BB> "Start do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync) +;; (let* ((finalres +;; (let retry-loop ((num-tries 0)) +;; (if (common:simple-file-lock lockfile) +;; (begin +;; (cond +;; ((not (or fork-to-background persist-until-sync)) +;; (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for max of (server.minimum-intersync-delay="min-intersync-delay +;; " , off-time="off-time" seconds ]") +;; (thread-sleep! (max off-time min-intersync-delay))) +;; (else +;; (debug:print 0 *default-log-port* "INFO: syncer thread NOT sleeping ; maybe time-to-exit..."))) +;; +;; (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) +;; (common:snapshot-file mtdbfile subdir: ".db-snapshot")) +;; (delete-file* staging-file) +;; (let* ((start-time (current-milliseconds)) +;; (res (system sync-cmd)) +;; (dbbackupfile (conc mtdbfile ".backup")) +;; (res2 +;; (cond +;; ((eq? 0 res ) +;; (handle-exceptions +;; exn +;; #f +;; (if (file-exists? dbbackupfile) +;; (delete-file* dbbackupfile) +;; ) +;; (if (eq? 0 (file-size sync-log)) +;; (delete-file* sync-log)) +;; (system (conc "/bin/mv " staging-file " " mtdbfile)) +;; +;; (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) +;; (set! off-time (calculate-off-time +;; last-sync-seconds +;; (cond +;; ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) +;; duty-cycle) +;; (else +;; (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) +;; default-duty-cycle)))) +;; +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec ; with duty-cycle of "duty-cycle" off time is now "off-time) +;; 'sync-completed)) +;; (else +;; (system (conc "/bin/cp "sync-log" "sync-log".fail")) +;; (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") +;; (if (file-exists? (conc mtdbfile ".backup")) +;; (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) +;; #f)))) +;; (common:simple-file-release-lock lockfile) +;; (BB> "released lockfile: " lockfile) +;; (when (common:file-exists? lockfile) +;; (BB> "DID NOT ACTUALLY RELEASE LOCKFILE")) +;; res2) ;; end let +;; );; end begin +;; ;; else +;; (cond +;; (persist-until-sync +;; (thread-sleep! 1) +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; we're in a fork-to-background so we need to succeed. Let's wait a jiffy and and try again. num-tries="num-tries" (waiting for lockfile="lockfile" to disappear)") +;; (retry-loop (add1 num-tries))) +;; (else +;; (thread-sleep! (max off-time (+ last-sync-seconds min-intersync-delay))) +;; (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") +;; 'parallel-sync-in-progress)) +;; ) ;; end if got lockfile +;; ) +;; )) +;; (BB> "End do-a-sync with fork-to-background="fork-to-background" persist-until-sync="persist-until-sync" and result="finalres) +;; finalres) +;; ) ;; end lambda +;; )) +;; do-a-sync)) +;; +;; )