Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -47,11 +47,11 @@ ;; Call this to start the actual server ;; (define *db:process-queue-mutex* (make-mutex)) -(define (http-transport:run hostn run-id server-id) +(define (http-transport:run hostn) (debug:print 2 *default-log-port* "Attempting to start the server ...") (let* ((db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") @@ -104,18 +104,17 @@ ((equal? (uri-path (request-uri (current-request))) '(/ "hey")) (send-response body: "hey there!\n" headers: '((content-type text/plain)))) (else (continue)))))))) - (http-transport:try-start-server run-id ipaddrstr start-port server-id))) + (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; -(define (http-transport:try-start-server run-id ipaddrstr portnum server-id) - (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) - (tdbdat (tasks:open-db))) - (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname) +(define (http-transport:try-start-server ipaddrstr portnum) + (let ((config-hostname (configf:lookup *configdat* "server" "hostname"))) + (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname) (handle-exceptions exn (begin (print-error-message exn) (if (< portnum 64000) @@ -126,34 +125,26 @@ (portlogger:open-run-close portlogger:set-failed portnum) (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; get_next_port goes here - (http-transport:try-start-server run-id - ipaddrstr - (portlogger:open-run-close portlogger:find-port) - server-id)) + (http-transport:try-start-server ipaddrstr + (portlogger:open-run-close portlogger:find-port))) (begin - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) - (tasks:server-set-interface-port - (db:delay-if-busy tdbdat) - server-id - ipaddrstr portnum) (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL ;; (start-server bind-address: ipaddrstr port: portnum) (if config-hostname ;; this is a hint to bind directly (start-server port: portnum bind-address: (if (equal? config-hostname "-") ipaddrstr config-hostname)) (start-server port: portnum)) - ;; (portlogger:open-run-close portlogger:set-port portnum "released") - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") + (portlogger:open-run-close portlogger:set-port portnum "released") (debug:print 1 *default-log-port* "INFO: server has been stopped")))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -341,17 +332,16 @@ server-dat)) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (http-transport:keep-running server-id run-id) +(define (http-transport:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server for run-id=" run-id) - (let* ((tdbdat (tasks:open-db)) - (server-start-time (current-seconds)) + (let* ((server-start-time (current-seconds)) (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) @@ -369,11 +359,11 @@ (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (begin (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server " server-id " for run " run-id) - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") + ;; (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") (exit)) (loop start-time (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) @@ -383,33 +373,18 @@ (server-going #f)) (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) - ;;(BB> "http-transport: top of loop; count="count" server-state="server-state" bad-sync-count="bad-sync-count" server-going="server-going) ;; Use this opportunity to sync the tmp db to megatest.db (if (not server-going) ;; *dbstruct-db* - ;; Removed code is pasted below (keeping it around until we are clear it is not needed). - ;; no *dbstruct-db* yet, set running after our first pass through and start the db - (if (eq? server-state 'available) - (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers - (if (equal? new-server-id server-id) - (begin - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") - ;;(BB> "http-transport: ->dbprep") - (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access - (set! *dbstruct-db* (db:setup)) ;; run-id)) - (set! server-going #t) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") - ;;(BB> "http-transport: ->running") - (server:write-dotserver *toppath* iface port (current-process-id) 'http) - (thread-start! *watchdog*) - (server:complete-attempt *toppath*)) - (begin ;; gotta exit nicely - ;;(BB> "http-transport: ->collision") - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") - (http-transport:server-shutdown server-id port)))))) + (begin + (debug:print 0 *default-log-port* "SERVER: dbprep") + (set! *dbstruct-db* (db:setup)) ;; run-id)) + (set! server-going #t) + (debug:print 0 *default-log-port* "SERVER: running") ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. + (thread-start! *watchdog*))) ;; when things go wrong we don't want to be doing the various queries too often ;; so we strive to run this stuff only every four seconds or so. (let* ((sync-time (- (current-milliseconds) start-time)) (rem-time (quotient (- 4000 sync-time) 1000))) @@ -425,181 +400,108 @@ (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (or (not (equal? sdat (list iface port))) (not server-id)) - (begin - (debug:print-info 0 *default-log-port* "interface changed, refreshing iface and port info") - (set! iface (car sdat)) - (set! port (cadr sdat)) - (server:write-dotserver *toppath* iface port (current-process-id) 'http))) + (let ((new-iface (car sdat)) + (new-port (cadr sdat))) + (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") + (set! iface new-iface) + (set! port new-port) + (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " " (current-seconds)))) ;; Transfer *db-last-access* to last-access to use in checking that we are still alive (mutex-lock! *heartbeat-mutex*) (set! last-access *db-last-access*) (mutex-unlock! *heartbeat-mutex*) + + (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) + (debug:print-info 0 *default-log-port* "SERVER STARTED: " iface ":" port " " (current-seconds))) - ;; (debug:print 11 *default-log-port* "last-access=" last-access ", server-timeout=" server-timeout) - ;; - ;; no_traffic, no running tests, if server 0, no running servers - ;; - ;; (let ((wait-on-running (configf:lookup *configdat* "server" b"wait-on-running"))) ;; wait on running tasks (if not true then exit on time out) - ;; (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)) (adjusted-timeout (if (> hrs-since-start 1) (- server-timeout (inexact->exact (round (* hrs-since-start 60)))) ;; subtract 60 seconds per hour server-timeout))) (if (common:low-noise-print 120 "server timeout") (debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout)) (cond - ((not (server:confirm-dotserver *toppath* iface port (current-process-id) 'http)) - (debug:print-info 0 *default-log-port* "Server .server file does not exist or contents do not match. Initiate server shutdown.") - (http-transport:server-shutdown server-id port)) ((and *server-run* (> (+ last-access server-timeout) (current-seconds))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) - ;; - ;; Consider implementing some smarts here to re-insert the record or kill self is - ;; the db indicates so - ;; - ;; (if (tasks:server-am-i-the-server? tdb run-id) - ;; (tasks:server-set-state! tdb server-id "running")) - ;; (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timeed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown server-id port))))))) -;; code cut out from above -;; -;; (condition-case -;; ;; (if (and (member (mutex-state *db-sync-mutex*) '(abandoned not-abandoned)) -;; ;; (> (- (current-seconds) *db-last-sync*) 5)) ;; if not currently being synced nor recently synced -;; (db:sync-touched *dbstruct-db* *run-id* force-sync: #t) ;; usually done in the watchdog, not here. -;; ((sync-failed)(cond -;; ((> bad-sync-count 10) ;; time to give up -;; (http-transport:server-shutdown server-id port)) -;; (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop -;; (thread-sleep! 5) -;; (loop count server-state (+ bad-sync-count 1))))) -;; ((exn) -;; (debug:print-error 0 *default-log-port* "error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") -;; (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") -;; (exit))) -;; (set! sync-time (- (current-milliseconds) start-time)) -;; (set! rem-time (quotient (- 4000 sync-time) 1000)) -;; (debug:print 4 *default-log-port* "SYNC: time= " sync-time ", rem-time=" rem-time) -;; -;; (if (and (<= rem-time 4) -;; (> rem-time 0)) -;; (thread-sleep! rem-time) -;; (thread-sleep! 4))) ;; fallback for if the math is changed ... - (define (http-transport:server-shutdown server-id port) (let ((tdbdat (tasks:open-db))) ;;(BB> "http-transport:server-shutdown called") (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") + ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") - (thread-sleep! 5) -;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) -;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) -;; (debug:print-info 0 *default-log-port* "Average cached write time " -;; (if (eq? *number-of-writes* 0) -;; "n/a (no writes)" -;; (/ *writes-total-delay* -;; *number-of-writes*)) -;; " ms") -;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) -;; (debug:print-info 0 *default-log-port* "Average non-cached time " -;; (if (eq? *number-non-write-queries* 0) -;; "n/a (no queries)" -;; (/ *total-non-write-delay* -;; *number-non-write-queries*)) - ;; " ms") - + (thread-sleep! 1) + + ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) + ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) + ;; (debug:print-info 0 *default-log-port* "Average cached write time " + ;; (if (eq? *number-of-writes* 0) + ;; "n/a (no writes)" + ;; (/ *writes-total-delay* + ;; *number-of-writes*)) + ;; " ms") + ;; (debug:print-info 0 *default-log-port* "Number non-cached queries " *number-non-write-queries*) + ;; (debug:print-info 0 *default-log-port* "Average non-cached time " + ;; (if (eq? *number-non-write-queries* 0) + ;; "n/a (no queries)" + ;; (/ *total-non-write-delay* + ;; *number-non-write-queries*)) + ;; " ms") + (db:print-current-query-stats) (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting") - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete") + ;; (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete") ;; if the .server file contained :myport then we can remove it - (server:remove-dotserver-file *toppath* port) + ;; (server:remove-dotserver-file *toppath* port) ;;(BB> "http-transport:server-shutdown -> exit") (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; -(define (http-transport:launch run-id) - (server:attempting-start *toppath*) - (let* ((tdbdat (tasks:open-db))) - (set! *run-id* run-id) - (if (args:get-arg "-daemonize") - (begin - (daemon:ize) - (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it - (begin - (current-error-port *alt-log-file*) - (current-output-port *alt-log-file*))))) - (if (and (server:read-dotserver *toppath*) - (server:check-if-running run-id)) - (begin - (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") - (exit 0)) - (begin ;; ok, no server detected, clean out any lingering records - (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id "notresponding"))) - (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) - (remtries 4)) - (if (not server-id) - (if (> remtries 0) - (begin - (thread-sleep! 2) - (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) - (- remtries 1))) - (begin - ;; since we didn't get the server lock we are going to clean up and bail out - (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") - (server:complete-attempt *toppath*) - )) - (let* ((th2 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server run thread started") - (http-transport:run - (if (args:get-arg "-server") - (args:get-arg "-server") - "-") - run-id - server-id)) "Server run")) - (th3 (make-thread (lambda () - (debug:print-info 0 *default-log-port* "Server monitor thread started") - (http-transport:keep-running server-id run-id)) - "Keep running"))) - (thread-start! th2) - (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. - (thread-start! th3) - (set! *didsomething* #t) - (thread-join! th2) - (exit)))))) - -;; (define (http:ping run-id host-port) -;; (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port))) -;; (login-res (rmt:login-no-auto-client-setup server-dat run-id))) -;; (if (and (list? login-res) -;; (car login-res)) -;; (begin -;; (print "LOGIN_OK") -;; (exit 0)) -;; (begin -;; (print "LOGIN_FAILED") -;; (exit 1))))) +(define (http-transport:launch) + (if (args:get-arg "-daemonize") + (begin + (daemon:ize) + (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it + (begin + (current-error-port *alt-log-file*) + (current-output-port *alt-log-file*))))) + (let* ((th2 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server run thread started") + (http-transport:run + (if (args:get-arg "-server") + (args:get-arg "-server") + "-") + )) "Server run")) + (th3 (make-thread (lambda () + (debug:print-info 0 *default-log-port* "Server monitor thread started") + (http-transport:keep-running) + "Keep running")))) + (thread-start! th2) + (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. + (thread-start! th3) + (set! *didsomething* #t) + (thread-join! th2) + (exit))) (define (http-transport:server-signal-handler signum) (signal-mask! signum) (handle-exceptions exn Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -61,11 +61,11 @@ (debug:print-info 0 *default-log-port* "Server already running (=> "dotserver-url"<=). Aborting server launch attempt in this process ("(current-process-id)")") (exit) )) (case transport-type - ((http)(http-transport:launch run-id)) + ((http)(http-transport:launch)) ;;((nmsg)(nmsg-transport:launch run-id)) ((rpc) (rpc-transport:launch run-id)) (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))) ;; is this a good place to print server exit stats? @@ -122,31 +122,31 @@ ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area (let* ((curr-host (get-host-name)) - (attempt-in-progress (server:start-attempted? areapath)) - (dot-server-url (server:check-if-running areapath)) + ;; (attempt-in-progress (server:start-attempted? areapath)) + ;; (dot-server-url (server:check-if-running areapath)) (curr-ip (server:get-best-guess-address curr-host)) (curr-pid (current-process-id)) (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) (target-host (car homehost)) (testsuite (common:get-testsuite-name)) - (logfile (conc areapath "/logs/server.log")) + (logfile (conc areapath "/logs/server-" curr-pid "-" target-host ".log")) (cmdln (conc (common:get-megatest-exe) " -server " (or target-host "-") " -run-id " 0 (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") (conc " -daemonize -log " logfile) "") " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread"))) ;; we want the remote server to start in *toppath* so push there (push-directory areapath) (cond - (attempt-in-progress - (debug:print 0 *default-log-port* "INFO: Not trying to start server because attempt is in progress: "attempt-in-progress)) - (dot-server-url - (debug:print 0 *default-log-port* "INFO: Not trying to start server because one is already running : "dot-server-url)) + ;; (attempt-in-progress + ;; (debug:print 0 *default-log-port* "INFO: Not trying to start server because attempt is in progress: "attempt-in-progress)) + ;; (dot-server-url + ;; (debug:print 0 *default-log-port* "INFO: Not trying to start server because one is already running : "dot-server-url)) (else (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") (thread-start! log-rotate) ;; host.domain.tld match host? @@ -164,11 +164,46 @@ (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) (thread-join! log-rotate) (pop-directory))))) - + +;; given a path to a server log return: host port startseconds +;; +(define (server:logf-get-start-info logf) + (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT (\\d+)"))) ;; SERVER STARTED: host:port AT timesecs + (with-input-from-file + logf + (lambda () + (let loop ((inl (read-line)) + (lnum 0)) + (if (not (eof-object? inl)) + (let ((mlst (string-match rx inl))) + (if (not mlst) + (if (< lnum 500) ;; give up if more than 500 lines of server log read + (loop (read-line)(+ lnum 1)) + (list #f #f #f)) + (cdr mlst))) + (list #f #f #F))))))) + +;; get a list of servers with all relevant data +;; +(define (server:get-list areapath) + (if (directory-exists? areapath) + (let ((server-logs (glob (conc areapath "/logs/server-*.log")))) + (if (null? server-logs) + '() + (let loop ((hed (car server-logs)) + (tal (cdr server-logs)) + (res '())) + (let* ((mod-time (file-modification-time hed)) + (serv-rec (cons mod-time (server:logf-get-start-info hed))) + (new-res (cons res serv-rec))) + (if (null? tal) + new-res + (loop (car tal)(cdr tal) new-res)))))))) + (define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) @@ -183,116 +218,116 @@ (server:run areapath) (hash-table-set! *server-kind-run* areapath (current-seconds)))))) (define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. -(define (server:attempting-start areapath) - (with-output-to-file - (conc areapath "/.starting-server") - (lambda () - (print (current-process-id) " on " (get-host-name))))) - -(define (server:complete-attempt areapath) - (delete-file* (conc areapath "/.starting-server"))) - -(define (server:start-attempted? areapath) - (let ((flagfile (conc areapath "/.starting-server"))) - (handle-exceptions - exn - #f ;; if things go wrong pretend we can't see the file - (cond - ((and (file-exists? flagfile) - (< (- (current-seconds) - (file-modification-time flagfile)) - 15)) ;; exists and less than 15 seconds old - (with-input-from-file flagfile (lambda () (read-line)))) - ((file-exists? flagfile) ;; it is stale. - (server:complete-attempt areapath) - #f) - (else #f))))) - -(define (server:read-dotserver areapath) - (let ((dotfile (conc areapath "/.server"))) - (handle-exceptions - exn - #f ;; if things go wrong pretend we can't see the file - (cond - ((not (file-exists? dotfile)) - #f) - ((not (file-read-access? dotfile)) - #f) - ((> (server:dotserver-age-seconds areapath) (+ 5 (server:get-timeout))) - (server:remove-dotserver-file areapath ".*") - #f) - (else - (let* ((line - (with-input-from-file - dotfile - (lambda () - (read-line)))) - (tokens (if (string? line) (string-split line ":") #f))) - (cond - ((eq? 4 (length tokens)) - tokens) - (else #f)))))))) - -(define (server:read-dotserver->url areapath) - (let ((dotserver-tokens (server:read-dotserver areapath))) - (if dotserver-tokens - (conc (list-ref dotserver-tokens 0) ":" (list-ref dotserver-tokens 1)) - #f))) - -;; write a .server file in *toppath* with hostport -;; return #t on success, #f otherwise -;; -(define (server:write-dotserver areapath host port pid transport) - (let ((lock-file (conc areapath "/.server.lock")) - (server-file (conc areapath "/.server"))) - (if (common:simple-file-lock lock-file) - (let ((res (handle-exceptions - exn - #f ;; failed for some reason, for the moment simply return #f - (with-output-to-file server-file - (lambda () - (print (conc host ":" port ":" pid ":" transport)))) - #t))) - (debug:print-info 0 *default-log-port* "server file " server-file " for " host ":" port " created pid="pid) - (common:simple-file-release-lock lock-file) - res) - #f))) - - -;; this will check that the .server file present matches the server calling this procedure. -;; if parameters match (this-pid and transport) the file will be touched and #t returned -;; otherwise #f will be returned. -(define (server:confirm-dotserver areapath this-iface this-port this-pid this-transport) - (let* ((tokens (server:read-dotserver areapath))) - (cond - ((not tokens) - (debug:print-info 0 *default-log-port* "INFO: .server file does not exist.") - #f) - ((not (eq? 4 (length tokens))) - (debug:print-info 0 *default-log-port* "INFO: .server file is corrupt. There are not 4 tokens as expeted; there are "(length tokens)".") - #f) - ((not (equal? this-iface (list-ref tokens 0))) - (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for iface, server has value >"(list-ref tokens 0)"< but this server's value is >"this-iface"<") - #f) - ((not (equal? (->string this-port) (list-ref tokens 1))) - (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for port, .server has value >"(list-ref tokens 1)"< but this server's value is >"(->string this-port)"<") - #f) - ((not (equal? (->string this-pid) (list-ref tokens 2))) - (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for pid, .server has value >"(list-ref tokens 2)"< but this server's value is >"(->string this-pid)"<") - #f) - ((not (equal? (->string this-transport) (->string (list-ref tokens 3)))) - (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for transport, .server has value >"(list-ref tokens 3)"< but this server's value is >"this-transport"<") - #f) - (else (server:touch-dotserver areapath) - #t)))) - -(define (server:touch-dotserver areapath) - (let ((server-file (conc areapath "/.server"))) - (change-file-times server-file (current-seconds) (current-seconds)))) +;; (define (server:attempting-start areapath) +;; (with-output-to-file +;; (conc areapath "/.starting-server") +;; (lambda () +;; (print (current-process-id) " on " (get-host-name))))) +;; +;; (define (server:complete-attempt areapath) +;; (delete-file* (conc areapath "/.starting-server"))) +;; +;; (define (server:start-attempted? areapath) +;; (let ((flagfile (conc areapath "/.starting-server"))) +;; (handle-exceptions +;; exn +;; #f ;; if things go wrong pretend we can't see the file +;; (cond +;; ((and (file-exists? flagfile) +;; (< (- (current-seconds) +;; (file-modification-time flagfile)) +;; 15)) ;; exists and less than 15 seconds old +;; (with-input-from-file flagfile (lambda () (read-line)))) +;; ((file-exists? flagfile) ;; it is stale. +;; (server:complete-attempt areapath) +;; #f) +;; (else #f))))) +;; +;; (define (server:read-dotserver areapath) +;; (let ((dotfile (conc areapath "/.server"))) +;; (handle-exceptions +;; exn +;; #f ;; if things go wrong pretend we can't see the file +;; (cond +;; ((not (file-exists? dotfile)) +;; #f) +;; ((not (file-read-access? dotfile)) +;; #f) +;; ((> (server:dotserver-age-seconds areapath) (+ 5 (server:get-timeout))) +;; (server:remove-dotserver-file areapath ".*") +;; #f) +;; (else +;; (let* ((line +;; (with-input-from-file +;; dotfile +;; (lambda () +;; (read-line)))) +;; (tokens (if (string? line) (string-split line ":") #f))) +;; (cond +;; ((eq? 4 (length tokens)) +;; tokens) +;; (else #f)))))))) +;; +;; (define (server:read-dotserver->url areapath) +;; (let ((dotserver-tokens (server:read-dotserver areapath))) +;; (if dotserver-tokens +;; (conc (list-ref dotserver-tokens 0) ":" (list-ref dotserver-tokens 1)) +;; #f))) +;; +;; ;; write a .server file in *toppath* with hostport +;; ;; return #t on success, #f otherwise +;; ;; +;; (define (server:write-dotserver areapath host port pid transport) +;; (let ((lock-file (conc areapath "/.server.lock")) +;; (server-file (conc areapath "/.server"))) +;; (if (common:simple-file-lock lock-file) +;; (let ((res (handle-exceptions +;; exn +;; #f ;; failed for some reason, for the moment simply return #f +;; (with-output-to-file server-file +;; (lambda () +;; (print (conc host ":" port ":" pid ":" transport)))) +;; #t))) +;; (debug:print-info 0 *default-log-port* "server file " server-file " for " host ":" port " created pid="pid) +;; (common:simple-file-release-lock lock-file) +;; res) +;; #f))) +;; +;; +;; ;; this will check that the .server file present matches the server calling this procedure. +;; ;; if parameters match (this-pid and transport) the file will be touched and #t returned +;; ;; otherwise #f will be returned. +;; (define (server:confirm-dotserver areapath this-iface this-port this-pid this-transport) +;; (let* ((tokens (server:read-dotserver areapath))) +;; (cond +;; ((not tokens) +;; (debug:print-info 0 *default-log-port* "INFO: .server file does not exist.") +;; #f) +;; ((not (eq? 4 (length tokens))) +;; (debug:print-info 0 *default-log-port* "INFO: .server file is corrupt. There are not 4 tokens as expeted; there are "(length tokens)".") +;; #f) +;; ((not (equal? this-iface (list-ref tokens 0))) +;; (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for iface, server has value >"(list-ref tokens 0)"< but this server's value is >"this-iface"<") +;; #f) +;; ((not (equal? (->string this-port) (list-ref tokens 1))) +;; (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for port, .server has value >"(list-ref tokens 1)"< but this server's value is >"(->string this-port)"<") +;; #f) +;; ((not (equal? (->string this-pid) (list-ref tokens 2))) +;; (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for pid, .server has value >"(list-ref tokens 2)"< but this server's value is >"(->string this-pid)"<") +;; #f) +;; ((not (equal? (->string this-transport) (->string (list-ref tokens 3)))) +;; (debug:print-info 0 *default-log-port* "INFO: .server file mismatch. for transport, .server has value >"(list-ref tokens 3)"< but this server's value is >"this-transport"<") +;; #f) +;; (else (server:touch-dotserver areapath) +;; #t)))) +;; +;; (define (server:touch-dotserver areapath) +;; (let ((server-file (conc areapath "/.server"))) +;; (change-file-times server-file (current-seconds) (current-seconds)))) (define (server:dotserver-age-seconds areapath) (let ((server-file (conc areapath "/.server"))) (begin (handle-exceptions @@ -299,25 +334,25 @@ exn #f (- (current-seconds) (file-modification-time server-file)))))) -(define (server:remove-dotserver-file areapath hostport) - (let ((dotserver-url (server:read-dotserver->url areapath)) - (server-file (conc areapath "/.server")) - (lock-file (conc areapath "/.server.lock"))) - (if (and dotserver-url (string-match (conc ".*:" hostport "$") dotserver-url)) ;; port matches, good enough info to decide to remove the file - (if (common:simple-file-lock lock-file) - (begin - (handle-exceptions - exn - #f - (delete-file* server-file)) - (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " removed") - (common:simple-file-release-lock lock-file)) - (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - could not get lock.")) - (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - dotserver-url("dotserver-url") did not match hostport pattern ("hostport")")))) +;; (define (server:remove-dotserver-file areapath hostport) +;; (let ((dotserver-url (server:read-dotserver->url areapath)) +;; (server-file (conc areapath "/.server")) +;; (lock-file (conc areapath "/.server.lock"))) +;; (if (and dotserver-url (string-match (conc ".*:" hostport "$") dotserver-url)) ;; port matches, good enough info to decide to remove the file +;; (if (common:simple-file-lock lock-file) +;; (begin +;; (handle-exceptions +;; exn +;; #f +;; (delete-file* server-file)) +;; (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " removed") +;; (common:simple-file-release-lock lock-file)) +;; (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - could not get lock.")) +;; (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - dotserver-url("dotserver-url") did not match hostport pattern ("hostport")")))) ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) (let* ((dotserver-url (server:read-dotserver->url areapath))) ;; tdbdat (tasks:open-db)))