Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -478,37 +478,68 @@ ;; ;; Server viability is checked in keep-running. Blindly start and run here. ;; (define (tt:start-server areapath run-id dbfname-in handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") - ;; is there already a server for this dbfile? Then exit. - (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) - (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) - (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead - (debug:print 0 *default-log-port* "Found " (length servers) " already running for " dbfname) - (if (> (length servers) 0) - (begin - (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") - (exit)) - (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys))) - (tt-handler-set! ttdat (handler dbstruct)) - (let* ((tcp-thread (make-thread - (lambda () - (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data - "tcp-server-thread")) - (run-thread (make-thread - (lambda () - (tt:keep-running ttdat dbfname dbstruct))))) - (thread-start! tcp-thread) - (thread-start! run-thread) - - (let* ((areapath (tt-areapath ttdat)) - (nosyncdbpath (conc areapath"/.mtdb"))) - ;; this didn't seem to work, is port not available yet? - (let loop ((count 0)) - (if (tt-port ttdat) + (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id)))) + (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys))) + (tt-handler-set! ttdat (handler dbstruct)) + (let* ((tcp-thread (make-thread + (lambda () + (tt:start-tcp-server ttdat)) ;; start the tcp-server which applies handler to incoming data + "tcp-server-thread")) + (run-thread (make-thread + (lambda () + (tt:keep-running ttdat dbfname dbstruct))))) + (thread-start! tcp-thread) + (thread-start! run-thread) + + (let* ((areapath (tt-areapath ttdat)) + (nosyncdbpath (conc areapath"/.mtdb")) + (servers ;; (tt:find-server areapath dbfname))) + (tt:get-server-info-sorted ttdat dbfname))) ;; (host port startseconds server-id servinfofile) + ;; contact servers via ping, if no response remove the .servinfo file + (for-each (lambda (servdat) + (match servdat + ((host port startseconds server-id servinfofile) + + ;; ping + + ;; remove servinfofile if no response from ping + + + ;; copied from keep-running + + (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) + (let* ((leadsrv (car servers))) + (match leadsrv + ((host port startseconds server-id pid dbfname servinfofile) + (let* ((result (tt:timed-ping host port server-id)) + (res (car result)) + (ping (cdr result))) + (debug:print-info 0 *default-log-port* "Ping to "host":"port", with server-id "server-id + ", and file "servinfofile" returned "res) + (if res + #f ;; not the server, but all good, want to exit + (if (and (file-exists? servinfofile) + (> (- (current-seconds)(file-modification-time servinfofile)) 30)) + (begin + ;; can't ping and file has been on disk 15 seconds, go ahead and try to remove it + (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile) + (handle-exceptions + exn + (debug:print-info 0 *default-log-port* "Error removing server info file: "servinfofile) + (delete-file* servinfofile) + ) + #t) ;; not the server but the server is not reachable + + ;; + + ;; this didn't seem to work, is port not available yet? + (let loop ((count 0)) + (if (tt-port ttdat) (begin (procinf-port-set! *procinf* (tt-port ttdat)) (procinf-dbname-set! *procinf* dbfname) (dbfile:with-no-sync-db nosyncdbpath