@@ -185,17 +185,26 @@ (thread-sleep! 0.5) (debug:print-info 0 *default-log-port* "server for " dbfname " is in starting state, retrying connect") (tt:client-connect-to-server ttdat dbfname run-id testsuite server-start-proc)) (else (let* ((curr-secs (current-seconds))) + (if (not ping-res) ;; the server is actually dead, remove the .servinfo file + (begin + (debug:print-info 0 *default-log-port* "Unreachable server at " + host":"port" with servinfo file "servinffile", removing it") + (if (file-exists? servinffile) + (handle-exceptions + exn + #f + (delete-file servinffile))))) ;; rm the (last server) would go here (if (> (- curr-secs (tt-last-serv-start ttdat)) 10) (begin (tt-last-serv-start-set! ttdat curr-secs) (server-start-proc))) ;; start server if 10 sec since last attempt (thread-sleep! 1) - (debug:print-info 0 *default-log-port* "server ping result was neither running nor starting. Retrying connect") + (debug:print-info 0 *default-log-port* "server ping result was "ping-res" neither running nor starting. Retrying connect") (tt:client-connect-to-server ttdat dbfname run-id testsuite server-start-proc))))))) (else ;; no good server found, if haven't started server in > 5 secs, start another (if (> (- (current-seconds) (tt-last-serv-start ttdat)) 3) ;; BUG - grow this number really do not want to swamp the machine with servers (begin (debug:print-info 0 *default-log-port* "Starting server for "dbfname) @@ -566,10 +575,12 @@ (dbfile:insert-or-update-process nsdb *procinf*))) (thread-start! run-thread) (thread-join! run-thread) ;; run thread will exit on timeout or other conditions + ;; (tcp-close (tt-socket ttdat)) ;; close up ports here + ;; replace with call to (dbfile:set-process-done nsdb host pid reason) (procinf-status-set! *procinf* "done") (procinf-end-set! *procinf* (current-seconds)) ;; either convert this to use set-process-done or get rid of set-process-done (dbfile:with-no-sync-db @@ -629,11 +640,11 @@ (if (< (- (current-seconds) (tt-last-access ttdat)) (tt-server-timeout-param)) (begin (thread-sleep! 5) (loop))))) - ;; (cleanup) ;; all done by tt:shutdown-server + (tt:shutdown-server ttdat) (debug:print 0 *default-log-port* "INFO: Server timed out, exiting from tt:keep-running."))) (define (tt:shutdown-server ttdat) (let* ((host (tt-host ttdat)) @@ -641,11 +652,10 @@ (sinf (tt-servinf-file ttdat))) (tt-state-set! ttdat 'shutdown) (portlogger:open-run-close portlogger:set-port port "released") (if (file-exists? sinf) (delete-file* sinf)) - (tcp-close (tt-socket ttdat)) ;; close up ports here )) ;; return servid ;; side-effects: ;; ttdat-cleanup-proc is populated with function to remove the serverinfo file @@ -772,24 +782,29 @@ #f) (else (if (not (file-exists? (conc areapath"/logs"))) (create-directory (conc areapath"/logs") #t)) (let* ((logfile (conc areapath "/logs/server-"dbfname"-"(current-process-id)".log")) ;; -" curr-pid "-" target-host ".log")) - (cmdln (conc + (cmdln (conc mtexe " -startdir "areapath " -server - ";; (or target-host "-") " -m testsuite:"testsuite " -db "dbfname ;; (dbmod:run-id->dbfname run-id) " " profile-mode - (conc " >> " logfile " 2>&1 &")))) + #;(conc " >> " logfile " 2>&1 &")))) ;; we want the remote server to start in *toppath* so push there ;; (push-directory areapath) ;; use cd in the command line instead (debug:print 2 *default-log-port* "INFO: Trying to start server in tcp mode (" cmdln ") at "(common:human-time)" for "areapath) ;; (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) - (system cmdln) + (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... + (setenv "NBFAKE_LOG" logfile) + (system (conc "cd "areapath" ; nbfake " cmdln)) + (unsetenv "NBFAKE_QUIET") + (unsetenv "NBFAKE_LOG") + ;; (system cmdln) (hash-table-set! *last-server-start* dbfname (current-seconds)) ;; ;; use below to go back to nbfake - nbfake does cause trouble ... ;; (setenv "NBFAKE_QUIET" "yes") ;; BUG: change to with-environment-variable ... ;; (setenv "NBFAKE_LOG" logfile) ;; (system (conc "cd "areapath" ; nbfake " cmdln))