Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -738,14 +738,15 @@ ;; Server? Start up here. ;; (let ((tl (launch:setup)) (run-id (and (args:get-arg "-run-id") - (string->number (args:get-arg "-run-id"))))) + (string->number (args:get-arg "-run-id")))) + (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) (if run-id (begin - (server:launch run-id) + (server:launch run-id transport-type) (set! *didsomething* #t)) (debug:print-error 0 *default-log-port* "server requires run-id be specified with -run-id"))) ;; Not a server? This section will decide how to communicate ;; @@ -753,10 +754,11 @@ (if (null? (lset-intersection equal? (hash-table-keys args:arg-hash) '("-list-servers" "-stop-server" + "-kill-server" "-show-cmdinfo" "-list-runs" "-ping"))) (if (launch:setup) (let ((run-id (and (args:get-arg "-run-id") @@ -775,18 +777,20 @@ ;; MAY STILL NEED THIS ;; (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t)))))))))) (if (or (args:get-arg "-list-servers") - (args:get-arg "-stop-server")) + (args:get-arg "-stop-server") + (args:get-arg "-kill-server")) (let ((tl (launch:setup))) (if tl (let* ((tdbdat (tasks:open-db)) (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") (servers-to-kill '()) - (killinfo (args:get-arg "-stop-server")) + (kill-switch (if (args:get-arg "-kill-server") "-9" "")) + (killinfo (or (args:get-arg "-stop-server") (args:get-arg "-kill-server") )) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) (sid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f))) (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport") (format #t fmtstr "==" "=====" "===" "====" "=================" "======" "========" "=====" "=========") (for-each @@ -816,12 +820,12 @@ (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update (if status "alive" "dead") transport) (if (or (equal? id sid) (equal? sid 0)) ;; kill all/any (begin - (debug:print-info 0 *default-log-port* "Attempting to stop server with pid " pid) - (tasks:kill-server hostname pid))))) + (debug:print-info 0 *default-log-port* "Attempting to kill "kill-switch" server with pid " pid) + (tasks:kill-server hostname pid kill-switch: kill-switch))))) servers) (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) ;; must do, would have to add checks to many/all calls below (exit)))) Index: rpc-transport.scm ================================================================== --- rpc-transport.scm +++ rpc-transport.scm @@ -38,32 +38,34 @@ ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (rpc-transport:launch run-id) - (set! *run-id* run-id) - (if (args:get-arg "-daemonize") - (daemon:ize)) - (if (server:check-if-running run-id) - (begin - (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") - (exit 0))) - (let loop ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)) - (remtries 4)) - (if (not server-id) - (if (> remtries 0) - (begin - (thread-sleep! 2) - (loop (open-run-close tasks:server-lock-slot tasks:open-db run-id) - (- remtries 1))) - (begin - ;; since we didn't get the server lock we are going to clean up and bail out - (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " rpc-transport:launch"))) - (begin - (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id) - (exit))))) + (let* ((tdbdat (tasks:open-db))) + (BB> "rpc-transport:launch fired for run-id="run-id) + (set! *run-id* run-id) + (if (args:get-arg "-daemonize") + (daemon:ize)) + (if (server:check-if-running run-id) + (begin + (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") + (exit 0))) + (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) + (remtries 4)) + (if (not server-id) + (if (> remtries 0) + (begin + (thread-sleep! 2) + (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) + (- remtries 1))) + (begin + ;; since we didn't get the server lock we are going to clean up and bail out + (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") + (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " rpc-transport:launch"))) + (begin + (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id) + (exit)))))) (define (rpc-transport:run hostn run-id server-id) (debug:print 2 *default-log-port* "Attempting to start the rpc server ...") ;; (trace rpc:publish-procedure!) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -47,16 +47,17 @@ ;; all routes though here end in exit ... ;; ;; start_server ;; -(define (server:launch run-id) - (case *transport-type* +(define (server:launch run-id transport-type) + (BB> "server:launch fired for run-id="run-id" transport-type="transport-type) + (case transport-type ((http)(http-transport:launch run-id)) ;;((nmsg)(nmsg-transport:launch run-id)) ((rpc) (rpc-transport:launch run-id)) - (else (debug:print-error 0 *default-log-port* "unknown server type " *transport-type*)))) + (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) ;; (else (debug:print-error 0 *default-log-port* "No known transport set, transport=" transport ", using rpc") ;; (rpc-transport:launch run-id))))) ;;====================================================================== ;; S E R V E R U T I L I T I E S Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -431,15 +431,15 @@ run-id) (reverse res))) ;; no elegance here ... ;; -(define (tasks:kill-server hostname pid) +(define (tasks:kill-server hostname pid #!key (kill-switch "")) (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) (setenv "TARGETHOST" hostname) (setenv "TARGETHOST_LOGF" "server-kills.log") - (system (conc "nbfake kill " pid)) + (system (conc "nbfake kill "kill-switch" "pid)) (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")) ;; look up a server by run-id and send it a kill, also delete the record for that server ;;