Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -53,10 +53,12 @@ (define *server-id* #f) (define *server-info* #f) (define *time-to-exit* #f) (define *received-response* #f) (define *default-numtries* 10) +(define *server-run* #t) + (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1345,12 +1345,12 @@ (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path)) (define (cdb:flush-queue serverdat) (cdb:client-call serverdat 'flush #f *default-numtries*)) -(define (cdb:kill-server serverdat) - (cdb:client-call serverdat 'killserver #t *default-numtries*)) +(define (cdb:kill-server serverdat pid) + (cdb:client-call serverdat 'killserver #t *default-numtries* pid)) (define (cdb:roll-up-pass-fail-counts serverdat run-id test-name item-path status) (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status)) (define (cdb:get-test-info serverdat run-id test-name item-path) @@ -1584,16 +1584,22 @@ (server:reply return-address qry-sig #t 1)) ;; (length data))) ((set-verbosity) (set! *verbosity* (car params)) (server:reply return-address qry-sig #t '(#t *verbosity*))) ((killserver) - (debug:print 0 "WARNING: Server going down in 15 seconds by user request!") - (open-run-close tasks:server-deregister tasks:open-db - (car *runremote*) - pullport: (cadr *runremote*)) - (thread-start! (make-thread (lambda ()(thread-sleep! 15)(exit)))) - (server:reply return-address qry-sig #t '(#t "exit process started"))) + (let ((hostname (car *runremote*)) + (port (cadr *runremote*)) + (pid (car params))) + (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!") + (debug:print-info 1 "current pid=" (current-process-id)) + (open-run-close tasks:server-deregister tasks:open-db + hostname + port: port) + (set! *server-run* #f) + (thread-sleep! 3) + (process-signal pid signal/kill) + (server:reply return-address qry-sig #t '(#t "exit process started")))) (else ;; not a command, i.e. is a query (debug:print 0 "ERROR: Unrecognised query/command " stmt-key) (server:reply return-address qry-sig #f 'failed))))) (else (debug:print-info 11 "Executing " stmt-key " for " params) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -34,11 +34,11 @@ (define (http-transport:make-server-url hostport) (if (not hostport) #f (conc "http://" (car hostport) ":" (cadr hostport)))) -(define *server-loop-heart-beat* (current-seconds)) +(define *server-loop-heart-beat* (current-seconds)) (define *heartbeat-mutex* (make-mutex)) ;;====================================================================== ;; S E R V E R ;;====================================================================== @@ -273,14 +273,15 @@ ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout) - (if (> (+ last-access server-timeout) - (current-seconds)) + (if (and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) (begin - (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -70,11 +70,11 @@ (conc testname (if (equal? itempath "") "" (conc "(" itempath ")"))))) ;; This is the *new* methodology. One record to inform them and in the chaos, organise them. ;; (define (runs:create-run-record) - (let* ((mconfig (if *configdat* + (let* ((mconfig (if *configdat* *configdat* (if (setup-for-run) *configdat* (begin (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting") Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -103,20 +103,20 @@ pubport transport )) ;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used! -(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'markdead)) +(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'delete)) (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid) (if pid (case action ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid)) (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid))) (if port (case action - ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port)) - (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port))) + ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port)) + (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port))) (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) (define (tasks:server-deregister-self mdb hostname) (tasks:server-deregister mdb hostname pid: (current-process-id))) @@ -141,11 +141,11 @@ "SELECT id FROM servers WHERE pid=-999;"))) (if hostname hostname iface)(if pid pid port)) res)) (define (tasks:server-update-heartbeat mdb server-id) - (debug:print-info 0 "Heart beat update of server id=" server-id) + (debug:print-info 1 "Heart beat update of server id=" server-id) (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)) ;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds (define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f)) (let* ((server-id (if server-id @@ -250,17 +250,17 @@ (process-signal pid signal/term) (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill ;;(process-signal pid signal/kill) ) ;; local machine, send sig term (begin - (debug:print-info 1 "Stopping remote servers not yet supported.")))) - ;; (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") - ;; (let ((serverdat (list hostname port))) - ;; (case (string->symbol transport) - ;; ((http)(http-transport:client-connect hostname port)) - ;; (else (debug:print "ERROR: remote stopping servers of type " transport " not supported yet"))) - ;; (cdb:kill-server serverdat))))) ;; remote machine, try telling server to commit suicide + ;;(debug:print-info 1 "Stopping remote servers not yet supported.")))) + (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") + (let ((serverdat (list hostname port))) + (case (if (string? transport) (string->symbol transport) transport) + ((http)(http-transport:client-connect hostname port)) + (else (debug:print "ERROR: remote stopping servers of type " transport " not supported yet"))) + (cdb:kill-server serverdat pid))))) ;; remote machine, try telling server to commit suicide (begin (if status (if (equal? hostname (get-host-name)) (begin (debug:print-info 1 "Sending signal/term to " pid " on " hostname) Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -81,34 +81,38 @@ (string? (getenv "MT_RUN_AREA_HOME")))) (test "server-register, get-best-server" #t (let ((res #f)) (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http) (set! res (open-run-close tasks:get-best-server tasks:open-db)) - (number? (cadddr res)))) + (number? (vector-ref res 3)))) + +(test "de-register server" #f (let ((res #f)) + (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) + (open-run-close tasks:get-best-server tasks:open-db))) -(test "de-register server" #t (let ((res #f)) - (open-run-close tasks:server-deregister tasks:open-db "bob" pullport: 1234) - (list? (open-run-close tasks:get-best-server tasks:open-db)))) +(define server-pid #f) +(test "launch server" #t (let ((pid (process-fork (lambda () + ;; (daemon:ize) + (server:launch 'http))))) + (set! server-pid pid) + (print "pid=" server-pid) + (number? pid))) -(define hostinfo #f) +(thread-sleep! 3) ;; need to wait for server to start. Yes, a better way is needed. (test "get-best-server" #t (let ((dat (open-run-close tasks:get-best-server tasks:open-db))) - (set! hostinfo dat) ;; host ip pullport pubport - (and (string? (car dat)) - (number? (caddr dat))))) - -(test #f #t (let ((zmq-socket (server:client-connect - (cadr hostinfo) - (caddr hostinfo) - ;; (cadddr hostinfo) - ))) - (set! *runremote* zmq-socket) - (string? (car *runremote*)))) - -(test #f #t (let ((res (server:client-login *runremote*))) + (set! *runremote* (list (vector-ref dat 1)(vector-ref dat 2))) ;; host ip pullport pubport + (and (string? (car *runremote*)) + (number? (cadr *runremote*))))) + +(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*))) +(test #f #t (let ((res (client:login *runremote*))) (car res))) -(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*))) +(test "server stop" #f (let ((hostname (car *runremote*)) + (port (cadr *runremote*))) + (tasks:kill-server #t hostname port server-pid 'http) + (open-run-close tasks:get-best-server tasks:open-db))) (exit 1) ;;====================================================================== ;; C O N F I G F I L E S