Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -70,11 +70,11 @@ ((http)(http-transport:client-connect iface port)) ((nmsg) host-info) ;; (http-transport:server-dat-get-socket host-info)) (else #f))) (ping-res (case *transport-type* ((http)(rmt:login-no-auto-client-setup start-res run-id)) - ((nmsg)(nmsg-transport:ping iface port timeout: 2 socket: )) + ((nmsg)(nmsg-transport:ping iface port timeout: 2 socket: #t)) (else #f)))) (if ping-res ;; sucessful login? (begin (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries) start-res) ;; return the server info Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -446,18 +446,12 @@ ;; no_traffic, no running tests, if server 0, no running servers ;; ;; (let ((wait-on-running (configf:lookup *configdat* "server" "wait-on-running"))) ;; wait on running tasks (if not true then exit on time out) ;; (if (and *server-run* - ;; (or (> (+ last-access server-timeout) (current-seconds))) -;; (and (eq? run-id 0) -;; (> (tasks:num-servers-non-zero-running tdb) 0)) -;; (and (not (eq? run-id 0)) ;; only makes sense in non-zero run-id servers -;; (> (db:get-count-tests-actually-running *inmemdb* run-id) 0)) -;; )) (begin (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) ;; ;; Consider implementing some smarts here to re-insert the record or kill self is ;; the db indicates so Index: nmsg-transport.scm ================================================================== --- nmsg-transport.scm +++ nmsg-transport.scm @@ -245,11 +245,19 @@ (thread-sleep! 0.5) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) - (tdbdat (tasks:open-db))) + (tdbdat (tasks:open-db)) + (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout"))) + (if (and (string? tmo) + (string->number tmo)) + (* 60 60 (string->number tmo)) + ;; (* 3 24 60 60) ;; default to three days + (* 60 1) ;; default to one minute + ;; (* 60 60 25) ;; default to 25 hours + )))) (print "Keep-running got server pid " server-id ", using iface " iface " and port " port) (let loop ((count 0)) (thread-sleep! 4) ;; no need to do this very often ;; NB// sync currently does NOT return queue-length (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) @@ -259,26 +267,22 @@ ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) - (if (> (+ last-access - ;; (* 50 60 60) ;; 48 hrs - ;; 60 ;; one minute - ;; (* 60 60) ;; one hour - (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. - ) - (current-seconds)) + (if (and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) (begin (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") (set! *time-to-exit* #t) (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running") (debug:print-info 0 "Server shutdown complete. Exiting") - ;; (exit) + (exit) )))))) ;;====================================================================== ;; C L I E N T S ;;====================================================================== @@ -286,14 +290,17 @@ (define (nmsg-transport:client-connect iface portnum) (let* ((reqsoc (nmsg-transport:ping iface portnum return-socket: #t))) (vector iface portnum #f #f #f (current-seconds) reqsoc))) (define (nmsg-transport:client-api-send-receive run-id connection-info cmd param) + (mutex-lock! *http-mutex*) (let ((packet (vector cmd param)) (reqsoc (http-transport:server-dat-get-socket connection-info))) (nn-send reqsoc (db:obj->string packet transport: 'nmsg)) - (db:string->obj (nn-recv reqsoc) transport: 'nmsg))) + (let ((res (db:string->obj (nn-recv reqsoc) transport: 'nmsg))) + (mutex-unlock! *http-mutex*) + res))) ;;====================================================================== ;; J U N K ;;====================================================================== Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -140,13 +140,17 @@ ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; - (let ((res (server:ping-server run-id - (tasks:hostinfo-get-interface server) - (tasks:hostinfo-get-port server)))) + (let ((res (case *transport-type* + ((http)(server:ping-server run-id + (tasks:hostinfo-get-interface server) + (tasks:hostinfo-get-port server))) + ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) + (tasks:hostinfo-get-port server) + timeout: 2))))) ;; if the server didn't respond we must remove the record (if res #t (begin (debug:print-info 0 "server at " server " not responding, removing record") Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -116,27 +116,27 @@ test9 : minsetup test9a test9b test9c test9d test9e test9a : @echo Run super-simple mintest e, no waitons. cd mintest;$(DASHBOARD)& - cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) -runname $(shell date +%H.%M.%S) -debug $(DEBUG) test9b : @echo Run simple mintest d with one waiton c - cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test9c : @echo Run mintest a with full waiton chain a -> b -> c -> d -> e - cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test9d : @echo Run an itemized test with no items - cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test9e : @echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1 - cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG) + cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG) test10 : @echo Run a bunch of different targets simultaneously (cd fullrun;$(MEGATEST) -server - ;sleep 2)& for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \ Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -136,14 +136,15 @@ # Three minutes is 0.05 hours # timeout 0.025 timeout 0.01 # Server is required - slower but more resistant to Sqlite issues. -# required yes +required yes # Start server when average query takes longer than this -server-query-threshold 55500 +# server-query-threshold 55500 +server-query-threshold -1 # daemonize yes # hostname #{scheme (get-host-name)} ## disks are: Index: tests/mintest/megatest.config ================================================================== --- tests/mintest/megatest.config +++ tests/mintest/megatest.config @@ -1,11 +1,11 @@ [fields] X TEXT [setup] max_concurrent_jobs 50 -linktree #{getenv PWD}/linktree +linktree #{getenv MT_RUN_AREA_HOME}/linktree transport http [server] port 8090