Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -74,10 +74,19 @@ (if res (cadr res)(if (null? default) #f (car default))))) ;;====================================================================== ;; Misc utils ;;====================================================================== + +;; one-of args defined +(define (args-defined? . param) + (let ((res #f)) + (for-each + (lambda (arg) + (if (args:get-arg arg)(set! res #t))) + param) + res)) ;; convert stuff to a number if possible (define (any->number val) (cond ((number? val) val) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -321,19 +321,18 @@ (format #t fmtstr id pid hostname port start-time priority status numclients))) servers) (set! *didsomething* #t)))) ;; if not list or kill then start a client (if appropriate) - (if (or (let ((res #f)) - (for-each - (lambda (key) - (if (args:get-arg key)(set! res #t))) - (list "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")) - res) + (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test") (eq? (length (hash-table-keys args:arg-hash)) 0)) (debug:print-info 1 "Server connection not needed") - (server:client-launch))) + ;; ping servers only if -runall -runtests + (let ((ping (args-defined? "-runall" "-runtests" "-remove-runs" + "-set-state-status" "-rerun" "-rollup" "-lock" "-unlock" + "-set-values" "-list-runs"))) + (server:client-launch do-ping: ping)))) ;;====================================================================== ;; Remove old run(s) ;;====================================================================== Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -137,14 +137,16 @@ (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) ;; -(define (server:client-connect host port) +(define (server:client-connect host port #!key (context #f)) (debug:print 3 "client-connect " host ":" port) (let ((connect-ok #f) - (zmq-socket (make-socket 'req)) + (zmq-socket (if context + (make-socket 'req context) + (make-socket 'req))) (conurl (server:make-server-url (list host port)))) (if (socket? zmq-socket) (begin (connect-socket zmq-socket conurl) zmq-socket) @@ -159,48 +161,48 @@ (cdb:logout zmq-socket *toppath* (server:get-client-signature))))) ;; (close-socket zmq-socket) ok)) ;; Do all the connection work, start a server if not already running -(define (server:client-setup #!key (numtries 10)) +(define (server:client-setup #!key (numtries 10)(do-ping #f)) (if (not *toppath*)(setup-for-run)) - (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db))) + (let ((hostinfo (open-run-close tasks:get-best-server tasks:open-db do-ping: do-ping))) (if hostinfo - (let* ((host (car hostinfo)) - (port (cadr hostinfo))) - (debug:print-info 2 "Setting up to connect to " hostinfo) - (handle-exceptions - exn - (begin - (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo) - (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) - (debug:print 0 " perhaps jobs killed with -9? Removing server records") - (open-run-close tasks:server-deregister tasks:open-db host port: port) - #f) - (let* ((zmq-socket (server:client-connect host port)) - (login-res (server:client-login zmq-socket)) - (connect-ok (if (null? login-res) #f (car login-res))) - (conurl (server:make-server-url hostinfo))) - (if connect-ok - (begin - (debug:print-info 2 "Logged in and connected to " conurl) - (set! *runremote* zmq-socket) - #t) - (begin - (debug:print-info 2 "Failed to login or connect to " conurl) - (set! *runremote* #f) - #f))))) + (let ((host (car hostinfo)) + (port (cadr hostinfo)) + (zsocket (caddr hostinfo))) + ;; (set! *runremote* zsocket)) + (let* ((host (car hostinfo)) + (port (cadr hostinfo))) + (debug:print-info 2 "Setting up to connect to " hostinfo) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to open a connection to the server at: " hostinfo) + (debug:print 0 " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " perhaps jobs killed with -9? Removing server records") + (open-run-close tasks:server-deregister tasks:open-db host port: port) + #f) + (let* ((zmq-socket (server:client-connect host port)) + (login-res (server:client-login zmq-socket)) + (connect-ok (if (null? login-res) #f (car login-res))) + (conurl (server:make-server-url hostinfo))) + (if connect-ok + (begin + (debug:print-info 2 "Logged in and connected to " conurl) + (set! *runremote* zmq-socket) + #t) + (begin + (debug:print-info 2 "Failed to login or connect to " conurl) + (set! *runremote* #f) + #f)))))) (if (> numtries 0) (let ((exe (car (argv)))) (debug:print-info 1 "No server available, attempting to start one...") (process-run exe (list "-server" "-" "-debug" (conc *verbosity*))) - ;; (system (conc " -server - " (if (args:get-arg "-debug") - ;; (conc "-debug " (args:get-arg "-debug")) - ;; "") - ;; " &")) - (sleep 10) - (server:client-setup numtries: (- numtries 1))) + (sleep 5) + (server:client-setup numtries: (- numtries 1) do-ping: do-ping)) (debug:print-info 1 "Too many retries, giving up"))))) (define (server:launch) (let* ((toppath (setup-for-run))) (debug:print-info 0 "Starting the standalone server") @@ -216,46 +218,49 @@ (thread-start! th3) (set! *didsomething* #t) (thread-join! th3)) (debug:print 0 "ERROR: Failed to setup for megatest")))))) -(define (server:client-launch) - (if (server:client-setup) +(define (server:client-launch #!key (do-ping #f)) + (if (server:client-setup do-ping: do-ping) (debug:print-info 0 "connected as client") (begin (debug:print 0 "ERROR: Failed to connect as client") (exit)))) ;; ping a server and return number of clients or #f (if no response) -(define (server:ping host port #!key (secs 10)) +(define (server:ping host port #!key (secs 10)(return-socket #f)) (cdb:use-non-blocking-mode (lambda () (let* ((res #f) (th1 (make-thread (lambda () - (let ((zmq-socket (server:client-connect host port))) + (let* ((zmq-context (make-context 1)) + (zmq-socket (server:client-connect host port context: zmq-context))) (if zmq-socket (if (server:client-login zmq-socket) (let ((numclients (cdb:num-clients zmq-socket))) - (server:client-logout zmq-socket) - (close-socket zmq-socket) - (set! res (list #t numclients))) + (if (not return-socket) + (begin + (server:client-logout zmq-socket) + (close-socket zmq-socket))) + (set! res (list #t numclients (if return-socket zmq-socket #f)))) (begin ;; (close-socket zmq-socket) - (set! res (list #f "CAN'T LOGIN")))) - (set! res (list #f "CAN'T CONNECT"))))))) + (set! res (list #f "CAN'T LOGIN" #f)))) + (set! res (list #f "CAN'T CONNECT" #f))))))) (th2 (make-thread (lambda () (let loop ((count 1)) (debug:print-info 1 "Ping " count " server on " host " at port " port) (thread-sleep! 2) (if (< count (/ secs 2)) (loop (+ count 1)))) ;; (thread-terminate! th1) - (set! res (list #f "TIMED OUT")))))) + (set! res (list #f "TIMED OUT" #f)))))) (thread-start! th2) (thread-start! th1) (handle-exceptions exn - (set! res (list #f "TIMED OUT")) + (set! res (list #f "TIMED OUT" #f)) (thread-join! th1 secs)) res)))) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -120,11 +120,11 @@ (define (tasks:have-clients? mdb server-id) (null? (tasks:get-logged-in-clients mdb server-id))) ;; ping each server in the db and return first found that responds. ;; remove any others. will not necessarily remove all! -(define (tasks:get-best-server mdb) +(define (tasks:get-best-server mdb #!key (do-ping #f)) (let ((res '()) (best #f)) (sqlite3:for-each-row (lambda (id hostname port) (set! res (cons (list hostname port) res)) @@ -134,14 +134,17 @@ ;; (print "res=" res) (if (null? res) #f (let loop ((hed (car res)) (tal (cdr res))) ;; (print "hed=" hed ", tal=" tal) - (let* ((host (car hed)) - (port (cadr hed)) - (ping-res (server:ping host port))) - (if ping-res hed + (let* ((host (car hed)) + (port (cadr hed)) + (ping-res (if do-ping (server:ping host port return-socket: #f) '(#t "NO PING" #f))) + (alive (car ping-res)) + (reason (cadr ping-res)) + (zsocket (caddr ping-res))) + (if alive (list host port zsocket) ;; remove defunct server from table (begin (open-run-close tasks:server-deregister tasks:open-db host port: port) (if (null? tal) #f Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -29,29 +29,27 @@ mkdir -p simplelinks simpleruns cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm cd simplerun;echo '(load "../tests.scm")' | $(MEGATEST) -repl -debug $(DEBUG) test2 : fullprep - cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) $(SERVER) -debug $(DEBUG) $(LOGGING) + cd fullrun;$(MEGATEST) -runtests ez_pass,runfirst/a/% -reqtarg ubuntu/nfs/none :runname $(RUNNAME) -debug $(DEBUG) $(LOGGING) cd fullrun;megatest -runall -target ubunut/nfs/none :runname $(RUNNAME)_01 -testpatt %/,%/ai -debug $(DEBUG) cd fullrun;megatest -runtests %/,%/ai -target ubunut/nfs/none :runname $(RUNAME)_02 -debug $(DEBUG) cd fullrun;megatest -runtests runfirst/%,%/ai -target ubunut/nfs/none :runname $(RUNNAME)_02 -debug $(DEBUG) cd fullrun;megatest -runtests %/,%/winter -target ubunut/nfs/none :runname $(RUNNAME)_03 -debug $(DEBUG) sleep 40;cd fullrun;megatest -target ubuntu/nfs/none :runname $(RUNNAME) -set-state-status COMPLETED,FORCED :state COMPLETED :status PASS -testpatt ez_p%s,runfirst/ -debug $(DEBUG) $(LOGGING) test3 : fullprep - cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b $(SERVER) -debug 10 + cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -debug 10 test4 : fullprep - cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) -debug $(DEBUG) & - cd fullrun;sleep 5;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) + cd fullrun;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) # NOTE: Only one instance can be a server test5 : fullprep - cd fullrun;$(MEGATEST) $(SERVER) $(LOGGING) & - cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & + cd fullrun;sleep 0;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & cd fullrun;sleep 10;$(MEGATEST) -runall $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & @@ -61,19 +59,17 @@ cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 cleanprep : ../*.scm Makefile */*.config - # if [ -e fullrun/megatest.db ]; then sqlite3 fullrun/megatest.db "delete from metadat where var='SERVER';";fi mkdir -p /tmp/mt_runs /tmp/mt_links cd ..;make install rm -f */logging.db */monitor.db touch cleanprep fullprep : cleanprep - cd fullrun;$(MEGATEST) -server - -debug $(DEBUG) & - sleep 5;cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% + cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/% cd fullrun;$(BINPATH)/dboard -rows 15 & dashboard : cleanprep cd fullrun && $(BINPATH)/dashboard -rows 25 & Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -18,12 +18,12 @@ # Max retries allows megatest to re-check that a tests status has changed # as tests can have transient FAIL status occasionally maxretries 20 [validvalues] -state start end -status pass fail n/a 0 1 running +state start end 0 1 - 2 +status pass fail n/a 0 1 running - 2 # These are set before all tests, override them # in the testconfig [pre-launch-env-overrides] section [env-override] SPECIAL_ENV_VARS overide them here - should be seen at launch and in the runs