Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -1,10 +1,10 @@ # make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' # rm .o ; make install CSCOPTS='-profile' ; ... ; chicken-profile | less PREFIX=$(PWD) -CSCOPTS=-O4 +CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -430,11 +430,20 @@ (if (file-exists? fname) (with-input-from-file fname (lambda () (equal? key-string (read-line)))) #f)))) - + +(define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) + (let ((end-time (+ expire-time (current-seconds)))) + (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) + (if got-lock + #t + (if (> end-time (current-seconds)) + (loop (common:simple-file-lock fname expire-time: expire-time)) + #f))))) + (define (common:simple-file-release-lock fname) (delete-file* fname)) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S @@ -1044,18 +1053,21 @@ (equal? (configf:lookup *configdat* "setup" "use-cache") "no"))))) ;; force use of server? ;; (define (common:force-server?) - (let* ((force-setting (configf:lookup "server" "force")) - (force-type (if force-setting (string->symbol force-setting) #f))) + (let* ((force-setting (configf:lookup *configdat* "server" "force")) + (force-type (if force-setting (string->symbol force-setting) #f))) (case force-type ((#f) #f) ((always) #t) ((test) (if (args:get-arg "-execute") ;; we are in a test #t - #f))))) + #f)) + (else + (debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.") + #t)))) ;; default to requiring server ;;====================================================================== ;; M I S C L I S T S ;;====================================================================== Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -95,11 +95,11 @@ (remote-conndat-set! runremote #f) (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ensure we have a record for our connection for given area ((not runremote) - (set! *runremote* (make-remote)) + (set! *runremote* (make-remote)) ;; new runremote will come from this on next iteration (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 1") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ensure we have a homehost record ((not (pair? (remote-hh-dat runremote))) ;; not on homehost @@ -142,21 +142,26 @@ (not (member cmd api:read-only-queries))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if server-url (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed - (server:kind-run *toppath*))) + (if (common:force-server?) + (server:start-and-wait *toppath*) + (server:kind-run *toppath*)))) + (remote-force-server-set! runremote (common:force-server?)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") (rmt:open-qry-close-locally cmd 0 params)) - ((and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost - (not (remote-conndat runremote))) ;; and no connection + ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one + (not (remote-conndat runremote))) + (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost + (not (remote-conndat runremote)))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) (mutex-unlock! *rmt-mutex*) (server:start-and-wait *toppath*) - (if (common:force-server?)(remote-force-server-set! runremote #t)) + (remote-force-server-set! runremote (common:force-server?)) (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;; all set up if get this far, dispatch the query ((and (not (remote-force-server runremote)) (cdr (remote-hh-dat runremote))) ;; we are on homehost Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -270,11 +270,11 @@ (else 600)) (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously (lock-file (conc areapath "/logs/server-start.lock"))) (if (> (- (current-seconds) when-run) run-delay) (begin - (common:simple-file-lock lock-file expire-time: 15) + (common:simple-file-lock-and-wait lock-file expire-time: 15) (server:run areapath) (thread-sleep! 5) ;; don't release the lock for at least a few seconds (common:simple-file-release-lock lock-file))) (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,13 +1,10 @@ [setup] testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log # launchwait no launch-delay 0 -[server] -runtime 180 - # All these are overridden in ../fdk.config # [jobtools] # launcher nbfake # launcher bsub -q priority -o $MT_TEST_RUN_DIR/openlava.log