Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -44,24 +44,12 @@ #;(define (client:logout serverdat) (let ((ok (and (socket? serverdat) (cdb:logout serverdat *toppath* (client:get-signature))))) ok)) -#;(define (client:connect iface port) - (http-transport:client-connect iface port) - #;(case (server:get-transport) - ((rpc) (rpc:client-connect iface port)) - ((http) (http:client-connect iface port)) - ((zmq) (zmq:client-connect iface port)) - (else (rpc:client-connect iface port)))) - (define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0)) - (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects) - #;(case (server:get-transport) - ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id)) - ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) - (else (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id)))) + (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)) ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -1015,11 +1015,11 @@ (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load") (thread-sleep! 0.2))) (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " - (current-process-id) ", throttling access")) + (current-process-id))) ;; ", throttling access")) (condition-case (begin (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc dbdat db params))) ;; the actual call is here. (if use-mutex (mutex-unlock! *db-with-db-mutex*)) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -500,13 +500,14 @@ (debug:print 0 *default-log-port* "SERVER: dbprep") (set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!! (set! server-going #t) (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. (if (and no-sync-db - (common:low-noise-print 5 "sync-all")) ;; cheesy way to reduce frequency of running sync :) + (common:low-noise-print 10 "sync-all")) ;; cheesy way to reduce frequency of running sync :) (begin - (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S")) + (if (common:low-noise-print 120 "sync-all-print") + (debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))) (db:all-db-sync *dbstruct-dbs*)))) ;; when things go wrong we don't want to be doing the various queries too often ;; so we strive to run this stuff only every four seconds or so. (let* ((sync-time (- (current-milliseconds) start-time)) @@ -558,19 +559,24 @@ (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn - (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) + (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on info file " servinfofile ". Are you out of space on that disk? exn=" exn) (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter - (not *server-overloaded*)) + (not *server-overloaded*) + (file-exists? servinfofile)) (change-file-times servinfofile curr-time curr-time))) (if (or (common:low-noise-print 120 "start new server") (> *api-process-request-count* 50)) ;; if this server is kind of busy start up another (begin - (debug:print-info 0 *default-log-port* "Server is busy, start another if possible...") - (server:kind-run *toppath*))))) + (debug:print-info 0 *default-log-port* "Server is busy, parallel-api-count "*api-process-request-count*", start another if possible...") + (server:kind-run *toppath*) + (if (> *api-process-request-count* 100) + (begin + (debug:print-info 0 *default-log-port* "Server is overloaded at parallel-api-count="*api-process-request-count*", removing "servinfofile) + (delete-file* servinfofile))))))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -668,11 +668,11 @@ (define (server:expiration-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below (* 3600 (string->number tmo)) - 1200))) + 60))) (define (server:get-best-guess-address hostname) (let ((res #f)) (for-each (lambda (adr)