Overview
Comment: | merged in protection against too demanding clients |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.63 |
Files: | files | file ages | folders |
SHA1: |
57e72d2e764ae10d38e3187fbdab7d23 |
User & Date: | bjbarcla on 2017-03-24 15:39:13 |
Other Links: | branch diff | manifest | tags |
Context
2017-03-24
| ||
22:39 | Merged v1.63 changes into v1.64 check-in: b2e6452e2f user: matt tags: v1.64 | |
15:39 | merged in protection against too demanding clients Leaf check-in: 57e72d2e76 user: bjbarcla tags: v1.63 | |
15:18 | Reduced threshold for server pushback on clients to 20 api calls/parallel. Added couple more calls to check if server is running before starting a new one. check-in: 37e149d637 user: matt tags: multi-server-hack | |
11:19 | Added some error handling on the locking calls. Silenced a dashboard message. check-in: a5dbcdd2ac user: matt tags: v1.63 | |
Changes
Modified api.scm from [4067424284] to [466ca51286].
︙ | |||
123 124 125 126 127 128 129 130 131 132 133 134 135 136 | 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | + + | (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer") (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f #f "remote must be called with a vector") ) ((> *api-process-request-count* 20) (vector #f 'overloaded)) (else (let* ((cmd-in (vector-ref dat 0)) (cmd (if (symbol? cmd-in) cmd-in (string->symbol cmd-in))) (params (vector-ref dat 1)) (start-t (current-milliseconds)) |
︙ | |||
269 270 271 272 273 274 275 | 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 | - + - - + + + | ((ping) (current-process-id)) ;; TESTMETA ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) ;; TASKS ((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params)))))) |
︙ |
Modified client.scm from [3c66569adb] to [5611deb23d].
︙ | |||
76 77 78 79 80 81 82 | 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | - + | (begin (debug:print-error 0 *default-log-port* "failed to start or connect to server") (exit 1)) ;; ;; Alternatively here, we can get the list of candidate servers and work our way ;; through them searching for a good one. ;; |
︙ |
Modified common.scm from [4f52eb87fd] to [9d413de21a].
︙ | |||
148 149 150 151 152 153 154 | 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | - - + + + + | (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) |
︙ | |||
1060 1061 1062 1063 1064 1065 1066 | 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 | - - + + + + + + + | (and *configdat* (equal? (configf:lookup *configdat* "setup" "use-cache") "no"))))) ;; force use of server? ;; (define (common:force-server?) (let* ((force-setting (configf:lookup *configdat* "server" "force")) |
︙ |
Modified db.scm from [df76958ce7] to [feaa389617].
︙ | |||
2094 2095 2096 2097 2098 2099 2100 | 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 | - + + + - + + - + + | (define (db:print-current-query-stats) ;; generate stats from *db-api-call-time* (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*) (lambda (a b) (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a))) (sum-b (common:sum (hash-table-ref *db-api-call-time* b)))) |
︙ |
Modified http-transport.scm from [44c2ce6eea] to [9751cbc3b5].
︙ | |||
416 417 418 419 420 421 422 | 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 | - + + + + | (set! last-access *db-last-access*) (mutex-unlock! *heartbeat-mutex*) (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) (begin (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds)) (flush-output *default-log-port*))) |
︙ |
Modified launch.scm from [442d86a53d] to [7ecb4d1b9c].
whitespace changes only
Modified rmt.scm from [1adf35b1f4] to [a6b31e3da3].
︙ | |||
56 57 58 59 60 61 62 | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | + + + + + - - - + + + + + + + + + | ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*)) (readonly-mode (if (and runremote (remote-ro-mode-checked runremote)) (remote-ro-mode runremote) |
︙ | |||
92 93 94 95 96 97 98 | 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | - + | (let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 30)))) ;; add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") (remote-conndat-set! runremote #f) (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ensure we have a record for our connection for given area |
︙ | |||
154 155 156 157 158 159 160 | 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | + - + | ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one (not (remote-conndat runremote))) (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost (not (remote-conndat runremote)))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) (mutex-unlock! *rmt-mutex*) (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? |
︙ | |||
193 194 195 196 197 198 199 200 201 202 203 204 205 | 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | + + + + + + - - + + | ((http) (mutex-unlock! *rmt-mutex*) res) (else (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " is unknown") (mutex-unlock! *rmt-mutex*) (exit 1))) (if (eq? res 'overloaded) (let ((wait-delay (+ attemptnum (* attemptnum 10)))) (debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.") (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) (begin (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum) (remote-conndat-set! runremote #f) (remote-server-url-set! runremote #f) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1") (mutex-unlock! *rmt-mutex*) (if (not (server:check-if-running *toppath*)) |
︙ |
Modified server.scm from [ba8be5ee9a] to [65b45e968a].
︙ | |||
165 166 167 168 169 170 171 | 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | - + - + | (define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) (day-seconds (* 24 60 60))) ;; if the directory exists continue to get the list ;; otherwise attempt to create the logs dir and then ;; continue (if (if (directory-exists? (conc areapath "/logs")) |
︙ | |||
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | + + - + + + + + + + + + + + + | ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off ;; and servers should stick around for about two hours or so. ;; (define (server:get-best srvlst) (let ((now (current-seconds))) (sort (filter (lambda (rec) (if (and (list? rec) (> (length rec) 2)) (let ((start-time (list-ref rec 3)) (mod-time (list-ref rec 0))) ;; (print "start-time: " start-time " mod-time: " mod-time) (and start-time mod-time (> (- now start-time) 0) ;; been running at least 0 seconds (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds (< (- now start-time) (+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600")) 180) (random 360))) ;; under one hour running time +/- 180 |
︙ | |||
290 291 292 293 294 295 296 | 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | - - - + + + + + + + + + + | (thread-sleep! 5) (loop (server:check-if-running areapath))))))) (define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; |
︙ |