Overview
Comment: | Protected calls to expensive ping with calls to cheap server:read-dotserver. This appears to 100% the run-away pings problem |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.62-no-rpc |
Files: | files | file ages | folders |
SHA1: |
bff9d56983e3732e5fd0c2915b594d55 |
User & Date: | mrwellan on 2016-12-05 12:58:47 |
Other Links: | branch diff | manifest | tags |
Context
2018-04-18
| ||
00:21 | No idea what this was. Commiting just in case it is interesting ... Leaf check-in: 33160354bc user: matt tags: dunno | |
2016-12-05
| ||
13:11 | Bumped version to v1.6301 check-in: fbf0a07a1d user: mrwellan tags: v1.63, v1.6301 | |
12:58 | Protected calls to expensive ping with calls to cheap server:read-dotserver. This appears to 100% the run-away pings problem Closed-Leaf check-in: bff9d56983 user: mrwellan tags: v1.62-no-rpc | |
11:05 | Correct expiration of server connections check-in: b168adb943 user: mrwellan tags: v1.62-no-rpc | |
Changes
Modified http-transport.scm from [344a092fb3] to [4d8eecbf3a].
︙ | ︙ | |||
531 532 533 534 535 536 537 | (if (args:get-arg "-daemonize") (begin (daemon:ize) (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it (begin (current-error-port *alt-log-file*) (current-output-port *alt-log-file*))))) | > | | 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 | (if (args:get-arg "-daemonize") (begin (daemon:ize) (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it (begin (current-error-port *alt-log-file*) (current-output-port *alt-log-file*))))) (if (and (server:read-dotserver *toppath*) (server:check-if-running run-id)) (begin (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") (exit 0)) (begin ;; ok, no server detected, clean out any lingering records (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id "notresponding"))) (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) (remtries 4)) |
︙ | ︙ |
Modified rmt.scm from [3ee7bc9a52] to [5e992d9837].
︙ | ︙ | |||
24 25 26 27 28 29 30 | ;; generate entries for ~/.megatestrc with the following ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | ;; generate entries for ~/.megatestrc with the following ;; ;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url (if *toppath* (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) (server-timeout (or (server:get-timeout) 100))) ;; default to 100 seconds ;;====================================================================== ;; S U P P O R T F U N C T I O N S |
︙ | ︙ | |||
109 110 111 112 113 114 115 | (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally cmd 0 params)) ;; no server contact made and this is a write, passively start a server ((and (not (remote-server-url *runremote*)) (not (member cmd api:read-only-queries))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") | | | | 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally cmd 0 params)) ;; no server contact made and this is a write, passively start a server ((and (not (remote-server-url *runremote*)) (not (member cmd api:read-only-queries))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if serverconn (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed (if (not (server:start-attempted? *toppath*)) (server:kind-run *toppath*)))) (if (cdr (remote-hh-dat *runremote*)) ;; we are on the homehost, just do the call (begin (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") (rmt:open-qry-close-locally cmd 0 params)) (begin ;; not on homehost, start server and wait (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.2") (tasks:start-and-wait-for-server (tasks:open-db) 0 15) (rmt:send-receive cmd rid params attemptnum: attemptnum)))) ;; if not on homehost ensure we have a connection to a live server ;; NOTE: we *have* a homehost record by now ((and (not (cdr (remote-hh-dat *runremote*))) ;; are we on a homehost? |
︙ | ︙ |
Modified tasks.scm from [285ca22d6e] to [b8a3c2af2e].
︙ | ︙ | |||
402 403 404 405 406 407 408 | (< delay-time delay-max-tries)) (begin (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) (debug:print 0 *default-log-port* "Try starting server for run-id " run-id)) (thread-sleep! (/ (random 2000) 1000)) (server:kind-run run-id) (thread-sleep! (min delay-time 1)) | > > | > > | 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 | (< delay-time delay-max-tries)) (begin (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) (debug:print 0 *default-log-port* "Try starting server for run-id " run-id)) (thread-sleep! (/ (random 2000) 1000)) (server:kind-run run-id) (thread-sleep! (min delay-time 1)) (if (not (or (server:start-attempted? *toppath*) (server:read-dotserver *toppath*))) ;; no point in trying (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)) #f)) #f))) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) |
︙ | ︙ |