Changes In Branch broken-fixes Excluding Merge-Ins
This is equivalent to a diff from 305a49c1be to b137ace97d
2014-03-05
| ||
22:19 | Completed support for homehost check-in: 1e96dcc57d user: matt tags: v1.60 | |
2014-03-03
| ||
08:56 | Bringing these changes forward to verify they were accounted for Closed-Leaf check-in: b137ace97d user: mrwellan tags: broken-fixes | |
2014-03-01
| ||
12:18 | Trying rpc transport again. check-in: a51ee25bce user: matt tags: multi-transport | |
2014-02-28
| ||
16:38 | Remove process id from debug print output. Change send-receive to operate on text url. Move set running to earlier. Update test run time in central db, not test db check-in: 305a49c1be user: mrwellan tags: v1.60 | |
10:21 | Added retries back into the http request call check-in: ad116f6360 user: mrwellan tags: v1.60 | |
2014-02-18
| ||
13:28 | Clean up that broke stuff :( - reapply needed check-in: 6e33de13e0 user: mrwellan tags: broken-fixes | |
Modified client.scm from [ef3271835b] to [cac0c05e6c].
︙ | ︙ | |||
52 53 54 55 56 57 58 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) | < > < < > | < < < < < > < < | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let ((host-info (hash-table-ref/default *runremote* run-id #f))) (debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries) (thread-sleep! 1) ;; try to avoid race conditons (if host-info (let* ((iface (car host-info)) (port (cadr host-info)) (start-res (http-transport:client-connect iface port)) ;; (ping-res (server:ping-server run-id iface port)) (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if ping-res ;; sucessful login? start-res) ;; return the server info (if (member remaining-tries '(3 4 6)) (begin ;; login failed (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (car host-info) (cadr host-info) " client:setup (host-info=#t)") (thread-sleep! 5) (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ;; YUK: rename server-dat here (if new-dat new-dat (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (let* ((iface (tasks:hostinfo-get-interface server-dat)) (port (tasks:hostinfo-get-port server-dat)) (start-res (http-transport:client-connect iface port)) ;; (ping-res (server:ping-server run-id iface port)) (ping-res (rmt:login-no-auto-client-setup start-res run-id))) (if (member remaining-tries '(2 5)) (begin ;; login failed (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) ;;(debug:print 0 "INFO: login failed in client:setup with no existing server-dat: " server-dat ", new-dat: " new-dat ", and server-info: " server-info ", cleaning out records and then trying again") (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (server:try-running run-id) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) (begin ;; no server registered (if (eq? remaining-tries 2) (begin ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") (client:setup run-id remaining-tries: 10)) (begin (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) (begin ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") (server:try-running run-id))) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) |
︙ | ︙ |
Modified http-transport.scm from [7a89e82f89] to [95daefe19d].
︙ | ︙ | |||
280 281 282 283 284 285 286 | ;; (uri-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api")))) ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api")))) (server-dat (list iface port uri-dat uri-api-dat api-url))) ;; (login-res (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id))) server-dat)) ;; (if (and (list? login-res) | | | | 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | ;; (uri-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: api-url)) ;; (uri-reference (conc "http://" iface ":" port "/api")))) ;; (uri-api-dat (make-request method: 'GET uri: (uri-reference (conc "http://" iface ":" port "/api")))) (server-dat (list iface port uri-dat uri-api-dat api-url))) ;; (login-res (server:ping-server run-id server-dat))) ;; login-no-auto-client-setup server-dat run-id))) server-dat)) ;; (if (and (list? login-res) (hash-table-set! *runremote* run-id server-dat) server-dat) ;; (hash-table-set! *runremote* run-id server-dat) ;; (debug:print-info 2 "Logged in and connected to " iface ":" port) ;; (hash-table-set! *runremote* run-id server-dat) ;; server-dat) ;; (begin ;; (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) ;; #f)))) |
︙ | ︙ |
Modified rmt.scm from [834deede3a] to [a875532c44].
︙ | ︙ | |||
41 42 43 44 45 46 47 | (let* ((run-id (if rid rid 0)) (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) (if cinfo cinfo (let loop ((numtries 100)) (let ((res (client:setup run-id))) (if res | | | 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | (let* ((run-id (if rid rid 0)) (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f))) (if cinfo cinfo (let loop ((numtries 100)) (let ((res (client:setup run-id))) (if res (hash-table-ref *runremote* run-id) ;; client:setup filled this in (hopefully) (if (> numtries 0) (begin (thread-sleep! 10) (loop (- numtries 1))) (begin (debug:print 0 "ERROR: 100 tries and no server, giving up") (exit 1))))))))) |
︙ | ︙ |
Modified server.scm from [ae3fed0a96] to [c246afb03e].
︙ | ︙ | |||
107 108 109 110 111 112 113 | ;; (define (server:try-running run-id) (if (eq? run-id 0) (server:run run-id) (rmt:start-server run-id))) (define (server:check-if-running run-id) | | | | > > > | 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | ;; (define (server:try-running run-id) (if (eq? run-id 0) (server:run run-id) (rmt:start-server run-id))) (define (server:check-if-running run-id) (let loop ((server-info (open-run-close tasks:get-server tasks:open-db run-id)) (trycount 0)) (if server-info ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; (let ((res (server:ping-server run-id (vector-ref server 1)(vector-ref server 0)))) run-id (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info)))) ;; if the server didn't respond we must remove the record (if res #t (begin (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id " server:check-if-running") res))) |
︙ | ︙ |
Modified tasks.scm from [677b9b3c1c] to [c243a4652b].
︙ | ︙ | |||
93 94 95 96 97 98 99 | (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) | | | 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) (thread-sleep! 0.2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id) (sqlite3:execute mdb |
︙ | ︙ | |||
203 204 205 206 207 208 209 | (exit 1)) (car (db:get-rows all)))) (header (db:get-header all)) (id (db:get-value-by-header first header "id")) (hostname (db:get-value-by-header first header "hostname")) (pid (db:get-value-by-header first header "pid")) (priority (db:get-value-by-header first header "priority"))) | < > | | | | > > > | 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | (exit 1)) (car (db:get-rows all)))) (header (db:get-header all)) (id (db:get-value-by-header first header "id")) (hostname (db:get-value-by-header first header "hostname")) (pid (db:get-value-by-header first header "pid")) (priority (db:get-value-by-header first header "priority"))) ;; for now a basic check. add tiebreaking by priority later (let* ((my-pid (current-process-id)) (res (if (and (equal? hostname (get-host-name)) (equal? pid my-pid)) id #f))) (debug:print 0 "INFO: am-i-the-server got record " first ", my-pid: " my-pid ", pid: " pid ", result: " res) res))) ;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname") ;; to extract info from the structure returned ;; (define (tasks:server-get-servers-vying-for-run-id mdb run-id) (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time")) (selstr (string-intersperse header ",")) |
︙ | ︙ |