Megatest

Check-in [2f1850785d]
Login
Overview
Comment:speculative fix
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70-refactor-procedures
Files: files | file ages | folders
SHA1: 2f1850785d2ddb622035fa56f616e01896df5d8e
User & Date: matt on 2022-06-01 21:14:59
Other Links: branch diff | manifest | tags
Context
2022-06-06
12:29
quiet down the is-trigger-dropped messages check-in: 8a05ecdb52 user: matt tags: v1.70-refactor-procedures
2022-06-04
18:08
Fixed -import-megatest.db and -sync-to-megatest.db, fixed dashboard startup with no db Leaf check-in: 9a5898a74e user: mmgraham tags: 1.70-fixed-sync
2022-06-01
21:14
speculative fix check-in: 2f1850785d user: matt tags: v1.70-refactor-procedures
06:08
Add forced reconnect to allow old servers to gracefully die check-in: f1ba33210e user: matt tags: v1.70-refactor-procedures
Changes

Modified client.scm from [dc4c7b41e8] to [b582d33d7d].

69
70
71
72
73
74
75
76

77






78
79
80
81
82
83
84
85
86
87
88
89
90
91

92
93
94
95
96
97
98
69
70
71
72
73
74
75

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103
104







-
+

+
+
+
+
+
+













-
+







;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;

 
(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
  (mutex-lock! *rmt-mutex*)
  (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat)))
    (mutex-unlock! *rmt-mutex*)
    res))

(define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
  (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
  (server:start-and-wait areapath)
  (if (<= remaining-tries 0)
      (begin
	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
	(exit 1))
      ;;
      ;; Alternatively here, we can get the list of candidate servers and work our way
      ;; through them searching for a good one.
      ;;
      (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
	     (runremote  (or area-dat *runremote*)))
	(if (not server-dat) ;; no server found
	    (client:setup-http areapath remaining-tries: (- remaining-tries 1))
	    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
	    (let ((host  (cadr  server-dat))
		  (port  (caddr server-dat))
                  (server-id (caddr (cddr server-dat))))
	      (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if (and (not area-dat)
		       (not *runremote*))
                  (begin       
106
107
108
109
110
111
112


113
114
115




116
117
118
119
120
121
122

123
124
125
126
127
128
129

130
112
113
114
115
116
117
118
119
120



121
122
123
124
125
126
127
128
129
130

131
132
133
134
135
136
137

138
139







+
+
-
-
-
+
+
+
+






-
+






-
+

		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port server-id))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			  (if runremote
			      (begin
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
				(remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
				(debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
				start-res)
			      (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))
			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
			  (thread-sleep! 1)
			  (client:setup-http areapath remaining-tries: (- remaining-tries 1))
			  (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))
			  )))
		  (begin    ;; no server registered
		    ;; (server:kind-run areapath)
		    (server:start-and-wait areapath)
		    (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
		    (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		    (client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))
		    (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))))))))