Megatest

Check-in [cb995a99df]
Login
Overview
Comment:Trying to get reliable server starts in the face of ports, hosts, kills etc. What fun
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: cb995a99dfc1c2978b6b9c6fefc7c58e58f118cd
User & Date: mrwellan on 2014-02-24 16:46:49
Other Links: branch diff | manifest | tags
Context
2014-02-24
19:44
Random port assignment, minimize re-use of ports check-in: bbd7cb0f5a user: matt tags: v1.60
16:46
Trying to get reliable server starts in the face of ports, hosts, kills etc. What fun check-in: cb995a99df user: mrwellan tags: v1.60
16:22
Trying to get reliable server starts in the face of ports, hosts, kills etc. What fun check-in: af4b1c4114 user: mrwellan tags: v1.60
Changes

Modified client.scm from [ad24ae52dc] to [3302993138].

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 20) (failed-connects 0))
  (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries)
  (if (<= remaining-tries 0)
      (begin
	(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	(exit 1))
      (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f))))
	(if server-dat
	    (let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result!
							    (car  server-dat)
							    (cadr server-dat))))
	      (if start-res ;; sucessful login?
		  start-res
		  (if (eq? remaining-tries 6)
		      (begin    ;; login failed
			(hash-table-delete! *runremote* run-id)
			(open-run-close tasks:server-force-clean-run-record
			 		tasks:open-db
			 		run-id 
			 		(car  server-dat)
			 		(cadr server-dat))
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: (- remaining-tries 1)))
		      (begin
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: (- remaining-tries 1))))))
	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))
	      (if server-dat
		  (let ((start-res (http-transport:client-connect run-id
								  (tasks:hostinfo-get-interface server-dat)
								  (tasks:hostinfo-get-port      server-dat))))
		    (if start-res
			start-res
			(if (eq? remaining-tries 6)
			    (begin    ;; login failed
			      (hash-table-delete! *runremote* run-id)
			      (open-run-close tasks:server-force-clean-run-record
					      tasks:open-db
					      run-id 
					      (tasks:hostinfo-get-interface server-dat)
					      (tasks:hostinfo-get-port      server-dat))
			      (thread-sleep! 2)
			      (server:try-running run-id)
			      (thread-sleep! 10) ;; give server a little time to start up
			      (client:setup run-id remaining-tries: (- remaining-tries 1)))
			    (begin
			      (thread-sleep! 5)
			      (client:setup run-id remaining-tries: (- remaining-tries 1))))))
		  (begin    ;; no server registered
		    (thread-sleep! 2)
		    (server:try-running run-id)
		    (thread-sleep! 10) ;; give server a little time to start up







|












|








|










|










|







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0))
  (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries)
  (if (<= remaining-tries 0)
      (begin
	(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	(exit 1))
      (let ((server-dat (and run-id (hash-table-ref/default *runremote* run-id #f))))
	(if server-dat
	    (let ((start-res (http-transport:client-connect run-id ;; NB// confusion over server-dat and connection result!
							    (car  server-dat)
							    (cadr server-dat))))
	      (if start-res ;; sucessful login?
		  start-res
		  (if (eq? remaining-tries 4)
		      (begin    ;; login failed
			(hash-table-delete! *runremote* run-id)
			(open-run-close tasks:server-force-clean-run-record
			 		tasks:open-db
			 		run-id 
			 		(car  server-dat)
			 		(cadr server-dat))
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
		      (begin
			(thread-sleep! 5)
			(client:setup run-id remaining-tries: (- remaining-tries 1))))))
	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))
	      (if server-dat
		  (let ((start-res (http-transport:client-connect run-id
								  (tasks:hostinfo-get-interface server-dat)
								  (tasks:hostinfo-get-port      server-dat))))
		    (if start-res
			start-res
			(if (eq? remaining-tries 2)
			    (begin    ;; login failed
			      (hash-table-delete! *runremote* run-id)
			      (open-run-close tasks:server-force-clean-run-record
					      tasks:open-db
					      run-id 
					      (tasks:hostinfo-get-interface server-dat)
					      (tasks:hostinfo-get-port      server-dat))
			      (thread-sleep! 2)
			      (server:try-running run-id)
			      (thread-sleep! 10) ;; give server a little time to start up
			      (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
			    (begin
			      (thread-sleep! 5)
			      (client:setup run-id remaining-tries: (- remaining-tries 1))))))
		  (begin    ;; no server registered
		    (thread-sleep! 2)
		    (server:try-running run-id)
		    (thread-sleep! 10) ;; give server a little time to start up