Megatest

Check-in [7c83ed2d8b]
Login
Overview
Comment:Experimental tweaks to address stuck server start issue
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 7c83ed2d8bd87283ea37ca049316cb4541b717c8
User & Date: mrwellan on 2014-09-11 13:03:41
Other Links: branch diff | manifest | tags
Context
2014-09-11
13:40
fixed issue with portlocker, added more verbosity on server start check-in: 8cab551ef3 user: mrwellan tags: v1.60
13:03
Experimental tweaks to address stuck server start issue check-in: 7c83ed2d8b user: mrwellan tags: v1.60
11:44
Added server killing, cleaning out junk records. check-in: 71bd40f627 user: mrwellan tags: v1.60, v1.6001_beta2
Changes

Modified http-transport.scm from [4bbddddd32] to [23aae77ee3].

132
133
134
135
136
137
138
139

140
141
142
143
144
145
146
132
133
134
135
136
137
138

139
140
141
142
143
144
145
146







-
+







				  (else (continue))))))))
    (http-transport:try-start-server run-id ipaddrstr start-port server-id)))

;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server run-id ipaddrstr portnum server-id)
  (let ((config-hostname (configf:lookup *configdat* "server" "hostname")))
    (debug:print-info 2 "http-transport:try-start-server run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname)
    (debug:print-info 0 "http-transport:try-start-server run-id=" run-id " ipaddrsstr=" ipaddrstr " portnum=" portnum " server-id=" server-id " config-hostname=" config-hostname)
    (handle-exceptions
     exn
     (begin
       (print-error-message exn)
       (if (< portnum 64000)
	   (begin 
	     (debug:print 0 "WARNING: attempt to start server failed. Trying again ...")
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
160
161
162
163
164
165
166

167
168
169
170
171
172
173
174







-
+







	     (print "ERROR: Tried and tried but could not start the server"))))
     ;; any error in following steps will result in a retry
     (set! *server-info* (list ipaddrstr portnum))
     (open-run-close tasks:server-set-interface-port 
		     tasks:open-db 
		     server-id 
		     ipaddrstr portnum)
     (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)
     (debug:print 0 "INFO: Trying to start server on " ipaddrstr ":" portnum)
     ;; This starts the spiffy server
     ;; NEED WAY TO SET IP TO #f TO BIND ALL
     ;; (start-server bind-address: ipaddrstr port: portnum)
     (if config-hostname ;; this is a hint to bind directly
	 (start-server port: portnum bind-address: (if (equal? config-hostname "-")
						       ipaddrstr
						       config-hostname))
335
336
337
338
339
340
341


342
343
344
345
346
347
348
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350







+
+







  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 "Waiting for server alive signal")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      sdat
489
490
491
492
493
494
495

496
497
498
499
500
501
502

503
504
505
506
507
508
509
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513







+







+







		    (- remtries 1)))
	    (begin
	      ;; since we didn't get the server lock we are going to clean up and bail out
	      (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
	      (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " http-transport:launch")
	      ))
	(let* ((th2 (make-thread (lambda ()
				   (debug:print-info 0 "Server run thread started")
				   (http-transport:run 
				    (if (args:get-arg "-server")
					(args:get-arg "-server")
					"-")
				    run-id
				    server-id)) "Server run"))
	       (th3 (make-thread (lambda ()
				   (debug:print-info 0 "Server monitor thread started")
				   (http-transport:keep-running server-id run-id))
				 "Keep running")))
	  ;; Database connection


	  ;; don't start the db here

Modified server.scm from [3b1ef21982] to [e660b98e2f].

85
86
87
88
89
90
91

92

93
94
95
96
97
98
99
85
86
87
88
89
90
91
92

93
94
95
96
97
98
99
100







+
-
+







  (let* ((curr-host   (get-host-name))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (target-host (configf:lookup *configdat* "server" "homehost" ))
	 (logfile     (conc *toppath* "/logs/" run-id ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") " -run-id " run-id (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
									      (conc " -daemonize -log " logfile)
									      "")
									      "")))) ;; (conc " >> " logfile " 2>&1 &")))))
		      " -debug 4 "))) ;; (conc " >> " logfile " 2>&1 &")))))
    (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
    (push-directory *toppath*)
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    ;; host.domain.tld match host?
    (if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname