Megatest

Diff
Login

Differences From Artifact [12633dcec7]:

To Artifact [d398b938c6]:


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  (let ((dotserver-url (server:check-if-running *toppath*))) ;; check for .server  
    (when dotserver-url
      (debug:print-info 0 *default-log-port* "Server already running (=> "dotserver-url"<=).  Aborting server launch attempt in this process ("(current-process-id)")")
      (exit)
      ))
  
  (case transport-type
    ((http)(http-transport:launch run-id))
    ;;((nmsg)(nmsg-transport:launch run-id))
    ((rpc)  (rpc-transport:launch run-id))
    (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))

  ;; is this a good place to print server exit stats?
  (debug:print 0 "SERVER: max parallel api requests: " *max-api-process-requests*)
  







|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  (let ((dotserver-url (server:check-if-running *toppath*))) ;; check for .server  
    (when dotserver-url
      (debug:print-info 0 *default-log-port* "Server already running (=> "dotserver-url"<=).  Aborting server launch attempt in this process ("(current-process-id)")")
      (exit)
      ))
  
  (case transport-type
    ((http)(http-transport:launch))
    ;;((nmsg)(nmsg-transport:launch run-id))
    ((rpc)  (rpc-transport:launch run-id))
    (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))

  ;; is this a good place to print server exit stats?
  (debug:print 0 "SERVER: max parallel api requests: " *max-api-process-requests*)
  
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* ((curr-host   (get-host-name))
         (attempt-in-progress (server:start-attempted? areapath))
         (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server.log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") " -run-id " 0 (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
									      (conc " -daemonize -log " logfile)
									      "")
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (cond
     (attempt-in-progress
      (debug:print 0 *default-log-port* "INFO: Not trying to start server because attempt is in progress: "attempt-in-progress))
     (dot-server-url
            (debug:print 0 *default-log-port* "INFO: Not trying to start server because one is already running : "dot-server-url))
     (else
      (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
      (thread-start! log-rotate)

      ;; host.domain.tld match host?
      (if (and target-host 
               ;; look at target host, is it host.domain.tld or ip address and does it 







|
|





|









|
|
|
|







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* ((curr-host   (get-host-name))
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server-" curr-pid "-" target-host ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") " -run-id " 0 (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
									      (conc " -daemonize -log " logfile)
									      "")
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (cond
     ;; (attempt-in-progress
     ;;  (debug:print 0 *default-log-port* "INFO: Not trying to start server because attempt is in progress: "attempt-in-progress))
     ;; (dot-server-url
     ;;        (debug:print 0 *default-log-port* "INFO: Not trying to start server because one is already running : "dot-server-url))
     (else
      (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
      (thread-start! log-rotate)

      ;; host.domain.tld match host?
      (if (and target-host 
               ;; look at target host, is it host.domain.tld or ip address and does it 
162
163
164
165
166
167
168
169



































170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
      (setenv "TARGETHOST_LOGF" logfile)
      (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
      (system (conc "nbfake " cmdln))
      (unsetenv "TARGETHOST_LOGF")
      (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
      (thread-join! log-rotate)
      (pop-directory)))))
    



































(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* areapath #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 30))
	(begin
	  (server:run areapath)
	  (hash-table-set! *server-kind-run* areapath (current-seconds))))))

(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:attempting-start areapath)
  (with-output-to-file
      (conc areapath "/.starting-server")
    (lambda ()
      (print (current-process-id) " on " (get-host-name)))))
  
(define (server:complete-attempt areapath)
  (delete-file* (conc areapath "/.starting-server")))
  
(define (server:start-attempted? areapath)
  (let ((flagfile (conc areapath "/.starting-server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (cond
      ((and (file-exists? flagfile)
            (< (- (current-seconds)
                  (file-modification-time flagfile))
               15)) ;; exists and less than 15 seconds old
       (with-input-from-file flagfile (lambda () (read-line))))
      ((file-exists? flagfile) ;; it is stale.
       (server:complete-attempt areapath)
       #f)
      (else #f)))))

(define (server:read-dotserver areapath)
  (let ((dotfile (conc areapath "/.server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (cond
      ((not (file-exists? dotfile))
       #f)
      ((not (file-read-access? dotfile))
       #f)
      ((> (server:dotserver-age-seconds areapath) (+ 5 (server:get-timeout)))
       (server:remove-dotserver-file areapath ".*")
       #f)
      (else
       (let* ((line
               (with-input-from-file
                   dotfile
                 (lambda ()
                   (read-line))))
              (tokens (if (string? line) (string-split line ":") #f)))
         (cond
          ((eq? 4 (length tokens))
           tokens)
          (else #f))))))))
       
(define (server:read-dotserver->url areapath)
  (let ((dotserver-tokens (server:read-dotserver areapath)))
    (if dotserver-tokens
        (conc (list-ref dotserver-tokens 0) ":" (list-ref dotserver-tokens 1))
        #f)))

;; write a .server file in *toppath* with hostport
;; return #t on success, #f otherwise
;;
(define (server:write-dotserver areapath host port pid transport)
  (let ((lock-file   (conc areapath "/.server.lock"))
	(server-file (conc areapath "/.server")))
    (if (common:simple-file-lock lock-file)
	(let ((res (handle-exceptions
		    exn
		    #f ;; failed for some reason, for the moment simply return #f
		    (with-output-to-file server-file
		      (lambda ()
			(print (conc host ":" port ":" pid ":" transport))))
		    #t)))
	  (debug:print-info 0 *default-log-port* "server file " server-file " for " host ":" port " created pid="pid)
	  (common:simple-file-release-lock lock-file)
	  res)
	#f)))


;; this will check that the .server file present matches the server calling this procedure.
;; if parameters match (this-pid and transport) the file will be touched and #t returned
;; otherwise #f will be returned.
(define (server:confirm-dotserver areapath this-iface this-port this-pid this-transport)
  (let* ((tokens (server:read-dotserver areapath)))
    (cond
     ((not tokens)
      (debug:print-info 0 *default-log-port* "INFO: .server file does not exist.")
      #f)
     ((not (eq? 4 (length tokens)))
      (debug:print-info 0 *default-log-port* "INFO: .server file is corrupt.  There are not 4 tokens as expeted; there are "(length tokens)".")
      #f)
     ((not (equal? this-iface (list-ref tokens 0)))
      (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for iface, server has value >"(list-ref tokens 0)"< but this server's value is >"this-iface"<")
      #f)
     ((not (equal? (->string this-port)  (list-ref tokens 1)))
      (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for port, .server has value >"(list-ref tokens 1)"< but this server's value is >"(->string this-port)"<")
      #f)
     ((not (equal? (->string this-pid)   (list-ref tokens 2)))
      (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for pid, .server has value >"(list-ref tokens 2)"< but this server's value is >"(->string this-pid)"<")
      #f)
     ((not (equal? (->string this-transport) (->string (list-ref tokens 3))))
      (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for transport, .server has value >"(list-ref tokens 3)"< but this server's value is >"this-transport"<")
      #f)
     (else (server:touch-dotserver areapath)
      #t))))

(define (server:touch-dotserver areapath)
  (let ((server-file (conc areapath "/.server")))
    (change-file-times server-file (current-seconds) (current-seconds))))

(define (server:dotserver-age-seconds areapath)
  (let ((server-file (conc areapath "/.server")))
    (begin
      (handle-exceptions
       exn
       #f
       (- (current-seconds)
          (file-modification-time server-file))))))
    
(define (server:remove-dotserver-file areapath hostport)
  (let ((dotserver-url   (server:read-dotserver->url areapath))
	(server-file (conc areapath "/.server"))
	(lock-file   (conc areapath "/.server.lock")))
    (if (and dotserver-url (string-match (conc ".*:" hostport "$") dotserver-url)) ;; port matches, good enough info to decide to remove the file
	(if (common:simple-file-lock lock-file)
	    (begin
	      (handle-exceptions
	       exn
	       #f
	       (delete-file* server-file))
	      (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " removed")
	      (common:simple-file-release-lock lock-file))
            (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - could not get lock."))
        (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - dotserver-url("dotserver-url") did not match hostport pattern ("hostport")"))))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath)
  (let* ((dotserver-url (server:read-dotserver->url areapath))) ;; tdbdat (tasks:open-db)))
    (if dotserver-url
	(let* ((res (case *transport-type*







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


















|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|










|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
      (setenv "TARGETHOST_LOGF" logfile)
      (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
      (system (conc "nbfake " cmdln))
      (unsetenv "TARGETHOST_LOGF")
      (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
      (thread-join! log-rotate)
      (pop-directory)))))

;; given a path to a server log return: host port startseconds
;;
(define (server:logf-get-start-info logf)
  (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT (\\d+)"))) ;; SERVER STARTED: host:port AT timesecs
    (with-input-from-file
	logf
      (lambda ()
	(let loop ((inl  (read-line))
		   (lnum 0))
	  (if (not (eof-object? inl))
	      (let ((mlst (string-match rx inl)))
		(if (not mlst)
		    (if (< lnum 500) ;; give up if more than 500 lines of server log read
			(loop (read-line)(+ lnum 1))
			(list #f #f #f))
		    (cdr mlst)))
	      (list #f #f #F)))))))

;; get a list of servers with all relevant data
;;
(define (server:get-list areapath)
  (if (directory-exists? areapath)
      (let ((server-logs (glob (conc areapath "/logs/server-*.log"))))
	(if (null? server-logs)
	    '()
	    (let loop ((hed  (car server-logs))
		       (tal  (cdr server-logs))
		       (res '()))
	      (let* ((mod-time (file-modification-time hed))
		     (serv-rec (cons mod-time (server:logf-get-start-info hed)))
		     (new-res  (cons res serv-rec)))
		(if (null? tal)
		    new-res
		    (loop (car tal)(cdr tal) new-res))))))))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* areapath #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 30))
	(begin
	  (server:run areapath)
	  (hash-table-set! *server-kind-run* areapath (current-seconds))))))

(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

;; (define (server:attempting-start areapath)
;;   (with-output-to-file
;;       (conc areapath "/.starting-server")
;;     (lambda ()
;;       (print (current-process-id) " on " (get-host-name)))))
;;   
;; (define (server:complete-attempt areapath)
;;   (delete-file* (conc areapath "/.starting-server")))
;;   
;; (define (server:start-attempted? areapath)
;;   (let ((flagfile (conc areapath "/.starting-server")))
;;     (handle-exceptions
;;      exn
;;      #f  ;; if things go wrong pretend we can't see the file
;;      (cond
;;       ((and (file-exists? flagfile)
;;             (< (- (current-seconds)
;;                   (file-modification-time flagfile))
;;                15)) ;; exists and less than 15 seconds old
;;        (with-input-from-file flagfile (lambda () (read-line))))
;;       ((file-exists? flagfile) ;; it is stale.
;;        (server:complete-attempt areapath)
;;        #f)
;;       (else #f)))))
;; 
;; (define (server:read-dotserver areapath)
;;   (let ((dotfile (conc areapath "/.server")))
;;     (handle-exceptions
;;      exn
;;      #f  ;; if things go wrong pretend we can't see the file
;;      (cond
;;       ((not (file-exists? dotfile))
;;        #f)
;;       ((not (file-read-access? dotfile))
;;        #f)
;;       ((> (server:dotserver-age-seconds areapath) (+ 5 (server:get-timeout)))
;;        (server:remove-dotserver-file areapath ".*")
;;        #f)
;;       (else
;;        (let* ((line
;;                (with-input-from-file
;;                    dotfile
;;                  (lambda ()
;;                    (read-line))))
;;               (tokens (if (string? line) (string-split line ":") #f)))
;;          (cond
;;           ((eq? 4 (length tokens))
;;            tokens)
;;           (else #f))))))))
;;        
;; (define (server:read-dotserver->url areapath)
;;   (let ((dotserver-tokens (server:read-dotserver areapath)))
;;     (if dotserver-tokens
;;         (conc (list-ref dotserver-tokens 0) ":" (list-ref dotserver-tokens 1))
;; #f)))
;; 
;; ;; write a .server file in *toppath* with hostport
;; ;; return #t on success, #f otherwise
;; ;;
;; (define (server:write-dotserver areapath host port pid transport)
;;   (let ((lock-file   (conc areapath "/.server.lock"))
;; 	(server-file (conc areapath "/.server")))
;;     (if (common:simple-file-lock lock-file)
;; 	(let ((res (handle-exceptions
;; 		    exn
;; 		    #f ;; failed for some reason, for the moment simply return #f
;; 		    (with-output-to-file server-file
;; 		      (lambda ()
;; 			(print (conc host ":" port ":" pid ":" transport))))
;; 		    #t)))
;; 	  (debug:print-info 0 *default-log-port* "server file " server-file " for " host ":" port " created pid="pid)
;; 	  (common:simple-file-release-lock lock-file)
;; 	  res)
;; 	#f)))
;; 
;; 
;; ;; this will check that the .server file present matches the server calling this procedure.
;; ;; if parameters match (this-pid and transport) the file will be touched and #t returned
;; ;; otherwise #f will be returned.
;; (define (server:confirm-dotserver areapath this-iface this-port this-pid this-transport)
;;   (let* ((tokens (server:read-dotserver areapath)))
;;     (cond
;;      ((not tokens)
;;       (debug:print-info 0 *default-log-port* "INFO: .server file does not exist.")
;;       #f)
;;      ((not (eq? 4 (length tokens)))
;;       (debug:print-info 0 *default-log-port* "INFO: .server file is corrupt.  There are not 4 tokens as expeted; there are "(length tokens)".")
;;       #f)
;;      ((not (equal? this-iface (list-ref tokens 0)))
;;       (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for iface, server has value >"(list-ref tokens 0)"< but this server's value is >"this-iface"<")
;;       #f)
;;      ((not (equal? (->string this-port)  (list-ref tokens 1)))
;;       (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for port, .server has value >"(list-ref tokens 1)"< but this server's value is >"(->string this-port)"<")
;;       #f)
;;      ((not (equal? (->string this-pid)   (list-ref tokens 2)))
;;       (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for pid, .server has value >"(list-ref tokens 2)"< but this server's value is >"(->string this-pid)"<")
;;       #f)
;;      ((not (equal? (->string this-transport) (->string (list-ref tokens 3))))
;;       (debug:print-info 0 *default-log-port* "INFO: .server file mismatch.  for transport, .server has value >"(list-ref tokens 3)"< but this server's value is >"this-transport"<")
;;       #f)
;;      (else (server:touch-dotserver areapath)
;;       #t))))
;; 
;; (define (server:touch-dotserver areapath)
;;   (let ((server-file (conc areapath "/.server")))
;;     (change-file-times server-file (current-seconds) (current-seconds))))

(define (server:dotserver-age-seconds areapath)
  (let ((server-file (conc areapath "/.server")))
    (begin
      (handle-exceptions
       exn
       #f
       (- (current-seconds)
          (file-modification-time server-file))))))
    
;; (define (server:remove-dotserver-file areapath hostport)
;;   (let ((dotserver-url   (server:read-dotserver->url areapath))
;; 	(server-file (conc areapath "/.server"))
;; 	(lock-file   (conc areapath "/.server.lock")))
;;     (if (and dotserver-url (string-match (conc ".*:" hostport "$") dotserver-url)) ;; port matches, good enough info to decide to remove the file
;; 	(if (common:simple-file-lock lock-file)
;; 	    (begin
;; 	      (handle-exceptions
;; 	       exn
;; 	       #f
;; 	       (delete-file* server-file))
;; 	      (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " removed")
;; 	      (common:simple-file-release-lock lock-file))
;;             (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - could not get lock."))
;;         (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - dotserver-url("dotserver-url") did not match hostport pattern ("hostport")"))))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath)
  (let* ((dotserver-url (server:read-dotserver->url areapath))) ;; tdbdat (tasks:open-db)))
    (if dotserver-url
	(let* ((res (case *transport-type*