Megatest

Check-in [9f8cd866ea]
Login
Overview
Comment:wip
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.6584-ck5
Files: files | file ages | folders
SHA1: 9f8cd866ea52530294fb8e1cb2bb96b85de8b0db
User & Date: matt on 2021-05-02 15:41:35
Other Links: branch diff | manifest | tags
Context
2021-05-02
22:49
wip check-in: 621ec7fe98 user: matt tags: v1.6584-ck5
15:41
wip check-in: 9f8cd866ea user: matt tags: v1.6584-ck5
2021-05-01
12:10
beginnings of basic client check-in: 4ab7adb0ad user: matt tags: v1.6584-ck5
Changes

Modified http-transportmod.scm from [1d4bd0c79a] to [1f587f4eee].

95
96
97
98
99
100
101
102
103
104
105


106
107
108
109



110
111
112
113
114
115
116
;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
;; 
;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048) 

(defstruct servdat
  host
  port
  uuid
  dbfile


  )

(define (servdat->url sdat)
  (conc (srvdat-host sdat)":"(srvdat-port srvdat)))




(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

;;======================================================================







|
|
|
|
>
>
|


|
>
>
>







95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
;; 
;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048) 

(defstruct servdat
  (host #f)
  (port #f)
  (uuid #f)
  (dbfile #f)
  (api-url #f)
  (api-uri #f)
  (api-req #f))

(define (servdat->url sdat)
  (conc (servdat-host sdat)":"(servdat-port sdat)))

;; this must be set up prior to making calls
(define api-proc (make-parameter conc))

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

;;======================================================================
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
				 (make-property-condition 
				  'server
				  'message "server error")))))

    ;; http-transport:handle-directory) ;; simple-directory-handler)
    ;; Setup the web server and a /ctrl interface
    ;;
    (vhost-map `(((* any) . ,(lambda (continue)
			       ;; open the db on the first call 
				 ;; This is were we set up the database connections
			       (let* (($   (request-vars source: 'both))
				      (dat ($ 'dat))
				      (res #f))
				 (cond
				  ((equal? (uri-path (request-uri (current-request)))
					   '(/ "api"))
				   (send-response ;; the $ is the request vars proc
				    body: ((http-get-function 'api:process-request) *dbstruct-db* $)
				    headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *db-last-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: ((http-get-function 'http-transport:main-page))))







<












|







153
154
155
156
157
158
159

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
				 (make-property-condition 
				  'server
				  'message "server error")))))


    ;; Setup the web server and a /ctrl interface
    ;;
    (vhost-map `(((* any) . ,(lambda (continue)
			       ;; open the db on the first call 
				 ;; This is were we set up the database connections
			       (let* (($   (request-vars source: 'both))
				      (dat ($ 'dat))
				      (res #f))
				 (cond
				  ((equal? (uri-path (request-uri (current-request)))
					   '(/ "api"))
				   (send-response ;; the $ is the request vars proc
				    body: ((api-proc) *dbstruct-db* $)
				    headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *db-last-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: ((http-get-function 'http-transport:main-page))))
291
292
293
294
295
296
297




298
299
300

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
  (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
  (mutex-unlock! *http-mutex*))

(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))





;; serverdat contains uuid to be used for connection validation
;;
;; TODO: translate http-transport:client-connect to use servdat

;;
(define (http-transport:basic-send-receive serverdat cmd params #!key (numretries 3))
  (let* ((fullurl    (servdat->url serverdat))
	 (res        #f)
	 (success    #t)
         (server-id  (servdat-uuid serverdat)))
       ;; set up the http-client here
    (max-retry-attempts 1)
    ;; consider all requests indempotent
    (retry-request? (lambda (request)
		      #f))
    ;; send the data and get the response extract the needed info from
    ;; the http data and process and return it.
    (let* ((send-recieve (lambda ()
			   (set! res
				 (vector
				  #t ;; success
				  (with-input-from-request
				   fullurl 
				   (list (cons 'key server-id)
					 (cons 'cmd cmd)
					 (cons 'params sparams))
				   read)))))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (close-idle-connections!)
	 (thread-terminate! th2)
	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag? 
                 (let* ((res-dat (vector-ref res 1)))
                    (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
                     (signal (make-composite-condition
		          (make-property-condition 
		       'servermismatch
		       'message  (vector-ref res 1))))       
		      res)) ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
                       (debug:print 11 *default-log-port* " server call chain:")
                       (pp (vector-ref res 1) (current-error-port))
                       (signal (vector-ref res 0)))
                     res))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(area-dat #f))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http))
	 (runremote  (or area-dat *runremote*))
         (server-id   (if (vector? serverdat) 
                           (http-transport:server-dat-get-server-id serverdat)
                           (begin
			     (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			     (exit 1)))))
       (debug:print-info 11 *default-log-port* "cmd=" cmd " fullurl=" fullurl " server-id=" server-id " current time:" (current-seconds)) 

       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (vector                ;;; DON'T FORGET - THIS IS THE CLIENT SIDE! NOTE: consider moving this to client.scm since we are only supporting http transport at this time.
					 success
					 (db:string->obj 
					  (handle-exceptions
					      exn
					      (let ((call-chain (get-call-chain))
						    (msg        ((condition-property-accessor 'exn 'message) exn)))
						(set! success #f)
                                                (if (debug:debug-mode 1)
                                                    (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...")
                                                    (begin
                                                      (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
                                                      (debug:print 0 *default-log-port* " message: " msg ", exn=" exn)
                                                      (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey"))
                                                      (debug:print 0 *default-log-port* " call-chain: " call-chain)))
                                                (if runremote
						    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key (or server-id   "thekey"))
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-idle-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
          (vector-set! res 0 success)
	 (thread-terminate! th2)
	 (if (vector? res)
	     (if (vector-ref res 0) ;; this is the first flag or the second flag? 
                 (let* ((res-dat (vector-ref res 1)))
                    (if (and (string? res-dat) (string-contains res-dat "server-id mismatch"))
                     (signal (make-composite-condition
		          (make-property-condition 
		       'servermismatch
		       'message  (vector-ref res 1))))       
		      res)) ;; this is the *inner* vector? seriously? why?
                 (if (debug:debug-mode 11)
                     (let ((call-chain (get-call-chain))) ;; note: this code also called in nmsg-transport - consider consolidating it
                       (print-call-chain (current-error-port))
                       (debug:print-error 11 *default-log-port* "error above occured at server, res=" res) ;; " message: " ((condition-property-accessor 'exn 'message) exn))
                       (debug:print 11 *default-log-port* " server call chain:")
                       (pp (vector-ref res 1) (current-error-port))
                       (signal (vector-ref res 0)))
                     res))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections #!key (area-dat #f))
  (let* ((runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)







>
>
>
>


<
>

|
|


















|












<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







295
296
297
298
299
300
301
302
303
304
305
306
307

308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
















343








































































































344
345
346
347
348
349
350
  (set! *http-connections-next-cleanup* (+ (current-seconds) 10))
  (mutex-unlock! *http-mutex*))

(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; Turn off proxy handling
(define (http-transport:client-turn-off-proxy)
  (determine-proxy (constantly #f))) ;; From (chicken base)

;; serverdat contains uuid to be used for connection validation
;;

;; NOTE: serverdat must be initialized once by servdat-init
;;
(define (http-transport:send-receive serverdat cmd params #!key (numretries 3))
  (let* ((fullurl    (servdat-api-uri serverdat)) ;; gets uri for /api
	 (res        #f)
	 (success    #t)
         (server-id  (servdat-uuid serverdat)))
       ;; set up the http-client here
    (max-retry-attempts 1)
    ;; consider all requests indempotent
    (retry-request? (lambda (request)
		      #f))
    ;; send the data and get the response extract the needed info from
    ;; the http data and process and return it.
    (let* ((send-recieve (lambda ()
			   (set! res
				 (vector
				  #t ;; success
				  (with-input-from-request
				   fullurl 
				   (list (cons 'key server-id)
					 (cons 'cmd cmd)
					 (cons 'params params))
				   read)))))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!")
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (close-idle-connections!)
	 (thread-terminate! th2)
















      res)))









































































































;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections #!key (area-dat #f))
  (let* ((runremote  (or area-dat *runremote*))
	 (server-dat (if runremote
                         (remote-conndat runremote)
505
506
507
508
509
510
511












512
513
514
515









516
517
518
519
520
521
522
;;
(define (http-transport:client-connect iface port server-id) 
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))













;;======================================================================
;; NEW SERVER METHOD
;;======================================================================










(define *srvpktspec*
  `((server (host    . h)
	    (port    . p)
	    (servkey . k)
	    (pid     . i)
	    (ipaddr  . a)







>
>
>
>
>
>
>
>
>
>
>
>




>
>
>
>
>
>
>
>
>







393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
;;
(define (http-transport:client-connect iface port server-id) 
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))

;; initialize servdat for client side
(define (servdat-init sdat-in iface port uuid)
  (let* ((sdat (or sdat-in (make-servdat))))
    (if uuid (servdat-uuid-set! sdat uuid))
    (servdat-host-set! sdat iface)
    (servdat-port-set! sdat port)
    (servdat-api-url-set! sdat (conc "http://" iface ":" port "/api"))
    (servdat-api-uri-set! sdat (uri-reference (servdat->url sdat)))
    (servdat-api-req-set! sdat (make-request method: 'POST
					     uri: (servdat-api-uri sdat)))
    sdat))

;;======================================================================
;; NEW SERVER METHOD
;;======================================================================

;; only use for main.db - need to re-write some of this :(
;;
(define (get-lock-db sdat dbfile)
  (let* ((dbh (db:open-run-db dbfile db:initialize-db))
	 (res (db:get-iam-server-lock dbh dbfile)))
    (sqlite3:finalize! dbh)
    res))


(define *srvpktspec*
  `((server (host    . h)
	    (port    . p)
	    (servkey . k)
	    (pid     . i)
	    (ipaddr  . a)
627
628
629
630
631
632
633
































































634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
		  (loop (cdr tail) new-best)))))))
	  
	  

;;======================================================================
;; END NEW SERVER METHOD
;;======================================================================

































































;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running dbname) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((run-id            (let ((rid (args:get-arg "-run-id"))) ;; consider getting rid of the -run-id mechanism
			      (if rid
				  (string->number rid)
				  #f)))
	 (db-file           (if dbname
				(db:dbname->path *toppath* dbname)
				(db:run-id->path *toppath* run-id)))
	 (sdat              #f)
	 ;; (tmp-area          (common:get-db-tmp-area))
	 (server-start-time (current-seconds))
	 (pkts-dir          (get-pkts-dir))
	 (server-key        (server:mk-signature))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (begin ;; let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (begin
				(debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
				;; create a server pkt in *toppath*/.meta/srvpkts

				;; TODO:
				;;   1. change sdat to stuct
				;;   2. add uuid to struct
				;;   3. update uuid in sdat here
				;;
				(servdat-uuid-set! sdat
						(register-server
						 pkts-dir *srvpktspec*
						 (get-host-name)
						 (servdat-port sdat) server-key
						 (servdat-host sdat) db-file))

				;; now read pkts and see if we are a contender
				(let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
				       (viables      (get-viable-servers all-pkts db-file))
				       (best-srv     (get-best-candidate viables db-file))
				       (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)))
				  (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key)
				  ;; am I the best-srv, compare server-keys to know
				  (if (equal? best-srv-key server-key)
				      (if (db:get-main-lock db-file)
					  (begin
					    (debug:print 0 *default-log-port* "I'm the server!")
					    (servdat-dbfile-set! sdat db-file))
					  (begin
					    (debug:print 0 *default-log-port* "I'm not the server, exiting.")
					    (bdat-time-to-exit-set! *bdat* #t)
					    (thread-sleep! 0.2)
					    (exit)))
				      (begin
					(debug:print 0 *default-log-port*
						     "Keys do not match "best-srv-key", "server-key", exiting.")
					(bdat-time-to-exit-set! *bdat* #t)
					(thread-sleep! 0.2)
					(exit)))
				  sdat))
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (begin
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
                                      #;(common:save-pkt `((action . died)
                                                         (T      . server)
                                                         (pid    . ,(current-process-id))
                                                         (ipaddr . ,(car sdat))
                                                         (port   . ,(cadr sdat))
                                                         (msg    . "Transport died?"))
						       *configdat* #t)
				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
	 (iface       (servdat-host server-info))
         (port        (servdat-port server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (let loop ((count         0)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










|










<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627















628





















































629
630
631
632
633
634
635
		  (loop (cdr tail) new-best)))))))
	  
	  

;;======================================================================
;; END NEW SERVER METHOD
;;======================================================================

(define (http-transport:wait-for-server pkts-dir db-file server-key)
  (let* ((sdat *server-info*))
    (let loop ((start-time (current-seconds))
	       (changed    #t)
	       (last-sdat  "not this"))
      (begin ;; let ((sdat #f))
	(thread-sleep! 0.01)
	(debug:print-info 0 *default-log-port* "Waiting for server alive signature")
	(mutex-lock! *heartbeat-mutex*)
	(set! sdat *server-info*)
	(mutex-unlock! *heartbeat-mutex*)
	(if (and sdat
		 (not changed)
		 (> (- (current-seconds) start-time) 2))
	    (begin
	      (debug:print-info 0 *default-log-port* "Received server alive signature, now attempting to lock in server")
	      ;; create a server pkt in *toppath*/.meta/srvpkts
	      
	      ;; TODO:
	      ;;   1. change sdat to stuct
	      ;;   2. add uuid to struct
	      ;;   3. update uuid in sdat here
	      ;;
	      (servdat-uuid-set! sdat
				 (register-server
				  pkts-dir *srvpktspec*
				  (get-host-name)
				  (servdat-port sdat) server-key
				  (servdat-host sdat) db-file))
	      
	      ;; now read pkts and see if we are a contender
	      (let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
		     (viables      (get-viable-servers all-pkts db-file))
		     (best-srv     (get-best-candidate viables db-file))
		     (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)))
		(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key)
		;; am I the best-srv, compare server-keys to know
		(if (equal? best-srv-key server-key)
		    (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
			(debug:print 0 *default-log-port* "I'm the server!")
			(servdat-dbfile-set! sdat db-file))
		    (begin
		      (debug:print 0 *default-log-port* "I'm not the server, exiting.")
		      (bdat-time-to-exit-set! *bdat* #t)
		      (thread-sleep! 0.2)
		      (exit)))
		(begin
		  (debug:print 0 *default-log-port*
			       "Keys do not match "best-srv-key", "server-key", exiting.")
		  (bdat-time-to-exit-set! *bdat* #t)
		  (thread-sleep! 0.2)
		  (exit)))
	      sdat))
	(begin
	  (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
	  (sleep 4)
	  (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
	      (begin
		(debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
		(exit))
	      (loop start-time
		    (equal? sdat last-sdat)
		    sdat)))))))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running dbname) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((run-id            (let ((rid (args:get-arg "-run-id"))) ;; consider getting rid of the -run-id mechanism
			      (if rid                             ;; replace with -db
				  (string->number rid)
				  #f)))
	 (db-file           (if dbname
				(db:dbname->path *toppath* dbname)
				(db:run-id->path *toppath* run-id)))
	 (sdat              #f)
	 ;; (tmp-area          (common:get-db-tmp-area))
	 (server-start-time (current-seconds))
	 (pkts-dir          (get-pkts-dir))
	 (server-key        (server:mk-signature))















	 (server-info       (http-transport:wait-for-server pkts-dir db-file server-key ))





















































	 (iface       (servdat-host server-info))
         (port        (servdat-port server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (let loop ((count         0)
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
                               "Keep running"))))
      (thread-start! th2)
      (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
      (thread-start! th3)
      (set! *didsomething* #t)
      (thread-join! th2)
      (exit))))

;; (define (http-transport:server-signal-handler signum)
;;   (signal-mask! signum)
;;   (handle-exceptions
;;    exn
;;    (debug:print 0 *default-log-port* " ... exiting ...")
;;    (let ((th1 (make-thread (lambda ()
;; 			     (thread-sleep! 1))
;; 			   "eat response"))
;; 	 (th2 (make-thread (lambda ()
;; 			     (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
;; 			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
;; 			     (debug:print 0 *default-log-port* "       Done.")
;; 			     (exit 4))
;; 			   "exit on ^C timer")))
;;      (thread-start! th2)
;;      (thread-start! th1)
;;      (thread-join! th2))))

	    
;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







790
791
792
793
794
795
796



















797
798
799
800
801
802
803
                               "Keep running"))))
      (thread-start! th2)
      (thread-sleep! 0.252) ;; give the server time to settle before starting the keep-running monitor.
      (thread-start! th3)
      (set! *didsomething* #t)
      (thread-join! th2)
      (exit))))



















	    
;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)

Modified rmtmod.scm from [681d0db458] to [13d1c5f978].

474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
		    ;; caused a lot of
		    ;; problems. However it
		    ;; is needed to deal with
		    ;; attemtped
		    ;; communication to
		    ;; servers that have gone
		    ;; away
		    (http-transport:client-api-send-receive 0 conninfo cmd params)
		    ((servermismatch)  (vector #f "Server id mismatch" ))
		    ((commfail)(vector #f "communications fail"))
		    ((exn)(vector #f "other fail" (print-call-chain)))))
	 (dat      (if (and (vector? dat-in) ;; ... check it is a correct size
			    (> (vector-length dat-in) 1))
		       dat-in
		       (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))







|







474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
		    ;; caused a lot of
		    ;; problems. However it
		    ;; is needed to deal with
		    ;; attemtped
		    ;; communication to
		    ;; servers that have gone
		    ;; away
		    (http-transport:send-receive 0 conninfo cmd params)
		    ((servermismatch)  (vector #f "Server id mismatch" ))
		    ((commfail)(vector #f "communications fail"))
		    ((exn)(vector #f "other fail" (print-call-chain)))))
	 (dat      (if (and (vector? dat-in) ;; ... check it is a correct size
			    (> (vector-length dat-in) 1))
		       dat-in
		       (vector #f (conc "communications fail (type 2), dat-in=" dat-in))))
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
       )
  (if (common:api-changed?)
      (common:set-last-run-version)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
         (res      (handle-exceptions
                    exn
                    #f
                    (http-transport:client-api-send-receive run-id connection-info cmd params))))
    (if (and res (vector-ref res 0))
        (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
        #f)))

(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if (bdat-time-to-exit *bdat*) ;; hurry up
		       #f







|
|
<
<
<
<
<
<
<
<
<
<







1739
1740
1741
1742
1743
1744
1745
1746
1747










1748
1749
1750
1751
1752
1753
1754
       )
  (if (common:api-changed?)
      (common:set-last-run-version)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
;; (define (rmt:login-no-auto-client-setup connection-info)
;;   (rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))











(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if (bdat-time-to-exit *bdat*) ;; hurry up
		       #f
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955


;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in server-id #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
		;; 		 (conc "no such server-id " host-port-in)))
		       host-port-in))) ;; )
    (let* ((host-port (if host:port
			  (let ((slst (string-split   host:port ":")))
			    (if (eq? (length slst) 2)
				(list (car slst)(string->number (cadr slst)))
				#f))
			  #f)))
;;	   (toppath       (launch:setup)))
      ;; (print "host-port=" host-port)
      (if (not host-port)
	  (begin
	    (if host-port-in
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port server-id))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
		(begin
		  ;; (print "LOGIN_FAILED")
		  (if do-exit (exit 1))
		  #f)))))))

;; ping the given server
;;
(define (server:check-server server-record)
  (let* ((server-url (server:record->url server-record))
         (server-id (server:record->id server-record)) 
         (res        (case *transport-type*
                       ((http)(server:ping server-url server-id))
                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
                       )))
    (if res
        server-url
	#f)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))







|
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<






<
|
<
<







1884
1885
1886
1887
1888
1889
1890
1891









1892















1893











1894
1895
1896
1897
1898
1899

1900


1901
1902
1903
1904
1905
1906
1907


;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host port server-id #!key (do-exit #f))









  (let* ((sdat       (servdat-init #f host port server-id)))















    (http-transport:send-receive sdat 'ping '())))












;; ping the given server
;;
(define (server:check-server server-record)
  (let* ((server-url (server:record->url server-record))
         (server-id (server:record->id server-record)) 

         (res       (server:ping server-url server-id)))


    (if res
        server-url
	#f)))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least 3 seconds old
  (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
	     (call-num     (car last-run-dat))
	     (when-run     (cadr last-run-dat))
	     (run-delay    (+ (case call-num
				((0)    0)
				((1)   20)







|







1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least 3 seconds old
  ;; (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
	     (call-num     (car last-run-dat))
	     (when-run     (cadr last-run-dat))
	     (run-delay    (+ (case call-num
				((0)    0)
				((1)   20)
2003
2004
2005
2006
2007
2008
2009

2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
  #;(case (server:get-transport)
    ((rpc)  (rpc:client-connect  iface port))
    ((http) (http:client-connect iface port))
    ((zmq)  (zmq:client-connect  iface port))
    (else   (rpc:client-connect  iface port))))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))

  (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)
  #;(case (server:get-transport)
    ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
    ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
    (else  (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))

;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;

(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
  (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
  (server:start-and-wait areapath)
  (if (<= remaining-tries 0)
      (begin
	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
	(exit 1))
      ;;







>
|


















|







1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
  #;(case (server:get-transport)
    ((rpc)  (rpc:client-connect  iface port))
    ((http) (http:client-connect iface port))
    ((zmq)  (zmq:client-connect  iface port))
    (else   (rpc:client-connect  iface port))))

(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (print "got here")
  ;; (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)
  #;(case (server:get-transport)
    ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
    ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
    (else  (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))

;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;

#;(define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f))
  (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries)
  (server:start-and-wait areapath)
  (if (<= remaining-tries 0)
      (begin
	(debug:print-error 0 *default-log-port* "failed to start or connect to server")
	(exit 1))
      ;;

Modified servermod.scm from [ea2cb26e2d] to [e99a19822e].

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
	)

(define (server:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

;;======================================================================
;; logic for getting homehost. Returns (host . at-home)
;; IF *toppath* is not set, wait up to five seconds trying every two seconds
;; (this is to accomodate the watchdog)
;;
#;(define (common:get-homehost #!key (trynum 5))
  (assert *toppath* "ERROR: common:get-homehost called before launch:setup. This is fatal.")
  ;; called often especially at start up. use mutex to eliminate collisions
  (mutex-lock! *homehost-mutex*)
  (cond
   (*home-host*
    (mutex-unlock! *homehost-mutex*)
    *home-host*)
   ((not *toppath*)
    (mutex-unlock! *homehost-mutex*)
    ;; (launch:setup) ;; safely mutexed now
    (if (> trynum 0)
	(begin
	  (thread-sleep! 2)
	  (common:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (handle-exceptions
			     exn
			     (if (> trynum 0)
				 (let ((delay-time (* (- 5 trynum) 5)))
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
						delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn)
						", exn=" exn)
				   (thread-sleep! delay-time)
				   (common:get-homehost trynum: (- trynum 1)))
				 (begin
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
						"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
						((condition-property-accessor 'exn 'message) exn))
				   (exit 1)))
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (common:file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-writable? *toppath*)
				     (begin
				       (with-output-to-file hhf
					 (lambda ()
					   (print bestadrs)))
				       (begin
					 (mutex-unlock! *homehost-mutex*)
					 (car (common:get-homehost))))
				     #f))))))
	   (at-home  (or (equal? homehost currhost)
			 (equal? homehost bestadrs))))
      (set! *home-host* (cons homehost at-home))
      (mutex-unlock! *homehost-mutex*)
      *home-host*))))

;;======================================================================
;; am I on the homehost?
;;
#;(define (common:on-homehost?)
  (let ((hh (common:get-homehost)))
    (if hh
	(cdr hh)
	#f)))

#;(define (common:wait-for-homehost-load maxnormload msg)
  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
                     #f
                     (common:get-homehost)))
         (hh     (if hh-dat (car hh-dat) #f)))
    (common:wait-for-normalized-load maxnormload msg hh)))


;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* ((curr-host   (get-host-name))
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 ;; (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 ;; (target-host (car homehost))
	 (testsuite   (common:get-area-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
			   ""))
	 (cmdln (conc
		 (common:get-megatest-exe)
		 " -server " (or (get-host-name) "-")
		 (if (equal? (configf:lookup *configdat* "server" "daemonize")
			     "yes")
		     " -daemonize "
		     "")
		 ;; " -log " logfile
		 " -m testsuite:" testsuite
		 " " profile-mode
		 ))
	 ;; (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!?
         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    ;; (thread-start! log-rotate)
    
    ;; host.domain.tld match host?
    #;(if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
      
    (setenv "NBFAKE_LOG" logfile)
    ;; (thread-sleep! (/ (pseudo-random-integer 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
    ;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
    #;(common:wait-for-homehost-load load-limit (conc " delaying server start due to load on homehost. limit is " load-limit))
    (system (conc "nbfake " cmdln))
    (unsetenv "NBFAKE_LOG")
    ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    ;; (thread-join! log-rotate)
    (pop-directory)))

(define (server:record->url servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "Unable to get server url from " servr ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f))))

;; wait for server=start-last to be three seconds old
;;
(define (server:wait-for-server-start-last-flag areapath)
  (let* ((start-flag (conc areapath "/logs/server-start-last"))
	 ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
	 (reftime    (configf:lookup-number *configdat* "server" "idletime" default: 4))
	 (server-key (conc (get-host-name) "-" (current-process-id))))
    (if (file-exists? start-flag)
	(let* ((fmodtime (file-modification-time start-flag))
	       (delta    (- (current-seconds) fmodtime))
	       (all-go   (> delta reftime)))
	  (if (and all-go
		   (begin
                     (debug:print-info 0 *default-log-port* "Writing " start-flag)
		     (with-output-to-file start-flag
		       (lambda ()
			 (print server-key)))
		     (thread-sleep! 0.254)
		     (let ((res (with-input-from-file start-flag
				  (lambda ()
				    (read-line)))))
		       (equal? server-key res))))
	      #t ;; (system (conc "touch " start-flag)) ;; lazy but safe
	      (begin
		(debug:print-info 0 *default-log-port* "Gating server start, last start: "
				  fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
		(thread-sleep! reftime)
		(server:wait-for-server-start-last-flag areapath)))))))

(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " servr ", exn=" exn)     
    #f)
  (match-let (((mod-time hostname port start-time server-id pid)
	       servr))
    (tasks:kill-server hostname pid))))


)







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<

<
<
<
<
<
<















<
<


<
<

|













<




<
<
<
<
<
<
<
<
<
<
<
<

<
<
<


<
<














<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<













60
61
62
63
64
65
66



























































67


68






69
70
71
72
73
74
75
76
77
78
79
80
81
82
83


84
85


86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

101
102
103
104












105



106
107


108
109
110
111
112
113
114
115
116
117
118
119
120
121




























122
123
124
125
126
127
128
129
130
131
132
133
134
	)

(define (server:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))




























































;; reuse this for server load?


;;






#;(define (common:wait-for-homehost-load maxnormload msg)
  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
                     #f
                     (common:get-homehost)))
         (hh     (if hh-dat (car hh-dat) #f)))
    (common:wait-for-normalized-load maxnormload msg hh)))


;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* ((curr-host   (get-host-name))


	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))


	 (testsuite   (common:get-area-name))
	 (logfile     (conc areapath "/logs/server.log"))
	 (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
			   ""))
	 (cmdln (conc
		 (common:get-megatest-exe)
		 " -server " (or (get-host-name) "-")
		 (if (equal? (configf:lookup *configdat* "server" "daemonize")
			     "yes")
		     " -daemonize "
		     "")
		 ;; " -log " logfile
		 " -m testsuite:" testsuite
		 " " profile-mode
		 ))

         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")












    (setenv "NBFAKE_LOG" logfile)



    (system (conc "nbfake " cmdln))
    (unsetenv "NBFAKE_LOG")


    (pop-directory)))

(define (server:record->url servr)
  (handle-exceptions
   exn
   (begin 
     (debug:print-info 0 *default-log-port*  "Unable to get server url from " servr ", exn=" exn)     
   #f)
  (match-let (((mod-time host port start-time server-id pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f))))






























(define (server:kill servr)
  (handle-exceptions
    exn
    (begin 
      (debug:print-info 0 *default-log-port*  "Unable to get host and/or port from " servr ", exn=" exn)     
    #f)
  (match-let (((mod-time hostname port start-time server-id pid)
	       servr))
    (tasks:kill-server hostname pid))))


)