Megatest

Check-in [8a4205f90b]
Login
Overview
Comment:Most needed changes made for multi-server with log handshaking. However there is a segfault ...
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | server-log-handshaking
Files: files | file ages | folders
SHA1: 8a4205f90b698a30371e28480f9e0717d440d2c7
User & Date: matt on 2017-01-27 22:47:55
Other Links: branch diff | manifest | tags
Context
2017-01-28
12:25
Fixed couple missing changes needed for server log handshaking check-in: 5a64939577 user: matt tags: server-log-handshaking
2017-01-27
22:47
Most needed changes made for multi-server with log handshaking. However there is a segfault ... check-in: 8a4205f90b user: matt tags: server-log-handshaking
2017-01-25
23:48
Get list of servers now works check-in: f2556f210b user: matt tags: server-log-handshaking
Changes

Modified common.scm from [040477db58] to [9ceabb42c1].

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;; launching and hosts
(defstruct host







|







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;; launching and hosts
(defstruct host

Modified dashboard.scm from [29cdcf21f3] to [bb7acd661f].

97
98
99
100
101
102
103




104
105
106
107
108
109
110
      (exit)))

(if (args:get-arg "-h")
    (begin
      (print help)
      (exit)))





;; TODO: Move this inside (main)
;;
(if (not (launch:setup))
    (begin
      (print "Failed to find megatest.config, exiting") 
      (exit 1)))








>
>
>
>







97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
      (exit)))

(if (args:get-arg "-h")
    (begin
      (print help)
      (exit)))

(if (not (common:on-homehost?))
    (begin
      (debug:print 0 "ERROR: Current policy requires running dashboard on homehost: " (common:get-homehost))))
    
;; TODO: Move this inside (main)
;;
(if (not (launch:setup))
    (begin
      (print "Failed to find megatest.config, exiting") 
      (exit 1)))

Modified db.scm from [a6e4ada759] to [0ef09f1d37].

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
;;======================================================================
;; Database access
;;======================================================================

;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc

(use (srfi 18) extras tcp stack) ;; RADT => use of require-extension?
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:)) ;; RADT => prefix??

(declare (unit db))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
;;======================================================================
;; Database access
;;======================================================================

;; dbstruct vector containing all the relevant dbs like main.db, megatest.db, run.db etc

(use (srfi 18) extras tcp stack) ;; RADT => use of require-extension?
(use sqlite3 srfi-1 posix regex regex-case srfi-69 csv-xml s11n md5 message-digest base64 format dot-locking z3 typed-records matchable)
(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:)) ;; RADT => prefix??

(declare (unit db))
(declare (uses common))
(declare (uses keys))
(declare (uses ods))
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838

839
840
841
842
843
844
845
846
(define (db:multi-db-sync dbstruct . options)
  (if (not (launch:setup))
      (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
      (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
	     (tmpdb    (db:get-db dbstruct))
             (refndb   (dbr:dbstruct-refndb dbstruct))
	     (allow-cleanup #t) ;; (if run-ids #f #t))
	     (tdbdat  (tasks:open-db))
	     (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))
	     (data-synced 0)) ;; count of changed records (I hope)
    
	;; kill servers
	(if (member 'killservers options)
	    (for-each
	     (lambda (server)
	       (tasks:server-delete-record (db:delay-if-busy tdbdat) (vector-ref server 0) "dbmigration")

	       (tasks:kill-server (vector-ref server 2)(vector-ref server 1)))
	     servers))

	;; clear out junk records
	;;
	(if (member 'dejunk options)
	    (begin
	      (db:delay-if-busy mtdb) ;; ok to delay on mtdb







|
|






|
>
|







823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
(define (db:multi-db-sync dbstruct . options)
  (if (not (launch:setup))
      (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
      (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
	     (tmpdb    (db:get-db dbstruct))
             (refndb   (dbr:dbstruct-refndb dbstruct))
	     (allow-cleanup #t) ;; (if run-ids #f #t))
	     ;; (tdbdat  (tasks:open-db))
	     (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
	     (data-synced 0)) ;; count of changed records (I hope)
    
	;; kill servers
	(if (member 'killservers options)
	    (for-each
	     (lambda (server)
	       (match-let (((mod-time host port start-time pid) server))
		 (if (and host pid)
		     (tasks:kill-server host pid))))
	     servers))

	;; clear out junk records
	;;
	(if (member 'dejunk options)
	    (begin
	      (db:delay-if-busy mtdb) ;; ok to delay on mtdb

Modified megatest.scm from [d7a15310c1] to [9fc67a21aa].

350
351
352
353
354
355
356
357
358

359



360

361
362
363
364
365
366
367
368

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog* (make-thread common:watchdog "Watchdog thread"))

(if (not (args:get-arg "-server"))
    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
;;(BB> "thread-start! watchdog")


(if (args:get-arg "-log")



    (let ((oup (open-output-file (args:get-arg "-log"))))

      (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log"))
      (set! *default-log-port* oup)))

(if (or (args:get-arg "-h")
	(args:get-arg "-help")
	(args:get-arg "--help"))
    (begin
      (print help)







<

>
|
>
>
>
|
>
|







350
351
352
353
354
355
356

357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog* (make-thread common:watchdog "Watchdog thread"))

(if (not (args:get-arg "-server"))
    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog


(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
    (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server
	   (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name
		     (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log")))
	   (oup  (open-output-file logf)))
      (if (not (args:get-arg "-log"))
	  (hash-table-set! args:arg-hash "-log" logf)) ;; fake out future queries of -log
      (debug:print-info 0 *default-log-port* "Sending log output to " logf)
      (set! *default-log-port* oup)))

(if (or (args:get-arg "-h")
	(args:get-arg "-help")
	(args:get-arg "--help"))
    (begin
      (print help)
697
698
699
700
701
702
703
704
705
706
707

708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
	    (debug:print-error 0 *default-log-port* "Parameter to -envdelta should be new=star-end")))))

;;======================================================================
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================

(if (args:get-arg "-server")

    ;; Server? Start up here.
    ;;

    (let ((tl        (launch:setup))
	;; (run-id    (and (args:get-arg "-run-id")
	;; 		  (string->number (args:get-arg "-run-id"))))
          (transport-type (string->symbol (or (args:get-arg "-transport") "http"))))
      ;; (if run-id
      ;;   (begin
      (server:launch 0 transport-type)
      (set! *didsomething* #t)))
;;     ;; (debug:print-error 0 *default-log-port* "server requires run-id be specified with -run-id")))
;; 
;;     ;; Not a server? This section will decide how to communicate
;;     ;;
;;     ;;  Setup client for all expect listed here
;;     (if (null? (lset-intersection 
;; 		equal?
;; 		(hash-table-keys args:arg-hash)
;; 		'("-list-servers"
;; 		  "-stop-server"
;;                   "-kill-server"
;; 		  "-show-cmdinfo"
;; 		  "-list-runs"
;; 		  "-ping")))
;; 	(if (launch:setup)
;; 	    (let ((run-id    (and (args:get-arg "-run-id")
;; 				  (string->number (args:get-arg "-run-id")))))
;; 	      ;; (set! *fdb*   (filedb:open-db (conc *toppath* "/db/paths.db")))
;; 	      ;; if not list or kill then start a client (if appropriate)
;; 	      (if (or (args-defined? "-h" "-version" "-create-megatest-area" "-create-test")
;; 		      (eq? (length (hash-table-keys args:arg-hash)) 0))
;; 		  (debug:print-info 1 *default-log-port* "Server connection not needed")
;; 		  (begin
;; 		    ;; (if run-id 
;; 		    ;;     (client:launch run-id) 
;; 		    ;;     (client:launch 0)      ;; without run-id we'll start a server for "0"
;; 		    #t
;; 		    ))))))

(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server")
        (args:get-arg "-kill-server"))
    (let ((tl (launch:setup)))
      (if tl 
	  (let* ((tdbdat  (tasks:open-db))







<
<
|
|
>

<
<

<
<


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







701
702
703
704
705
706
707


708
709
710
711


712


713
714




























715
716
717
718
719
720
721
	    (debug:print-error 0 *default-log-port* "Parameter to -envdelta should be new=star-end")))))

;;======================================================================
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================



;; Server? Start up here.
;;
(if (args:get-arg "-server")
    (let ((tl        (launch:setup))


          (transport-type (string->symbol (or (args:get-arg "-transport") "http"))))


      (server:launch 0 transport-type)
      (set! *didsomething* #t)))





























(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server")
        (args:get-arg "-kill-server"))
    (let ((tl (launch:setup)))
      (if tl 
	  (let* ((tdbdat  (tasks:open-db))

Modified rmt.scm from [e2d5384f31] to [0b9221a215].

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  3")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*)           ;; have a server
           (not (server:read-dotserver *toppath*)))  ;; server has died.
      (set! *runremote* #f)
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*))          ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; commented by bb; this was blocking server passive start on write on homehost (case 5)
     ;; ;; on homehost and this is a write, we have a server (we know because case 4 checked)
     ;; ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
     ;;       (not (member cmd api:read-only-queries)))
     ;;  (mutex-unlock! *rmt-mutex*)
     ;;  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
     ;;  (rmt:open-qry-close-locally cmd 0 params))

     
     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (cdr (remote-hh-dat *runremote*)) ; new
           (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (let ((server-url (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! *runremote* server-url) ;; the string can be consumed by the client setup if needed
	    (if (not (server:start-attempted? *toppath*))
		(server:kind-run *toppath*))))
             (mutex-unlock! *rmt-mutex*)
             (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5.1")
             (rmt:open-qry-close-locally cmd 0 params))



     ;;;
           ;;     (begin                            ;; not on homehost, start server and wait
            ;; (mutex-unlock! *rmt-mutex*)
	    ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5.2")
	    ;; (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
            ;; (rmt:send-receive cmd rid params attemptnum: attemptnum))   ;)  ;)
;;;;
     
     ;; if not on homehost ensure we have a connection to a live server
     ;; NOTE: we *have* a homehost record by now

     ;; ((and (not (cdr (remote-hh-dat *runremote*)))        ;; not on a homehost 
     ;;       (not (remote-conndat *runremote*))             ;; and no connection
     ;;       (server:read-dotserver *toppath*))             ;; .server file exists
     ;;  ;; something caused the server entry in tdb to disappear, but the server is still running
     ;;  (server:remove-dotserver-file *toppath* ".*")
     ;;  (mutex-unlock! *rmt-mutex*)
     ;;  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  20")
     ;;  (rmt:send-receive cmd rid params attemptnum: (add1 attemptnum)))

     ((and (not (cdr (remote-hh-dat *runremote*)))        ;; not on a homehost 
           (not (remote-conndat *runremote*)))            ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6  hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*))
      (mutex-unlock! *rmt-mutex*)
      (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
      (remote-conndat-set! *runremote* (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http







|













<
<
<
<
<
<
<
<
<





|


<
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110









111
112
113
114
115
116
117
118

119
120
121
122






















123
124
125
126
127
128
129
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  3")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*)           ;; have a server
           (not (server:check-if-running *toppath*)))  ;; server has died.
      (set! *runremote* #f)
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*))          ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4")
      (rmt:open-qry-close-locally cmd 0 params))










     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (cdr (remote-hh-dat *runremote*)) ; new
           (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (let ((server-url  (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (remote-server-url-set! *runremote* server-url) ;; the string can be consumed by the client setup if needed

	    (server:kind-run *toppath*)))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5.1")
      (rmt:open-qry-close-locally cmd 0 params))























     ((and (not (cdr (remote-hh-dat *runremote*)))        ;; not on a homehost 
           (not (remote-conndat *runremote*)))            ;; and no connection
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  6  hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*))
      (mutex-unlock! *rmt-mutex*)
      (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
      (remote-conndat-set! *runremote* (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http

Modified server.scm from [e489317c03] to [81e0620ae8].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras)
;; (use zmq)

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(declare (unit server))

(declare (uses common))












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable)
;; (use zmq)

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(declare (unit server))

(declare (uses common))
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server-" curr-pid "-" target-host ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
							   " -daemonize "
							   "")
		      " -log " logfile
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    (thread-start! log-rotate)
    







|




|







107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
         ;; (attempt-in-progress (server:start-attempted? areapath))
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
							   " -daemonize "
							   "")
		      ;; " -log " logfile
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    (thread-start! log-rotate)
    
163
164
165
166
167
168
169

170
171
172
173
174
175
176
177
178
179


180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210






211
212
213
214
215
216
217
			    (string->number (caddr dat))))))
	      (list #f #f #f)))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time )
;;
(define (server:get-list areapath)

  (if (directory-exists? areapath)
      (let ((server-logs (glob (conc areapath "/logs/server-*.log"))))
	(if (null? server-logs)
	    '()
	    (let loop ((hed  (car server-logs))
		       (tal  (cdr server-logs))
		       (res '()))
	      (let* ((mod-time (file-modification-time hed))
		     (serv-dat (server:logf-get-start-info hed))
		     (serv-rec (cons mod-time serv-dat))


		     (new-res  (cons serv-rec res)))
		(if (null? tal)
		    new-res
		    (loop (car tal)(cdr tal) new-res))))))))

;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time
;;
;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; and servers should stick around for about two hours or so.
;;
(define (server:get-best srvlst)
  (let ((now (current-seconds)))
    (sort
     (filter (lambda (rec)
	       (let ((start-time (list-ref rec 3))
		     (mod-time   (list-ref rec 0)))
		 (print "start-time: " start-time " mod-time: " mod-time)
		 (and start-time mod-time
		      (> (- now start-time) 1)    ;; been running at least 1 seconds
		      (< (- now mod-time)   10)   ;; still alive - file touched in last 10 seconds
		      (< (- now start-time) 3600) ;; under one hour running time
		      )))
	     srvlst)
     (lambda (a b)
       (< (list-ref a 3)
	  (list-ref b 3))))))
		







(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))








>
|
|
|
|
|
|
|
|
|
|
>
>
|
|
|
|





|










|









|
>
>
>
>
>
>







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
			    (string->number (caddr dat))))))
	      (list #f #f #f)))))))

;; get a list of servers with all relevant data
;; ( mod-time host port start-time )
;;
(define (server:get-list areapath)
  (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")))
    (if (directory-exists? areapath)
	(let ((server-logs (glob (conc areapath "/logs/server-*.log"))))
	  (if (null? server-logs)
	      '()
	      (let loop ((hed  (car server-logs))
			 (tal  (cdr server-logs))
			 (res '()))
		(let* ((mod-time (file-modification-time hed))
		       (serv-dat (server:logf-get-start-info hed))
		       (serv-rec (cons mod-time serv-dat))
		       (fmatch   (string-match fname-rx hed))
		       (pid      (if fmatch (string->number (list-ref fmatch 2)) #f))
		       (new-res  (cons (append serv-rec (list pid)) res)))
		  (if (null? tal)
		      new-res
		      (loop (car tal)(cdr tal) new-res)))))))))

;; given a list of servers get a list of valid servers, i.e. at least
;; 10 seconds old, has started and is less than 1 hour old and is
;; active (i.e. mod-time < 10 seconds
;;
;; mod-time host port start-time pid
;;
;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; and servers should stick around for about two hours or so.
;;
(define (server:get-best srvlst)
  (let ((now (current-seconds)))
    (sort
     (filter (lambda (rec)
	       (let ((start-time (list-ref rec 3))
		     (mod-time   (list-ref rec 0)))
		 ;; (print "start-time: " start-time " mod-time: " mod-time)
		 (and start-time mod-time
		      (> (- now start-time) 1)    ;; been running at least 1 seconds
		      (< (- now mod-time)   10)   ;; still alive - file touched in last 10 seconds
		      (< (- now start-time) 3600) ;; under one hour running time
		      )))
	     srvlst)
     (lambda (a b)
       (< (list-ref a 3)
	  (list-ref b 3))))))

(define (server:record->url servr)
  (match-let (((mod-time host port start-time pid)
	       servr))
    (if (and host port)
	(conc host ":" port)
	#f)))

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

358
359
360
361
362
363
364




365
366
367
368
369
370
371
372
373
374
375
376
377





378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
;; 	      (common:simple-file-release-lock lock-file))
;;             (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - could not get lock."))
;;         (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - dotserver-url("dotserver-url") did not match hostport pattern ("hostport")"))))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath)




  (let* ((dotserver-url (server:read-dotserver->url areapath))) ;; tdbdat (tasks:open-db)))
    (if dotserver-url
	(let* ((res (case *transport-type*
		      ((http)(server:ping-server dotserver-url))
		      ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
		      )))
	  (if res
	      dotserver-url
	      (begin
                (server:remove-dotserver-file areapath ".*") ;; remove stale dotserver
                #f)))
	#f)))






;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       (server:read-dotserver->url *toppath*)
		       (if (number? host-port-in) ;; we were handed a server-id
			   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
			     ;; (print "srec: " srec " host-port-in: " host-port-in)
			     (if srec
				 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
				 (conc "no such server-id " host-port-in)))
			   host-port-in))))







>
>
>
>
|








|



>
>
>
>
>







|







367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
;; 	      (common:simple-file-release-lock lock-file))
;;             (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - could not get lock."))
;;         (debug:print-info 0 *default-log-port* "server file " server-file " for " hostport " NOT removed - dotserver-url("dotserver-url") did not match hostport pattern ("hostport")"))))

;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath)
  (let* ((servers       (server:get-best (server:get-list areapath)))
	 (best-server   (if (null? servers) #f (car servers)))
	 (dotserver-url (if best-server
			    (server:record->url best-server)
			    #f))) ;; (server:read-dotserver->url areapath))) ;; tdbdat (tasks:open-db)))
    (if dotserver-url
	(let* ((res (case *transport-type*
		      ((http)(server:ping-server dotserver-url))
		      ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
		      )))
	  (if res
	      dotserver-url
	      (begin
		(server:kill best-server)
                #f)))
	#f)))

(define (server:kill servr)
  (match-let (((mod-time hostname port start-time pid)
	       servr))
    (tasks:kill-server hostname pid)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       (server:check-if-running *toppath*)
		       (if (number? host-port-in) ;; we were handed a server-id
			   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
			     ;; (print "srec: " srec " host-port-in: " host-port-in)
			     (if srec
				 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
				 (conc "no such server-id " host-port-in)))
			   host-port-in))))

Modified utils/mk_wrapper from [de6ec68c4a] to [9bb7f8caf7].

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# fi
# echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target
echo "#!/bin/bash" > $target

if [[ $cmd =~ dboard ]]; then
    cat >> $target <<'EOF'

# disable if not running on homehost
if [[ -e .homehost ]]; then
  homehostname=$( host `cat .homehost` | awk '{print $NF}' | sed 's/\.$//' )
  hostname=$( hostname -f )

  if [[ ! ($homehostname == $hostname) ]]; then
    echo "ERROR: this host ($hostname) is not the homehost ($homehostname) for this megatest run area.  Cannot start dashboard."
    echo "       Please log into homehost before launching dashboard."
    exit 1
  fi
fi

# check that $DISPLAY is set
if [[ -z $DISPLAY ]]; then
   echo 'ERROR: $DISPLAY environment variable is not set; megatest dashboard requires X display address to be set in $DISPLAY.'
   exit 1
fi








|
|
|
|
|
|
|
|
|
|
|







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# fi
# echo "exec $prefix/bin/.\$(lsb_release -sr)/$cmd \"\$@\"" >> $target
echo "#!/bin/bash" > $target

if [[ $cmd =~ dboard ]]; then
    cat >> $target <<'EOF'

# # disable if not running on homehost
# if [[ -e .homehost ]]; then
#   homehostname=$( host `cat .homehost` | awk '{print $NF}' | sed 's/\.$//' )
#   hostname=$( hostname -f )
# 
#   if [[ ! ($homehostname == $hostname) ]]; then
#     echo "ERROR: this host ($hostname) is not the homehost ($homehostname) for this megatest run area.  Cannot start dashboard."
#     echo "       Please log into homehost before launching dashboard."
#     exit 1
#   fi
# fi

# check that $DISPLAY is set
if [[ -z $DISPLAY ]]; then
   echo 'ERROR: $DISPLAY environment variable is not set; megatest dashboard requires X display address to be set in $DISPLAY.'
   exit 1
fi