Megatest

Check-in [837646d7e8]
Login
Overview
Comment:Centralize server starts to server 0 and pace out same-run-id starts by 40 seconds
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 837646d7e82ead63c83bdee17c13db73ee22b69f
User & Date: matt on 2014-02-25 23:11:08
Other Links: branch diff | manifest | tags
Context
2014-02-26
14:36
Partial fix to server start issue (breaks startup when db contains dead server). check-in: 05fa3869fb user: mrwellan tags: v1.60
2014-02-25
23:11
Centralize server starts to server 0 and pace out same-run-id starts by 40 seconds check-in: 837646d7e8 user: matt tags: v1.60
20:53
Added some missing checks for deleted in queries for runs check-in: 8f59526787 user: matt tags: v1.60
Changes

Modified api.scm from [7b83b1f06b] to [be6d379826].

13
14
15
16
17
18
19




















20
21
22
23
24
25
26
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







(declare (uses rmt))
(declare (uses db))

;; These are called by the server on recipt of /api calls

(define (api:execute-requests dbstruct cmd params)
  (case (string->symbol cmd)
    ;; SERVERS
    ((start-server)                (apply server:kind-run params))
    ;; ((kill-server)
    ;;  (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*)  ;; (db:sync-to *inmemdb* *db*)
    ;;  (let ((hostname (car  *runremote*))
    ;;        (port     (cadr *runremote*))
    ;;        (pid      (if (null? params) #f (car params)))
    ;;        (th1      (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
    ;;    (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
    ;;    (debug:print-info 1 "current pid=" (current-process-id))
    ;;    (open-run-close tasks:server-deregister tasks:open-db 
    ;;     	       hostname
    ;;     	       port: port)
    ;;    (set! *server-run* #f)
    ;;    (thread-sleep! 3)
    ;;    (if pid 
    ;;        (process-signal pid signal/kill)
    ;;        (thread-start! th1))
    ;;    '(#t "exit process started")))

    ;; KEYS
    ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
    ((get-keys)                     (db:get-keys dbstruct))

    ;; TESTS
    ;; json doesn't do vectors, convert to list
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
92
93
94
95
96
97
98

















99
100
101
102
103
104
105







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







    ((general-call)                 (let ((stmtname   (car params))
					  (run-id     (cadr params))
					  (realparams (cddr params)))
				      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
						  (lambda (db)
						    (db:general-call db stmtname realparams)))))
    ((sync-inmem->db)               (db:sync-touched dbstruct run-id force-sync: #t))
    ;; ((kill-server)
    ;;  (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*)  ;; (db:sync-to *inmemdb* *db*)
    ;;  (let ((hostname (car  *runremote*))
    ;;        (port     (cadr *runremote*))
    ;;        (pid      (if (null? params) #f (car params)))
    ;;        (th1      (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
    ;;    (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
    ;;    (debug:print-info 1 "current pid=" (current-process-id))
    ;;    (open-run-close tasks:server-deregister tasks:open-db 
    ;;     	       hostname
    ;;     	       port: port)
    ;;    (set! *server-run* #f)
    ;;    (thread-sleep! 3)
    ;;    (if pid 
    ;;        (process-signal pid signal/kill)
    ;;        (thread-start! th1))
    ;;    '(#t "exit process started")))
    ((sdb-qry)                      (apply sdb:qry params))

    ;; TESTMETA
    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
    (else

Modified client.scm from [c34489a609] to [470e0682b1].

102
103
104
105
106
107
108
109
110
111
112









113
114
115
116
117
118
119
102
103
104
105
106
107
108




109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124







-
-
-
-
+
+
+
+
+
+
+
+
+







			      (server:try-running run-id)
			      (thread-sleep! 10) ;; give server a little time to start up
			      (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
			    (begin
			      (thread-sleep! 5)
			      (client:setup run-id remaining-tries: (- remaining-tries 1))))))
		  (begin    ;; no server registered
		    (thread-sleep! 2)
		    (server:try-running run-id)
		    (thread-sleep! 10) ;; give server a little time to start up
		    (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))
		    (thread-sleep! 2) 
		    (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 2)
			(begin
			  (server:try-running run-id)
			  (thread-sleep! 10) ;; give server a little time to start up
			  (client:setup run-id remaining-tries: (- remaining-tries 1)))
			(begin
			  (thread-sleep! 10)
			  (client:setup run-id remaining-tries: remainint-tries))))))))))

;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect run-id 
				 (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

Modified common.scm from [98ab0c8aab] to [8ab78fa41e].

65
66
67
68
69
70
71

72
73
74
75
76
77
78
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79







+







(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *db-write-access*   #t)
(define *inmemdb*           #f)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))

(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id

Modified http-transport.scm from [e1da68c36b] to [c4fe5c65f6].

142
143
144
145
146
147
148


149

150
151
152
153
154
155
156
142
143
144
145
146
147
148
149
150

151
152
153
154
155
156
157
158







+
+
-
+







	 (begin 
	   (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
	   (thread-sleep! 0.1)

	   ;; get_next_port goes here

	   (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id))
	 (begin
	   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum " http-transport:try-start-server")
	 (print "ERROR: Tried and tried but could not start the server")))
	   (print "ERROR: Tried and tried but could not start the server"))))
   ;; any error in following steps will result in a retry
   (set! *server-info* (list ipaddrstr portnum))
   (open-run-close tasks:server-set-interface-port 
		   tasks:open-db 
		   server-id 
		   ipaddrstr portnum)
   (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)

Modified rmt.scm from [a00bf53eda] to [3da7dfaf01].

81
82
83
84
85
86
87










88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110



111
112
113
114
115
116
117







+
+
+
+
+
+
+
+
+
+













-
-
-







      (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

;;======================================================================
;;  S E R V E R
;;======================================================================

(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server run-id (list run-id)))

(define (rmt:start-server run-id)
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;;
(define (rmt:login-no-auto-client-setup connection-info run-id)
  (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
  
(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server run-id (list run-id)))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))

(define (rmt:sync-inmem->db run-id)

Modified server.scm from [9a921aa4ae] to [c4fc1a172b].

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110






111
112
113
114
115




116

117

















118
119
120
121
122
123
124
44
45
46
47
48
49
50



51
52





53
54
55
56
57
58
59





















60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78



79
80
81
82
83
84
85

86
87
88
89
90
91
92

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118







-
-
-


-
-
-
-
-







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-



















-
-
-
+
+
+
+
+
+

-



+
+
+
+
-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  ;; (if (server:check-if-running run-id)
  ;; a server is already running
  ;; (exit)
  (http-transport:launch run-id))

;; (define (server:launch-no-exit run-id)
;;   (if (server:check-if-running run-id)
;;       #t ;; if running
;;       (http-transport:launch run-id)))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))

;; Flush the queue every third of a second. Can we assume that setup-for-run 
;; has already been done?
;; (define (server:write-queue-handler)
;;   (if (setup-for-run)
;;       (let ((db (open-db)))
;; 	(let loop ()
;; 	  (let ((last-write-flush-time #f))
;; 	    (mutex-lock! *incoming-mutex*)
;; 	    (set! last-write-flush-time *server:last-write-flush*)
;; 	    (mutex-unlock! *incoming-mutex*)
;; 	    (if (> (- (current-milliseconds) last-write-flush-time) 10)
;; 		(begin
;; 		  (mutex-lock! *db:process-queue-mutex*)
;; 		  (db:process-cached-writes db)
;; 		  (mutex-unlock! *db:process-queue-mutex*)
;; 		  (thread-sleep! 0.005))))
;; 	  (loop)))
;;       (begin
;; 	(debug:print 0 "ERROR: failed to setup for Megatest in server:write-queue-handler")
;; 	(exit 1))))
    
;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))


;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (db:obj->string (vector success/fail query-sig result)))

;; > file 2>&1 
(define (server:try-running run-id)
  (let* ((rand-name (random 100))
;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;
(define  (server:run run-id)
  (let* ((target-host (configf:lookup *configdat* "server" "homehost" ))
	 (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
		    ;; " -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id "-" rand-name ".log 2>&1 &")))
		      " -server - -run-id " run-id " >> " *toppath* "/db/" run-id ".log 2>&1 &")))
    (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
    (push-directory *toppath*)
    (if target-host
	(begin
	  (set-environment-variable "TARGETHOST" target-host)
	  (system (conc "nbfake " cmdln)))
    (system cmdln)
	(system cmdln))
    (pop-directory)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run run-id)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 40))
	(begin
	  (server:run run-id)
	  (hash-table-set! *server-kind-run* run-id (current-seconds))))))

;; The generic run a server command. Dispatches the call to server 0 if run-id != 0
;; 
(define (server:try-running run-id)
  (if (eq? run-id 0)
      (server:run run-id)
      (rmt:start-server run-id)))

(define (server:check-if-running run-id)
  (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id))
	     (trycount 0))
    (thread-sleep! 2)
    (if server
	;; note: client:start will set *runremote*. this needs to be changed