Megatest

Check-in [837646d7e8]
Login
Overview
Comment:Centralize server starts to server 0 and pace out same-run-id starts by 40 seconds
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 837646d7e82ead63c83bdee17c13db73ee22b69f
User & Date: matt on 2014-02-25 23:11:08
Other Links: branch diff | manifest | tags
Context
2014-02-26
14:36
Partial fix to server start issue (breaks startup when db contains dead server). check-in: 05fa3869fb user: mrwellan tags: v1.60
2014-02-25
23:11
Centralize server starts to server 0 and pace out same-run-id starts by 40 seconds check-in: 837646d7e8 user: matt tags: v1.60
20:53
Added some missing checks for deleted in queries for runs check-in: 8f59526787 user: matt tags: v1.60
Changes

Modified api.scm from [7b83b1f06b] to [be6d379826].

13
14
15
16
17
18
19




















20
21
22
23
24
25
26
(declare (uses rmt))
(declare (uses db))

;; These are called by the server on recipt of /api calls

(define (api:execute-requests dbstruct cmd params)
  (case (string->symbol cmd)




















    ;; KEYS
    ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
    ((get-keys)                     (db:get-keys dbstruct))

    ;; TESTS
    ;; json doesn't do vectors, convert to list
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
(declare (uses rmt))
(declare (uses db))

;; These are called by the server on recipt of /api calls

(define (api:execute-requests dbstruct cmd params)
  (case (string->symbol cmd)
    ;; SERVERS
    ((start-server)                (apply server:kind-run params))
    ;; ((kill-server)
    ;;  (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*)  ;; (db:sync-to *inmemdb* *db*)
    ;;  (let ((hostname (car  *runremote*))
    ;;        (port     (cadr *runremote*))
    ;;        (pid      (if (null? params) #f (car params)))
    ;;        (th1      (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
    ;;    (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
    ;;    (debug:print-info 1 "current pid=" (current-process-id))
    ;;    (open-run-close tasks:server-deregister tasks:open-db 
    ;;     	       hostname
    ;;     	       port: port)
    ;;    (set! *server-run* #f)
    ;;    (thread-sleep! 3)
    ;;    (if pid 
    ;;        (process-signal pid signal/kill)
    ;;        (thread-start! th1))
    ;;    '(#t "exit process started")))

    ;; KEYS
    ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
    ((get-keys)                     (db:get-keys dbstruct))

    ;; TESTS
    ;; json doesn't do vectors, convert to list
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    ((general-call)                 (let ((stmtname   (car params))
					  (run-id     (cadr params))
					  (realparams (cddr params)))
				      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
						  (lambda (db)
						    (db:general-call db stmtname realparams)))))
    ((sync-inmem->db)               (db:sync-touched dbstruct run-id force-sync: #t))
    ;; ((kill-server)
    ;;  (db:sync-tables (db:tbls *inmemdb*) *inmemdb* *db*)  ;; (db:sync-to *inmemdb* *db*)
    ;;  (let ((hostname (car  *runremote*))
    ;;        (port     (cadr *runremote*))
    ;;        (pid      (if (null? params) #f (car params)))
    ;;        (th1      (make-thread (lambda ()(thread-sleep! 3)(debug:print 0 "Server exiting!")(exit 0)) "Server exit thread")))
    ;;    (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
    ;;    (debug:print-info 1 "current pid=" (current-process-id))
    ;;    (open-run-close tasks:server-deregister tasks:open-db 
    ;;     	       hostname
    ;;     	       port: port)
    ;;    (set! *server-run* #f)
    ;;    (thread-sleep! 3)
    ;;    (if pid 
    ;;        (process-signal pid signal/kill)
    ;;        (thread-start! th1))
    ;;    '(#t "exit process started")))
    ((sdb-qry)                      (apply sdb:qry params))

    ;; TESTMETA
    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
    (else







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







92
93
94
95
96
97
98

















99
100
101
102
103
104
105
    ((general-call)                 (let ((stmtname   (car params))
					  (run-id     (cadr params))
					  (realparams (cddr params)))
				      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
						  (lambda (db)
						    (db:general-call db stmtname realparams)))))
    ((sync-inmem->db)               (db:sync-touched dbstruct run-id force-sync: #t))

















    ((sdb-qry)                      (apply sdb:qry params))

    ;; TESTMETA
    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
    (else

Modified client.scm from [c34489a609] to [470e0682b1].

102
103
104
105
106
107
108
109


110
111
112



113
114
115
116
117
118
119
			      (server:try-running run-id)
			      (thread-sleep! 10) ;; give server a little time to start up
			      (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
			    (begin
			      (thread-sleep! 5)
			      (client:setup run-id remaining-tries: (- remaining-tries 1))))))
		  (begin    ;; no server registered
		    (thread-sleep! 2)


		    (server:try-running run-id)
		    (thread-sleep! 10) ;; give server a little time to start up
		    (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))




;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect run-id 
				 (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))








|
>
>
|
|
|
>
>
>







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
			      (server:try-running run-id)
			      (thread-sleep! 10) ;; give server a little time to start up
			      (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
			    (begin
			      (thread-sleep! 5)
			      (client:setup run-id remaining-tries: (- remaining-tries 1))))))
		  (begin    ;; no server registered
		    (thread-sleep! 2) 
		    (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 2)
			(begin
			  (server:try-running run-id)
			  (thread-sleep! 10) ;; give server a little time to start up
			  (client:setup run-id remaining-tries: (- remaining-tries 1)))
			(begin
			  (thread-sleep! 10)
			  (client:setup run-id remaining-tries: remainint-tries))))))))))

;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect run-id 
				 (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

Modified common.scm from [98ab0c8aab] to [8ab78fa41e].

65
66
67
68
69
70
71

72
73
74
75
76
77
78
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *db-write-access*   #t)
(define *inmemdb*           #f)
(define *run-id*            #f)


(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id







>







65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *db-write-access*   #t)
(define *inmemdb*           #f)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))

(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id

Modified http-transport.scm from [e1da68c36b] to [c4fe5c65f6].

142
143
144
145
146
147
148


149
150
151
152
153
154
155
156
	 (begin 
	   (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
	   (thread-sleep! 0.1)

	   ;; get_next_port goes here

	   (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id))


	 (print "ERROR: Tried and tried but could not start the server")))
   ;; any error in following steps will result in a retry
   (set! *server-info* (list ipaddrstr portnum))
   (open-run-close tasks:server-set-interface-port 
		   tasks:open-db 
		   server-id 
		   ipaddrstr portnum)
   (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)







>
>
|







142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
	 (begin 
	   (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port")
	   (thread-sleep! 0.1)

	   ;; get_next_port goes here

	   (http-transport:try-start-server ipaddrstr (+ portnum 1) server-id))
	 (begin
	   (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum " http-transport:try-start-server")
	   (print "ERROR: Tried and tried but could not start the server"))))
   ;; any error in following steps will result in a retry
   (set! *server-info* (list ipaddrstr portnum))
   (open-run-close tasks:server-set-interface-port 
		   tasks:open-db 
		   server-id 
		   ipaddrstr portnum)
   (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum)

Modified rmt.scm from [a00bf53eda] to [3da7dfaf01].

81
82
83
84
85
86
87










88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
      (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================











;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;;
(define (rmt:login-no-auto-client-setup connection-info run-id)
  (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
  
(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server run-id (list run-id)))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))

(define (rmt:sync-inmem->db run-id)







>
>
>
>
>
>
>
>
>
>













<
<
<







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110



111
112
113
114
115
116
117
      (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

;;======================================================================
;;  S E R V E R
;;======================================================================

(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server run-id (list run-id)))

(define (rmt:start-server run-id)
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;;
(define (rmt:login-no-auto-client-setup connection-info run-id)
  (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
  



;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))

(define (rmt:sync-inmem->db run-id)

Modified server.scm from [9a921aa4ae] to [c4fc1a172b].

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108



109
110
111
112
113
114
115




116
117

















118
119
120
121
122
123
124
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  ;; (if (server:check-if-running run-id)
  ;; a server is already running
  ;; (exit)
  (http-transport:launch run-id))

;; (define (server:launch-no-exit run-id)
;;   (if (server:check-if-running run-id)
;;       #t ;; if running
;;       (http-transport:launch run-id)))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))

;; Flush the queue every third of a second. Can we assume that setup-for-run 
;; has already been done?
;; (define (server:write-queue-handler)
;;   (if (setup-for-run)
;;       (let ((db (open-db)))
;; 	(let loop ()
;; 	  (let ((last-write-flush-time #f))
;; 	    (mutex-lock! *incoming-mutex*)
;; 	    (set! last-write-flush-time *server:last-write-flush*)
;; 	    (mutex-unlock! *incoming-mutex*)
;; 	    (if (> (- (current-milliseconds) last-write-flush-time) 10)
;; 		(begin
;; 		  (mutex-lock! *db:process-queue-mutex*)
;; 		  (db:process-cached-writes db)
;; 		  (mutex-unlock! *db:process-queue-mutex*)
;; 		  (thread-sleep! 0.005))))
;; 	  (loop)))
;;       (begin
;; 	(debug:print 0 "ERROR: failed to setup for Megatest in server:write-queue-handler")
;; 	(exit 1))))
    
;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))


;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (db:obj->string (vector success/fail query-sig result)))

;; > file 2>&1 



(define (server:try-running run-id)
  (let* ((rand-name (random 100))
	 (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")
		    ;; " -server - -run-id " run-id " name=" rand-name " > " *toppath* "/db/" run-id "-" rand-name ".log 2>&1 &")))
		      " -server - -run-id " run-id " >> " *toppath* "/db/" run-id ".log 2>&1 &")))
    (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
    (push-directory *toppath*)




    (system cmdln)
    (pop-directory)))


















(define (server:check-if-running run-id)
  (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id))
	     (trycount 0))
    (thread-sleep! 2)
    (if server
	;; note: client:start will set *runremote*. this needs to be changed







<
<
<


<
<
<
<
<







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<



















|
>
>
>
|
|

<



>
>
>
>
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







44
45
46
47
48
49
50



51
52





53
54
55
56
57
58
59





















60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)



  (http-transport:launch run-id))






;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))






















;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))


;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (db:obj->string (vector success/fail query-sig result)))

;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;
(define  (server:run run-id)
  (let* ((target-host (configf:lookup *configdat* "server" "homehost" ))
	 (cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest")

		      " -server - -run-id " run-id " >> " *toppath* "/db/" run-id ".log 2>&1 &")))
    (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...")
    (push-directory *toppath*)
    (if target-host
	(begin
	  (set-environment-variable "TARGETHOST" target-host)
	  (system (conc "nbfake " cmdln)))
	(system cmdln))
    (pop-directory)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run run-id)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 40))
	(begin
	  (server:run run-id)
	  (hash-table-set! *server-kind-run* run-id (current-seconds))))))

;; The generic run a server command. Dispatches the call to server 0 if run-id != 0
;; 
(define (server:try-running run-id)
  (if (eq? run-id 0)
      (server:run run-id)
      (rmt:start-server run-id)))

(define (server:check-if-running run-id)
  (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id))
	     (trycount 0))
    (thread-sleep! 2)
    (if server
	;; note: client:start will set *runremote*. this needs to be changed