Megatest

Check-in [dc2468ee23]
Login
Overview
Comment:Re-enabled remote server stop. Addressing issues found by test1
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | refactor
Files: files | file ages | folders
SHA1: dc2468ee231f46c1d2eafaca090c8a59f3ea7f7a
User & Date: matt on 2013-05-06 00:04:01
Other Links: branch diff | manifest | tags
Context
2013-05-06
00:44
Clean up related to test1 and refactoring check-in: 5c6d9dac45 user: matt tags: refactor
00:04
Re-enabled remote server stop. Addressing issues found by test1 check-in: dc2468ee23 user: matt tags: refactor
2013-05-05
22:32
Second pass on refactoring keys handling. Created runs:create-run-record which will eventually unify all actions check-in: f4e1a69886 user: matt tags: refactor
Changes

Modified common.scm from [3109b21887] to [585cd1b3a7].

51
52
53
54
55
56
57


58
59
60
61
62
63
64
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66







+
+







(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)


(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id

Modified db.scm from [7d1a9f4b1b] to [17c4d5f356].

1343
1344
1345
1346
1347
1348
1349
1350
1351


1352
1353
1354
1355
1356
1357
1358
1343
1344
1345
1346
1347
1348
1349


1350
1351
1352
1353
1354
1355
1356
1357
1358







-
-
+
+








(define (cdb:tests-register-test serverdat run-id test-name item-path)
  (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))

(define (cdb:flush-queue serverdat)
  (cdb:client-call serverdat 'flush #f *default-numtries*))

(define (cdb:kill-server serverdat)
  (cdb:client-call serverdat 'killserver #t *default-numtries*))
(define (cdb:kill-server serverdat pid)
  (cdb:client-call serverdat 'killserver #t *default-numtries* pid))

(define (cdb:roll-up-pass-fail-counts serverdat run-id test-name item-path status)
  (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status))

(define (cdb:get-test-info serverdat run-id test-name item-path)
  (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:get-test-info #f run-id test-name item-path))

1582
1583
1584
1585
1586
1587
1588



1589
1590



1591
1592
1593
1594






1595
1596
1597
1598
1599
1600
1601
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591


1592
1593
1594




1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607







+
+
+
-
-
+
+
+
-
-
-
-
+
+
+
+
+
+







			   (server:reply return-address qry-sig #f (list #f (conc "Login failed due to mismatch paths: " calling-path ", " *toppath*)))))))
		((flush sync)
		 (server:reply return-address qry-sig #t 1)) ;; (length data)))
		((set-verbosity)
		 (set! *verbosity* (car params))
		 (server:reply return-address qry-sig #t '(#t *verbosity*)))
		((killserver)
		 (let ((hostname (car  *runremote*))
		       (port     (cadr *runremote*))
		       (pid      (car params)))
		 (debug:print 0 "WARNING: Server going down in 15 seconds by user request!")
		 (open-run-close tasks:server-deregister tasks:open-db 
		   (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
		   (debug:print-info 1 "current pid=" (current-process-id))
		   (open-run-close tasks:server-deregister tasks:open-db 
				 (car *runremote*)
				 pullport: (cadr *runremote*))
		 (thread-start! (make-thread (lambda ()(thread-sleep! 15)(exit))))
		 (server:reply return-address qry-sig #t '(#t "exit process started")))
				   hostname
				   port: port)
		   (set! *server-run* #f)
		   (thread-sleep! 3)
		   (process-signal pid signal/kill)
		   (server:reply return-address qry-sig #t '(#t "exit process started"))))
		(else ;; not a command, i.e. is a query
		 (debug:print 0 "ERROR: Unrecognised query/command " stmt-key)
		 (server:reply return-address qry-sig #f 'failed)))))
	   (else
	    (debug:print-info 11 "Executing " stmt-key " for " params)
	    (apply sqlite3:execute (hash-table-ref queries stmt-key) params)
	    (server:reply return-address qry-sig #t #t)))))))

Modified http-transport.scm from [fe6673ff15] to [5696601f09].

32
33
34
35
36
37
38
39

40
41
42
43
44
45
46
32
33
34
35
36
37
38

39
40
41
42
43
44
45
46







-
+







(include "db_records.scm")

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

(define  *server-loop-heart-beat* (current-seconds))
(define *server-loop-heart-beat* (current-seconds))
(define *heartbeat-mutex* (make-mutex))

;;======================================================================
;; S E R V E R
;;======================================================================

;; Call this to start the actual server
271
272
273
274
275
276
277

278
279


280
281

282
283
284
285
286
287
288
271
272
273
274
275
276
277
278


279
280
281

282
283
284
285
286
287
288
289







+
-
-
+
+

-
+







        (tasks:server-update-heartbeat tdb spid)
      
        ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
        (mutex-lock! *heartbeat-mutex*)
        (set! last-access *last-db-access*)
        (mutex-unlock! *heartbeat-mutex*)
	;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
        (if (and *server-run*
        (if (> (+ last-access server-timeout)
               (current-seconds))
		 (> (+ last-access server-timeout)
		    (current-seconds)))
            (begin
              (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (loop 0))
            (begin
              (debug:print-info 0 "Starting to shutdown the server.")
              ;; need to delete only *my* server entry (future use)
              (set! *time-to-exit* #t)
              (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
              (thread-sleep! 1)

Modified runs.scm from [b1cb11f80b] to [1164e5fe02].

68
69
70
71
72
73
74
75

76
77
78
79
80
81
82
68
69
70
71
72
73
74

75
76
77
78
79
80
81
82







-
+







  (let* ((testname (db:test-get-testname   test))
	 (itempath (db:test-get-item-path test)))
    (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))

;; This is the *new* methodology. One record to inform them and in the chaos, organise them.
;;
(define (runs:create-run-record)
  (let* ((mconfig      (if *configdat* 
  (let* ((mconfig      (if *configdat*
		           *configdat*
		           (if (setup-for-run)
		               *configdat*
		               (begin
		                 (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting")
		                 (exit 1)))))
	  (runrec      (runs:runrec-make-record))

Modified tasks.scm from [9d3053bc48] to [6678d1eaa1].

101
102
103
104
105
106
107
108

109
110
111
112
113
114
115
116
117


118
119
120
121
122
123
124
101
102
103
104
105
106
107

108
109
110
111
112
113
114
115


116
117
118
119
120
121
122
123
124







-
+







-
-
+
+







   interface
   port
   pubport
   transport
   ))

;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used!
(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'markdead))
(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'delete))
  (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid)
  (if pid
      (case action
	((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid))
	(else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid)))
      (if port
	  (case action
	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE  hostname=? AND port=?;" hostname port))
	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port)))
	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port))
	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port)))
	  (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified"))))

(define (tasks:server-deregister-self mdb hostname)
  (tasks:server-deregister mdb hostname pid: (current-process-id)))

;; need a simple call for robustly removing records given host and port
(define (tasks:server-delete mdb hostname port)
139
140
141
142
143
144
145
146

147
148
149
150
151
152
153
139
140
141
142
143
144
145

146
147
148
149
150
151
152
153







-
+







       (begin
	 (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)")
	 "SELECT id FROM servers WHERE pid=-999;")))
     (if hostname hostname iface)(if pid pid port))
    res))

(define (tasks:server-update-heartbeat mdb server-id)
  (debug:print-info 0 "Heart beat update of server id=" server-id)
  (debug:print-info 1 "Heart beat update of server id=" server-id)
  (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id))

;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds
(define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f))
  (let* ((server-id  (if server-id 
			 server-id
			 (tasks:server-get-server-id mdb hostname iface port pid)))
248
249
250
251
252
253
254
255
256
257
258
259
260
261







262
263
264
265
266
267
268
248
249
250
251
252
253
254







255
256
257
258
259
260
261
262
263
264
265
266
267
268







-
-
-
-
-
-
-
+
+
+
+
+
+
+







			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 1 "Sending signal/term to " pid " on " hostname)
	     (process-signal pid signal/term)
	     (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill
	     ;;(process-signal pid signal/kill)
	     ) ;; local machine, send sig term
	    (begin
		(debug:print-info 1 "Stopping remote servers not yet supported."))))
	;;      (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide")
	;;      (let ((serverdat (list hostname port)))
	;;	(case (string->symbol transport)
	;;	  ((http)(http-transport:client-connect hostname port))
	;;	  (else  (debug:print "ERROR: remote stopping servers of type " transport " not supported yet")))
	;;	(cdb:kill-server serverdat)))))    ;; remote machine, try telling server to commit suicide
	      ;;(debug:print-info 1 "Stopping remote servers not yet supported."))))
	      (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide")
	      (let ((serverdat (list hostname port)))
	      	(case (if (string? transport) (string->symbol transport) transport)
	      	  ((http)(http-transport:client-connect hostname port))
	      	  (else  (debug:print "ERROR: remote stopping servers of type " transport " not supported yet")))
	      	(cdb:kill-server serverdat pid)))))    ;; remote machine, try telling server to commit suicide
      (begin
	(if status 
	    (if (equal? hostname (get-host-name))
		(begin
		  (debug:print-info 1 "Sending signal/term to " pid " on " hostname)
		  (process-signal pid signal/term)  ;; local machine, send sig term
		  (thread-sleep! 5)                 ;; give it five seconds to die peacefully then do a brutal kill

Modified tests/tests.scm from [8c3cd1cc08] to [340a51275a].

79
80
81
82
83
84
85
86

87
88
89
90



91
92









93
94
95
96



97
98
99
100
101
102
103
104
105
106


107
108

109



110
111
112
113
114
115
116
79
80
81
82
83
84
85

86
87



88
89
90
91

92
93
94
95
96
97
98
99
100
101



102
103
104
105









106
107
108
109
110

111
112
113
114
115
116
117
118
119
120







-
+

-
-
-
+
+
+

-
+
+
+
+
+
+
+
+
+

-
-
-
+
+
+

-
-
-
-
-
-
-
-
-
+
+


+
-
+
+
+








(test "setup for run" #t (begin (setup-for-run)
				(string? (getenv "MT_RUN_AREA_HOME"))))

(test "server-register, get-best-server" #t (let ((res #f))
					      (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http)
					      (set! res (open-run-close tasks:get-best-server tasks:open-db))
					      (number? (cadddr res))))
					      (number? (vector-ref res 3))))

(test "de-register server" #t (let ((res #f))
				(open-run-close tasks:server-deregister tasks:open-db "bob" pullport: 1234)
				(list? (open-run-close tasks:get-best-server tasks:open-db))))
(test "de-register server" #f (let ((res #f))
				(open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234)
				(open-run-close tasks:get-best-server tasks:open-db)))

(define hostinfo #f)
(define server-pid #f)
(test "launch server" #t (let ((pid (process-fork (lambda ()
						    ;; (daemon:ize)
						    (server:launch 'http)))))
			   (set! server-pid pid)
			   (print "pid=" server-pid)
			   (number? pid)))

(thread-sleep! 3) ;; need to wait for server to start. Yes, a better way is needed.
(test "get-best-server" #t (let ((dat (open-run-close tasks:get-best-server tasks:open-db)))
			     (set! hostinfo dat) ;; host ip pullport pubport
			     (and (string? (car dat))
				  (number? (caddr dat)))))
			     (set! *runremote* (list (vector-ref dat 1)(vector-ref dat 2))) ;; host ip pullport pubport
			     (and (string? (car  *runremote*))
			     	  (number? (cadr *runremote*)))))

(test #f #t (let ((zmq-socket (server:client-connect
			       (cadr hostinfo)
			       (caddr hostinfo)
			       ;; (cadddr hostinfo)
			       )))
	      (set! *runremote* zmq-socket)
	      (string? (car *runremote*))))

(test #f #t (let ((res (server:client-login *runremote*)))
(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*)))
(test #f #t (let ((res (client:login *runremote*)))
	      (car res)))

(test "server stop" #f (let ((hostname (car  *runremote*))
(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*)))
			     (port     (cadr *runremote*)))
			 (tasks:kill-server #t hostname port server-pid 'http)
			 (open-run-close tasks:get-best-server tasks:open-db)))

(exit 1)

;;======================================================================
;; C O N F I G   F I L E S 
;;======================================================================