Megatest

Check-in [bc256de3e4]
Login
Overview
Comment:Made *runremote* overrideable via a local var
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | multi-area
Files: files | file ages | folders
SHA1: bc256de3e4fe99d7170afd605fec4411247ba28f
User & Date: matt on 2015-04-04 23:18:38
Other Links: branch diff | manifest | tags
Context
2015-04-05
01:01
The big refactor to eliminate globals (at least those that interfere with the multi-area viewer) continues. check-in: b5e26242ac user: matt tags: multi-area
2015-04-04
23:18
Made *runremote* overrideable via a local var check-in: bc256de3e4 user: matt tags: multi-area
11:57
Moved dashboard to olddashboard and newdashboard to dashboard check-in: 53c81338a0 user: matt tags: multi-area
Changes

Modified client.scm from [c1867a27a6] to [2015bd1f07].

64
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79
80
81

82
83
84
85
86

87
88
89
90
91
92
93
64
65
66
67
68
69
70

71
72
73
74
75
76
77
78
79
80

81
82
83
84
85

86
87
88
89
90
91
92
93







-
+









-
+




-
+







;; 
;; (define (client:setup-rpc run-id)
;;   (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries)
;;   (if (<= remaining-tries 0)
;;       (begin
;; 	(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
;; 	(exit 1))
;;       (let ((host-info (hash-table-ref/default *runremote* run-id #f)))
;;       (let ((host-info (hash-table-ref/default (common:get-remote remote) run-id #f)))
;; 	(debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries)
;; 	(if host-info
;; 	    (let* ((iface     (car  host-info))
;; 		   (port      (cadr host-info))
;; 		   (start-res (client:connect iface port))
;; 		   ;; (ping-res  (server:ping-server run-id iface port))
;; 		   (ping-res  (client:login-no-auto-setup start-res run-id)))
;; 	      (if ping-res   ;; sucessful login?
;; 		  (begin
;; 		    (hash-table-set! *runremote* run-id start-res)
;; 		    (hash-table-set! (common:get-remote remote) run-id start-res)
;; 		    start-res)  ;; return the server info
;; 		  (if (member remaining-tries '(3 4 6))
;; 		      (begin    ;; login failed
;; 			(debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info)
;; 			(hash-table-delete! *runremote* run-id)
;; 			(hash-table-delete! (common:get-remote remote) run-id)
;; 			(open-run-close tasks:server-force-clean-run-record
;; 			 		tasks:open-db
;; 			 		run-id 
;; 			 		(car  host-info)
;; 			 		(cadr host-info)
;; 					" client:setup (host-info=#t)")
;; 			(thread-sleep! 5)
103
104
105
106
107
108
109
110

111
112
113
114
115

116
117
118
119
120
121
122
103
104
105
106
107
108
109

110
111
112
113
114

115
116
117
118
119
120
121
122







-
+




-
+







;; 		  (let* ((iface     (tasks:hostinfo-get-interface server-dat))
;; 			 (port      (tasks:hostinfo-get-port      server-dat))
;; 			 (start-res (http-transport:client-connect iface port))
;; 			 ;; (ping-res  (server:ping-server run-id iface port))
;; 			 (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
;; 		    (if start-res
;; 			(begin
;; 			  (hash-table-set! *runremote* run-id start-res)
;; 			  (hash-table-set! (common:get-remote remote) run-id start-res)
;; 			  start-res)
;; 			(if (member remaining-tries '(2 5))
;; 			    (begin    ;; login failed
;; 			      (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
;; 			      (hash-table-delete! *runremote* run-id)
;; 			      (hash-table-delete! (common:get-remote remote) run-id)
;; 			      (open-run-close tasks:server-force-clean-run-record
;; 					      tasks:open-db
;; 					      run-id 
;; 					      (tasks:hostinfo-get-interface server-dat)
;; 					      (tasks:hostinfo-get-port      server-dat)
;; 					      " client:setup (server-dat = #t)")
;; 			      (thread-sleep! 2)
142
143
144
145
146
147
148
149

150
151

152
153
154
155

156
157

158
159
160
161
162
163
164
142
143
144
145
146
147
148

149
150

151
152
153
154

155
156

157
158
159
160
161
162
163
164







-
+

-
+



-
+

-
+







;; 			  (thread-sleep! 10) ;; give server a little time to start up
;; 			  (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))))

;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   1. We are a test manager and we received *transport-type* and (common:get-remote remote) via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;      *transport-type* and (common:get-remote remote) from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;; lookup_server, need to remove (common:get-remote remote) stuff
;;
(define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0))
(define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0)(remote #f))
  (debug:print-info 2 "client:setup remaining-tries=" remaining-tries)
  (let* ((tdbdat (tasks:open-db)))
    (if (<= remaining-tries 0)
	(begin
	  (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	  (exit 1))
	(let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
175
176
177
178
179
180
181
182

183
184
185
186
187
188
189

190
191
192
193
194
195
196
175
176
177
178
179
180
181

182
183
184
185
186
187
188

189
190
191
192
193
194
195
196







-
+






-
+







				  ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id)))
 					   (if logininfo
 					       (car (vector-ref logininfo 1))
 					       #f))))))
		(if (and start-res
			 ping-res)
		    (begin
		      (hash-table-set! *runremote* run-id start-res)
		      (common:set-remote! remote run-id start-res)
		      (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res))
		      start-res)
		    (begin    ;; login failed but have a server record, clean out the record and try again
		      (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
		      (case *transport-type* 
			((http)(http-transport:close-connections run-id)))
		      (hash-table-delete! *runremote* run-id)
		      (common:del-remote! remote run-id)
		      (tasks:kill-server-run-id run-id)
		      (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)

Modified common.scm from [163b8623d2] to [61d7d81521].

66
67
68
69
70
71
72























73
74
75
76
77
78
79
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







(define *db-access-mutex*     (make-mutex))

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>

(define (common:get-remote remote run-id)
  (let ((ht (or remote *runremote*)))
    (if ht
	(hash-table-ref/default ht run-id #f)
	#f)))

(define (common:set-remote! remote run-id value)
  (let ((ht (or remote *runremote*)))
    (if ht
	(hash-table-set! ht run-id value))))

(define (common:del-remote! remote run-id)
  (let ((ht (or remote *runremote*)))
    (if ht
	(hash-table-delete! ht run-id))))

(define (common:get-remote-all remote)
  (let ((ht (or remote *runremote*)))
    (if ht
	(hash-table-keys ht)
	'())))

(define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)

Modified dashboard.scm from [8ecdd4ecf2] to [99bb436233].

72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
72
73
74
75
76
77
78

79
80
81
82
83
84
85
86







-
+







(if (not (launch:setup-for-run))
    (begin
      (print "Failed to find megatest.config, exiting") 
      (exit 1)))

;; (if (args:get-arg "-host")
;;     (begin
;;       (set! *runremote* (string-split (args:get-arg "-host" ":")))
;;       (set! (common:get-remote remote) (string-split (args:get-arg "-host" ":")))
;;       (client:launch))
;;     (client:launch))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))
598
599
600
601
602
603
604
605

606
607

608
609
610
611
612
613
614
598
599
600
601
602
603
604

605


606
607
608
609
610
611
612
613







-
+
-
-
+







     (iup:attribute-set! tabtop "TABTITLE2" "Run Control")
     (iup:attribute-set! tabtop "TABTITLE3" "megatest.config") 
     (iup:attribute-set! tabtop "TABTITLE4" "runconfigs.config")
     tabtop)))

(define *current-window-id* 0)

(define (newdashboard dbstruct)
(define (newdashboard data)
  (let* ((data     (make-hash-table))
	 (keys     (db:get-keys dbstruct))
  (let* ((keys     (db:get-keys dbstruct))
	 (runname  "%")
	 (testpatt "%")
	 (keypatts (map (lambda (k)(list k "%")) keys))
	 (states   '())
	 (statuses '())
	 (nextmintime (current-milliseconds))
	 (my-window-id *current-window-id*))
625
626
627
628
629
630
631
632
633
634
635





624
625
626
627
628
629
630




631
632
633
634
635







-
-
-
-
+
+
+
+
+
			 (if (< nextmintime (current-milliseconds))
			     (let* ((starttime (current-milliseconds))
				    (changes   (dcommon:run-update keys data runname keypatts testpatt states statuses 'full my-window-id))
				    (endtime   (current-milliseconds)))
			       (set! nextmintime (+ endtime (* 2 (- endtime starttime))))
			       (debug:print 11 "CHANGE(S): " (car changes) "..."))
			     (debug:print-info 11 "Server overloaded"))))))

(dboard:data-set-updaters! *data* (make-hash-table))
(newdashboard *dbstruct-local*)    
(iup:main-loop)
;;; main
;;;
(let ((data (make-hash-table))) ;; data will have "areaname" => "area record" entries
  (newdashboard data)
  (iup:main-loop))

Modified dcommon.scm from [a30707a8d1] to [8dd00f0fd1].

30
31
32
33
34
35
36
37



38
39
40
41
42
43
44
30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45
46







-
+
+
+







(define dashboard:update-summary-tab #f)
(define dashboard:update-servers-table #f)

;;======================================================================
;; C O M M O N   D A T A   S T R U C T U R E
;;======================================================================
;; 
;; A single data structure for all the data used in a dashboard.
;; A single data structure for all the data used in a dashboard for
;; a given area.
;;
;; Share this structure between newdashboard and dashboard with the 
;; intent of converging on a single app.
;;
;; (define *data* (make-vector 25 #f))
(define (dboard:data-get-runs          vec)    (vector-ref  vec 0))
(define (dboard:data-get-tests         vec)    (vector-ref  vec 1))
(define (dboard:data-get-runs-matrix   vec)    (vector-ref  vec 2))
61
62
63
64
65
66
67

68
69
70
71
72
73
74
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77







+







(define (dboard:data-get-command-tb    vec)    (vector-ref vec 17))
(define (dboard:data-get-target        vec)    (vector-ref vec 18))
(define (dboard:data-get-target-string vec)
  (let ((targ (dboard:data-get-target vec)))
    (if (list? targ)(string-intersperse targ "/") "no-target-specified")))
(define (dboard:data-get-run-name      vec)    (vector-ref vec 19))
(define (dboard:data-get-runs-listbox  vec)    (vector-ref vec 20))
(define (dboard:data-get-area-path     vec)    (vector-ref vec 21))

(define (dboard:data-set-runs!          vec val)(vector-set! vec 0 val))
(define (dboard:data-set-tests!         vec val)(vector-set! vec 1 val))
(define (dboard:data-set-runs-matrix!   vec val)(vector-set! vec 2 val))
(define (dboard:data-set-tests-tree!    vec val)(vector-set! vec 3 val))
(define (dboard:data-set-run-keys!      vec val)(vector-set! vec 4 val))
(define (dboard:data-set-curr-test-ids! vec val)(vector-set! vec 5 val))
85
86
87
88
89
90
91

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107


108
109
110
111















































112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176

177
178
179
180
181
182
183







+
















+
+




+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+












-







(define (dboard:data-set-statuses!      vec val)(vector-set! vec 14 val))
(define (dboard:data-set-logs-textbox!  vec val)(vector-set! vec 15 val))
(define (dboard:data-set-command!       vec val)(vector-set! vec 16 val))
(define (dboard:data-set-command-tb!    vec val)(vector-set! vec 17 val))
(define (dboard:data-set-target!        vec val)(vector-set! vec 18 val))
(define (dboard:data-set-run-name!      vec val)(vector-set! vec 19 val))
(define (dboard:data-set-runs-listbox!  vec val)(vector-set! vec 20 val))
(define (dboard:data-set-area-path!     vec val)(vector-set! vec 21 val))

(dboard:data-set-run-keys! *data* (make-hash-table))

;; List of test ids being viewed in various panels
(dboard:data-set-curr-test-ids! *data* (make-hash-table))

;; Look up test-ids by (key1 key2 ... testname [itempath])
(dboard:data-set-path-test-ids! *data* (make-hash-table))

;; Look up run-ids by ??
(dboard:data-set-path-run-ids! *data* (make-hash-table))

;;======================================================================
;; D O T F I L E
;;======================================================================

;; write a sexp list to fname
;;
(define (dcommon:write-dotfile fname dat)
  (with-output-to-file fname
    (lambda ()
      (pp dat))))

(define (dcommon:read-dotfile fname)
  (if (file-exists? fname)
      (with-input-from-file fname
	(lambda ()
	  (read)))
      '()))       

;; gets the name for the file ~/.megatest/<name>
;; creates .megatest dir if not there
;;
(define (dcommon:get-dot-file-pathn name)
  (let* ((dot-dir (conc (get-environment-variable "HOME") "/.megatest"))
	 (dfile   (conc dot-dir "/" name)))
    (if (not (file-exists? dot-dir))
	(create-directory dot-dir))
    dfile))

;; dat is the top level data stucture that contains all the info being 
;; displayed in all runs etc.
;;
(define (dcommon:dotfiles-save-areas data)
  (let* ((areas-dat   (dcommon:data-get-areas data))
	 (areas-dfile (dcommon:get-dot-file-pathn "areas")))
    (dcommon:write-dotfile areas-dfile areas-dat)))

;; returns alist of area => path
;;
(define (dcommon:data-get-areas data)
  (let ((area-names (hash-table-keys data)))
    (map (lambda (area-name)
	   (cons area-name 
		 (dboard:data-get-area-path (hash-table-ref data area-name))))
	 area-names)))

;; Fill the hash table data with area => area-record
;;
(define (dcommon:read-areas-init-data data)
  (let* ((dfile       (dcommon:get-dot-file-pathn "areas"))
	 (areas-dfile (dcommon:read-dotfile dfile)))
    (for-each
     (lambda (area)
       (let ((rec (vector 25 #f)))
	 (dboard:data-set-area-path! rec (cdr area))
	 (dboard:data-set-updaters!  rec (make-hash-table))
	 (hash-table-set! data (car area) rec)))
     areas-dfile)))

;;======================================================================
;; TARGET AND PATTERN MANIPULATIONS
;;======================================================================

;; Convert to and from list of lines (for a text box)
;; "," => "\n"
(define (dboard:test-patt->lines test-patt)
  (string-substitute (regexp ",") "\n" test-patt))

(define (dboard:lines->test-patt lines)
  (string-substitute (regexp "\n") "," lines #t))


;;======================================================================
;; P R O C E S S   R U N S
;;======================================================================

;; MOVE THIS INTO *data*
(define *cachedata* (make-hash-table))
547
548
549
550
551
552
553
554

555
556
557
558
559
560
561
599
600
601
602
603
604
605

606
607
608
609
610
611
612
613







-
+







   ;;      		       (system cmd)))))
   ;;    servers-matrix
   ;;   )))
    servers-matrix
    ))

;; The main menu
(define (dcommon:main-menu)
(define (dcommon:main-menu data)
  (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top)
   (iup:menu-item "Files" (iup:menu   ;; Note that you can use either #:action or action: for options
			   (iup:menu-item "Open"  action: (lambda (obj)
							    (let* ((area-name (iup:textbox #:expand "HORIZONTAL"))
								   (fd        (iup:file-dialog #:dialogtype "DIR"))
								   (top       (iup:show fd #:modal? "YES")))
							      (iup:attribute-set! source-tb "VALUE"

Modified ezsteps.scm from [18ab86f9c8] to [b686d17683].

162
163
164
165
166
167
168
169

170
171
172
173
174
175
162
163
164
165
166
167
168

169
170
171
172
173
174
175







-
+






		  (debug:print-info 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		  (tests:test-set-status! test-id 
					  new-state
					  new-status
					  (args:get-arg "-m") #f)
		  ;; need to update the top test record if PASS or FAIL and this is a subtest
		  (if (not (equal? item-path ""))
		      (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path new-status))))
		      (cdb:roll-up-pass-fail-counts (common:get-remote remote run-id) run-id test-name item-path new-status))))
	    ;; for automated creation of the rollup html file this is a good place...
	    (if (not (equal? item-path ""))
		(tests:summarize-items #f run-id test-id test-name #f)) ;; don't force - just update if no
	    )))
    (pop-directory)
    rollup-status))

Modified http-transport.scm from [e2a8a6ff8e] to [4b37e6ffd7].

219
220
221
222
223
224
225
226

227
228
229
230
231
232
233
219
220
221
222
223
224
225

226
227
228
229
230
231
232
233







-
+








(define (http-transport:inc-requests-and-prep-to-close-all-connections)
  (mutex-lock! *http-mutex*)
  (set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))

;; Send "cmd" with json payload "params" to serverdat and receive result
;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3))
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3)(remote #f))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f)
	 (success    #t)
268
269
270
271
272
273
274
275

276
277
278
279
280
281
282
268
269
270
271
272
273
274

275
276
277
278
279
280
281
282







-
+







					 (db:string->obj 
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					     (hash-table-delete! *runremote* run-id)
					     (common:del-remote! remote run-id)
					     ;; Killing associated server to allow clean retry.")
					     ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					     (mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
					     (db:obj->string #f))
312
313
314
315
316
317
318
319

320
321
322

323
324
325
326
327
328
329
312
313
314
315
316
317
318

319
320
321

322
323
324
325
326
327
328
329







-
+


-
+







		   (pp (vector-ref res 1) (current-error-port))
		   (signal (vector-ref result 0))))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;; careful closing of connections stored in (common:get-remote remote)
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat (hash-table-ref/default *runremote* run-id #f)))
  (let* ((server-dat (common:get-remote remote run-id)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


Modified megatest.scm from [0e40d16995] to [275b002dfa].

1521
1522
1523
1524
1525
1526
1527



1528

1529
1530
1531
1532
1533
1534
1535
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530

1531
1532
1533
1534
1535
1536
1537
1538







+
+
+
-
+







       )
      (set! *didsomething* #t)))

;;======================================================================
;; Exit and clean up
;;======================================================================

;; if *runremote* is defined, close connections, otherwise - trust that it was
;; taken care of.
;;
(if *runremote* (close-all-connections!))
(if (common:get-remote #f #f)(close-all-connections!))

(if (not *didsomething*)
    (debug:print 0 help))

(set! *time-to-exit* #t)
(thread-join! *watchdog*)

Modified newdashboard.scm from [8ecdd4ecf2] to [9c6040697d].

72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
72
73
74
75
76
77
78

79
80
81
82
83
84
85
86







-
+







(if (not (launch:setup-for-run))
    (begin
      (print "Failed to find megatest.config, exiting") 
      (exit 1)))

;; (if (args:get-arg "-host")
;;     (begin
;;       (set! *runremote* (string-split (args:get-arg "-host" ":")))
;;       (set! (common:get-remote remote) (string-split (args:get-arg "-host" ":")))
;;       (client:launch))
;;     (client:launch))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))

Modified nmsg-transport.scm from [c28712df60] to [0e4df34ad1].

340
341
342
343
344
345
346
347

348
349
350
351
352
353
354
340
341
342
343
344
345
346

347
348
349
350
351
352
353
354







-
+







;;
(define (nmsg-transport:client-signal-handler signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (if (not *received-response*)
				 (receive-message* *runremote*))) ;; flush out last call if applicable
				 (receive-message* (common:get-remote remote #f)))) ;; flush out last call if applicable
			   "eat response"))
	 (th2 (make-thread (lambda ()
			     (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
			     (debug:print 0 "       Done.")
			     (exit 4))
			   "exit on ^C timer")))

Modified rmt.scm from [e57d73e7b7] to [867c7f8d2d].

68
69
70
71
72
73
74
75
76


77
78
79
80
81
82

83
84
85
86

87
88
89
90
91
92

93
94
95
96
97
98
99
100
101
102



103
104
105
106
107
108
109
68
69
70
71
72
73
74


75
76
77
78
79
80
81

82
83
84
85

86
87
88
89
90
91

92
93
94
95
96
97
98
99



100
101
102
103
104
105
106
107
108
109







-
-
+
+





-
+



-
+





-
+







-
-
-
+
+
+







	       (debug:print-info 1 "db write rate too high, starting a server, count=" count " start=" start " run-id=" run-id " queries-per-second=" queries-per-second)
	       #t)
	     #f))))

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
(define (rmt:get-connection-info run-id #!key (remote #f))
  (let ((cinfo (common:get-remote remote run-id)))
    (if cinfo
	cinfo
	;; NB// can cache the answer for server running for 10 seconds ...
	;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    (client:setup run-id remote: remote)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(remote #f)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
       (let ((connection (common:get-remote remote run-id)))
         (if (and (vector? connection)
        	  (< (http-transport:server-dat-get-last-access connection) expire-time))
             (begin
               (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
               ;; SHOULD CLOSE THE CONNECTION HERE
	       (case *transport-type*
		 ((nmsg)(nn-close (http-transport:server-dat-get-socket 
				   (hash-table-ref *runremote* run-id)))))
               (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
				   (common:get-remote remote run-id)))))
               (common:del-remote! remote run-id)))))
     (common:get-remote-all remote)))
  (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info
125
126
127
128
129
130
131
132

133
134
135
136
137
138
139
125
126
127
128
129
130
131

132
133
134
135
136
137
138
139







-
+







		(case *transport-type* 
		  ((http) res) ;; (db:string->obj res))
		  ((nmsg) res))) ;; (vector-ref res 1)))
	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to rmt:send-receive again.")
		;; (case *transport-type*
		;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
		(common:del-remote! remote run-id) ;; don't keep using the same connection
		;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
		;; (if (eq? (modulo attemptnum 5) 0)
		;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
		;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
		(tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
		;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))

147
148
149
150
151
152
153
154

155
156
157
158
159
160
161
147
148
149
150
151
152
153

154
155
156
157
158
159
160
161







-
+







	;; server and the query is read-only
	;;
	;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
	;;
	(if (and (< attemptnum 15)
		 (member cmd api:write-queries))
	    (let ((faststart (configf:lookup *configdat* "server" "faststart")))
	      (hash-table-delete! *runremote* run-id)
	      (common:del-remote! remote run-id)
	      ;; (mutex-unlock! *send-receive-mutex*)
	      (if (and faststart (equal? faststart "no"))
		  (begin
		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		  (begin

Modified rpc-transport.scm from [1e1f685d67] to [a54662d800].

27
28
29
30
31
32
33
34

35
36
37
38
39
40
41
27
28
29
30
31
32
33

34
35
36
37
38
39
40
41







-
+







;; procstr is the name of the procedure to be called as a string
(define (rpc-transport:autoremote procstr params)
  (handle-exceptions
   exn
   (begin
     (debug:print 1 "Remote failed for " proc " " params)
     (apply (eval (string->symbol procstr)) params))
   ;; (if *runremote*
   ;; (if (common:get-remote remote)
   ;;    (apply (eval (string->symbol (conc "remote:" procstr))) params)
   (apply (eval (string->symbol procstr)) params)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
158
159
160
161
162
163
164
165

166
167
168
169

170
171
172
173
174
175
176

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

192
193
194
195
196
197
198
158
159
160
161
162
163
164

165
166
167
168

169
170
171
172
173
174
175

176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

191
192
193
194
195
196
197
198







-
+



-
+






-
+














-
+







	   (print "LOGIN_OK")
	   (exit 0))
	 (begin
	   (print "LOGIN_FAILED")
	   (exit 1))))))

(define (rpc-transport:client-setup run-id #!key (remtries 10))
  (if *runremote*
  (if (common:get-remote remote run-id)
      (begin
	(debug:print 0 "ERROR: Attempt to connect to server but already connected")
	#f)
      (let* ((host-info (hash-table-ref/default *runremote* run-id #f))) ;; (open-run-close db:get-var #f "SERVER"))
      (let* ((host-info (common:get-remote remote run-id))) ;; (open-run-close db:get-var #f "SERVER"))
	(if host-info
	    (let ((iface    (car host-info))
		  (port     (cadr host-info))
		  (ping-res ((rpc:procedure 'server:login host port) *toppath*)))
	      (if ping-res
		  (let ((server-dat (list iface port #f #f #f)))
		    (hash-table-set! *runremote* run-id server-dat)
		    (common:set-remote! remote run-id server-dat)
		    server-dat)
		  (begin
		    (server:try-running run-id)
		    (thread-sleep! 2)
		    (rpc-transport:client-setup run-id (- remtries 1)))))
 	    (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id)))
 	      (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if server-db-info
 		  (let* ((iface     (tasks:hostinfo-get-interface server-db-info))
 			 (port      (tasks:hostinfo-get-port      server-db-info))
			 (server-dat (list iface port #f #f #f))
 			 (ping-res  ((rpc:procedure 'server:login host port) *toppath*)))
 		    (if start-res
 			(begin
 			  (hash-table-set! *runremote* run-id server-dat)
 			  (common:set-remote! remote run-id server-dat)
			  server-dat)
			(begin
			  (server:try-running run-id)
			  (thread-sleep! 2)
			  (rpc-transport:client-setup run-id (- remtries 1)))))
		  (begin
		    (server:try-running run-id)
209
210
211
212
213
214
215
216

217
218
219
220
221

222
223
224

225
226
209
210
211
212
213
214
215

216
217
218
219
220

221
222
223

224
225
226







-
+




-
+


-
+


;; 	       (begin
;; 		 (debug:print 0 "ERROR: Failed to open a connection to the server at host: " host " port: " port)
;; 		 (debug:print 0 "   EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
;; 		 ;; (open-run-close 
;; 		 ;;  (lambda (db . param) 
;; 		 ;;    (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'"))
;; 		 ;;  #f)
;; 		 (set! *runremote* #f))
;; 		 (set! (common:get-remote remote) #f))
;; 	       (if (and (not (args:get-arg "-server")) ;; no point in the server using the server using the server
;; 			((rpc:procedure 'server:login host portn) *toppath*))
;; 		   (begin
;; 		     (debug:print-info 2 "Logged in and connected to " host ":" port)
;; 		     (set! *runremote* (vector host portn)))
;; 		     (set! (common:get-remote remote) (vector host portn)))
;; 		   (begin
;; 		     (debug:print-info 2 "Failed to login or connect to " host ":" port)
;; 		     (set! *runremote* #f)))))
;; 		     (set! (common:get-remote remote) #f)))))
;; 	    (debug:print-info 2 "no server available")))))

Modified runs.scm from [61baf1971a] to [1e4666ba35].

33
34
35
36
37
38
39
40

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
33
34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58



59
60
61
62
63
64
65







-
+


















-
-
-







(define (runs:test-get-full-path test)
  (let* ((testname (db:test-get-testname   test))
	 (itempath (db:test-get-item-path test)))
    (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))

;; This is the *new* methodology. One record to inform them and in the chaos, organise them.
;;
(define (runs:create-run-record)
(define (runs:create-run-record #!key (remote #f))
  (let* ((mconfig      (if *configdat*
		           *configdat*
		           (if (launch:setup-for-run)
		               *configdat*
		               (begin
		                 (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting")
		                 (exit 1)))))
	  (runrec      (runs:runrec-make-record))
	  (target      (common:args-get-target))
	  (runname     (or (args:get-arg "-runname")
		           (args:get-arg ":runname")))
	  (testpatt    (or (args:get-arg "-testpatt")
		           (args:get-arg "-runtests")))
	  (keys        (keys:config-get-fields mconfig))
	  (keyvals     (keys:target->keyval keys target))
	  (toppath     *toppath*)
	  (envdat      keyvals) ;; initial values start with keyvals
	  (runconfig   #f)
	  (serverdat   (if (args:get-arg "-server")
			   *runremote*
			   #f)) ;; to be used later
	  (transport   (or (args:get-arg "-transport") 'http))
	  (run-id      #f))
    ;; Set all the environment vars we know so far, start with keys
    (for-each (lambda (keyval)
		(setenv (car keyval)(cadr keyval)))
	      keyvals)
    ;; Set up various and sundry known vars here
86
87
88
89
90
91
92
93

94
95
96
97
98
99
100
83
84
85
86
87
88
89

90
91
92
93
94
95
96
97







-
+







    ;; Now have runconfigs data loaded, set environment vars
    (for-each (lambda (section)
		(for-each (lambda (varval)
			    (set! envdat (append envdat (list varval)))
			    (safe-setenv (car varval)(cadr varval)))
			  (configf:get-section runconfig section)))
	      (list "default" target))
    (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id)))
    (vector target runname testpatt keys keyvals envdat mconfig runconfig (common:get-remote remote run-id) transport db toppath run-id)))

(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f))
  (let* ((target    (or (common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys    (if inkeys    inkeys    (rmt:get-keys)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))

Modified server.scm from [d9e8792ebb] to [a2547ad8cd].

80
81
82
83
84
85
86
87

88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
80
81
82
83
84
85
86

87
88
89
90
91
92
93
94

95
96
97
98
99
100
101
102







-
+







-
+







			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))

;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
(define (server:reply return-addr query-sig success/fail result #!key (remote #f))
  (debug:print-info 11 "server:reply return-addr=" return-addr ", result=" result)
  ;; (send-message pubsock target send-more: #t)
  ;; (send-message pubsock 
  (case (server:get-transport)
    ((rpc)  (db:obj->string (vector success/fail query-sig result)))
    ((http) (db:obj->string (vector success/fail query-sig result)))
    ((zmq)
     (let ((pub-socket (vector-ref *runremote* 1)))
     (let ((pub-socket (vector-ref (common:get-remote remote #f) 1)))
       (send-message pub-socket return-addr send-more: #t)
       (send-message pub-socket (db:obj->string (vector success/fail query-sig result)))))
    ((fs)   result)
    (else 
     (debug:print 0 "ERROR: unrecognised transport type: " *transport-type*)
     result)))

158
159
160
161
162
163
164
165

166
167
168
169
170
171
172
158
159
160
161
162
163
164

165
166
167
168
169
170
171
172







-
+







      (rmt:start-server run-id)))

(define (server:check-if-running run-id)
  (let ((tdbdat (tasks:open-db)))
    (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat) run-id))
	       (trycount 0))
    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;; note: client:start will set (common:get-remote remote). this needs to be changed
	;;       also, client:start will login to the server, also need to change that.
	;;
	;; client:start returns #t if login was successful.
	;;
	(let ((res (case *transport-type*
		     ((http)(server:ping-server run-id 
						(tasks:hostinfo-get-interface server)

Modified tests.scm from [1b02d35c8f] to [f04a81c96a].

649
650
651
652
653
654
655
656

657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673

674
675
676
677
678
679
680
649
650
651
652
653
654
655

656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672

673
674
675
676
677
678
679
680







-
+
















-
+







       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
	      (test-name   (tests:testqueue-get-testname  test-record))
	      (itemdat     (tests:testqueue-get-itemdat   test-record))
	      (item-path   (tests:testqueue-get-item_path test-record))
	      (waitons     (tests:testqueue-get-waitons   test-record))
	      (keep-test   #t)
	      (test-id     (rmt:get-test-id run-id test-name item-path))
	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id (common:get-remote remote) test-id)))
	 (if tdat
	     (begin
	       ;; Look at the test state and status
	       (if (or (and (member (db:test-get-status tdat) 
				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
			    (equal? (db:test-get-state tdat) "COMPLETED"))
		       (member (db:test-get-state tdat)
				    '("INCOMPLETE" "KILLED")))
		   (set! keep-test #f))

	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
	       ;; from the runnable list
	       (if keep-test
		   (for-each (lambda (waiton)
			       ;; for now we are waiting only on the parent test
			       (let* ((parent-test-id (rmt:get-test-id run-id waiton ""))
				      (wtdat          (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
				      (wtdat          (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id (common:get-remote remote) test-id)))
				 (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED")
					      (member (db:test-get-status wtdat) '("FAIL")))
					 (member (db:test-get-status wtdat)  '("KILLED"))
					 (member (db:test-get-state wtdat)   '("INCOMPETE")))
				 ;; (if (or (member (db:test-get-status wtdat)
				 ;;        	 '("FAIL" "KILLED"))
				 ;;         (member (db:test-get-state wtdat)

Modified zmq-transport.scm from [e1f3152a02] to [07f0ba479b].

102
103
104
105
106
107
108
109

110
111
112
113
114
115
116
102
103
104
105
106
107
108

109
110
111
112
113
114
115
116







-
+







    
    (set! zmq-sdat2    (cadr  zmq-sockets-dat))
    (set! pub-socket   (cadr  zmq-sdat2))
    (set! p2           (caddr zmq-sdat2))

    (set! *cache-on* #t)

    (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of *runremote* BUG!?
    (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of (common:get-remote remote) BUG!?

    ;; what to do when we quit
    ;;
;;     (on-exit (lambda ()
;; 	       (if (and *toppath* *server-info*)
;; 		   (open-run-close tasks:server-deregister-self tasks:open-db (car *server-info*))
;; 		   (let loop () 
475
476
477
478
479
480
481
482

483
484
485
486
487
488
489
490
491
492
493
475
476
477
478
479
480
481

482
483
484
485
486
487
488
489
490
491
492
493







-
+











;;   ;; server-info: server-id interface pullport pubport
;;   (let ((iface    (list-ref server-info 1))
;; 	(pullport (list-ref server-info 2))
;; 	(pubport  (list-ref server-info 3)))
;;     (zmq-transport:client-connect iface pullport pubport)
;;     (let loop ()
;;       (thread-sleep! 2)
;;       (cdb:client-call *runremote* 'ping #t)
;;       (cdb:client-call (common:get-remote remote) 'ping #t)
;;       (debug:print 4 "zmq-transport:self-ping - I'm alive on " iface ":" pullport "/" pubport "!")
;;       (mutex-lock! *heartbeat-mutex*)
;;       (set! *server-loop-heart-beat* (current-seconds))
;;       (mutex-unlock! *heartbeat-mutex*)
;;       (loop))))
    
(define (zmq-transport:reply pubsock target query-sig success/fail result)
  (debug:print-info 11 "zmq-transport:reply target=" target ", result=" result)
  (send-message pubsock target send-more: #t)
  (send-message pubsock (db:obj->string (vector success/fail query-sig result))))