Megatest

Check-in [c33db3cfa5]
Login
Overview
Comment:ketchup
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.62-rpc
Files: files | file ages | folders
SHA1: c33db3cfa59b1380c3b03dc1428e78a8dd865b33
User & Date: bjbarcla on 2016-12-02 12:46:48
Other Links: branch diff | manifest | tags
Context
2016-12-02
14:35
grafted rpc code check-in: 59b73e1fc2 user: bjbarcla tags: v1.62-rpc
12:46
ketchup check-in: c33db3cfa5 user: bjbarcla tags: v1.62-rpc
12:46
bugfix check-in: 694eaeea2e user: bjbarcla tags: v1.62-rpc
2016-12-01
23:00
Fixed some server functions. Misc. cleanup check-in: 632d7c9f79 user: matt tags: v1.62-no-rpc
Changes

Modified Makefile from [83b5fe2a28] to [1b85fc3382].

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
tests.o db.o launch.o runs.o dashboard-tests.o dashboard-guimonitor.o dashboard-main.o monitor.o dashboard.o  \
archive.o megatest.o : db_records.scm
tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o  : run_records.scm
db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
megatest.o : megatest-fossil-hash.scm
client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm rpc-transport.scm
common_records.scm : altdb.scm
vg.o dashboard.o : vg_records.scm
dcommon.o : run_records.scm
# Temporary while transitioning to new routine
# runs.o : run-tests-queue-classic.scm  run-tests-queue-new.scm

megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm







|







70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
tests.o db.o launch.o runs.o dashboard-tests.o dashboard-guimonitor.o dashboard-main.o monitor.o dashboard.o  \
archive.o megatest.o : db_records.scm
tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o  : run_records.scm
db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
megatest.o : megatest-fossil-hash.scm
rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm rpc-transport.scm
common_records.scm : altdb.scm
vg.o dashboard.o : vg_records.scm
dcommon.o : run_records.scm
# Temporary while transitioning to new routine
# runs.o : run-tests-queue-classic.scm  run-tests-queue-new.scm

megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm

Modified client.scm from [5c6ba40366] to [50265f350f].

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
		     (hostname  (tasks:hostinfo-get-hostname  server-dat))
		     (port      (tasks:hostinfo-get-port      server-dat))
		     (start-res (case *transport-type*
				  ((http)(http-transport:client-connect iface port))
				  ;;((nmsg)(nmsg-transport:client-connect hostname port))
                                  ))
		     (ping-res  (case *transport-type* 
				  ((http)(rmt:login-no-auto-client-setup start-res run-id))
				  ;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id)))
 				  ;;          (if logininfo
 				  ;;              (car (vector-ref logininfo 1))
 				  ;;              #f)))

                                  )))
		(if (and start-res
			 ping-res)
		    (begin
		      (set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res)
		      (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
		      start-res)
		    (begin    ;; login failed but have a server record, clean out the record and try again
		      (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
		      (case *transport-type* 
			((http)(http-transport:close-connections run-id)))
		      (set! *runremote* #f)  ;; (hash-table-delete! *runremote* run-id)
		      (tasks:kill-server-run-id run-id)
		      (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)







|




|




|






|







168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
		     (hostname  (tasks:hostinfo-get-hostname  server-dat))
		     (port      (tasks:hostinfo-get-port      server-dat))
		     (start-res (case *transport-type*
				  ((http)(http-transport:client-connect iface port))
				  ;;((nmsg)(nmsg-transport:client-connect hostname port))
                                  ))
		     (ping-res  (case *transport-type* 
				  ((http)(rmt:login-no-auto-client-setup start-res))
				  ;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id)))
 				  ;;          (if logininfo
 				  ;;              (car (vector-ref logininfo 1))
 				  ;;              #f)))
                                  
                                  )))
		(if (and start-res
			 ping-res)
		    (begin
		      (remote-conndat-set! *runremote* start-res) ;; (hash-table-set! *runremote* run-id start-res)
		      (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
		      start-res)
		    (begin    ;; login failed but have a server record, clean out the record and try again
		      (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
		      (case *transport-type* 
			((http)(http-transport:close-connections run-id)))
		      (remote-conndat-set! *runremote* #f)  ;; (hash-table-delete! *runremote* run-id)
		      (tasks:kill-server-run-id run-id)
		      (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)

Modified common.scm from [91bf9bb241] to [1c3ede54f0].

127
128
129
130
131
132
133



134
135
136
137
138
139
140
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
(define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))




;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))







>
>
>







127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
(define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))

;; client
(define *rmt-mutex*         (make-mutex))     ;; remote access calls mutex 

;; RPC transport
(define *rpc:listener*      #f)

;; KEY info
(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
624
625
626
627
628
629
630
631

632
633
634
635
636
637
638
			      (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
			      (if *task-db*    
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  (vector-set! *task-db* 0 #f)))))

			      (close-output-port *default-log-port*)
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
				  (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff
				  (thread-sleep! 2))







|
>







627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
			      (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
			      (if *task-db*    
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
			      (close-output-port *default-log-port*)
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
				  (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff
				  (thread-sleep! 2))

Modified common_records.scm from [9b8dfbfc6d] to [0e6990e6a2].

41
42
43
44
45
46
47









48
49
50
51
52
53
54
     (print-call-chain (current-error-port))
     (with-output-to-port (current-error-port)
       (lambda ()
	 (print ((condition-property-accessor 'exn 'message) exn))
	 (print "Callback error in " procname)
	 (print "Full condition info:\n" (condition->list exn)))))
   (proc)))










;; this was cached based on results from profiling but it turned out the profiling
;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching
;; in for now but can probably take it out later.
;;
(define (debug:calc-verbosity vstr)
  (or (hash-table-ref/default *verbosity-cache* vstr #f)







>
>
>
>
>
>
>
>
>







41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
     (print-call-chain (current-error-port))
     (with-output-to-port (current-error-port)
       (lambda ()
	 (print ((condition-property-accessor 'exn 'message) exn))
	 (print "Callback error in " procname)
	 (print "Full condition info:\n" (condition->list exn)))))
   (proc)))

;; Need a mutex protected way to get and set values
;; or use (define-simple-syntax ??
;;
(define-inline (with-mutex mtx accessor record . val)
  (mutex-lock! mtx)
  (let ((res (apply accessor record val)))
    (mutex-unlock! mtx)
    res))

;; this was cached based on results from profiling but it turned out the profiling
;; somehow went wrong - perhaps too many processes writing to it. Leaving the caching
;; in for now but can probably take it out later.
;;
(define (debug:calc-verbosity vstr)
  (or (hash-table-ref/default *verbosity-cache* vstr #f)

Modified db.scm from [77088c1205] to [fef1f3f2e3].

37
38
39
40
41
42
43


44
45
46
47




48
49
50
51
52
53
54
(define *number-non-write-queries* 0)

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; each db entry is a pair ( db . dbfilepath )


(defstruct dbr:dbstruct 
  (tmpdb  #f)
  (mtdb   #f)
  (refndb #f))





;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
;;
(defstruct dbr:counts
  (state #f)
  (status #f)







>
>

|
|
|
>
>
>
>







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
(define *number-non-write-queries* 0)

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; each db entry is a pair ( db . dbfilepath )
;; I propose this record evolves into the area record
;;
(defstruct dbr:dbstruct 
  (tmpdb       #f)
  (mtdb        #f)
  (refndb      #f)
  (homehost    #f) ;; not used yet
  (on-homehost #f) ;; not used yet
  )                ;; goal is to converge on one struct for an area but for now it is too confusing
  

;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
;;
(defstruct dbr:counts
  (state #f)
  (status #f)

Modified http-transport.scm from [a93ed3a08e] to [a2d6254d62].

24
25
26
27
28
29
30

31
32
33
34
35
36
37
(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses server))
(declare (uses daemon))
(declare (uses portlogger))


(include "common_records.scm")
(include "db_records.scm")

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f







>







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses server))
(declare (uses daemon))
(declare (uses portlogger))
(declare (uses rmt))

(include "common_records.scm")
(include "db_records.scm")

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f)
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http)))
;;    (condition-case
;;     handle-exceptions
;;     exn
;;     (if (> numretries 0)
;;	 (begin
;;	   (mutex-unlock! *http-mutex*)
;;	   (thread-sleep! 1)
;;	   (handle-exceptions
;;	    exn
;;	    (debug:print 0 *default-log-port* "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
;;	    (close-all-connections!))
;;	   (debug:print 0 *default-log-port* "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
;;	   (http-transport:client-api-send-receive run-id serverdat cmd sparams numretries: (- numretries 1)))
;;	 (begin
;;	   (mutex-unlock! *http-mutex*)
;;	   (tasks:kill-server-run-id run-id)
;;	   #f))
;;     (begin
       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







218
219
220
221
222
223
224


















225
226
227
228
229
230
231
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f)
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http)))


















       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
257
258
259
260
261
262
263

264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

279
280
281
282
283
284
285
					 (db:string->obj 
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))

					     (set! *runremote* #f) ;; (hash-table-delete! *runremote* run-id)
					     ;; Killing associated server to allow clean retry.")
					     ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					     (mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
					     (db:obj->string #f))
					   (with-input-from-request ;; was dat
					    fullurl 
					    (list (cons 'key "thekey")
						  (cons 'cmd cmd)
						  (cons 'params sparams))
					    read-string))
					  transport: 'http)))

			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))







>
|












|
|
>







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
					 (db:string->obj 
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
					     (if *runremote*
                                                 (remote-conndat-set! *runremote* #f))
					     ;; Killing associated server to allow clean retry.")
					     ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					     (mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
					     (db:obj->string #f))
					   (with-input-from-request ;; was dat
					    fullurl 
					    (list (cons 'key "thekey")
						  (cons 'cmd cmd)
						  (cons 'params sparams))
					    read-string)
                                           transport: 'http))
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310


311
312
313
314
315
316
317
318
		 res
		 (begin ;; note: this code also called in nmsg-transport - consider consolidating it
		   (debug:print-error 0 *default-log-port* "error occured at server, info=" (vector-ref res 2))
		   (debug:print 0 *default-log-port* " client call chain:")
		   (print-call-chain (current-error-port))
		   (debug:print 0 *default-log-port* " server call chain:")
		   (pp (vector-ref res 1) (current-error-port))
		   (signal (vector-ref result 0))))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)


  (let* ((server-dat *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))









|








>
>
|







280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
		 res
		 (begin ;; note: this code also called in nmsg-transport - consider consolidating it
		   (debug:print-error 0 *default-log-port* "error occured at server, info=" (vector-ref res 2))
		   (debug:print 0 *default-log-port* " client call chain:")
		   (print-call-chain (current-error-port))
		   (debug:print 0 *default-log-port* " server call chain:")
		   (pp (vector-ref res 1) (current-error-port))
		   (signal (vector-ref res 0))))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat (if *runremote*
                         (remote-conndat *runremote*)
                         #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


496
497
498
499
500
501
502
503
504
505
506
507
508
509

510
511
512
513
514
515
516
;; 	     (> rem-time 0))
;; 	(thread-sleep! rem-time)
;; 	(thread-sleep! 4))) ;; fallback for if the math is changed ...

(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server.")
    ;; need to delete only *my* server entry (future use)
    ;; (if *dbstruct-db* (db:sync-touched *dbstruct-db* *run-id* force-sync: #t)) ;; handled in the watchdog only
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    ;;
    ;; start_shutdown
    ;;
    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")

    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 5)
    (debug:print-info 0 *default-log-port* "Max cached queries was    " *max-cache-size*)
    (debug:print-info 0 *default-log-port* "Number of cached writes   " *number-of-writes*)
    (debug:print-info 0 *default-log-port* "Average cached write time "
		      (if (eq? *number-of-writes* 0)
			  "n/a (no writes)"







<
<
<




>







483
484
485
486
487
488
489



490
491
492
493
494
495
496
497
498
499
500
501
;; 	     (> rem-time 0))
;; 	(thread-sleep! rem-time)
;; 	(thread-sleep! 4))) ;; fallback for if the math is changed ...

(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server.")



    ;;
    ;; start_shutdown
    ;;
    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 5)
    (debug:print-info 0 *default-log-port* "Max cached queries was    " *max-cache-size*)
    (debug:print-info 0 *default-log-port* "Number of cached writes   " *number-of-writes*)
    (debug:print-info 0 *default-log-port* "Average cached write time "
		      (if (eq? *number-of-writes* 0)
			  "n/a (no writes)"

Modified megatest.scm from [60075c013d] to [46d15d3c2a].

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
		(else
		 (pp data))))))
      (if out-file (close-output-port out-port))
      (exit) ;; yes, bending the rules here - need to exit since this is a utility
      ))

(if (args:get-arg "-ping")
    (let* (;; (run-id        (string->number (args:get-arg "-run-id")))
	   (host:port     (args:get-arg "-ping")))
      (server:ping host:port)))

;;======================================================================
;; Capture, save and manipulate environments
;;======================================================================

;; NOTE: Keep these above the section where the server or client code is setup








|

|







640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
		(else
		 (pp data))))))
      (if out-file (close-output-port out-port))
      (exit) ;; yes, bending the rules here - need to exit since this is a utility
      ))

(if (args:get-arg "-ping")
    (let* ((server-id     (string->number (args:get-arg "-ping"))) ;; extract run-id (i.e. no ":"
	   (host:port     (args:get-arg "-ping")))
      (server:ping (or server-id host:port) do-exit: #t)))

;;======================================================================
;; Capture, save and manipulate environments
;;======================================================================

;; NOTE: Keep these above the section where the server or client code is setup

1861
1862
1863
1864
1865
1866
1867



1868
1869
1870
1871
1872
1873
1874
1875
1876
;; fakeout readline
(include "readline-fix.scm")

(if (or (getenv "MT_RUNSCRIPT")
	(args:get-arg "-repl")
	(args:get-arg "-load"))
    (let* ((toppath (launch:setup))



	   (dbstruct (if toppath (db:setup)))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
      (if dbstruct
	  (cond
	   ((getenv "MT_RUNSCRIPT")
	    ;; How to run megatest scripts
	    ;;
	    ;; #!/bin/bash
	    ;;
	    ;; export MT_RUNSCRIPT=yes







>
>
>
|
|







1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
;; fakeout readline
(include "readline-fix.scm")

(if (or (getenv "MT_RUNSCRIPT")
	(args:get-arg "-repl")
	(args:get-arg "-load"))
    (let* ((toppath (launch:setup))
	   (dbstruct (if (and toppath
                              (common:on-homehost?))
                         (db:setup)
                         #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f)))
      (if *toppath*
	  (cond
	   ((getenv "MT_RUNSCRIPT")
	    ;; How to run megatest scripts
	    ;;
	    ;; #!/bin/bash
	    ;;
	    ;; export MT_RUNSCRIPT=yes

Modified remotediff-nmsg.scm from [90308a45f2] to [50100144d4].

36
37
38
39
40
41
42





43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
                  (mutex-unlock! mtx)
                  (car (string-split result)))
                #f)
            (loop (read-line inp)))))))

(define *max-running* 40)






(define (gather-dir-info path)
  (let ((mtx1     (make-mutex))
        (threads  (make-hash-table))
        (last-num 0)
        (req      (nn-socket 'req)))
    (print "starting client with pid " (current-process-id))
    (nn-connect req
                ;; "tcp://localhost:5559")
                "ipc:///tmp/test-ipc")
    (find-files 
     path 
     ;; test: #t
     action: (lambda (p res)
               (let ((info (cond
                            ((not (file-read-access? p)) '(cant-read))
                            ((directory? p)              '(dir))
                            ((symbolic-link? p)          (list 'symlink (read-symbolic-link p)))
                            (else                        '(data)))))
                 (if (eq? (car info) 'data)
                     (let loop ((start-time (current-seconds)))
                       (mutex-lock! mtx1)
                       (let* ((num-threads (hash-table-size threads))
                              (ok-to-run   (> *max-running* num-threads)))
                         ;; (if (> (abs (- num-threads last-num)) 2)
                         ;;     (begin
                         ;;       ;; (print "num-threads:" num-threads)
                         ;;       (set! last-num num-threads)))
                         (mutex-unlock! mtx1)
                         (if ok-to-run
                             (let ((run-time-start (current-seconds)))
                               ;; (print "num threads: " num-threads)
                               (let ((th1  (make-thread
                                            (lambda ()
                                              (let ((cksum (checksum mtx1 p cmd: "md5sum"))
                                                    (run-time (- (current-seconds) run-time-start)))
                                                (mutex-lock! mtx1)
                                                (client-send-receive req (conc p " " cksum))
                                                (mutex-unlock! mtx1))
                                              (let loop2 ()
                                                (mutex-lock! mtx1)
                                                (let ((registered (hash-table-exists? threads p)))
                                                  (if registered
                                                      (begin
                                                        ;; (print "deleting thread reference for " p)
                                                        (hash-table-delete! threads p))) ;; delete myself
                                                  (mutex-unlock! mtx1)
                                                  (if (not registered)
                                                      (begin
                                                        (thread-sleep! 0.5)
                                                        (loop2))))))
                                            p)))
                                 (thread-start! th1)
                                 ;; (thread-sleep! 0.05) ;; give things a little time to get going
                                 ;; (thread-join! th1) ;; 
                                 (mutex-lock! mtx1)
                                 (hash-table-set! threads p th1)
                                 (mutex-unlock! mtx1)
                                 )) ;; thread is launched
                             (let ((run-time (- (current-seconds) start-time))) ;; couldn't launch yet
                               (cond
                                ((< run-time 5)) ;; blast on through
                                ((< run-time 30)(thread-sleep! 0.1))
                                ((< run-time 60)(thread-sleep! 2))
                                ((< run-time 120)(thread-sleep! 3))







>
>
>
>
>




















|






|







|

|

|





|








|

|







36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
                  (mutex-unlock! mtx)
                  (car (string-split result)))
                #f)
            (loop (read-line inp)))))))

(define *max-running* 40)

(define my-mutex-lock! conc)
(define my-mutex-unlock! conc)
;; (define my-mutex-lock! mutex-lock!)
;; (define my-mutex-unlock! mutex-unlock!)

(define (gather-dir-info path)
  (let ((mtx1     (make-mutex))
        (threads  (make-hash-table))
        (last-num 0)
        (req      (nn-socket 'req)))
    (print "starting client with pid " (current-process-id))
    (nn-connect req
                ;; "tcp://localhost:5559")
                "ipc:///tmp/test-ipc")
    (find-files 
     path 
     ;; test: #t
     action: (lambda (p res)
               (let ((info (cond
                            ((not (file-read-access? p)) '(cant-read))
                            ((directory? p)              '(dir))
                            ((symbolic-link? p)          (list 'symlink (read-symbolic-link p)))
                            (else                        '(data)))))
                 (if (eq? (car info) 'data)
                     (let loop ((start-time (current-seconds)))
                       (my-mutex-lock! mtx1)
                       (let* ((num-threads (hash-table-size threads))
                              (ok-to-run   (> *max-running* num-threads)))
                         ;; (if (> (abs (- num-threads last-num)) 2)
                         ;;     (begin
                         ;;       ;; (print "num-threads:" num-threads)
                         ;;       (set! last-num num-threads)))
                         (my-mutex-unlock! mtx1)
                         (if ok-to-run
                             (let ((run-time-start (current-seconds)))
                               ;; (print "num threads: " num-threads)
                               (let ((th1  (make-thread
                                            (lambda ()
                                              (let ((cksum (checksum mtx1 p cmd: "md5sum"))
                                                    (run-time (- (current-seconds) run-time-start)))
                                                (my-mutex-lock! mtx1)
                                                (client-send-receive req (conc p " " cksum))
                                                (my-mutex-unlock! mtx1))
                                              (let loop2 ()
                                                (my-mutex-lock! mtx1)
                                                (let ((registered (hash-table-exists? threads p)))
                                                  (if registered
                                                      (begin
                                                        ;; (print "deleting thread reference for " p)
                                                        (hash-table-delete! threads p))) ;; delete myself
                                                  (my-mutex-unlock! mtx1)
                                                  (if (not registered)
                                                      (begin
                                                        (thread-sleep! 0.5)
                                                        (loop2))))))
                                            p)))
                                 (thread-start! th1)
                                 ;; (thread-sleep! 0.05) ;; give things a little time to get going
                                 ;; (thread-join! th1) ;; 
                                 (my-mutex-lock! mtx1)
                                 (hash-table-set! threads p th1)
                                 (my-mutex-unlock! mtx1)
                                 )) ;; thread is launched
                             (let ((run-time (- (current-seconds) start-time))) ;; couldn't launch yet
                               (cond
                                ((< run-time 5)) ;; blast on through
                                ((< run-time 30)(thread-sleep! 0.1))
                                ((< run-time 60)(thread-sleep! 2))
                                ((< run-time 120)(thread-sleep! 3))

Modified rmt.scm from [cfa1ca3a75] to [3618123224].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35







36
37
38
39
40
41
42
;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================
;;
(use json format) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
;;(declare (uses nmsg-transport))


;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; ;; For debugging add the following to ~/.megatestrc
;;
;; (require-library trace)
;; (import trace)
;; (trace
;; rmt:send-receive
;; api:execute-requests
;; )

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u









;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; thread-safe interface to *runremote* hash
(define *rrr-mutex* (make-mutex))











|






>
>




<
<
<
<
<
<
<
<
<




>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24









25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
;;======================================================================
;; Copyright 2006-2013, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================
;;
(use format typed-records) ;; RADT => purpose of json format??

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
;;(declare (uses nmsg-transport))
(include "common_records.scm")

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;










;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; thread-safe interface to *runremote* hash
(define *rrr-mutex* (make-mutex))
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90







91


























92

93
94
95
96
97
98



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116


117
118
119
120
121
122

123
124
125
126


127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

142
143
144

145
146
147
148


149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

178
179
180
181
182
183

184
185
186
187
188
189
190
191
192

193


194
195
196
197
198
199


200
201
202
203
204
205
206
207



;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo *runremote*)) ;; (hash-table-ref/default *runremote* run-id #f)))
    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections

  ;; (mutex-lock! *db-multi-sync-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote*
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (if (and ;; #f  ;; FORCE NO GO FOR RIGHT NOW







	   (not *runremote*)                         ;; we trust *runremote* to reflect that a server was found previously


























	   (not (member cmd api:read-only-queries))) ;; we don't trust so much the list of write queries

      (let ((serverconn (server:check-if-running *toppath*)))
	(if serverconn
	    (set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
	    (if (not (server:start-attempted? *toppath*))
		(server:kind-run *toppath*)))))
  



  (rmt:open-qry-close-locally cmd (if rid rid 0) params))

;;   (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
;;     (for-each 
;;      (lambda (run-id)
;;        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
;;          (if (and (vector? connection)
;;         	  (< (http-transport:server-dat-get-last-access connection) expire-time))
;;              (begin
;;                (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
;;                ;; bb- disabling nanomsg
;;                ;; SHOULD CLOSE THE CONNECTION HERE 
;; 	       ;; (case *transport-type*
;; 	       ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
;; 	       ;;  		   (hash-table-ref *runremote* run-id)))))
;;                (hash-table-delete! *runremote* run-id)))))
;;      (hash-table-keys *runremote*)))
;;   ;; (mutex-unlock! *db-multi-sync-mutex*)


;;   ;; (mutex-lock! *send-receive-mutex*)
;;   (let* ((run-id          (if rid rid 0))
;; 	 (home-host       (common:get-homehost))
;; 	 (connection-info (if (cdr home-host) ;; we are on the home-host
;; 			      #f
;; 			      (rmt:get-connection-info run-id))))

;;     (cond
;;      (home-host        (rmt:open-qry-close-locally cmd run-id params))
;;      (connection-info
;;       ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)


;;       ;; use the server if have connection info
;;       (let* ((dat     (case *transport-type*
;; 			((http)(condition-case
;; 				(http-transport:client-api-send-receive run-id connection-info cmd params)
;; 				((commfail)(vector #f "communications fail"))
;; 				((exn)(vector #f "other fail"))))
;; 			;; ((nmsg)(condition-case
;; 			;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
;; 			;;         ((timeout)(vector #f "timeout talking to server"))))
;; 			(else  (exit))))
;; 	     (success (if (vector? dat) (vector-ref dat 0) #f))
;; 	     (res     (if (vector? dat) (vector-ref dat 1) #f)))
;; 	(if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
;; 	(if success
;; 	    (begin

;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (case *transport-type* 
;; 		((http) res) ;; (db:string->obj res))

;; 		;; ((nmsg) res)
;; 		)) ;; (vector-ref res 1)))
;; 	    (begin ;; let ((new-connection-info (client:setup run-id)))
;; 	      (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")


;; 	      ;; (case *transport-type*
;; 	      ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
;; 	      (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
;; 	      ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
;; 	      ;; (if (eq? (modulo attemptnum 5) 0)
;; 	      ;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
;; 	      ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
;; 	      (tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
;; 	      ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))
;; 	      
;; 	      ;; no longer killing the server in http-transport:client-api-send-receive
;; 	      ;; may kill it here but what are the criteria?
;; 	      ;; start with three calls then kill server
;; 	      ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
;; 	      ;; (thread-sleep! 2)
;; 	      (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))))
;;      (else
;;      ;; no connection info? try to start a server, or access locally if no
;;       ;; server and the query is read-only
;;       ;;
;;       ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
;;       ;;
;;       (if (and (< attemptnum 15)
;; 	       (member cmd api:write-queries))
;; 	  (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
;; 	      (hash-table-delete! *runremote* run-id)
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
;; 		  (begin

;; 		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
;; 		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
;; 		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
;; 		  ;; NB - probably can remove the query time stuff but need to discuss it ....
;; 		  (let ((start-time (current-milliseconds))
;; 			(max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")

;; 							"300")))
;; 			(newres     (rmt:open-qry-close-locally cmd run-id params)))
;; 		    (let ((delta (- (current-milliseconds) start-time)))
;; 		      (if (> delta max-query)
;; 			  (begin
;; 			    (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
;; 			    ;; (server:kind-run run-id)))
;; 			    ))
;; 		      ;; return the result!

;; 		      newres)


;; 		    )))
;; 	  (begin
;; 	    ;; (debug:print-error 0 *default-log-port* "Communication failed!")
;; 	    ;; (mutex-unlock! *send-receive-mutex*)
;; 	    ;; (exit)
;; 	    (rmt:open-qry-close-locally cmd run-id params)


;; 	    ))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")







|











|
>
|





<
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>


|

|
|
>
>
>
|
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
|
>
>
|
<
<
|
|
<
>
<
<
|
<
>
>
|
<
<
|
<
<
<
|
<
<
<
<
<
<
|
>
|
|
<
>
|
<
|
<
>
>
|
<
|
<
<
<
<
<
<
|
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
|
<
|
>
|
|
|
|
|
|
>
|
<
|
|
|
|
<
<
|
>
|
>
>
|
|
|
<
|
|
>
>
|







64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136







137








138
139
140
141


142
143

144


145

146
147
148


149



150






151
152
153
154

155
156

157

158
159
160

161






162






163









164

165
166
167
168
169
170
171
172
173
174

175
176
177
178


179
180
181
182
183
184
185
186

187
188
189
190
191
192
193
194
195
196
197
198



;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo (remote-conndat *runremote*)))
    (if cinfo
	cinfo
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected

  ;; do all the prep locked under the rmt-mutex
  (mutex-lock! *rmt-mutex*)

  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in *runremote*
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;

  (let* ((start-time (current-seconds))) ;; snapshot time so all use cases get same value
    (cond
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))
     ;; ensure we have a record for our connection for given area
     ((not *runremote*)                     
      (set! *runremote* (make-remote))
      (mutex-unlock! *rmt-mutex*)
      (print "case 1")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; ensure we have a homehost record
     ((not (pair? (remote-hh-dat *runremote*)))  ;; have a homehost record?
      (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
      (remote-hh-dat-set! *runremote* (common:get-homehost))
      (mutex-unlock! *rmt-mutex*)
      (print "case 2")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; on homehost and this is a read
     ((and (cdr (remote-hh-dat *runremote*))   ;; on homehost
           (member cmd api:read-only-queries)) ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (print "case 3")
      (rmt:open-qry-close-locally cmd 0 params))
     ;; on homehost and this is a write, we already have a server
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*))          ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (print "case 4")
      (rmt:open-qry-close-locally cmd 0 params))
     ;; no server contact made and this is a write, passively start a server 
     ((and (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (print "case 5")
      (let ((serverconn (server:check-if-running *toppath*)))
	(if serverconn
	    (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
	    (if (not (server:start-attempted? *toppath*))
		(server:kind-run *toppath*))))
      (if (cdr (remote-hh-dat *runremote*)) ;; we are on the homehost, just do the call
          (begin
            (mutex-unlock! *rmt-mutex*)
	    (print "case 5.1")
            (rmt:open-qry-close-locally cmd 0 params))







          (begin








            (mutex-unlock! *rmt-mutex*)
	    (print "case 5.2")
	    (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
            (rmt:send-receive cmd rid params attemptnum: attemptnum))))


     ;; if not on homehost ensure we have a connection to a live server
     ;; NOTE: we *have* a homehost record by now

     ((and (not (cdr (remote-hh-dat *runremote*)))        ;; are we on a homehost?


           (not (remote-conndat *runremote*)))            ;; and no connection

      (print "case 6  hh-dat: " (remote-hh-dat *runremote*) " conndat: " (remote-conndat *runremote*))
      (mutex-unlock! *rmt-mutex*)
      (tasks:start-and-wait-for-server (tasks:open-db) 0 15)


      (remote-conndat-set! *runremote* (rmt:get-connection-info 0))



      (rmt:send-receive cmd rid params attemptnum: attemptnum))






     ;; all set up if get this far, dispatch the query
     ((cdr (remote-hh-dat *runremote*)) ;; we are on homehost
      (mutex-unlock! *rmt-mutex*)
      (print "case 7")

      (rmt:open-qry-close-locally cmd (if rid rid 0) params))
     ;; reset the connection if it has been unused too long

     ((and (remote-conndat *runremote*)

	   (let ((expire-time (- start-time (remote-server-timeout *runremote*))))
	     (< (http-transport:server-dat-get-last-access (remote-conndat *runremote*)) expire-time)))
      (print "case 8")

      (remote-conndat-set! *runremote* #f))






     ;; not on homehost, do server query






     (else









      (mutex-unlock! *rmt-mutex*)

      (print "case 9")
      (let* ((conninfo (remote-conndat *runremote*))
	     (dat      (case (remote-transport *runremote*)
			 ((http) (condition-case ;; handling here has caused a lot of problems. However it is needed to deal with attemtped communication to servers that have gone away
                                  (http-transport:client-api-send-receive 0 conninfo cmd params)
                                  ((commfail)(vector #f "communications fail"))
                                  ((exn)(vector #f "other fail" (print-call-chain)))))
			 (else
			  (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " not supported")
			  (exit))))

	     (success  (if (vector? dat) (vector-ref dat 0) #f))
	     (res      (if (vector? dat) (vector-ref dat 1) #f)))
	(if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time
        (print "case 9. conninfo=" conninfo " dat=" dat)


	(if success
	    (case (remote-transport *runremote*)
	      ((http) res)
	      (else
	       (debug:print 0 *default-log-port* "ERROR: transport " (remote-transport *runremote*) " is unknown")
	       (exit 1)))
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)

	      (remote-conndat-set!    *runremote* #f)
	      (remote-server-url-set! *runremote* #f)
              (print "case 9.1")
	      (tasks:start-and-wait-for-server (tasks:open-db) 0 15)
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
		(mutex-lock! *db-multi-sync-mutex*)
		(set! *db-last-write* start-time) ;; the oldest "write"
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 ;; (jparams  (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res  	   (handle-exceptions
		    exn
		    #f
		    (http-transport:client-api-send-receive run-id connection-info cmd params))))
;;		    ((commfail) (vector #f "communications fail")))))
    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))
;; 	(db:string->obj (vector-ref dat 1))
;; 	(begin
;; 	  (debug:print-error 0 *default-log-port* "rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
;; 	  dat))))

;; Wrap json library for strings (why the ports crap in the first place?)
(define (rmt:dat->json-str dat)
  (with-output-to-string 
    (lambda ()
      (json-write dat))))

(define (rmt:json-str->dat json-str)
  (with-input-from-string json-str
    (lambda ()
      (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================








<




<



<
<
<
<

|
|
|
|
|
|
|
|
|
|







278
279
280
281
282
283
284

285
286
287
288

289
290
291




292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
		(mutex-lock! *db-multi-sync-mutex*)
		(set! *db-last-write* start-time) ;; the oldest "write"
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))

	 (res  	   (handle-exceptions
		    exn
		    #f
		    (http-transport:client-api-send-receive run-id connection-info cmd params))))

    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))





;; ;; Wrap json library for strings (why the ports crap in the first place?)
;; (define (rmt:dat->json-str dat)
;;   (with-output-to-string 
;;     (lambda ()
;;       (json-write dat))))
;; 
;; (define (rmt:json-str->dat json-str)
;;   (with-input-from-string json-str
;;     (lambda ()
;;       (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))







|







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))

Modified server.scm from [15a5983a03] to [9384560fe7].

164
165
166
167
168
169
170



171
172
173
174
175
176
177



178
179
180
181
182
183
184
185
186
187
188
189
190
191
;;    (if (eq? run-id 0)
;;        (server:run run-id)
;;        (rmt:start-server run-id)))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:start-attempted? areapath)
  (let ((flagfile (conc areapath "/.starting-server")))



    (and (file-exists? flagfile)
	 (< (- (current-seconds)
	       (file-modification-time flagfile))
	    15)))) ;; exists and less than 15 seconds old
    
(define (server:read-dotserver areapath)
  (let ((dotfile (conc areapath "/.server")))



    (if (and (file-exists? dotfile)
	     (file-read-access? dotfile))
	(with-input-from-file
	    dotfile
	  (lambda ()
	    (read-line)))
	#f)))

;; write a .server file in *toppath* with hostport
;; return #t on success, #f otherwise
;;
(define (server:write-dotserver areapath hostport)
  (let ((lock-file   (conc areapath "/.server.lock"))
	(server-file (conc areapath "/.server")))







>
>
>
|
|
|
|



>
>
>
|
|
|
|
|
|
|







164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
;;    (if (eq? run-id 0)
;;        (server:run run-id)
;;        (rmt:start-server run-id)))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:start-attempted? areapath)
  (let ((flagfile (conc areapath "/.starting-server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (and (file-exists? flagfile)
	  (< (- (current-seconds)
		(file-modification-time flagfile))
	     15))))) ;; exists and less than 15 seconds old
    
(define (server:read-dotserver areapath)
  (let ((dotfile (conc areapath "/.server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (if (and (file-exists? dotfile)
	      (file-read-access? dotfile))
	 (with-input-from-file
	     dotfile
	   (lambda ()
	     (read-line)))
	 #f))))

;; write a .server file in *toppath* with hostport
;; return #t on success, #f otherwise
;;
(define (server:write-dotserver areapath hostport)
  (let ((lock-file   (conc areapath "/.server.lock"))
	(server-file (conc areapath "/.server")))
231
232
233
234
235
236
237
238

239





240
241
242
243
244

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
	#f)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host:port)

  (let ((tdbdat (tasks:open-db)))





    (let* ((host-port (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
	   (toppath       (launch:setup)))

      (if (not host-port)
	  (begin
	    (print "ERROR: bad host:port")
	    (exit 1))
	  (let* ((iface      (if host-port (car host-port) (tasks:hostinfo-get-interface server-db-dat)))
		 (port       (if host-port (cadr host-port)(tasks:hostinfo-get-port      server-db-dat)))
		 (server-dat (http-transport:client-connect iface port))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  (print "LOGIN_OK")
		  (exit 0))
		(begin
		  (print "LOGIN_FAILED")
		  (exit 1))))))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()







|
>
|
>
>
>
>
>





>


|
|
|
|






|


|







237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
	#f)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
  (let ((host:port (if (number? host-port-in) ;; we were handed a server-id
		       (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
			 ;; (print "srec: " srec " host-port-in: " host-port-in)
			 (if srec
			     (conc (vector-ref srec 3) ":" (vector-ref srec 4))
			     (conc "no such server-id " host-port-in)))
		       host-port-in)))
    (let* ((host-port (let ((slst (string-split   host:port ":")))
			(if (eq? (length slst) 2)
			    (list (car slst)(string->number (cadr slst)))
			    #f)))
	   (toppath       (launch:setup)))
      ;; (print "host-port=" host-port)
      (if (not host-port)
	  (begin
	    (debug:print 0 *default-log-port*  "ERROR: bad host:port")
	    (if do-exit (exit 1)))
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  (print "LOGIN_OK")
		  (if do-exit (exit 0)))
		(begin
		  (print "LOGIN_FAILED")
		  (if do-exit (exit 1)))))))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()

Modified tasks.scm from [a06114a2ac] to [285ca22d6e].

414
415
416
417
418
419
420












421
422
423
424
425
426
427
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;")
    res))













(define (tasks:get-server-records mdb run-id)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))







>
>
>
>
>
>
>
>
>
>
>
>







414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;")
    res))

(define (tasks:get-server-by-id mdb id)
  (let ((res #f))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)))
     mdb
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE id=?;"
     id)
    res))

(define (tasks:get-server-records mdb run-id)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))

Modified tests.scm from [8ec0971889] to [5514a2a23d].

1016
1017
1018
1019
1020
1021
1022


1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
		      (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
		      (configf:write-alist tcfg tpath)))
		tcfg))))))
  
;; sort tests by priority and waiton
;; Move test specific stuff to a test unit FIXME one of these days
(define (tests:sort-by-priority-and-waiton test-records)


  (let* ((mungepriority (lambda (priority)
			  (if priority
			      (let ((tmp (any->number priority)))
				(if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0)))
			      0)))
	 (all-tests      (hash-table-keys test-records))
	 (all-waited-on  (let loop ((hed (car all-tests))
				    (tal (cdr all-tests))
				    (res '()))
			   (let* ((trec    (hash-table-ref test-records hed))
				  (waitons (or (tests:testqueue-get-waitons trec) '())))
			     (if (null? tal)
				 (append res waitons)
				 (loop (car tal)(cdr tal)(append res waitons))))))
	 (sort-fn1 
	  (lambda (a b)
	    (let* ((a-record   (hash-table-ref test-records a))
		   (b-record   (hash-table-ref test-records b))
		   (a-waitons  (or (tests:testqueue-get-waitons a-record) '()))
		   (b-waitons  (or (tests:testqueue-get-waitons b-record) '()))
		   (a-config   (tests:testqueue-get-testconfig  a-record))
		   (b-config   (tests:testqueue-get-testconfig  b-record))
		   (a-raw-pri  (config-lookup a-config "requirements" "priority"))
		   (b-raw-pri  (config-lookup b-config "requirements" "priority"))
		   (a-priority (mungepriority a-raw-pri))
		   (b-priority (mungepriority b-raw-pri)))
	      (tests:testqueue-set-priority! a-record a-priority)
	      (tests:testqueue-set-priority! b-record b-priority)
	      ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons)
	      (cond
	       ;; is 
	       ((member a b-waitons)          ;; is b waiting on a?
		;; (debug:print 0 *default-log-port* "case1")
		#t)
	       ((member b a-waitons)          ;; is a waiting on b?
		;; (debug:print 0 *default-log-port* "case2")
		#f)
	       ((and (not (null? a-waitons))  ;; both have waitons - do not disturb
		     (not (null? b-waitons)))
		;; (debug:print 0 *default-log-port* "case2.1")
		#t)
	       ((and (null? a-waitons)        ;; no waitons for a but b has waitons
		     (not (null? b-waitons)))
		;; (debug:print 0 *default-log-port* "case3")
		#f)
	       ((and (not (null? a-waitons))  ;; a has waitons but b does not
		     (null? b-waitons)) 
		;; (debug:print 0 *default-log-port* "case4")
		#t)
	       ((not (eq? a-priority b-priority)) ;; use
		(> a-priority b-priority))
	       (else
		;; (debug:print 0 *default-log-port* "case5")
		(string>? a b))))))
	 
	 (sort-fn2
	  (lambda (a b)
	    (> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a)))
	       (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b)))))))
    ;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain")))
    ;;   (debug:print "dot-res=" dot-res))
    ;; (let ((data (map cdr (filter
    ;;     		  (lambda (x)(equal? "node" (car x)))
    ;;     		  (map string-split (tests:easy-dot test-records "plain"))))))
    ;;   (map car (sort data (lambda (a b)
    ;;     		    (> (string->number (caddr a))(string->number (caddr b)))))))
    ;; ))
    (sort all-tests sort-fn1))) ;; avoid dealing with deleted tests, look at the hash table

(define (tests:easy-dot test-records outtype)
  (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX"))))
    (let ((all-testnames (hash-table-keys test-records))
	  (temp-port     (open-output-file* fd)))
      ;; (format temp-port "This file is ~A.~%" temp-path)
      (format temp-port "digraph tests {\n")







>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
		      (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
		      (configf:write-alist tcfg tpath)))
		tcfg))))))
  
;; sort tests by priority and waiton
;; Move test specific stuff to a test unit FIXME one of these days
(define (tests:sort-by-priority-and-waiton test-records)
  (if (eq? (hash-table-size test-records) 0)
      '()
      (let* ((mungepriority (lambda (priority)
			      (if priority
				  (let ((tmp (any->number priority)))
				    (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0)))
				  0)))
	     (all-tests      (hash-table-keys test-records))
	     (all-waited-on  (let loop ((hed (car all-tests))
					(tal (cdr all-tests))
					(res '()))
			       (let* ((trec    (hash-table-ref test-records hed))
				      (waitons (or (tests:testqueue-get-waitons trec) '())))
				 (if (null? tal)
				     (append res waitons)
				     (loop (car tal)(cdr tal)(append res waitons))))))
	     (sort-fn1 
	      (lambda (a b)
		(let* ((a-record   (hash-table-ref test-records a))
		       (b-record   (hash-table-ref test-records b))
		       (a-waitons  (or (tests:testqueue-get-waitons a-record) '()))
		       (b-waitons  (or (tests:testqueue-get-waitons b-record) '()))
		       (a-config   (tests:testqueue-get-testconfig  a-record))
		       (b-config   (tests:testqueue-get-testconfig  b-record))
		       (a-raw-pri  (config-lookup a-config "requirements" "priority"))
		       (b-raw-pri  (config-lookup b-config "requirements" "priority"))
		       (a-priority (mungepriority a-raw-pri))
		       (b-priority (mungepriority b-raw-pri)))
		  (tests:testqueue-set-priority! a-record a-priority)
		  (tests:testqueue-set-priority! b-record b-priority)
		  ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons)
		  (cond
		   ;; is 
		   ((member a b-waitons)          ;; is b waiting on a?
		    ;; (debug:print 0 *default-log-port* "case1")
		    #t)
		   ((member b a-waitons)          ;; is a waiting on b?
		    ;; (debug:print 0 *default-log-port* "case2")
		    #f)
		   ((and (not (null? a-waitons))  ;; both have waitons - do not disturb
			 (not (null? b-waitons)))
		    ;; (debug:print 0 *default-log-port* "case2.1")
		    #t)
		   ((and (null? a-waitons)        ;; no waitons for a but b has waitons
			 (not (null? b-waitons)))
		    ;; (debug:print 0 *default-log-port* "case3")
		    #f)
		   ((and (not (null? a-waitons))  ;; a has waitons but b does not
			 (null? b-waitons)) 
		    ;; (debug:print 0 *default-log-port* "case4")
		    #t)
		   ((not (eq? a-priority b-priority)) ;; use
		    (> a-priority b-priority))
		   (else
		    ;; (debug:print 0 *default-log-port* "case5")
		    (string>? a b))))))
	     
	     (sort-fn2
	      (lambda (a b)
		(> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a)))
		   (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b)))))))
	;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain")))
	;;   (debug:print "dot-res=" dot-res))
	;; (let ((data (map cdr (filter
	;;     		  (lambda (x)(equal? "node" (car x)))
	;;     		  (map string-split (tests:easy-dot test-records "plain"))))))
	;;   (map car (sort data (lambda (a b)
	;;     		    (> (string->number (caddr a))(string->number (caddr b)))))))
	;; ))
	(sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table

(define (tests:easy-dot test-records outtype)
  (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX"))))
    (let ((all-testnames (hash-table-keys test-records))
	  (temp-port     (open-output-file* fd)))
      ;; (format temp-port "This file is ~A.~%" temp-path)
      (format temp-port "digraph tests {\n")

Modified tests/rununittest.sh from [751af2da02] to [a3ce11ff80].

1
2
3
4

5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20
21
#!/bin/bash

# Usage: rununittest.sh testname debuglevel
#


# put megatest on path from correct location
mtbindir=$(readlink -f ../bin)

export PATH="${mtbindir}:$PATH"

# Clean setup
#

dbdir=$(cd simplerun;megatest -show-config -section setup -var linktree)/.db
rm -f simplerun/megatest.db simplerun/monitor.db simplerun/db/monitor.db $dbdir/*.db
rm -rf simplelinks/ simpleruns/ simplerun/db/ $dbdir
mkdir -p simplelinks simpleruns
(cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm)
(cd simplerun;cp ../../altdb.scm .)

# Run the test $1 is the unit test to run
cd simplerun;echo '(load "../tests.scm")' | ../../bin/megatest -repl -debug $2 $1




>








>
|
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/bin/bash

# Usage: rununittest.sh testname debuglevel
#
banner $1

# put megatest on path from correct location
mtbindir=$(readlink -f ../bin)

export PATH="${mtbindir}:$PATH"

# Clean setup
#
dbdir=$(echo /tmp/$USER/megatest_localdb/simplerun/.[a-zA-Z]*/)
echo "dbdir=$dbdir"
rm -f simplerun/megatest.db simplerun/monitor.db simplerun/db/monitor.db $dbdir
rm -rf simplelinks/ simpleruns/ simplerun/db/ $dbdir
mkdir -p simplelinks simpleruns
(cd simplerun;cp ../../*_records.scm .;perl -pi.bak -e 's/define-inline/define/' *_records.scm)
(cd simplerun;cp ../../altdb.scm .)

# Run the test $1 is the unit test to run
cd simplerun;echo '(load "../tests.scm")' | ../../bin/megatest -repl -debug $2 $1

Modified tests/unittests/basicserver.scm from [85fa769c5b] to [723ba8b37f].

8
9
10
11
12
13
14

15
16






17




18







19
20

21







22





































23

24
25
26
27
28
29
30

(delete-file* "logs/1.log")
(define run-id 1)

(test "setup for run" #t (begin (launch:setup)
 				(string? (getenv "MT_RUN_AREA_HOME"))))


;; NON Server tests go here







(test #f #f (db:dbdat-get-path *db*))




(test #f #f (db:get-run-name-from-id *db* run-id))







;; (test #f '("SYSTEM" "RELEASE") (rmt:get-keys))


;; (exit)













































;; Server tests go here 

(for-each
 (lambda (run-id)
   (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))
   (server:kind-run run-id)
   (test "did server start within 20 seconds?"
	 #t
	 (let loop ((remtries 20)







>
|

>
>
>
>
>
>
|
>
>
>
>
|
>
>
>
>
>
>
>
|

>
|
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

(delete-file* "logs/1.log")
(define run-id 1)

(test "setup for run" #t (begin (launch:setup)
 				(string? (getenv "MT_RUN_AREA_HOME"))))

(test #f #t (and (server:kind-run *toppath*) #t))


(define user    (current-user-name))
(define runname "mytestrun")
(define keys    (rmt:get-keys))
(define runinfo #f)
(define keyvals '(("SYSTEM" "abc")("RELEASE" "def")))
(define header  (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time"))

;; Setup
;;
;; (test #f #f  (not (client:setup run-id)))
;; (test #f #f  (not (hash-table-ref/default *runremote* run-id #f)))

;; Login
;;
(test #f'(#t "successful login")
      (rmt:login run-id))

;; Keys
;;
(test #f '("SYSTEM" "RELEASE")  (rmt:get-keys))

;; No data in db
;;
(test #f '() (rmt:get-all-run-ids))
(test #f #f  (rmt:get-run-name-from-id run-id))
(test #f 
      (vector
       header
       (vector #f #f #f #f))
      (rmt:get-run-info run-id))

;; Insert data into db
;;
(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user))
;; (test #f #f (rmt:get-runs-by-patt keys runname))
(test #f #t (rmt:general-call 'register-test run-id run-id "test-one" ""))
(define test-one-id #f)
(test #f 1  (let ((test-id (rmt:get-test-id run-id "test-one" "")))
	      (set! test-one-id test-id)
	      test-id))
(define test-one-rec #f)
(test #f "test-one" (let ((test-rec (rmt:get-test-info-by-id run-id test-one-id)))
		      (set! test-one-rec test-rec)
		      (vector-ref test-rec 2)))

;; With data in db
;;
(print "Using runame=" runname)
(test #f '(1)    (rmt:get-all-run-ids))
(test #f runname (rmt:get-run-name-from-id run-id))
(test #f 
      runname
      (let ((run-info (rmt:get-run-info run-id)))
	(db:get-value-by-header (db:get-rows run-info)
				(db:get-header run-info)
				"runname")))

;; test killing server
;;
(for-each
 (lambda (run-id)
   (test #f #t (and (tasks:kill-server-run-id run-id) #t))
   (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)))
 (list 0 1))

;; Tests to assess reading/writing while servers are starting/stopping
;; NO LONGER APPLICABLE

;; Server tests go here
(define (server-tests-dont-run-right-now)
(for-each
 (lambda (run-id)
   (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))
   (server:kind-run run-id)
   (test "did server start within 20 seconds?"
	 #t
	 (let loop ((remtries 20)
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

128
129
130
131
132
133
134
	       (begin
		 (if (> remtries 0)
		     (begin
		       (thread-sleep! 1.1)
		       (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
		     res)))))
   )
 (list 0 1))

(define user    (current-user-name))
(define runname "mytestrun")
(define keys    (rmt:get-keys))
(define runinfo #f)
(define keyvals '(("SYSTEM" "abc")("RELEASE" "def")))
(define header  (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time"))

;; Setup
;;
(test #f #f  (not (client:setup run-id)))
(test #f #f  (not (hash-table-ref/default *runremote* run-id #f)))

;; Login
;;
(test #f'(#t "successful login")
      (rmt:login-no-auto-client-setup (hash-table-ref/default *runremote* run-id #f) run-id))
(test #f '(#t "successful login")
      (rmt:login run-id))

;; Keys
;;
(test #f '("SYSTEM" "RELEASE")  (rmt:get-keys))

;; No data in db
;;
(test #f '() (rmt:get-all-run-ids))
(test #f #f  (rmt:get-run-name-from-id run-id))
(test #f 
      (vector
       header
       (vector #f #f #f #f))
      (rmt:get-run-info run-id))

;; Insert data into db
;;
(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user))
;; (test #f #f (rmt:get-runs-by-patt keys runname))
(test #f #t (rmt:general-call 'register-test run-id run-id "test-one" ""))
(define test-one-id #f)
(test #f 30001  (let ((test-id (rmt:get-test-id run-id "test-one" "")))
	      (set! test-one-id test-id)
	      test-id))
(define test-one-rec #f)
(test #f "test-one" (let ((test-rec (rmt:get-test-info-by-id run-id test-one-id)))
		      (set! test-one-rec test-rec)
		      (vector-ref test-rec 2)))

;; With data in db
;;
(print "Using runame=" runname)
(test #f '(1)    (rmt:get-all-run-ids))
(test #f runname (rmt:get-run-name-from-id run-id))
(test #f 
      runname
      (let ((run-info (rmt:get-run-info run-id)))
	(db:get-value-by-header (db:get-rows run-info)
				(db:get-header run-info)
				"runname")))

(for-each (lambda (run-id)
;; test killing server
;;
(tasks:kill-server-run-id run-id)

(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))
)
(list 0 1))

;; Tests to assess reading/writing while servers are starting/stopping
(define start-time (current-seconds))

(let loop ((test-state 'start))
  (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db)) run-id))
	 (first-dat   (if (not (null? server-dats))
			  (car server-dats)
			  #f)))
    (map (lambda (dat)
	   (apply print (intersperse (vector->list dat) ", ")))







|

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

>







113
114
115
116
117
118
119
120
121





































































122
123
124
125
126
127
128
129
130
	       (begin
		 (if (> remtries 0)
		     (begin
		       (thread-sleep! 1.1)
		       (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
		     res)))))
   )
 (list 0 1)))






































































(define start-time (current-seconds))
(define (reading-writing-while-server-starting-stopping-dont-run-now)
(let loop ((test-state 'start))
  (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db)) run-id))
	 (first-dat   (if (not (null? server-dats))
			  (car server-dats)
			  #f)))
    (map (lambda (dat)
	   (apply print (intersperse (vector->list dat) ", ")))
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
	  (rmt:kill-server run-id)
	  (loop 'server-shutdown))
	 ((shutting-down)
	  (loop test-state))
	 (else (print "Don't know what to do if get here"))))
      ((server-shutdown)
       (loop test-state)))))

;;======================================================================
;; END OF TESTS
;;======================================================================


;; (test #f #f (client:setup run-id))








|







143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
	  (rmt:kill-server run-id)
	  (loop 'server-shutdown))
	 ((shutting-down)
	  (loop test-state))
	 (else (print "Don't know what to do if get here"))))
      ((server-shutdown)
       (loop test-state)))))
)
;;======================================================================
;; END OF TESTS
;;======================================================================


;; (test #f #f (client:setup run-id))

Modified tests/unittests/runs.scm from [75d6997ca7] to [fb0f09ae17].

1


2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
(define keys (rmt:get-keys))



(test "get all legal tests" (list "test1" "test2") (sort (hash-table-keys (tests:get-all)) string<=?))

(test "register-run" #t (number?
			 (rmt:register-run 
					  '(("SYSTEM" "key1")("RELEASE" "key2"))
					  "myrun" 
					  "new"
					  "n/a" 
					  "bob")))

(test #f #t             (rmt:register-test 1 "nada" ""))
(test #f 30001          (rmt:get-test-id 1 "nada" ""))
(test #f "NOT_STARTED"  (vector-ref (rmt:get-test-info-by-id 1 30001) 3)) ;; "nada" "") 3))

(test #f "FOO LIKE 'abc%def'" (db:patt->like "FOO" "abc%def"))
(test #f "key2" (vector-ref (car (vector-ref (mt:get-runs-by-patt '("SYSTEM" "RELEASE") "%" "key1/key2") 1)) 1))

(test #f "SYSTEM,RELEASE,id,runname,state,status,owner,event_time" (car (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time"))))
(test #f #t (runs:operate-on 'print "%" "%" "%"))


>
>












|
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
(define keys (rmt:get-keys))

(test #f #t (and (server:kind-run *toppath*) #t))

(test "get all legal tests" (list "test1" "test2") (sort (hash-table-keys (tests:get-all)) string<=?))

(test "register-run" #t (number?
			 (rmt:register-run 
					  '(("SYSTEM" "key1")("RELEASE" "key2"))
					  "myrun" 
					  "new"
					  "n/a" 
					  "bob")))

(test #f #t             (rmt:register-test 1 "nada" ""))
(test #f 1              (rmt:get-test-id 1 "nada" ""))
(test #f "NOT_STARTED"  (vector-ref (rmt:get-test-info-by-id 1 1) 3))

(test #f "FOO LIKE 'abc%def'" (db:patt->like "FOO" "abc%def"))
(test #f "key2" (vector-ref (car (vector-ref (mt:get-runs-by-patt '("SYSTEM" "RELEASE") "%" "key1/key2") 1)) 1))

(test #f "SYSTEM,RELEASE,id,runname,state,status,owner,event_time" (car (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time"))))
(test #f #t (runs:operate-on 'print "%" "%" "%"))

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

;; force keepgoing
; (hash-table-set! args:arg-hash "-keepgoing" #t)
(hash-table-set! args:arg-hash "-itempatt" "%")
(hash-table-set! args:arg-hash "-testpatt" "%")
(hash-table-set! args:arg-hash "-target" "ubuntu/r1.2") ;; SYSTEM/RELEASE
(hash-table-set! args:arg-hash "-runname" "testrun")
(test "Setup for a run"       #t (begin (launch:setup-for-run) #t))

(define *tdb* #f)
(define keyvals #f)
(test "target->keyval" #t (let ((kv (keys:target->keyval keys (args:get-arg "-target"))))
			    (print "keyvals=" kv ", keys=" keys)
			    (set! keyvals kv)(list? keyvals)))








|







49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

;; force keepgoing
; (hash-table-set! args:arg-hash "-keepgoing" #t)
(hash-table-set! args:arg-hash "-itempatt" "%")
(hash-table-set! args:arg-hash "-testpatt" "%")
(hash-table-set! args:arg-hash "-target" "ubuntu/r1.2") ;; SYSTEM/RELEASE
(hash-table-set! args:arg-hash "-runname" "testrun")
(test "Setup for a run"       #t (string? (launch:setup)))

(define *tdb* #f)
(define keyvals #f)
(test "target->keyval" #t (let ((kv (keys:target->keyval keys (args:get-arg "-target"))))
			    (print "keyvals=" kv ", keys=" keys)
			    (set! keyvals kv)(list? keyvals)))

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173





174
175
176
177
178
179
180
181
182
183
   '("ABORT"     "FAIL"      "FAIL"       "FAIL"       "PASS"    "FAIL"    "ABORT"     "AUTO")))


(test "launch-test" #t
      (string? 
       (file-exists?
	;; (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
	(launch-test 30001 1 rinfo keyvals "run1" tconfig "test1" test-path '() (make-hash-table)))))

;;======================================================================
;; M O R E   R E M O T E   C A L  L S
;;======================================================================

(test #f '("COMPLETED" "PASS")
      (begin
	(rmt:set-tests-state-status 1 '("rollup") "COMPLETED" "AUTO" "COMPLETED" "PASS")
	(get-state-status 1 "rollup" "")))
(test #f #t (rmt:top-test-set-per-pf-counts 1 "rollup"))

;;======================================================================
;; T E S T   I T E M M A P
;;======================================================================

(test #f "a/b/c"       (db:multi-pattern-apply   "d/e/f" "d a\ne b\nf c"))
(test #f "blah/foo/bar/baz" (db:convert-test-itempath "blah/baz/bar/foo" "^([^/]+)/([^/]+)/([^/]+)$ \\3/\\2/\\1"))





(test #f #t (db:compare-itempaths "abc/def/123" "abc/ghi/123" "ghi def"))
(test #f #f (db:compare-itempaths "some/5" "item/5" ".*/"))
(test #f #t (db:compare-itempaths "some/5" "item/5" ".*/ some/"))

(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(toplevel)  itemmap: ".*/" "/"))
(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(normal)    itemmap: ".*/" "/"))
(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(itemmatch) itemmap: ".*/" "/"))
(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(itemwait)  itemmap: ".*/" "/"))

(exit 1)







|

















>
>
>
>
>
|
|
|







151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
   '("ABORT"     "FAIL"      "FAIL"       "FAIL"       "PASS"    "FAIL"    "ABORT"     "AUTO")))


(test "launch-test" #t
      (string? 
       (file-exists?
	;; (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
	(launch-test 1 1 rinfo keyvals "run1" tconfig "test1" test-path '() (make-hash-table)))))

;;======================================================================
;; M O R E   R E M O T E   C A L  L S
;;======================================================================

(test #f '("COMPLETED" "PASS")
      (begin
	(rmt:set-tests-state-status 1 '("rollup") "COMPLETED" "AUTO" "COMPLETED" "PASS")
	(get-state-status 1 "rollup" "")))
(test #f #t (rmt:top-test-set-per-pf-counts 1 "rollup"))

;;======================================================================
;; T E S T   I T E M M A P
;;======================================================================

(test #f "a/b/c"       (db:multi-pattern-apply   "d/e/f" "d a\ne b\nf c"))
(test #f "blah/foo/bar/baz" (db:convert-test-itempath "blah/baz/bar/foo" "^([^/]+)/([^/]+)/([^/]+)$ \\3/\\2/\\1"))
(define itemmaps (alist->hash-table
		  '(("test1" "ghi def")
		    ("test2" ".*/")
		    ("test3" ".*/ some/"))))

(test #f #t (db:compare-itempaths "test1" "abc/def/123" "abc/ghi/123" itemmaps))
(test #f #f (db:compare-itempaths "test2" "some/5" "item/5" ".*/" itemmaps))
(test #f #t (db:compare-itempaths "test3" "some/5" "item/5" ".*/ some/" itemmaps))

(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(toplevel)  itemmap: ".*/" "/"))
(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(normal)    itemmap: ".*/" "/"))
(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(itemmatch) itemmap: ".*/" "/"))
(test #f '() (rmt:get-prereqs-not-met 1 '("rollup") "some/5" mode: '(itemwait)  itemmap: ".*/" "/"))

(exit 1)