Megatest

Check-in [de910838a1]
Login
Overview
Comment:Stripped server stuff out to get db access down to bare metal
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.62-no-rpc
Files: files | file ages | folders
SHA1: de910838a10459be40dc944ea75a0a39c47a87e0
User & Date: matt on 2016-11-21 20:06:03
Other Links: branch diff | manifest | tags
Context
2016-11-21
22:18
Mostly working after stripping even more junk out ... check-in: 01fd2fa26b user: matt tags: v1.62-no-rpc
20:06
Stripped server stuff out to get db access down to bare metal check-in: de910838a1 user: matt tags: v1.62-no-rpc
14:37
Experimental removal of most of the inmem db stuff check-in: fdb279df36 user: mrwellan tags: v1.62-no-rpc
Changes

Modified common.scm from [7c2ea3f6ac] to [521f7397cb].

112
113
114
115
116
117
118

119
120
121
122
123
124
125
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126







+







(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)

(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
626
627
628
629
630
631
632


633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649


















650
651
652
653
654
655
656
627
628
629
630
631
632
633
634
635

















636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660







+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
	      #f)
	    #f))))

;; logic for getting homehost. Returns (host . at-home)
;;
(define (common:get-homehost)
  (if *home-host*
      *home-host*
  (let* ((currhost (get-host-name))
	 (bestadrs (server:get-best-guess-address currhost))
	 ;; first look in config, then look in file .homehost, create it if not found
	 (homehost (or (configf:lookup *configdat* "server" "homehost" )
		       (let ((hhf (conc *toppath* "/.homehost")))
			 (if (file-exists? hhf)
			     (with-input-from-file hhf read-line)
			     (if (file-write-access? *toppath*)
				 (begin
				   (with-output-to-file hhf
				     (lambda ()
				       (print bestadrs)))
				   (common:get-homehost))
				 #f)))))
	 (at-home  (or (equal? homehost currhost)
		       (equal? homehost bestadrs))))
    (cons homehost at-home)))
      (let* ((currhost (get-host-name))
	     (bestadrs (server:get-best-guess-address currhost))
	     ;; first look in config, then look in file .homehost, create it if not found
	     (homehost (or (configf:lookup *configdat* "server" "homehost" )
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
				       (with-output-to-file hhf
					 (lambda ()
					   (print bestadrs)))
				       (common:get-homehost))
				     #f)))))
	     (at-home  (or (equal? homehost currhost)
			   (equal? homehost bestadrs))))
	(set! *home-host* (cons homehost at-home))
	*home-host*)))

;;======================================================================
;; M I S C   L I S T S
;;======================================================================

;; items in lista are matched value and position in listb
;; return the remaining items in listb or #f

Modified dashboard.scm from [ef050cd64e] to [df1301b5c8].

81
82
83
84
85
86
87

88
89
90
91
92
93
94
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95







+







			"-use-server"
			"-guimonitor"
			"-main"
			"-v"
			"-q"
			"-use-local"
			"-skip-version-check"
			"-repl"
			)
		 args:arg-hash
		 0))

(if (not (null? remargs))
    (begin
      (print "Unrecognised arguments: " (string-intersperse remargs " "))
3353
3354
3355
3356
3357
3358
3359


3360

3361
3354
3355
3356
3357
3358
3359
3360
3361
3362

3363
3364







+
+
-
+

	(thread-join! th2)))))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))

(if (args:get-arg "-repl")
    (repl)
(main)
    (main))

Modified db.scm from [dad3fc289f] to [dbf520bf9b].

240
241
242
243
244
245
246
247

248
249
250
251
252
253
254
240
241
242
243
244
245
246

247
248
249
250
251
252
253
254







-
+







;;     (dbr:dbstruct-rundb-set!  dbstruct (cons db dbfile))
;;     (dbr:dbstruct-inuse-set!  dbstruct #t)
;;     (dbr:dbstruct-olddb-set!  dbstruct olddb)
;;     ;;; (mutex-unlock! *rundb-mutex*) ;;; why did we need a mutex on opening db's?
;;     (db:sync-tables db:sync-tests-only *megatest-db* db)
;;     db))

;; This routine creates the db if not already present. It is only called if the db is not already ls opened
;; This routine creates the db if not already present. It is only called if the db is not already opened
;;
(define (db:open-db dbstruct) ;;  (conc *toppath* "/megatest.db") (car *configinfo*))) 
  (let ((mdb (dbr:dbstruct-tmpdb dbstruct))) ;; RA => Returns the first reference in dbstruct
    (if mdb
	mdb
        ;; (mutex-lock! *rundb-mutex*)
        (let* ((dbpath       (db:dbfile-path)) ;;  0))

Modified rmt.scm from [db1278b247] to [80a7990c07].

52
53
54
55
56
57
58


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77






















78
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152













































































153
154
155
156
157
158
159
52
53
54
55
56
57
58
59
60



















61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82


83









































































84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167







+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  ;; (mutex-lock! *db-multi-sync-mutex*)
  (rmt:open-qry-close-locally cmd (if rid rid 0) params))

  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
         (if (and (vector? connection)
        	  (< (http-transport:server-dat-get-last-access connection) expire-time))
             (begin
               (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
               ;; bb- disabling nanomsg
               ;; SHOULD CLOSE THE CONNECTION HERE 
	       ;; (case *transport-type*
	       ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
	       ;;  		   (hash-table-ref *runremote* run-id)))))
               (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  ;; (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id)))
;;   (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
;;     (for-each 
;;      (lambda (run-id)
;;        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
;;          (if (and (vector? connection)
;;         	  (< (http-transport:server-dat-get-last-access connection) expire-time))
;;              (begin
;;                (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
;;                ;; bb- disabling nanomsg
;;                ;; SHOULD CLOSE THE CONNECTION HERE 
;; 	       ;; (case *transport-type*
;; 	       ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
;; 	       ;;  		   (hash-table-ref *runremote* run-id)))))
;;                (hash-table-delete! *runremote* run-id)))))
;;      (hash-table-keys *runremote*)))
;;   ;; (mutex-unlock! *db-multi-sync-mutex*)
;;   ;; (mutex-lock! *send-receive-mutex*)
;;   (let* ((run-id          (if rid rid 0))
;; 	 (home-host       (common:get-homehost))
;; 	 (connection-info (if (cdr home-host) ;; we are on the home-host
;; 			      #f
;; 			      (rmt:get-connection-info run-id))))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
;;     (cond
	;; use the server if have connection info
	(let* ((dat     (case *transport-type*
			  ((http)(condition-case
				  (http-transport:client-api-send-receive run-id connection-info cmd params)
				  ((commfail)(vector #f "communications fail"))
				  ((exn)(vector #f "other fail"))))
			  ;; ((nmsg)(condition-case
			  ;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
			  ;;         ((timeout)(vector #f "timeout talking to server"))))
			  (else  (exit))))
	       (success (if (vector? dat) (vector-ref dat 0) #f))
	       (res     (if (vector? dat) (vector-ref dat 1) #f)))
	  (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
	  (if success
	      (begin
		;; (mutex-unlock! *send-receive-mutex*)
		(case *transport-type* 
		  ((http) res) ;; (db:string->obj res))
		  ;; ((nmsg) res)
                  )) ;; (vector-ref res 1)))
	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")
		;; (case *transport-type*
		;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
		;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
		;; (if (eq? (modulo attemptnum 5) 0)
		;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
		;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
		(tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
		;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server
		;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
		;; (thread-sleep! 2)
		(rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1)))))
	;; no connection info? try to start a server, or access locally if no
	;; server and the query is read-only
	;;
	;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
	;;
	(if (and (< attemptnum 15)
		 (member cmd api:write-queries))
	    (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
	      (hash-table-delete! *runremote* run-id)
	      ;; (mutex-unlock! *send-receive-mutex*)
	      (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
		  (begin
		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		  ;; NB - probably can remove the query time stuff but need to discuss it ....
		  (let ((start-time (current-milliseconds))
			(max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")
							"300")))
			(newres     (rmt:open-qry-close-locally cmd run-id params)))
		    (let ((delta (- (current-milliseconds) start-time)))
		      (if (> delta max-query)
			  (begin
			    (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
			    ;; (server:kind-run run-id)))
			    ))
		      ;; return the result!
		      newres)
		    )))
	    (begin
	      ;; (debug:print-error 0 *default-log-port* "Communication failed!")
	      ;; (mutex-unlock! *send-receive-mutex*)
	      ;; (exit)
	      (rmt:open-qry-close-locally cmd run-id params)
	      )))))
;;      (home-host        (rmt:open-qry-close-locally cmd run-id params))
;;      (connection-info
;;       ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
;;       ;; use the server if have connection info
;;       (let* ((dat     (case *transport-type*
;; 			((http)(condition-case
;; 				(http-transport:client-api-send-receive run-id connection-info cmd params)
;; 				((commfail)(vector #f "communications fail"))
;; 				((exn)(vector #f "other fail"))))
;; 			;; ((nmsg)(condition-case
;; 			;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
;; 			;;         ((timeout)(vector #f "timeout talking to server"))))
;; 			(else  (exit))))
;; 	     (success (if (vector? dat) (vector-ref dat 0) #f))
;; 	     (res     (if (vector? dat) (vector-ref dat 1) #f)))
;; 	(if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
;; 	(if success
;; 	    (begin
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (case *transport-type* 
;; 		((http) res) ;; (db:string->obj res))
;; 		;; ((nmsg) res)
;; 		)) ;; (vector-ref res 1)))
;; 	    (begin ;; let ((new-connection-info (client:setup run-id)))
;; 	      (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")
;; 	      ;; (case *transport-type*
;; 	      ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
;; 	      (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
;; 	      ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
;; 	      ;; (if (eq? (modulo attemptnum 5) 0)
;; 	      ;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
;; 	      ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
;; 	      (tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
;; 	      ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))
;; 	      
;; 	      ;; no longer killing the server in http-transport:client-api-send-receive
;; 	      ;; may kill it here but what are the criteria?
;; 	      ;; start with three calls then kill server
;; 	      ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
;; 	      ;; (thread-sleep! 2)
;; 	      (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))))
;;      (else
;;      ;; no connection info? try to start a server, or access locally if no
;;       ;; server and the query is read-only
;;       ;;
;;       ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
;;       ;;
;;       (if (and (< attemptnum 15)
;; 	       (member cmd api:write-queries))
;; 	  (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
;; 	      (hash-table-delete! *runremote* run-id)
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
;; 		  (begin
;; 		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
;; 		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
;; 		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
;; 		  ;; NB - probably can remove the query time stuff but need to discuss it ....
;; 		  (let ((start-time (current-milliseconds))
;; 			(max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")
;; 							"300")))
;; 			(newres     (rmt:open-qry-close-locally cmd run-id params)))
;; 		    (let ((delta (- (current-milliseconds) start-time)))
;; 		      (if (> delta max-query)
;; 			  (begin
;; 			    (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
;; 			    ;; (server:kind-run run-id)))
;; 			    ))
;; 		      ;; return the result!
;; 		      newres)
;; 		    )))
;; 	  (begin
;; 	    ;; (debug:print-error 0 *default-log-port* "Communication failed!")
;; 	    ;; (mutex-unlock! *send-receive-mutex*)
;; 	    ;; (exit)
;; 	    (rmt:open-qry-close-locally cmd run-id params)
;; 	    ))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")