Megatest

Check-in [de910838a1]
Login
Overview
Comment:Stripped server stuff out to get db access down to bare metal
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.62-no-rpc
Files: files | file ages | folders
SHA1: de910838a10459be40dc944ea75a0a39c47a87e0
User & Date: matt on 2016-11-21 20:06:03
Other Links: branch diff | manifest | tags
Context
2016-11-21
22:18
Mostly working after stripping even more junk out ... check-in: 01fd2fa26b user: matt tags: v1.62-no-rpc
20:06
Stripped server stuff out to get db access down to bare metal check-in: de910838a1 user: matt tags: v1.62-no-rpc
14:37
Experimental removal of most of the inmem db stuff check-in: fdb279df36 user: mrwellan tags: v1.62-no-rpc
Changes

Modified common.scm from [7c2ea3f6ac] to [521f7397cb].

112
113
114
115
116
117
118

119
120
121
122
123
124
125
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))


(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id







>







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)

(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
626
627
628
629
630
631
632


633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649

650
651
652
653
654
655
656
	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
	      #f)
	    #f))))

;; logic for getting homehost. Returns (host . at-home)
;;
(define (common:get-homehost)


  (let* ((currhost (get-host-name))
	 (bestadrs (server:get-best-guess-address currhost))
	 ;; first look in config, then look in file .homehost, create it if not found
	 (homehost (or (configf:lookup *configdat* "server" "homehost" )
		       (let ((hhf (conc *toppath* "/.homehost")))
			 (if (file-exists? hhf)
			     (with-input-from-file hhf read-line)
			     (if (file-write-access? *toppath*)
				 (begin
				   (with-output-to-file hhf
				     (lambda ()
				       (print bestadrs)))
				   (common:get-homehost))
				 #f)))))
	 (at-home  (or (equal? homehost currhost)
		       (equal? homehost bestadrs))))
    (cons homehost at-home)))


;;======================================================================
;; M I S C   L I S T S
;;======================================================================

;; items in lista are matched value and position in listb
;; return the remaining items in listb or #f







>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>







627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
	      (debug:print-error 0 *default-log-port* "Invalid target, spaces or blanks not allowed \"" target "\", target should be: " (string-intersperse keys "/") ", have " tlist " for elements")
	      #f)
	    #f))))

;; logic for getting homehost. Returns (host . at-home)
;;
(define (common:get-homehost)
  (if *home-host*
      *home-host*
      (let* ((currhost (get-host-name))
	     (bestadrs (server:get-best-guess-address currhost))
	     ;; first look in config, then look in file .homehost, create it if not found
	     (homehost (or (configf:lookup *configdat* "server" "homehost" )
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
				       (with-output-to-file hhf
					 (lambda ()
					   (print bestadrs)))
				       (common:get-homehost))
				     #f)))))
	     (at-home  (or (equal? homehost currhost)
			   (equal? homehost bestadrs))))
	(set! *home-host* (cons homehost at-home))
	*home-host*)))

;;======================================================================
;; M I S C   L I S T S
;;======================================================================

;; items in lista are matched value and position in listb
;; return the remaining items in listb or #f

Modified dashboard.scm from [ef050cd64e] to [df1301b5c8].

81
82
83
84
85
86
87

88
89
90
91
92
93
94
			"-use-server"
			"-guimonitor"
			"-main"
			"-v"
			"-q"
			"-use-local"
			"-skip-version-check"

			)
		 args:arg-hash
		 0))

(if (not (null? remargs))
    (begin
      (print "Unrecognised arguments: " (string-intersperse remargs " "))







>







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
			"-use-server"
			"-guimonitor"
			"-main"
			"-v"
			"-q"
			"-use-local"
			"-skip-version-check"
			"-repl"
			)
		 args:arg-hash
		 0))

(if (not (null? remargs))
    (begin
      (print "Unrecognised arguments: " (string-intersperse remargs " "))
3353
3354
3355
3356
3357
3358
3359


3360
3361
	(thread-join! th2)))))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))



(main)








>
>
|

3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
	(thread-join! th2)))))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))

(if (args:get-arg "-repl")
    (repl)
    (main))

Modified db.scm from [dad3fc289f] to [dbf520bf9b].

240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
;;     (dbr:dbstruct-rundb-set!  dbstruct (cons db dbfile))
;;     (dbr:dbstruct-inuse-set!  dbstruct #t)
;;     (dbr:dbstruct-olddb-set!  dbstruct olddb)
;;     ;;; (mutex-unlock! *rundb-mutex*) ;;; why did we need a mutex on opening db's?
;;     (db:sync-tables db:sync-tests-only *megatest-db* db)
;;     db))

;; This routine creates the db if not already present. It is only called if the db is not already ls opened
;;
(define (db:open-db dbstruct) ;;  (conc *toppath* "/megatest.db") (car *configinfo*))) 
  (let ((mdb (dbr:dbstruct-tmpdb dbstruct))) ;; RA => Returns the first reference in dbstruct
    (if mdb
	mdb
        ;; (mutex-lock! *rundb-mutex*)
        (let* ((dbpath       (db:dbfile-path)) ;;  0))







|







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
;;     (dbr:dbstruct-rundb-set!  dbstruct (cons db dbfile))
;;     (dbr:dbstruct-inuse-set!  dbstruct #t)
;;     (dbr:dbstruct-olddb-set!  dbstruct olddb)
;;     ;;; (mutex-unlock! *rundb-mutex*) ;;; why did we need a mutex on opening db's?
;;     (db:sync-tables db:sync-tests-only *megatest-db* db)
;;     db))

;; This routine creates the db if not already present. It is only called if the db is not already opened
;;
(define (db:open-db dbstruct) ;;  (conc *toppath* "/megatest.db") (car *configinfo*))) 
  (let ((mdb (dbr:dbstruct-tmpdb dbstruct))) ;; RA => Returns the first reference in dbstruct
    (if mdb
	mdb
        ;; (mutex-lock! *rundb-mutex*)
        (let* ((dbpath       (db:dbfile-path)) ;;  0))

Modified rmt.scm from [db1278b247] to [80a7990c07].

52
53
54
55
56
57
58


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76



77
78
79
80




81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  ;; (mutex-lock! *db-multi-sync-mutex*)


  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
         (if (and (vector? connection)
        	  (< (http-transport:server-dat-get-last-access connection) expire-time))
             (begin
               (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
               ;; bb- disabling nanomsg
               ;; SHOULD CLOSE THE CONNECTION HERE 
	       ;; (case *transport-type*
	       ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
	       ;;  		   (hash-table-ref *runremote* run-id)))))
               (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  ;; (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))



	 (connection-info (rmt:get-connection-info run-id)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info




	(let* ((dat     (case *transport-type*
			  ((http)(condition-case
				  (http-transport:client-api-send-receive run-id connection-info cmd params)
				  ((commfail)(vector #f "communications fail"))
				  ((exn)(vector #f "other fail"))))
			  ;; ((nmsg)(condition-case
			  ;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
			  ;;         ((timeout)(vector #f "timeout talking to server"))))
			  (else  (exit))))
	       (success (if (vector? dat) (vector-ref dat 0) #f))
	       (res     (if (vector? dat) (vector-ref dat 1) #f)))
	  (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
	  (if success
	      (begin
		;; (mutex-unlock! *send-receive-mutex*)
		(case *transport-type* 
		  ((http) res) ;; (db:string->obj res))
		  ;; ((nmsg) res)
                  )) ;; (vector-ref res 1)))
	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")
		;; (case *transport-type*
		;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
		;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
		;; (if (eq? (modulo attemptnum 5) 0)
		;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
		;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
		(tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
		;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server
		;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
		;; (thread-sleep! 2)
		(rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1)))))

	;; no connection info? try to start a server, or access locally if no
	;; server and the query is read-only
	;;
	;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
	;;
	(if (and (< attemptnum 15)
		 (member cmd api:write-queries))
	    (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
	      (hash-table-delete! *runremote* run-id)
	      ;; (mutex-unlock! *send-receive-mutex*)
	      (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
		  (begin
		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
		  ;; NB - probably can remove the query time stuff but need to discuss it ....
		  (let ((start-time (current-milliseconds))
			(max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")
							"300")))
			(newres     (rmt:open-qry-close-locally cmd run-id params)))
		    (let ((delta (- (current-milliseconds) start-time)))
		      (if (> delta max-query)
			  (begin
			    (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
			    ;; (server:kind-run run-id)))
			    ))
		      ;; return the result!
		      newres)
		    )))
	    (begin
	      ;; (debug:print-error 0 *default-log-port* "Communication failed!")
	      ;; (mutex-unlock! *send-receive-mutex*)
	      ;; (exit)
	      (rmt:open-qry-close-locally cmd run-id params)
	      )))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")







>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
|
<
|
<
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  ;; (mutex-lock! *db-multi-sync-mutex*)
  (rmt:open-qry-close-locally cmd (if rid rid 0) params))

;;   (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
;;     (for-each 
;;      (lambda (run-id)
;;        (let ((connection (hash-table-ref/default *runremote* run-id #f)))
;;          (if (and (vector? connection)
;;         	  (< (http-transport:server-dat-get-last-access connection) expire-time))
;;              (begin
;;                (debug:print-info 0 *default-log-port* "Discarding connection to server for run-id " run-id ", too long between accesses")
;;                ;; bb- disabling nanomsg
;;                ;; SHOULD CLOSE THE CONNECTION HERE 
;; 	       ;; (case *transport-type*
;; 	       ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket 
;; 	       ;;  		   (hash-table-ref *runremote* run-id)))))
;;                (hash-table-delete! *runremote* run-id)))))
;;      (hash-table-keys *runremote*)))
;;   ;; (mutex-unlock! *db-multi-sync-mutex*)
;;   ;; (mutex-lock! *send-receive-mutex*)
;;   (let* ((run-id          (if rid rid 0))
;; 	 (home-host       (common:get-homehost))
;; 	 (connection-info (if (cdr home-host) ;; we are on the home-host
;; 			      #f
;; 			      (rmt:get-connection-info run-id))))

;;     (cond

;;      (home-host        (rmt:open-qry-close-locally cmd run-id params))
;;      (connection-info
;;       ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
;;       ;; use the server if have connection info
;;       (let* ((dat     (case *transport-type*
;; 			((http)(condition-case
;; 				(http-transport:client-api-send-receive run-id connection-info cmd params)
;; 				((commfail)(vector #f "communications fail"))
;; 				((exn)(vector #f "other fail"))))
;; 			;; ((nmsg)(condition-case
;; 			;;         (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
;; 			;;         ((timeout)(vector #f "timeout talking to server"))))
;; 			(else  (exit))))
;; 	     (success (if (vector? dat) (vector-ref dat 0) #f))
;; 	     (res     (if (vector? dat) (vector-ref dat 1) #f)))
;; 	(if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
;; 	(if success
;; 	    (begin
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (case *transport-type* 
;; 		((http) res) ;; (db:string->obj res))
;; 		;; ((nmsg) res)
;; 		)) ;; (vector-ref res 1)))
;; 	    (begin ;; let ((new-connection-info (client:setup run-id)))
;; 	      (debug:print 0 *default-log-port* "WARNING: Communication failed, trying call to rmt:send-receive again.")
;; 	      ;; (case *transport-type*
;; 	      ;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
;; 	      (hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
;; 	      ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
;; 	      ;; (if (eq? (modulo attemptnum 5) 0)
;; 	      ;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
;; 	      ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
;; 	      (tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
;; 	      ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))
;; 	      
;; 	      ;; no longer killing the server in http-transport:client-api-send-receive
;; 	      ;; may kill it here but what are the criteria?
;; 	      ;; start with three calls then kill server
;; 	      ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
;; 	      ;; (thread-sleep! 2)
;; 	      (rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1))))))
;;      (else
;;      ;; no connection info? try to start a server, or access locally if no
;;       ;; server and the query is read-only
;;       ;;
;;       ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
;;       ;;
;;       (if (and (< attemptnum 15)
;; 	       (member cmd api:write-queries))
;; 	  (let ((homehost  (common:get-homehost))) ;; faststart (configf:lookup *configdat* "server" "faststart")))
;; 	      (hash-table-delete! *runremote* run-id)
;; 	      ;; (mutex-unlock! *send-receive-mutex*)
;; 	      (if (not (cdr homehost)) ;; we always require a server if not on homehost ;; (and faststart (equal? faststart "no"))
;; 		  (begin
;; 		    (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
;; 		    (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
;; 		    (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
;; 		  ;; NB - probably can remove the query time stuff but need to discuss it ....
;; 		  (let ((start-time (current-milliseconds))
;; 			(max-query  (string->number (or (configf:lookup *configdat* "server" "server-query-threshold")
;; 							"300")))
;; 			(newres     (rmt:open-qry-close-locally cmd run-id params)))
;; 		    (let ((delta (- (current-milliseconds) start-time)))
;; 		      (if (> delta max-query)
;; 			  (begin
;; 			    (debug:print-info 0 *default-log-port* "WARNING: long query times, you may have an overloaded homehost.") ;; Starting server as query time " delta " is over the limit of " max-query)
;; 			    ;; (server:kind-run run-id)))
;; 			    ))
;; 		      ;; return the result!
;; 		      newres)
;; 		    )))
;; 	  (begin
;; 	    ;; (debug:print-error 0 *default-log-port* "Communication failed!")
;; 	    ;; (mutex-unlock! *send-receive-mutex*)
;; 	    ;; (exit)
;; 	    (rmt:open-qry-close-locally cmd run-id params)
;; 	    ))))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")