Megatest

Check-in [7ca86d7e47]
Login
Overview
Comment:Handle errors in data transmission (e.g. corruption in flight).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 7ca86d7e47951b59248cad7407ccf7d4fc62ae4e
User & Date: matt on 2014-11-16 08:30:29
Other Links: branch diff | manifest | tags
Context
2014-11-16
17:37
Added channel for result codes to http communication. Fixed some unit tests check-in: 98ffd649ba user: matt tags: v1.60
08:30
Handle errors in data transmission (e.g. corruption in flight). check-in: 7ca86d7e47 user: matt tags: v1.60
07:49
Added print-call-chain to exception handler of open-db. Fixed typo. Switched back to not requiring server in self QA check-in: 7fe971f6e5 user: matt tags: v1.60
Changes

Modified db.scm from [e22b875bd7] to [f81dab27b4].

2244
2245
2246
2247
2248
2249
2250


2251
2252
2253
2254
2255
2256
2257
2258
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))


	 (vector #f #f #f))) ;; crude reply for when things go awry
    ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg)))

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))







>
>
|







2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.")
	   #f))) ;; crude reply for when things go awry
    ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg)))

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))

Modified rmt.scm from [667f107f6b] to [a3b0e53bfe].

87
88
89
90
91
92
93
94



95
96
97
98
99
100
101
					  #f))
				    #f))))
	 (jparams         (db:obj->string params)))
    (if connection-info
	(let ((res             (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
	  (http-transport:server-dat-update-last-access connection-info)
	  (if res
	      (db:string->obj res)



	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server







|
>
>
>







87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
					  #f))
				    #f))))
	 (jparams         (db:obj->string params)))
    (if connection-info
	(let ((res             (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
	  (http-transport:server-dat-update-last-access connection-info)
	  (if res
	      (or(db:string->obj res)
		 (begin
		   (thread-sleep! 0.5)
		   (rmt:send-receive cmd rid params attempnum: (+ attemptnum 1))))
	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server

Modified tasks.scm from [c9b5a9471c] to [6b278cc270].

71
72
73
74
75
76
77

78
79
80
81
82
83
84
	   (begin
	     (print-call-chain)
	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (print "exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin

	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (print "exn=" (condition->list exn))))
       (let* ((dbpath       (tasks:get-task-db-path))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond







>







71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
	   (begin
	     (print-call-chain)
	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (print "exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain)
	     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	     (print "exn=" (condition->list exn))))
       (let* ((dbpath       (tasks:get-task-db-path))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond