Megatest

Check-in [1510977b0a]
Login
Overview
Comment:Added few more defensive layers to calls that *may* be part of the crash-on-startup-at-weird-random-times bug
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 1510977b0ae39301047a763817992800228e8fb9
User & Date: mrwellan on 2014-12-08 17:36:05
Other Links: branch diff | manifest | tags
Context
2014-12-08
23:31
Added most of what is needed for archiving check-in: e9d174e213 user: matt tags: v1.60
17:36
Added few more defensive layers to calls that *may* be part of the crash-on-startup-at-weird-random-times bug check-in: 1510977b0a user: mrwellan tags: v1.60
13:28
Added checks for vector instead of just true before accessing some data stucts that are generated on the fly and can fail due to communication errors check-in: 4e1162ffe9 user: mrwellan tags: v1.60
Changes

Modified http-transport.scm from [ef8d9caccb] to [7c1ad3a799].

340
341
342
343
344
345
346

347




348
349
350
351
352
353
354
340
341
342
343
344
345
346
347

348
349
350
351
352
353
354
355
356
357
358







+
-
+
+
+
+







      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
	    (http-transport:server-dat-get-port  vec))
      #f))

(define (http-transport:server-dat-update-last-access vec)
  (if (vector? vec)
  (vector-set! vec 5 (current-seconds)))
      (vector-set! vec 5 (current-seconds))
      (begin
	(print-call-chain (current-error-port))
	(debug:print 0 "ERROR: call to http-transport:server-dat-update-last-access with non-vector!!"))))

;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))

Modified rmt.scm from [c506e20989] to [7d7a03f6b1].

159
160
161
162
163
164
165
166

167
168
169
170
171
172
173
159
160
161
162
163
164
165

166
167
168
169
170
171
172
173







-
+







   (begin
     (debug:print 0 "WARNING: stats collection failed in update-db-stats")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     #f) ;; if this fails we don't care, it is just stats
   (let* ((cmd      (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd)))
	  (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
     (if (not stat-vec)
     (if (not (vector? stat-vec))
	 (let ((newvec (vector 0 0)))
	   (hash-table-set! *db-stats* cmd newvec)
	   (set! stat-vec newvec)))
     (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1))
     (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration))))
  (mutex-unlock! *db-stats-mutex*))

Modified runs.scm from [4f08d413e2] to [52961e3bf4].

367
368
369
370
371
372
373





374

375
376
377
378
379
380
381
367
368
369
370
371
372
373
374
375
376
377
378

379
380
381
382
383
384
385
386







+
+
+
+
+
-
+







	(debug:print-info 1 "Adding " required-tests " to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going #t)
		 (th1        (make-thread (lambda ()
					    (handle-exceptions
					     exn
					     (begin
					       (print-call-chain (current-error-port))
					       (debug:print 0 "ERROR: failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn)))
					    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))
					     (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()				    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (rmt:find-and-mark-incomplete run-id #f))) ;; ovr-deadtime)))