Megatest

Check-in [086bd3226a]
Login
Overview
Comment:Added more exception and signal handlers
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 086bd3226ae1e1e2ccc699002948df9369cbb455
User & Date: matt on 2014-10-23 23:51:02
Other Links: branch diff | manifest | tags
Context
2014-10-24
00:25
Merged rerun-behavior-fixes check-in: ef02bf7a14 user: matt tags: v1.60
2014-10-23
23:53
Merged v1.60 Closed-Leaf check-in: db227d3471 user: matt tags: rerun-behavior-fixes
23:51
Added more exception and signal handlers check-in: 086bd3226a user: matt tags: v1.60
09:17
Added error handler for dashboard where it looks for recent accesses to db check-in: 46182ccbf0 user: mrwellan tags: v1.60
Changes

Modified db.scm from [5aaa9292b2] to [6aed320697].

283
284
285
286
287
288
289


290
291

292
293




294
295
296
297
298
299
300
301
302
  (sqlite3:finalize! (db:get-db dbstruct #f))
  (let* ((local (dbr:dbstruct-get-local dbstruct))
	 (rundb (dbr:dbstruct-get-rundb dbstruct)))
    (if local
	(for-each
	 (lambda (db)
	   (if (sqlite3:database? db)


	       (sqlite3:finalize! db)))
	 (hash-table-values (dbr:dbstruct-get-locdbs dbstruct))))

    (if rundb
	(if (sqlite3:database? rundb)




	    (sqlite3:finalize! rundb)
	    (debug:print 2 "WARNING: attempting to close databases but got " rundb " instead of a database")))))

(define (db:open-inmem-db)
  (let* ((db      (sqlite3:open-database ":memory:"))
	 (handler (make-busy-timeout 3600)))
    (db:initialize-run-id-db db)
    (sqlite3:set-busy-handler! db handler)
    db))







>
>
|

>
|
|
>
>
>
>
|
<







283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301

302
303
304
305
306
307
308
  (sqlite3:finalize! (db:get-db dbstruct #f))
  (let* ((local (dbr:dbstruct-get-local dbstruct))
	 (rundb (dbr:dbstruct-get-rundb dbstruct)))
    (if local
	(for-each
	 (lambda (db)
	   (if (sqlite3:database? db)
	       (begin
		 (sqlite3:interrupt! db)
		 (sqlite3:finalize! db #t))))
	 (hash-table-values (dbr:dbstruct-get-locdbs dbstruct))))
    (thread-sleep! 3)
    (if (and rundb
	     (sqlite3:database? rundb))
	(handle-exceptions
	 exn
	 (debug:print 0 "WARNING: database files may not have been closed correctly. Consider running -cleanup-db")
	 (sqlite3:interrupt! rundb)
	 (sqlite3:finalize! rundb #t)))))


(define (db:open-inmem-db)
  (let* ((db      (sqlite3:open-database ":memory:"))
	 (handler (make-busy-timeout 3600)))
    (db:initialize-run-id-db db)
    (sqlite3:set-busy-handler! db handler)
    db))

Modified http-transport.scm from [457c02e647] to [cf3cf50511].

246
247
248
249
250
251
252



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272





273
274
275
276
277
278
279
280
281
282
283
284
285
	 (res        #f))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 1)



	   (close-all-connections!)
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))





			      (set! res (with-input-from-request ;; was dat
					 fullurl 
					 (list (cons 'key "thekey")
					       (cons 'cmd cmd)
					       (cons 'params params))
					 read-string))
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))







>
>
>
|



















>
>
>
>
>
|
|
|
|
|
|







246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
	 (res        #f))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 1)
	   (handle-exceptions
	    exn
	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
	    (close-all-connections!))
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   #f))
     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (handle-exceptions
					 exn
					 (begin
					   (debug:print 0 "ERROR: failure in with-input-from-request. Giving up.")
					   #f)
					 (with-input-from-request ;; was dat
					  fullurl 
					  (list (cons 'key "thekey")
						(cons 'cmd cmd)
						(cons 'params params))
					  read-string)))
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))

Modified launch.scm from [0c1efd6507] to [09c74e18be].

89
90
91
92
93
94
95


96
97
98
99
100
101
102
	                                  (if (and (file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       (rollup-status 0))
	  (change-directory top-path)



	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
	    (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")))







>
>







89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
	                                  (if (and (file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       (rollup-status 0))
	  (change-directory top-path)

	  ;; (set-signal-handler! signal/int (lambda ()
					    
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let ((test-info (rmt:get-testinfo-state-status run-id test-id)))
	    (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
		(tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		(debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed")))

Modified megatest.scm from [edf456cc83] to [456389bec5].

277
278
279
280
281
282
283






















284
285
286
287
288
289
290
			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only
		       )
		 args:arg-hash
		 0))























(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 "Sending log output to " (args:get-arg "-log"))
      (current-error-port oup)
      (current-output-port oup)))

(if (or (args:get-arg "-h")







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only
		       )
		 args:arg-hash
		 0))

(define (std-exit-procedure)
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (not (null? run-ids))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))
  (if *megatest-db* (begin
		      (sqlite3:interrupt! *megatest-db*)
		      (sqlite3:finalize! *megatest-db* #t)))
  (if *task-db*     (let ((db (vector-ref *task-db* 0)))
		      (sqlite3:interrupt! db)
		      (sqlite3:finalize! db #t))))

(define (std-signal-handler signum)
  (signal-mask! signum)
  (debug:print 0 "ERROR: Received signal " signum " exiting promptly")
  (std-exit-procedure)
  (exit))

(set-signal-handler! signal/int std-signal-handler)
(set-signal-handler! signal/term std-signal-handler)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 "Sending log output to " (args:get-arg "-log"))
      (current-error-port oup)
      (current-output-port oup)))

(if (or (args:get-arg "-h")
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

(if (args:get-arg "-itempatt")
    (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt"))))
      (debug:print 0 "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval)
      (hash-table-set! args:arg-hash "-testpatt" newval)
      (hash-table-delete! args:arg-hash "-itempatt")))

(on-exit (lambda ()
	   (rmt:print-db-stats)
	   (let ((run-ids (hash-table-keys *db-local-sync*)))
	     (if (not (null? run-ids))
		 (db:multi-db-sync run-ids 'new2old)))
	   (if *dbstruct-db* (db:close-all *dbstruct-db*))
	   (if *megatest-db* (sqlite3:finalize! *megatest-db*))
	   (if *task-db*     (sqlite3:finalize! (vector-ref *task-db* 0)))))

;;======================================================================
;; Misc general calls
;;======================================================================

(if (args:get-arg "-env2file")
    (begin







|
<
<
<
<
<
<
<







367
368
369
370
371
372
373
374







375
376
377
378
379
380
381

(if (args:get-arg "-itempatt")
    (let ((newval (conc (args:get-arg "-testpatt") "/" (args:get-arg "-itempatt"))))
      (debug:print 0 "WARNING: -itempatt has been deprecated, please use -testpatt testpatt/itempatt method, new testpatt is "newval)
      (hash-table-set! args:arg-hash "-testpatt" newval)
      (hash-table-delete! args:arg-hash "-itempatt")))

(on-exit std-exit-procedure)








;;======================================================================
;; Misc general calls
;;======================================================================

(if (args:get-arg "-env2file")
    (begin

Modified runs.scm from [0a93b5efd0] to [83650e6e32].

215
216
217
218
219
220
221

222
223

224
225
226
227
228
229
230
231
232
	 (test-names         #f)  ;; (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tasks-db           (tasks:open-db)))

    (set-signal-handler! signal/int
			 (lambda (signum)

			   (let ((tdb (tasks:open-db)))
			     (tasks:set-state-given-param-key tdb task-key "killed")

			     (sqlite3:finalize! tdb))
			   (print "Killed by sigint. Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (tasks:add tasks-db "run-tests" user target runname test-patts task-key) ;; params)
    (tasks:set-state-given-param-key tasks-db task-key "running")
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)







>


>

|







215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
	 (test-names         #f)  ;; (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tasks-db           (tasks:open-db)))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (let ((tdb (tasks:open-db)))
			     (tasks:set-state-given-param-key tdb task-key "killed")
			     ;; (sqlite3:interrupt! tdb) ;; seems silly?
			     (sqlite3:finalize! tdb))
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (tasks:add tasks-db "run-tests" user target runname test-patts task-key) ;; params)
    (tasks:set-state-given-param-key tasks-db task-key "running")
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)

tests/installall/config/megatest.config.dat became a regular file with contents [736a5da885].

tests/installall/config/runconfigs.config.dat became a regular file with contents [3b8f260acb].