Megatest

Check-in [c101fd0ec9]
Login
Overview
Comment:Force start on servers before doing write heavy stuff. The transition when doing writes from direct access -> server needs improving. This is a work-around (but a good feature anyway.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: c101fd0ec953a1bcdaa9de83f07e29a52466a7bd
User & Date: mrwellan on 2014-11-18 09:32:59
Other Links: branch diff | manifest | tags
Context
2014-11-18
10:18
reduce print noise. re-start server in long running loop check-in: 127cbef166 user: mrwellan tags: v1.60
09:32
Force start on servers before doing write heavy stuff. The transition when doing writes from direct access -> server needs improving. This is a work-around (but a good feature anyway. check-in: c101fd0ec9 user: mrwellan tags: v1.60
00:15
Fixed param order issue that was causing some server crashes. Cleaned up some exception handing check-in: 3a0e887fa8 user: matt tags: v1.60
Changes

Modified db.scm from [d047d1e2d5] to [b24a0c30a3].

542
543
544
545
546
547
548
549

550
551
552
553
554
555
556
			      (if (and same
				       (< i (- num-fields 1)))
				  (loop (+ i 1))))
			    (if (not same)
				(begin
				  (apply sqlite3:execute stmth (vector->list fromrow))
				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
			fromdat-lst))))

		  fromdats)
		 (sqlite3:finalize! stmth)))
	     (append (list todb) slave-dbs))))
	tbls)
       (let* ((runtime      (- (current-milliseconds) start-time))
	      (should-print (common:low-noise-print 30 "db sync" (> runtime 500)))) ;; low and high sync times treated as separate.
	 (if should-print (debug:print 0 "INFO: db sync, total run time " runtime " ms"))







|
>







542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
			      (if (and same
				       (< i (- num-fields 1)))
				  (loop (+ i 1))))
			    (if (not same)
				(begin
				  (apply sqlite3:execute stmth (vector->list fromrow))
				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
			fromdat-lst))
		  ))
		  fromdats)
		 (sqlite3:finalize! stmth)))
	     (append (list todb) slave-dbs))))
	tbls)
       (let* ((runtime      (- (current-milliseconds) start-time))
	      (should-print (common:low-noise-print 30 "db sync" (> runtime 500)))) ;; low and high sync times treated as separate.
	 (if should-print (debug:print 0 "INFO: db sync, total run time " runtime " ms"))

Modified runs.scm from [7981f5c942] to [a8e4b28ff5].

214
215
216
217
218
219
220


221
222
223
224
225
226
227
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db)))



    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db)))
			     (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")







>
>







214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db)))

    (tasks:start-and-wait-for-server tdbdat run-id 10)

    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db)))
			     (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
1424
1425
1426
1427
1428
1429
1430

1431
1432
1433
1434
1435
1436

1437
1438
1439
1440
1441
1442
1443
			       '()))
		(lasttpath "/does/not/exist/I/hope"))
	   (debug:print-info 4 "runs:operate-on run=" run ", header=" header)
	   (if (not (null? tests))
	       (begin
		 (case action
		   ((remove-runs)

		    ;; seek and kill in flight -runtests with % as testpatt here
		    (if (equal? testpatt "%")
			(tasks:kill-runner (db:delay-if-busy tdbdat) target run-name)
			(debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)

		    (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   (else







>






>







1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
			       '()))
		(lasttpath "/does/not/exist/I/hope"))
	   (debug:print-info 4 "runs:operate-on run=" run ", header=" header)
	   (if (not (null? tests))
	       (begin
		 (case action
		   ((remove-runs)
		    (tasks:start-and-wait-for-server tdbdat run-id 10)
		    ;; seek and kill in flight -runtests with % as testpatt here
		    (if (equal? testpatt "%")
			(tasks:kill-runner (db:delay-if-busy tdbdat) target run-name)
			(debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    (tasks:start-and-wait-for-server tdbdat run-id 10)
		    (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   (else

Modified tasks.scm from [d12c4149c9] to [792641d7ed].

343
344
345
346
347
348
349















350
351
352
353
354
355
356
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     mdb ;; NEEDS dbprep ADDED
     "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND  (strftime('%s','now') - start_time) < 60));" run-id)
    res))
















(define (tasks:get-all-servers mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     mdb ;; NEEDS dbprep ADDED
     "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND  (strftime('%s','now') - start_time) < 60));" run-id)
    res))

;; try to start a server and wait for it to be available
;;
(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries)
  (tdbdat             (tasks:open-db))
  ;; ensure a server is running for this run
  (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))
	     (delay-time 0))
      (if (and (not server-dat)
	       (< delay-time delay-max-tries))
	  (begin
	    (debug:print 0 "Try starting server")
	    (server:kind-run run-id)
	    (thread-sleep! (min delay-time 5))
	    (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1))))))

(define (tasks:get-all-servers mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb