Megatest

Check-in [c101fd0ec9]
Login
Overview
Comment:Force start on servers before doing write heavy stuff. The transition when doing writes from direct access -> server needs improving. This is a work-around (but a good feature anyway.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: c101fd0ec953a1bcdaa9de83f07e29a52466a7bd
User & Date: mrwellan on 2014-11-18 09:32:59
Other Links: branch diff | manifest | tags
Context
2014-11-18
10:18
reduce print noise. re-start server in long running loop check-in: 127cbef166 user: mrwellan tags: v1.60
09:32
Force start on servers before doing write heavy stuff. The transition when doing writes from direct access -> server needs improving. This is a work-around (but a good feature anyway. check-in: c101fd0ec9 user: mrwellan tags: v1.60
00:15
Fixed param order issue that was causing some server crashes. Cleaned up some exception handing check-in: 3a0e887fa8 user: matt tags: v1.60
Changes

Modified db.scm from [d047d1e2d5] to [b24a0c30a3].

542
543
544
545
546
547
548
549


550
551
552
553
554
555
556
542
543
544
545
546
547
548

549
550
551
552
553
554
555
556
557







-
+
+







			      (if (and same
				       (< i (- num-fields 1)))
				  (loop (+ i 1))))
			    (if (not same)
				(begin
				  (apply sqlite3:execute stmth (vector->list fromrow))
				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
			fromdat-lst))))
			fromdat-lst))
		  ))
		  fromdats)
		 (sqlite3:finalize! stmth)))
	     (append (list todb) slave-dbs))))
	tbls)
       (let* ((runtime      (- (current-milliseconds) start-time))
	      (should-print (common:low-noise-print 30 "db sync" (> runtime 500)))) ;; low and high sync times treated as separate.
	 (if should-print (debug:print 0 "INFO: db sync, total run time " runtime " ms"))

Modified runs.scm from [7981f5c942] to [a8e4b28ff5].

214
215
216
217
218
219
220


221
222
223
224
225
226
227
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229







+
+







	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db)))

    (tasks:start-and-wait-for-server tdbdat run-id 10)

    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db)))
			     (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
1424
1425
1426
1427
1428
1429
1430

1431
1432
1433
1434
1435
1436

1437
1438
1439
1440
1441
1442
1443
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447







+






+







			       '()))
		(lasttpath "/does/not/exist/I/hope"))
	   (debug:print-info 4 "runs:operate-on run=" run ", header=" header)
	   (if (not (null? tests))
	       (begin
		 (case action
		   ((remove-runs)
		    (tasks:start-and-wait-for-server tdbdat run-id 10)
		    ;; seek and kill in flight -runtests with % as testpatt here
		    (if (equal? testpatt "%")
			(tasks:kill-runner (db:delay-if-busy tdbdat) target run-name)
			(debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    (tasks:start-and-wait-for-server tdbdat run-id 10)
		    (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   (else

Modified tasks.scm from [d12c4149c9] to [792641d7ed].

343
344
345
346
347
348
349















350
351
352
353
354
355
356
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     mdb ;; NEEDS dbprep ADDED
     "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND  (strftime('%s','now') - start_time) < 60));" run-id)
    res))

;; try to start a server and wait for it to be available
;;
(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries)
  (tdbdat             (tasks:open-db))
  ;; ensure a server is running for this run
  (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))
	     (delay-time 0))
      (if (and (not server-dat)
	       (< delay-time delay-max-tries))
	  (begin
	    (debug:print 0 "Try starting server")
	    (server:kind-run run-id)
	    (thread-sleep! (min delay-time 5))
	    (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1))))))

(define (tasks:get-all-servers mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb