Megatest

Check-in [39d1713c91]
Login
Overview
Comment:stopped calls to set state and status where the test-id was not known (test not actually registered yet). Added safety net to some calls to print call chain if called with bad data, moved couple mutexes around to prevent contention
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | streamline-exception-handling
Files: files | file ages | folders
SHA1: 39d1713c912a74a702cc71ed773e31bd652434bd
User & Date: matt on 2014-11-02 09:24:19
Other Links: branch diff | manifest | tags
Context
2014-11-02
09:25
Merged streamline exception handling branch into v1.60 check-in: 8fd7d261b7 user: matt tags: v1.60
09:24
stopped calls to set state and status where the test-id was not known (test not actually registered yet). Added safety net to some calls to print call chain if called with bad data, moved couple mutexes around to prevent contention Closed-Leaf check-in: 39d1713c91 user: matt tags: streamline-exception-handling
2014-11-01
22:37
Moved the db sync for local access to watchdog thread. check-in: 39fc3591a8 user: matt tags: streamline-exception-handling
Changes

Modified common.scm from [1306ff927d] to [51076419ec].

554
555
556
557
558
559
560











561
562
563
564
565
566
567
(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w %H:%M"))












;;======================================================================
;; Colors
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))







>
>
>
>
>
>
>
>
>
>
>







554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
(define (seconds->year-work-week/day sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w"))

(define (seconds->year-work-week/day-time sec)
  (time->string
   (seconds->local-time sec) "%yww%V.%w %H:%M"))

(define (seconds->quarter sec)
  (case (string->number
	 (time->string 
	  (seconds->local-time sec)
	  "%m"))
    ((1 2 3) 1)
    ((4 5 6) 2)
    ((7 8 9) 3)
    ((10 11 12) 4)
    (else #f)))

;;======================================================================
;; Colors
;;======================================================================
      
(define (common:name->iup-color name)
  (case (string->symbol (string-downcase name))

Modified mt.scm from [77d6104e75] to [fdb95af183].

163
164
165
166
167
168
169






170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)






  (cond
   ((and newstate newstatus newcomment)
    (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
   ((and newstate newstatus)
    (rmt:general-call 'state-status run-id newstate newstatus test-id))
   (else
    (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
    (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
    (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
   (mt:process-triggers run-id test-id newstate newstatus)
   #t)

(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
    (mt:test-set-state-status-by-id test-id new-state new-status new-comment)))

(define (mt:lazy-read-test-config test-name)
  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))







>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (if (not (and run-id test-id))
      (begin
	(debug:print 0 "ERROR: bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
	(print-call-chain)
	#f)
      (begin
	(cond
	 ((and newstate newstatus newcomment)
	  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
	 ((and newstate newstatus)
	  (rmt:general-call 'state-status run-id newstate newstatus test-id))
	 (else
	  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
	  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
	  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
	(mt:process-triggers run-id test-id newstate newstatus)
	#t)))

(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
    (mt:test-set-state-status-by-id test-id new-state new-status new-comment)))

(define (mt:lazy-read-test-config test-name)
  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))

Modified rmt.scm from [8820dec9a5] to [ef41e52830].

56
57
58
59
60
61
62

63
64
65
66

67
68
69
70
71

72
73
74
75
76
77
78
	     #f))))

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params)
  ;; clean out old connections

  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref *runremote* run-id)))

	 (if (< (http-transport:server-dat-get-last-access connection) expire-time)
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))

  (let* ((run-id          (if rid rid 0))
	 (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
			    (if cinfo
				cinfo
				;; NB// can cache the answer for server running for 10 seconds ...
				;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
				(if (tasks:server-running-or-starting? (tasks:get-db) run-id)







>



|
>
|




>







56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
	     #f))))

;; cmd is a symbol
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params)
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
	 (if ;; (and connection 
		  (< (http-transport:server-dat-get-last-access connection) expire-time) ; )
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
			    (if cinfo
				cinfo
				;; NB// can cache the answer for server running for 10 seconds ...
				;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
				(if (tasks:server-running-or-starting? (tasks:get-db) run-id)
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
(define (rmt:get-test-id run-id testname item-path)
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (and (number? run-id)(number? test-id))
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 "ERROR: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain)
	#f)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))







|







262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
(define (rmt:get-test-id run-id testname item-path)
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

(define (rmt:get-test-info-by-id run-id test-id)
  (if (and (number? run-id)(number? test-id))
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain)
	#f)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))

Modified runs.scm from [e52279daef] to [31523ae98e].

497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id (rmt:get-test-id run-id test-name "")))
		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites")))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)







|







497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id (rmt:get-test-id run-id test-name "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites"))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)







|







535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579

580
581
582
583
584
585
586
587
588
589
	    (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while."))
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  reruns))))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))

	(if (not (null? prereq-fails))
	    (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
	    (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites")))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  (cons hed reruns)))







|













>
|
|
|







559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
	    (debug:print-info 1 "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0))
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  reruns))))

     ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 1 "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
	    (list (runs:queue-next-hed tal reg reglen regfull)
		  (runs:queue-next-tal tal reg reglen regfull)
		  (runs:queue-next-reg tal reg reglen regfull)
		  (cons hed reruns)))
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites"))
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)







|







771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (if (or (not (null? reg))(not (null? tal)))
	      (if (vector? hed)
		  (begin
		    (debug:print 1 "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items
		    (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
		    (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed)
		    (list (runs:queue-next-hed tal reg reglen regfull)
			  (runs:queue-next-tal tal reg reglen regfull)

Modified tests/Makefile from [25eaa21107] to [60c8f28493].

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

clean  : 
	rm cleanprep

kill :
	killall -v mtest main.sh dboard || true
	rm -rf *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* fullrun/tmp/mt_*/.db* fullrun/logs/*.log || true
	killall -v mtest dboard || true

hardkill : kill
	sleep 2;killall -v mtest main.sh dboard -9

listservers :
	cd fullrun;$(MEGATEST) -list-servers







|







172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUN)  -testpatt % -itempatt % :sysname % :fsname % :datapath %

clean  : 
	rm cleanprep

kill :
	killall -v mtest main.sh dboard || true
	rm -rf *run/db/* */megatest.db */logging.db */monitor.db fullrun/tmp/mt_*/* fullrun/tmp/mt_*/.db* fullrun/logs/*.log fullrun/*.log || true
	killall -v mtest dboard || true

hardkill : kill
	sleep 2;killall -v mtest main.sh dboard -9

listservers :
	cd fullrun;$(MEGATEST) -list-servers

tests/installall/config/megatest.config.dat became a regular file with contents [736a5da885].

tests/installall/config/runconfigs.config.dat became a regular file with contents [3b8f260acb].