Megatest

Check-in [2fe9144186]
Login
Overview
Comment:Merged latest from v1.61
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fix-chained-waiton
Files: files | file ages | folders
SHA1: 2fe914418666a83305e3cfbd47c931fed450f9d4
User & Date: mrwellan on 2016-05-13 11:32:12
Other Links: branch diff | manifest | tags
Context
2016-05-16
09:05
Speculative change to track tests that were waitons of a stuck or non-runnable test check-in: 0b57bca235 user: mrwellan tags: fix-chained-waiton
2016-05-13
11:32
Merged latest from v1.61 check-in: 2fe9144186 user: mrwellan tags: fix-chained-waiton
00:37
Added more instrumentation to help debug the test2 issue check-in: eabab4fa80 user: matt tags: v1.61
2016-05-12
22:35
caught up with v1.61 check-in: 0868158f0b user: bb tags: fix-chained-waiton
Changes

Modified common.scm from [7b10355f34] to [ba84b1bce0].

787
788
789
790
791
792
793
794

795
796
797
798

799
800
801
802

803
804
805
806
807
808
809
787
788
789
790
791
792
793

794
795
796
797

798
799
800
801

802
803
804
805
806
807
808
809







-
+



-
+



-
+







  (let ((best     #f)
	(bestsize 0))
    (for-each 
     (lambda (disk-num)
       (let* ((dirpath    (cadr (assoc disk-num disks)))
	      (freespc    (cond
			   ((not (directory? dirpath))
			    (if (common:low-noise-print 50 "disks not a dir " disk-num)
			    (if (common:low-noise-print 300 "disks not a dir " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
			    -1)
			   ((not (file-write-access? dirpath))
			    (if (common:low-noise-print 50 "disks not writeable " disk-num)
			    (if (common:low-noise-print 300 "disks not writeable " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
			    -1)
			   ((not (eq? (string-ref dirpath 0) #\/))
			    (if (common:low-noise-print 50 "disks not a proper path " disk-num)
			    (if (common:low-noise-print 300 "disks not a proper path " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
			    -1)
			   (else
			    (get-df dirpath)))))
	 (if (> freespc bestsize)
	     (begin
	       (set! best     (cons disk-num dirpath))

Added debugger.scm version [f446c83fb1].










































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(use iup)

(define *debugger-control* #f)
(define *debugger-rownum*  0)
(define *debugger-matrix*  #f)
(define *debugger*         #f)

(define (debugger)
  (if (not *debugger*)
      (set! *debugger* 
	    (thread-start!
	     (make-thread
	      (lambda ()
		(show
		 (dialog
		  (let ((pause #f)
			(mtrx  (matrix
				#:expand "YES"
				#:numlin 30
				#:numcol 3
				#:numlin-visible 20
				#:numcol-visible 2
				#:alignment1 "ALEFT"
				)))
		    (set! pause (button "Pause" 
					#:action (lambda (obj)
						   (set! *debugger-control* (not *debugger-control*))
						   (attribute-set! pause "BGCOLOR" (if *debugger-control*
										       "200 0 0"
										       "0 0 200")))))
		    (set! *debugger-matrix* mtrx)
		    (attribute-set! mtrx "WIDTH1" "300")
		    (vbox
		     mtrx
		     (hbox
		      pause)))))
		(main-loop)))))))

(define (debugger-start #!key (start 2))
  (set! *debugger-rownum* start))

(define (debugger-trace-var varname varval)
  (let ((oldval (attribute *debugger-matrix* (conc *debugger-rownum* ":1")))
	(newval (conc varval)))
    (if (not (equal? oldval newval))
	(begin
	  ;; (print "DEBUG: " varname " = " newval)
	  (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":0") varname)
	  (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":1") (conc varval))
	  ;; (attribute-set! *debugger-matrix* "FITTOTEXT" "C1")
	  ))
    (set! *debugger-rownum* (+ *debugger-rownum* 1))))


(define (debugger-pauser)
  (debugger)
  (attribute-set! *debugger-matrix* "REDRAW" "ALL")
  (let loop ()
    (if *debugger-control*
	(begin
	  (print "PAUSED!")
	  (thread-sleep! 1)
	  (loop))
	;;(thread-sleep! 0.01)
	)))
		  
;;    ;; lets use the debugger eh?
;;    (debugger-start)
;;    (debugger-trace-var "can-run-more"     can-run-more)
;;    (debugger-trace-var "hed"              hed)
;;    (debugger-trace-var "prereqs-not-met"  (runs:pretty-string prereqs-not-met))
;;    (debugger-pauser)

Modified rmt.scm from [b7f6f86358] to [b3e339430d].

71
72
73
74
75
76
77
78

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

94
95
96
97
98
99
100
71
72
73
74
75
76
77

78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

93
94
95
96
97
98
99
100







-
+














-
+







	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
         (if (and (vector? connection)
        	  (< (http-transport:server-dat-get-last-access connection) expire-time))
             (begin
               (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
               ;; SHOULD CLOSE THE CONNECTION HERE
	       (case *transport-type*
		 ((nmsg)(nn-close (http-transport:server-dat-get-socket 
				   (hash-table-ref *runremote* run-id)))))
               (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info
	(let* ((dat     (case *transport-type*

Modified runs.scm from [0c516bb8a4] to [29ad163503].

25
26
27
28
29
30
31


32
33
34
35
36
37
38
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40







+
+







;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

(include "debugger.scm")

(define (runs:test-get-full-path test)
  (let* ((testname (db:test-get-testname   test))
	 (itempath (db:test-get-item-path test)))
    (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))


157
158
159
160
161
162
163








164
165
166
167
168
169
170
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180







+
+
+
+
+
+
+
+







				 ((and job-group-limit
				       (>= num-running-in-jobgroup job-group-limit))
				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
				      (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup 
						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
				  #t)
				 (else #f))))
	  ;; lets use the debugger eh?
	  (debugger-start start: 15)
	  (debugger-trace-var "runs:can-run-more-tests" "")
	  (debugger-trace-var "can-not-run-more"         can-not-run-more)
	  (debugger-trace-var "num-running"              num-running)
	  (debugger-trace-var "num-running-in-jobgroup"  num-running-in-jobgroup)
	  (debugger-trace-var "job-group-limit"          job-group-limit)
	  (debugger-pauser)
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))


;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
518
519
520
521
522
523
524








525
526
527
528
529
530
531
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549







+
+
+
+
+
+
+
+







		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

    ;; lets use the debugger eh?
    (debugger-start start: 2)
    (debugger-trace-var "runs:expand-items" "")
    (debugger-trace-var "can-run-more"     can-run-more)
    (debugger-trace-var "hed"              hed)
    (debugger-trace-var "prereqs-not-met"  (runs:pretty-string prereqs-not-met))
    (debugger-pauser)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
1062
1063
1064
1065
1066
1067
1068











1069
1070
1071
1072
1073
1074
1075
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104







+
+
+
+
+
+
+
+
+
+
+







		     "\n  num-retries: " num-retries
		     "\n  tal:         " tal
		     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
		     "\n  reg:         " reg)

	;; lets use the debugger eh?
	(debugger-start start: 7)
	(debugger-trace-var "runs:run-tests-queue" "")
	(debugger-trace-var "hed"              hed)
	(debugger-trace-var "tal"              tal)
	(debugger-trace-var "items"            items)
	(debugger-trace-var "item-path"        item-path)
	(debugger-trace-var "waitons"          waitons) 
	(debugger-pauser)


	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))