Megatest

Check-in [2fe9144186]
Login
Overview
Comment:Merged latest from v1.61
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fix-chained-waiton
Files: files | file ages | folders
SHA1: 2fe914418666a83305e3cfbd47c931fed450f9d4
User & Date: mrwellan on 2016-05-13 11:32:12
Other Links: branch diff | manifest | tags
Context
2016-05-16
09:05
Speculative change to track tests that were waitons of a stuck or non-runnable test check-in: 0b57bca235 user: mrwellan tags: fix-chained-waiton
2016-05-13
11:32
Merged latest from v1.61 check-in: 2fe9144186 user: mrwellan tags: fix-chained-waiton
00:37
Added more instrumentation to help debug the test2 issue check-in: eabab4fa80 user: matt tags: v1.61
2016-05-12
22:35
caught up with v1.61 check-in: 0868158f0b user: bb tags: fix-chained-waiton
Changes

Modified common.scm from [7b10355f34] to [ba84b1bce0].

787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
  (let ((best     #f)
	(bestsize 0))
    (for-each 
     (lambda (disk-num)
       (let* ((dirpath    (cadr (assoc disk-num disks)))
	      (freespc    (cond
			   ((not (directory? dirpath))
			    (if (common:low-noise-print 50 "disks not a dir " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
			    -1)
			   ((not (file-write-access? dirpath))
			    (if (common:low-noise-print 50 "disks not writeable " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
			    -1)
			   ((not (eq? (string-ref dirpath 0) #\/))
			    (if (common:low-noise-print 50 "disks not a proper path " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
			    -1)
			   (else
			    (get-df dirpath)))))
	 (if (> freespc bestsize)
	     (begin
	       (set! best     (cons disk-num dirpath))







|



|



|







787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
  (let ((best     #f)
	(bestsize 0))
    (for-each 
     (lambda (disk-num)
       (let* ((dirpath    (cadr (assoc disk-num disks)))
	      (freespc    (cond
			   ((not (directory? dirpath))
			    (if (common:low-noise-print 300 "disks not a dir " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not a directory - ignoring it."))
			    -1)
			   ((not (file-write-access? dirpath))
			    (if (common:low-noise-print 300 "disks not writeable " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not writeable - ignoring it."))
			    -1)
			   ((not (eq? (string-ref dirpath 0) #\/))
			    (if (common:low-noise-print 300 "disks not a proper path " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path \"" dirpath "\" is not a fully qualified path - ignoring it."))
			    -1)
			   (else
			    (get-df dirpath)))))
	 (if (> freespc bestsize)
	     (begin
	       (set! best     (cons disk-num dirpath))

Added debugger.scm version [f446c83fb1].



















































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
(use iup)

(define *debugger-control* #f)
(define *debugger-rownum*  0)
(define *debugger-matrix*  #f)
(define *debugger*         #f)

(define (debugger)
  (if (not *debugger*)
      (set! *debugger* 
	    (thread-start!
	     (make-thread
	      (lambda ()
		(show
		 (dialog
		  (let ((pause #f)
			(mtrx  (matrix
				#:expand "YES"
				#:numlin 30
				#:numcol 3
				#:numlin-visible 20
				#:numcol-visible 2
				#:alignment1 "ALEFT"
				)))
		    (set! pause (button "Pause" 
					#:action (lambda (obj)
						   (set! *debugger-control* (not *debugger-control*))
						   (attribute-set! pause "BGCOLOR" (if *debugger-control*
										       "200 0 0"
										       "0 0 200")))))
		    (set! *debugger-matrix* mtrx)
		    (attribute-set! mtrx "WIDTH1" "300")
		    (vbox
		     mtrx
		     (hbox
		      pause)))))
		(main-loop)))))))

(define (debugger-start #!key (start 2))
  (set! *debugger-rownum* start))

(define (debugger-trace-var varname varval)
  (let ((oldval (attribute *debugger-matrix* (conc *debugger-rownum* ":1")))
	(newval (conc varval)))
    (if (not (equal? oldval newval))
	(begin
	  ;; (print "DEBUG: " varname " = " newval)
	  (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":0") varname)
	  (attribute-set! *debugger-matrix* (conc *debugger-rownum* ":1") (conc varval))
	  ;; (attribute-set! *debugger-matrix* "FITTOTEXT" "C1")
	  ))
    (set! *debugger-rownum* (+ *debugger-rownum* 1))))


(define (debugger-pauser)
  (debugger)
  (attribute-set! *debugger-matrix* "REDRAW" "ALL")
  (let loop ()
    (if *debugger-control*
	(begin
	  (print "PAUSED!")
	  (thread-sleep! 1)
	  (loop))
	;;(thread-sleep! 0.01)
	)))
		  
;;    ;; lets use the debugger eh?
;;    (debugger-start)
;;    (debugger-trace-var "can-run-more"     can-run-more)
;;    (debugger-trace-var "hed"              hed)
;;    (debugger-trace-var "prereqs-not-met"  (runs:pretty-string prereqs-not-met))
;;    (debugger-pauser)

Modified rmt.scm from [b7f6f86358] to [b3e339430d].

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
         (if (and (vector? connection)
        	  (< (http-transport:server-dat-get-last-access connection) expire-time))
             (begin
               (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
               ;; SHOULD CLOSE THE CONNECTION HERE
	       (case *transport-type*
		 ((nmsg)(nn-close (http-transport:server-dat-get-socket 
				   (hash-table-ref *runremote* run-id)))))
               (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info
	(let* ((dat     (case *transport-type*







|














|







71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  ;; (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
         (if (and (vector? connection)
        	  (< (http-transport:server-dat-get-last-access connection) expire-time))
             (begin
               (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
               ;; SHOULD CLOSE THE CONNECTION HERE
	       (case *transport-type*
		 ((nmsg)(nn-close (http-transport:server-dat-get-socket 
				   (hash-table-ref *runremote* run-id)))))
               (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  ;; (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info
	(let* ((dat     (case *transport-type*

Modified runs.scm from [0c516bb8a4] to [29ad163503].

25
26
27
28
29
30
31


32
33
34
35
36
37
38
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")



(define (runs:test-get-full-path test)
  (let* ((testname (db:test-get-testname   test))
	 (itempath (db:test-get-item-path test)))
    (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))









>
>







25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")

(include "debugger.scm")

(define (runs:test-get-full-path test)
  (let* ((testname (db:test-get-testname   test))
	 (itempath (db:test-get-item-path test)))
    (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))


157
158
159
160
161
162
163








164
165
166
167
168
169
170
				 ((and job-group-limit
				       (>= num-running-in-jobgroup job-group-limit))
				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
				      (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup 
						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
				  #t)
				 (else #f))))








	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))


;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            







>
>
>
>
>
>
>
>







159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
				 ((and job-group-limit
				       (>= num-running-in-jobgroup job-group-limit))
				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
				      (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup 
						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
				  #t)
				 (else #f))))
	  ;; lets use the debugger eh?
	  (debugger-start start: 15)
	  (debugger-trace-var "runs:can-run-more-tests" "")
	  (debugger-trace-var "can-not-run-more"         can-not-run-more)
	  (debugger-trace-var "num-running"              num-running)
	  (debugger-trace-var "num-running-in-jobgroup"  num-running-in-jobgroup)
	  (debugger-trace-var "job-group-limit"          job-group-limit)
	  (debugger-pauser)
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))


;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
518
519
520
521
522
523
524








525
526
527
528
529
530
531
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)









    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)







>
>
>
>
>
>
>
>







528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

    ;; lets use the debugger eh?
    (debugger-start start: 2)
    (debugger-trace-var "runs:expand-items" "")
    (debugger-trace-var "can-run-more"     can-run-more)
    (debugger-trace-var "hed"              hed)
    (debugger-trace-var "prereqs-not-met"  (runs:pretty-string prereqs-not-met))
    (debugger-pauser)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

     ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
1062
1063
1064
1065
1066
1067
1068











1069
1070
1071
1072
1073
1074
1075
		     "\n  num-retries: " num-retries
		     "\n  tal:         " tal
		     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
		     "\n  reg:         " reg)












	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))







>
>
>
>
>
>
>
>
>
>
>







1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
		     "\n  num-retries: " num-retries
		     "\n  tal:         " tal
		     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
		     "\n  reg:         " reg)

	;; lets use the debugger eh?
	(debugger-start start: 7)
	(debugger-trace-var "runs:run-tests-queue" "")
	(debugger-trace-var "hed"              hed)
	(debugger-trace-var "tal"              tal)
	(debugger-trace-var "items"            items)
	(debugger-trace-var "item-path"        item-path)
	(debugger-trace-var "waitons"          waitons) 
	(debugger-pauser)


	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))