Megatest

Check-in [63c4fcc524]
Login
Overview
Comment:no transport option basically working (db locks after a while).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v2.0001-configurable-transport
Files: files | file ages | folders
SHA1: 63c4fcc5243929781a17bb35b809807f1cd9e373
User & Date: matt on 2022-01-16 12:24:56
Other Links: branch diff | manifest | tags
Context
2022-01-17
14:06
wip, ulex-simple progressing a bit check-in: 337ae6b713 user: matt tags: v2.0001-configurable-transport
2022-01-16
12:24
no transport option basically working (db locks after a while). check-in: 63c4fcc524 user: matt tags: v2.0001-configurable-transport
08:55
Added none as transport option check-in: d8959da4df user: matt tags: v2.0001-configurable-transport
Changes

Modified commonmod.scm from [ae707007da] to [875119b082].

182
183
184
185
186
187
188

189
190
191
192
193
194
195
test:get-item-path
test:test-get-fullname
make-and-init-bigdata
call-with-environment-variables
common:simple-file-lock
common:simple-file-lock-and-wait
common:simple-file-release-lock

common:fail-safe
get-file-descriptor-count
common:get-this-exe-fullpath
common:get-sync-lock-filepath
common:find-local-megatest
common:logpro-exit-code->status-sym
common:worse-status-sym







>







182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
test:get-item-path
test:test-get-fullname
make-and-init-bigdata
call-with-environment-variables
common:simple-file-lock
common:simple-file-lock-and-wait
common:simple-file-release-lock
common:with-simple-file-lock
common:fail-safe
get-file-descriptor-count
common:get-this-exe-fullpath
common:get-sync-lock-filepath
common:find-local-megatest
common:logpro-exit-code->status-sym
common:worse-status-sym
1240
1241
1242
1243
1244
1245
1246







1247
1248
1249
1250
1251
1252
1253
	      #f)))))

(define (common:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))








;;======================================================================
;; PUlled below from common.scm
;;======================================================================

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message







>
>
>
>
>
>
>







1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
	      #f)))))

(define (common:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))

(define (common:with-simple-file-lock fname proc)
  (let* ((lkfname (conc fname ".lock")))
    (common:simple-file-lock-and-wait lkfname)
    (let ((res (proc)))
      (common:simple-file-release-lock lkfname)
      res)))

;;======================================================================
;; PUlled below from common.scm
;;======================================================================

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message

Modified configfmod.scm from [b4853bf0ef] to [91463d0a8b].

47
48
49
50
51
52
53

54
55
56
57
58
59
60
	 configf:set-section-var
	 configf:var-is?
	 configf:write-alist
	 configf:write-config
	 find-config
	 getenv
	 mytarget

	 nice-path
	 process:cmd-run->list
	 runconfig:read
	 runconfigs-get
	 safe-setenv
	 setenv
	 configf:eval-string-in-environment







>







47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
	 configf:set-section-var
	 configf:var-is?
	 configf:write-alist
	 configf:write-config
	 find-config
	 getenv
	 mytarget
	 my-with-lock
	 nice-path
	 process:cmd-run->list
	 runconfig:read
	 runconfigs-get
	 safe-setenv
	 setenv
	 configf:eval-string-in-environment
112
113
114
115
116
117
118



119
120


121
122
123
124
125
126
127
128
129
;;======================================================================
;; parameters
;;======================================================================

;; while targets are Megatest specific they are a useful concept
(define mytarget (make-parameter #f))




;; locking is optional, many environments don't care (e.g. running on one machine)
;; NOTE: the locker must follow the same syntax as with-dot-lock*


;;
(define my-with-lock (make-parameter with-dot-lock*))

;;======================================================================
;; move debug stuff to separate module then put these back where they belong
;;======================================================================
;;======================================================================
;; lookup routines - replicated from configf
;;======================================================================







>
>
>


>
>

|







113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
;;======================================================================
;; parameters
;;======================================================================

;; while targets are Megatest specific they are a useful concept
(define mytarget (make-parameter #f))

;; fake locker
(define (fake-locker fname proc)(proc))

;; locking is optional, many environments don't care (e.g. running on one machine)
;; NOTE: the locker must follow the same syntax as with-dot-lock*
;;       with-dot-lock* has problems if /tmp and the file being
;;       locked are not on the same filesystem
;;
(define my-with-lock (make-parameter fake-locker)) ;; with-dot-lock*))

;;======================================================================
;; move debug stuff to separate module then put these back where they belong
;;======================================================================
;;======================================================================
;; lookup routines - replicated from configf
;;======================================================================
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207


1208
1209
1210
1211
1212
1213
1214
1215
1216

1217
1218
1219

1220
1221
1222
1223
1224
1225
1226
1227
    (configf:alist->config
     (with-input-from-file fname read))))

;;======================================================================
;; DO THE LOCKING AROUND THE CALL
;;======================================================================
;;
(define (configf:write-alist cdat fname)
  ;; (if (not (common:faux-lock fname))
  ;; (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname)
  ((my-with-lock)
   fname
   (lambda ()
     (let* ((dat  (configf:config->alist cdat))
            (res
             (begin
               (with-output-to-file fname ;; first write out the file
		 (lambda ()
                   (pp dat)))
               ;; I don't like this. It makes write-alist opaque and complicated. -mrw-


               (if (file-exists? fname)   ;; now verify it is readable
                   (if (configf:read-alist fname)
                       #t ;; data is good.
                       (begin
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn)
			    #f)

			  (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
			  (delete-file fname))
			 #f))

                   #f))))
       res))))
  
(define (common:get-fields cfgdat)
  (let ((fields (hash-table-ref/default cfgdat "fields" '())))
    (map car fields)))

)







|











|
>
>
|
|
|
<
|
|
|
|
<
>
|
|
<
>
|







1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218

1219
1220
1221
1222

1223
1224
1225

1226
1227
1228
1229
1230
1231
1232
1233
1234
    (configf:alist->config
     (with-input-from-file fname read))))

;;======================================================================
;; DO THE LOCKING AROUND THE CALL
;;======================================================================
;;
(define (configf:write-alist cdat fname #!optional (check-written #f))
  ;; (if (not (common:faux-lock fname))
  ;; (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname)
  ((my-with-lock)
   fname
   (lambda ()
     (let* ((dat  (configf:config->alist cdat))
            (res
             (begin
               (with-output-to-file fname ;; first write out the file
		 (lambda ()
                   (pp dat)))
               ;; I don't like this. It makes write-alist complicated
	       ;; move to something like write-and-verify-alist. -mrw-
               (if check-written
		   (if (file-exists? fname)   ;; now verify it is readable
		       (if (configf:read-alist fname)
			   'data-good ;; data is good.

			   (handle-exceptions
			       exn
			       (begin
				 (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn)

				 'data-bad)
			     (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
			     (delete-file fname)))

		       'data-not-there)
		   'data-not-checked))))
       res))))
  
(define (common:get-fields cfgdat)
  (let ((fields (hash-table-ref/default cfgdat "fields" '())))
    (map car fields)))

)

Modified dbmod.scm from [ac637164a6] to [2dc9d5a75d].

3452
3453
3454
3455
3456
3457
3458
3459
3460


3461
3462
3463
3464
3465
3466
3467
(define (db:get-count-tests-running-for-run-id dbstruct run-id) 
  (let* ((qry  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; )
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
	 (sqlite3:first-result stmth run-id))))))



;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;;
(define (db:get-count-tests-running-for-testname dbstruct run-id testname)
  (db:with-db
   dbstruct







|
|
>
>







3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
(define (db:get-count-tests-running-for-run-id dbstruct run-id) 
  (let* ((qry  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; )
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (let* (#;(stmth (db:get-cache-stmth dbstruct db qry)))
	 #;(sqlite3:first-result stmth run-id)
	 (sqlite3:first-result db qry run-id)
	 )))))

;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;;
(define (db:get-count-tests-running-for-testname dbstruct run-id testname)
  (db:with-db
   dbstruct

Modified launchmod.scm from [a3891cff5d] to [b8e458a09e].

661
662
663
664
665
666
667
668

669
670
671
672
673
674
675
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  ;; now is also a good time to write the .testconfig file
	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
		 (scripts (configf:get-section tconfig "scripts")))
	    ;; create .testconfig file
	    (configf:write-alist tconfig tconfig-tmpfile)

	    (move-file tconfig-tmpfile tconfig-fname #t)
	    (delete-file* ".final-status")

	    ;; extract scripts from testconfig and write them to files in test run dir
	    (for-each
	     (lambda (scriptdat)
	       (match scriptdat







|
>







661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  ;; now is also a good time to write the .testconfig file
	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
		 (scripts (configf:get-section tconfig "scripts")))
	    ;; create .testconfig file
	    (configf:write-alist tconfig tconfig-tmpfile #t) ;; the #t forces a check of the written data
	    (assert (file-exists? tconfig-tmpfile) "FATAL: We just wrote the dang file, how can it not exist?")
	    (move-file tconfig-tmpfile tconfig-fname #t)
	    (delete-file* ".final-status")

	    ;; extract scripts from testconfig and write them to files in test run dir
	    (for-each
	     (lambda (scriptdat)
	       (match scriptdat

Modified megatest.scm from [2a4403e0b1] to [09a47e551f].

167
168
169
170
171
172
173


174
175
176
177
178
179
180
;;   ;; ulex parameters
;;   (work-method 'direct)
;;   (return-method 'direct)
  
  ;; ulex parameters
  ;; (work-method   'mailbox)
  ;; (return-method 'mailbox)


  
;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)
(define *didsomething* #f)  
(define *db* #f) ;; this is only for the repl, do not use in general!!!!

;; (include "common_records.scm")







>
>







167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
;;   ;; ulex parameters
;;   (work-method 'direct)
;;   (return-method 'direct)
  
  ;; ulex parameters
  ;; (work-method   'mailbox)
  ;; (return-method 'mailbox)

(my-with-lock common:with-simple-file-lock)
  
;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)
(define *didsomething* #f)  
(define *db* #f) ;; this is only for the repl, do not use in general!!!!

;; (include "common_records.scm")

Modified runsmod.scm from [b535942743] to [f7fd47400d].

242
243
244
245
246
247
248













249
250
251
252
253
254
255
	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))














(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)

  ;; Take advantage of a good place to exit if running the one-pass methodology
  (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
	   (args:get-arg "-one-pass"))
      (exit 0))








>
>
>
>
>
>
>
>
>
>
>
>
>







242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define *too-soon-delays* (make-hash-table))

;; to-soon delay, when matching event happened in less than dseconds delay wseconds
;;
(define (runs:too-soon-delay key dseconds wseconds)
  (let* ((last-time (hash-table-ref/default *too-soon-delays* key #f)))
    (if (and last-time
	     (< (- (current-seconds) last-time) dseconds))
	(begin
	  (debug:print-info 0 *default-log-port* "Whoa, slow down there ... "key" has been too recently seen.")
	  (thread-sleep! wseconds)))
    (hash-table-set! *too-soon-delays* key (current-seconds))))

(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)

  ;; Take advantage of a good place to exit if running the one-pass methodology
  (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
	   (args:get-arg "-one-pass"))
      (exit 0))

1465
1466
1467
1468
1469
1470
1471
1472


1473
1474
1475
1476
1477
1478
1479
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)
    


	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))








|
>
>







1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	(runs:too-soon-delay (conc "loop delay " hed) 1 1)
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

1492
1493
1494
1495
1496
1497
1498

1499
1500
1501
1502
1503
1504
1505
	      (if (runs:lownoise (conc "been marked do not run " tfullname) 60)
		  (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable"))
	      (if (or (not (null? tal))(not (null? reg)))
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))

        ;; (loop (car tal)(cdr tal) reg reruns))))

	(runs:incremental-print-results run-id)
	(debug:print 4 *default-log-port* "TOP OF LOOP => "
		     "test-name: " test-name
		     "\n  hed:         " hed
		     "\n  tal:         " (runs:pretty-long-list tal)







>







1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
	      (if (runs:lownoise (conc "been marked do not run " tfullname) 60)
		  (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable"))
	      (if (or (not (null? tal))(not (null? reg)))
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
	
        ;; (loop (car tal)(cdr tal) reg reruns))))

	(runs:incremental-print-results run-id)
	(debug:print 4 *default-log-port* "TOP OF LOOP => "
		     "test-name: " test-name
		     "\n  hed:         " hed
		     "\n  tal:         " (runs:pretty-long-list tal)
1723
1724
1725
1726
1727
1728
1729

1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))

	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (runs:find-and-mark-incomplete-and-check-end-of-run run-id #f)
		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
		       num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. 
    ;; (debug:print-info 0 *default-log-port* "Calling Post Hook")    
    ;; (runs:run-post-hook run-id)







>










<







1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756

1757
1758
1759
1760
1761
1762
1763
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (thread-sleep! 5) ;; let's always sleep, prevents abutting calls to rum:get-count-tests-running-for-run-id - didn't help
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (runs:find-and-mark-incomplete-and-check-end-of-run run-id #f)
		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))

	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
		       num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. 
    ;; (debug:print-info 0 *default-log-port* "Calling Post Hook")    
    ;; (runs:run-post-hook run-id)

tests/simplerun/tests/test1/step1.sh became executable with contents [b36e5b3e1b].

tests/simplerun/tests/test1/step2.sh became executable with contents [8cc537c8e5].