Megatest

Check-in [63c4fcc524]
Login
Overview
Comment:no transport option basically working (db locks after a while).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v2.0001-configurable-transport
Files: files | file ages | folders
SHA1: 63c4fcc5243929781a17bb35b809807f1cd9e373
User & Date: matt on 2022-01-16 12:24:56
Other Links: branch diff | manifest | tags
Context
2022-01-17
14:06
wip, ulex-simple progressing a bit check-in: 337ae6b713 user: matt tags: v2.0001-configurable-transport
2022-01-16
12:24
no transport option basically working (db locks after a while). check-in: 63c4fcc524 user: matt tags: v2.0001-configurable-transport
08:55
Added none as transport option check-in: d8959da4df user: matt tags: v2.0001-configurable-transport
Changes

Modified commonmod.scm from [ae707007da] to [875119b082].

182
183
184
185
186
187
188

189
190
191
192
193
194
195
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196







+







test:get-item-path
test:test-get-fullname
make-and-init-bigdata
call-with-environment-variables
common:simple-file-lock
common:simple-file-lock-and-wait
common:simple-file-release-lock
common:with-simple-file-lock
common:fail-safe
get-file-descriptor-count
common:get-this-exe-fullpath
common:get-sync-lock-filepath
common:find-local-megatest
common:logpro-exit-code->status-sym
common:worse-status-sym
1240
1241
1242
1243
1244
1245
1246







1247
1248
1249
1250
1251
1252
1253
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261







+
+
+
+
+
+
+







	      #f)))))

(define (common:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
    (delete-file* fname)))

(define (common:with-simple-file-lock fname proc)
  (let* ((lkfname (conc fname ".lock")))
    (common:simple-file-lock-and-wait lkfname)
    (let ((res (proc)))
      (common:simple-file-release-lock lkfname)
      res)))

;;======================================================================
;; PUlled below from common.scm
;;======================================================================

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message

Modified configfmod.scm from [b4853bf0ef] to [91463d0a8b].

47
48
49
50
51
52
53

54
55
56
57
58
59
60
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61







+







	 configf:set-section-var
	 configf:var-is?
	 configf:write-alist
	 configf:write-config
	 find-config
	 getenv
	 mytarget
	 my-with-lock
	 nice-path
	 process:cmd-run->list
	 runconfig:read
	 runconfigs-get
	 safe-setenv
	 setenv
	 configf:eval-string-in-environment
112
113
114
115
116
117
118



119
120


121
122

123
124
125
126
127
128
129
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

128
129
130
131
132
133
134
135







+
+
+


+
+

-
+







;;======================================================================
;; parameters
;;======================================================================

;; while targets are Megatest specific they are a useful concept
(define mytarget (make-parameter #f))

;; fake locker
(define (fake-locker fname proc)(proc))

;; locking is optional, many environments don't care (e.g. running on one machine)
;; NOTE: the locker must follow the same syntax as with-dot-lock*
;;       with-dot-lock* has problems if /tmp and the file being
;;       locked are not on the same filesystem
;;
(define my-with-lock (make-parameter with-dot-lock*))
(define my-with-lock (make-parameter fake-locker)) ;; with-dot-lock*))

;;======================================================================
;; move debug stuff to separate module then put these back where they belong
;;======================================================================
;;======================================================================
;; lookup routines - replicated from configf
;;======================================================================
1188
1189
1190
1191
1192
1193
1194
1195

1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210






1211
1212
1213
1214
1215




1216
1217
1218



1219
1220


1221
1222
1223
1224
1225
1226
1227
1194
1195
1196
1197
1198
1199
1200

1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212




1213
1214
1215
1216
1217
1218





1219
1220
1221
1222



1223
1224
1225


1226
1227
1228
1229
1230
1231
1232
1233
1234







-
+











-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
-
-
-
+
+
+
-
-
+
+







    (configf:alist->config
     (with-input-from-file fname read))))

;;======================================================================
;; DO THE LOCKING AROUND THE CALL
;;======================================================================
;;
(define (configf:write-alist cdat fname)
(define (configf:write-alist cdat fname #!optional (check-written #f))
  ;; (if (not (common:faux-lock fname))
  ;; (debug:print 0 *default-log-port* "INFO: NEED LOCKING ADDED HERE " fname)
  ((my-with-lock)
   fname
   (lambda ()
     (let* ((dat  (configf:config->alist cdat))
            (res
             (begin
               (with-output-to-file fname ;; first write out the file
		 (lambda ()
                   (pp dat)))
               ;; I don't like this. It makes write-alist opaque and complicated. -mrw-
               (if (file-exists? fname)   ;; now verify it is readable
                   (if (configf:read-alist fname)
                       #t ;; data is good.
               ;; I don't like this. It makes write-alist complicated
	       ;; move to something like write-and-verify-alist. -mrw-
               (if check-written
		   (if (file-exists? fname)   ;; now verify it is readable
		       (if (configf:read-alist fname)
			   'data-good ;; data is good.
                       (begin
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn)
			   (handle-exceptions
			       exn
			       (begin
				 (debug:print 0 *default-log-port* "deleting " fname " failed, exn=" exn)
			    #f)
			  (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
			  (delete-file fname))
				 'data-bad)
			     (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
			     (delete-file fname)))
			 #f))
                   #f))))
		       'data-not-there)
		   'data-not-checked))))
       res))))
  
(define (common:get-fields cfgdat)
  (let ((fields (hash-table-ref/default cfgdat "fields" '())))
    (map car fields)))

)

Modified dbmod.scm from [ac637164a6] to [2dc9d5a75d].

3452
3453
3454
3455
3456
3457
3458
3459
3460




3461
3462
3463
3464
3465
3466
3467
3452
3453
3454
3455
3456
3457
3458


3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469







-
-
+
+
+
+







(define (db:get-count-tests-running-for-run-id dbstruct run-id) 
  (let* ((qry  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")) ;; )
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
	 (sqlite3:first-result stmth run-id))))))
       (let* (#;(stmth (db:get-cache-stmth dbstruct db qry)))
	 #;(sqlite3:first-result stmth run-id)
	 (sqlite3:first-result db qry run-id)
	 )))))

;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;;
(define (db:get-count-tests-running-for-testname dbstruct run-id testname)
  (db:with-db
   dbstruct

Modified launchmod.scm from [a3891cff5d] to [b8e458a09e].

661
662
663
664
665
666
667
668


669
670
671
672
673
674
675
661
662
663
664
665
666
667

668
669
670
671
672
673
674
675
676







-
+
+







	  ;; (db:test-remove-steps db run-id testname itemdat)
	  ;; now is also a good time to write the .testconfig file
	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
		 (scripts (configf:get-section tconfig "scripts")))
	    ;; create .testconfig file
	    (configf:write-alist tconfig tconfig-tmpfile)
	    (configf:write-alist tconfig tconfig-tmpfile #t) ;; the #t forces a check of the written data
	    (assert (file-exists? tconfig-tmpfile) "FATAL: We just wrote the dang file, how can it not exist?")
	    (move-file tconfig-tmpfile tconfig-fname #t)
	    (delete-file* ".final-status")

	    ;; extract scripts from testconfig and write them to files in test run dir
	    (for-each
	     (lambda (scriptdat)
	       (match scriptdat

Modified megatest.scm from [2a4403e0b1] to [09a47e551f].

167
168
169
170
171
172
173


174
175
176
177
178
179
180
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182







+
+







;;   ;; ulex parameters
;;   (work-method 'direct)
;;   (return-method 'direct)
  
  ;; ulex parameters
  ;; (work-method   'mailbox)
  ;; (return-method 'mailbox)

(my-with-lock common:with-simple-file-lock)
  
;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)
(define *didsomething* #f)  
(define *db* #f) ;; this is only for the repl, do not use in general!!!!

;; (include "common_records.scm")

Modified runsmod.scm from [b535942743] to [f7fd47400d].

242
243
244
245
246
247
248













249
250
251
252
253
254
255
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268







+
+
+
+
+
+
+
+
+
+
+
+
+







	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define *too-soon-delays* (make-hash-table))

;; to-soon delay, when matching event happened in less than dseconds delay wseconds
;;
(define (runs:too-soon-delay key dseconds wseconds)
  (let* ((last-time (hash-table-ref/default *too-soon-delays* key #f)))
    (if (and last-time
	     (< (- (current-seconds) last-time) dseconds))
	(begin
	  (debug:print-info 0 *default-log-port* "Whoa, slow down there ... "key" has been too recently seen.")
	  (thread-sleep! wseconds)))
    (hash-table-set! *too-soon-delays* key (current-seconds))))

(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)

  ;; Take advantage of a good place to exit if running the one-pass methodology
  (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
	   (args:get-arg "-one-pass"))
      (exit 0))

1465
1466
1467
1468
1469
1470
1471
1472



1473
1474
1475
1476
1477
1478
1479
1478
1479
1480
1481
1482
1483
1484

1485
1486
1487
1488
1489
1490
1491
1492
1493
1494







-
+
+
+







			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)
    

	(runs:too-soon-delay (conc "loop delay " hed) 1 1)
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

1492
1493
1494
1495
1496
1497
1498

1499
1500
1501
1502
1503
1504
1505
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521







+







	      (if (runs:lownoise (conc "been marked do not run " tfullname) 60)
		  (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable"))
	      (if (or (not (null? tal))(not (null? reg)))
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
	
        ;; (loop (car tal)(cdr tal) reg reruns))))

	(runs:incremental-print-results run-id)
	(debug:print 4 *default-log-port* "TOP OF LOOP => "
		     "test-name: " test-name
		     "\n  hed:         " hed
		     "\n  tal:         " (runs:pretty-long-list tal)
1723
1724
1725
1726
1727
1728
1729

1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756

1757
1758
1759
1760
1761
1762
1763







+










-







      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (thread-sleep! 5) ;; let's always sleep, prevents abutting calls to rum:get-count-tests-running-for-run-id - didn't help
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id)))
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (runs:find-and-mark-incomplete-and-check-end-of-run run-id #f)
		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id)
		       num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. 
    ;; (debug:print-info 0 *default-log-port* "Calling Post Hook")    
    ;; (runs:run-post-hook run-id)

tests/simplerun/tests/test1/step1.sh became executable with contents [b36e5b3e1b].

tests/simplerun/tests/test1/step2.sh became executable with contents [8cc537c8e5].