Megatest

Changes On Branch b612b353ea05f528
Login

Changes In Branch v2.0001-matt-test-edits Through [b612b353ea] Excluding Merge-Ins

This is equivalent to a diff from 3d2d201a06 to b612b353ea

2022-02-10
12:19
changed the config hash key for toppath from empty string to toppath check-in: 366b1b75fd user: mmgraham tags: v2.0001
2022-02-09
08:02
Initialize placeholder record to correct length in db:get-run-info check-in: dbdd2c33cc user: mrwellan tags: v2.0001-matt-test-edits
2022-02-06
19:53
Reduced server expiration to 5sec. Fixed typo (extra paren) check-in: b612b353ea user: matt tags: v2.0001-matt-test-edits
2022-02-03
18:05
tweak waits in runconfigs check-in: 6c303b59b4 user: mrwellan tags: v2.0001-matt-test-edits
2022-02-02
18:07
corrected *configdat* to *runconfigdat* check-in: 3d2d201a06 user: mmgraham tags: v2.0001
16:08
changed to send unquoted cmd to runconfigs-get. When quoted, configf:lookup could not find the entry check-in: 2896749a24 user: mmgraham tags: v2.0001

Modified commonmod.scm from [875119b082] to [6aaa47a003].

4436
4437
4438
4439
4440
4441
4442
4443



4444
4445
4446
4447
4448
4449
4450
4436
4437
4438
4439
4440
4441
4442

4443
4444
4445
4446
4447
4448
4449
4450
4451
4452







-
+
+
+







;; timeout is hms string: 1h 5m 3s, default is 10 minutes
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	60))) ;; default is one minute
	;; 60 ;; default is one minute
	5
	)))

(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)

Modified dashboard.scm from [d302c30c66] to [5c46200846].

875
876
877
878
879
880
881
882
883
884
885
886
887






888
889
890
891
892
893
894
895
896
897
875
876
877
878
879
880
881






882
883
884
885
886
887



888
889
890
891
892
893
894







-
-
-
-
-
-
+
+
+
+
+
+
-
-
-







	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
                      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                      (let* ((old-val (iup:attribute *tim* "TIME"))
                             (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
                        (if (< (string->number new-val) 5000)
                            ((debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
                            (iup:attribute-set! *tim* "TIME" new-val))))
		      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
		      (let* ((old-val (iup:attribute *tim* "TIME"))
			     (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
			(if (< (string->number new-val) 5000)
			    (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
			    (iup:attribute-set! *tim* "TIME" new-val))))


                      )
		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)
    (dboard:update-tree tabdat runs-hash header tb)))

Modified runsmod.scm from [727372ff23] to [26ea23059a].

1482
1483
1484
1485
1486
1487
1488
1489

1490
1491
1492
1493
1494
1495
1496
1482
1483
1484
1485
1486
1487
1488

1489
1490
1491
1492
1493
1494
1495
1496







-
+







			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	(if (> (- (current-seconds) *last-test-launch*) 5)        ;; be pretty aggressive for five seconds after
	    (runs:too-soon-delay (conc "loop delay " hed) 1 1)    ;; starting a test then apply more delay
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.6)    ;; starting a test then apply more delay
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) 
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
1732
1733
1734
1735
1736
1737
1738
1739

1740
1741
1742
1743
1744
1745
1746
1732
1733
1734
1735
1736
1737
1738

1739
1740
1741
1742
1743
1744
1745
1746







-
+







	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched 
    (rmt:set-var run-id (conc "launch-complete-" run-id) "yes")  
        
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
    (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
    (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle
    
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))

Modified ulex-dual/dbmgr.scm from [53b181f4c9] to [9a6a086d09].

331
332
333
334
335
336
337

338
339
340
341
342
343
344
345













346
347
348
349
350
351
352
331
332
333
334
335
336
337
338








339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358







+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+








;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
    (condition-case
    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
           ;; then send-receive using the ulex layer to host-port stored in cdat
           (res      (send-receive uconn (conndat-hostport cdat) cmd params)))
      ;; since we accessed the server we can bump the expires time up
      (conndat-expires-set! cdat (+ (current-seconds)
                                    (server:expiration-timeout)
                                    -2)) ;; two second margin for network time misalignments etc.
      res)))
     (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
	    (hostport (conndat-hostport cdat))
            ;; then send-receive using the ulex layer to host-port stored in cdat
            (res      (send-receive uconn hostport cmd params)))
       ;; since we accessed the server we can bump the expires time up
       (conndat-expires-set! cdat (+ (current-seconds)
                                     (server:expiration-timeout)
                                     -2)) ;; two second margin for network time misalignments etc.
       res)
     ((exn i/o net)
      (debug:print-info 0 *default-log-port* "IO failure in connection to "hostport
			", resetting connection.")
      

;
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname

Modified ulex-dual/ulex.scm from [ba1e2ab076] to [12cecae7cb].

258
259
260
261
262
263
264













265

266
267
268
269
270
271
272
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277

278
279
280
281
282
283
284
285







+
+
+
+
+
+
+
+
+
+
+
+
+
-
+







	 (dat          (list `(host-port . ,my-host-port)
			     `(qrykey . qrykey)
			     `(cmd . ,cmd)
			     `(params . ,params))))
    (cond
     (isme (do-work udata dat)) ;; no transmission needed
     (else
      (let-values (((inp oup)(tcp-connect host port)))
	(let ((res (if (and inp oup)
		       (begin
			 (write (obj->string dat) oup)
			 (close-output-port oup)
			 (string->obj (read inp)))
		       (begin
			 (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			 #f))))
	  (close-input-port inp)))
	
	
	     
      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
      #;(handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
       exn
       (begin
         (print "ULEX send-receive: "cmd", "params", exn="exn)
         (message exn))
       (begin
	 ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
	 (let-values (((inp oup)(tcp-connect host port)))