Megatest

Check-in [262ea69800]
Login
Overview
Comment:Redo inter-test-delay.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | v1.65-real-redo-inter-test-delay
Files: files | file ages | folders
SHA1: 262ea69800340938cfc4ff01668a93943a5597a8
User & Date: matt on 2021-03-14 05:41:11
Other Links: branch diff | manifest | tags
Context
2021-03-14
05:41
Redo inter-test-delay. Leaf check-in: 262ea69800 user: matt tags: v1.65-real-redo-inter-test-delay
2021-03-09
21:03
Very odd, missing egg in server.scm, util. check-in: 57b5fb07d6 user: matt tags: v1.65-real
Changes

Modified common.scm from [82673dacdb] to [167de94e7f].

357
358
359
360
361
362
363

364
365
366
367
368
369
370

;; Generic string database
(define sdb:qry #f) ;; (make-sdb:qry)) ;;  'init #f)
;; Generic path database
(define *fdb* #f)

(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state.


;;======================================================================
;; V E R S I O N
;;======================================================================

(define (common:get-full-version)
  (conc megatest-version "-" megatest-fossil-hash))







>







357
358
359
360
361
362
363
364
365
366
367
368
369
370
371

;; Generic string database
(define sdb:qry #f) ;; (make-sdb:qry)) ;;  'init #f)
;; Generic path database
(define *fdb* #f)

(define *last-launch* (current-seconds)) ;; use for throttling the launch rate. Would be better to use the db and last time of a test in LAUNCHED state.
(define *last-test-start* 0)             ;; same as above but done differently

;;======================================================================
;; V E R S I O N
;;======================================================================

(define (common:get-full-version)
  (conc megatest-version "-" megatest-fossil-hash))

Modified launch.scm from [7e65ac64d4] to [f314aea3df].

1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387





1388
1389
1390
1391
1392
1393
1394
1395













1396
1397
1398
1399
1400
1401
1402
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))





    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))













    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)







|
<
<
<
<
<
<
<
<
<
<
<
<


>
>
>
>
>





|


>
>
>
>
>
>
>
>
>
>
>
>
>







1366
1367
1368
1369
1370
1371
1372
1373












1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* (;; NOTE: There used to be a test start lock here.












	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))

    ;; this is the same idea as inter-test-delay but it seems bady implemented, why a loop and does it really make sense to set *last-launch*
    ;; further down in the code?
    ;; I don't think this is used and it should be removed.
    ;;
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust."))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))

    ;; this is nearly the same as setup launch-delay!!
    (let* ((inter-test-delay (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0))
	   (last-delay       (- (current-seconds) *last-test-start*)))
      (if (and (> 0 inter-test-delay)
	       (< last-delay inter-test-delay))
	  (begin
	    (if (common:low-noise-print 1200 "inter test delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test starts are delayed by " inter-test-delay
				  " seconds. Check megatest.config inter-test-delay in [settings] to adjust."))
	    (thread-sleep! (- inter-test-delay last-delay))))
      (set! *last-test-start* (current-seconds)))
    
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)
    (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0))))

;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid







|
<







1597
1598
1599
1600
1601
1602
1603
1604

1605
1606
1607
1608
1609
1610
1611
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)))


;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid