Megatest

Check-in [71819d884d]
Login
Overview
Comment:added loop to wait for testconfig to show up (combat nfs cache latency) ; added testconfig path derived from launch:execute payload to search path in case tests:get-all has gaps due to nfs cache latency
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65-telemetry
Files: files | file ages | folders
SHA1: 71819d884d3058bbca02485b797d3f3c54d9e17b
User & Date: bjbarcla on 2019-02-20 16:21:27
Original Comment: wip
Other Links: branch diff | manifest | tags
Context
2019-02-20
18:22
added zombie test sweep to launch:execute check-in: 0db6a22974 user: bjbarcla tags: v1.65-telemetry
16:21
added loop to wait for testconfig to show up (combat nfs cache latency) ; added testconfig path derived from launch:execute payload to search path in case tests:get-all has gaps due to nfs cache latency check-in: 71819d884d user: bjbarcla tags: v1.65-telemetry
2019-02-14
15:04
pulled in docs/manual changes check-in: 3797a8bab4 user: bjbarcla tags: v1.65-telemetry
Changes

Modified common.scm from [91f2f49c15] to [7fb4515436].

3112
3113
3114
3115
3116
3117
3118

       (define *common:telemetry-log-state* 'closed-fail)
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
       )
     (begin
       (define *common:telemetry-log-state* 'closed)
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))








>
3112
3113
3114
3115
3116
3117
3118
3119
       (define *common:telemetry-log-state* 'closed-fail)
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
       )
     (begin
       (define *common:telemetry-log-state* 'closed)
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))

Modified db.scm from [da7bac24de] to [7ee427f00f].

1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) ;; FIXME suspect test run time & deadtime are not well matched; resulting in COMPLETED/DEAD status of an a-ok running test
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   7200))) ;; two hours
    (db:with-db 
     dbstruct #f #f
     (lambda (db)
       (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;







|







1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime")) ;; FIXME suspect test run time & deadtime are not well matched; resulting in COMPLETED/DEAD status of an a-ok running test
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   120))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
    (db:with-db 
     dbstruct #f #f
     (lambda (db)
       (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;

Modified launch.scm from [6dd1993f7c] to [3d75421951].

571
572
573
574
575
576
577
578
579

580

581
582
583
584
585
586
587
588
589
590
591
592
          
	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((th1 (make-thread (lambda ()

						     (rmt:test-set-state-status run-id test-id "INCOMPLETE" "KILLED" #f)

						     (print "Killed by signal " signum ". Exiting")
						     (thread-sleep! 1)
						     (exit 1))))
				 (th2 (make-thread (lambda ()
						     (thread-sleep! 2)
						     (debug:print 0 *default-log-port* "Done")
						     (exit 4)))))
			     (thread-start! th2)
			     (thread-start! th1)
			     (thread-join! th2)))))
	    (set-signal-handler! signal/int sighand)
	    (set-signal-handler! signal/term sighand)







|

>
|
>

<


|







571
572
573
574
575
576
577
578
579
580
581
582
583

584
585
586
587
588
589
590
591
592
593
          
	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)
			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
			   (let ((th1 (make-thread (lambda ()
                                                     (print "set test to COMPLETED/ABORT begin.")
						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
                                                     (print "set test to COMPLETED/ABORT complete.")
						     (print "Killed by signal " signum ". Exiting")

						     (exit 1))))
				 (th2 (make-thread (lambda ()
						     (thread-sleep! 20)
						     (debug:print 0 *default-log-port* "Done")
						     (exit 4)))))
			     (thread-start! th2)
			     (thread-start! th1)
			     (thread-join! th2)))))
	    (set-signal-handler! signal/int sighand)
	    (set-signal-handler! signal/term sighand)

Modified tests.scm from [82f5f9277a] to [c7490d0df8].

1543
1544
1545
1546
1547
1548
1549








1550
1551
1552
1553

1554
1555

1556
1557
1558

1559
1560

1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
	  (if (and  dat ;; have a locally cached version
		    (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data?
	      dat
	      ;; no cached data available
	      (let* ((treg         (or test-registry
				       (tests:get-all)))
		     (test-path    (or (hash-table-ref/default treg test-name #f)








				       (conc *toppath* "/tests/" test-name)))
		     (test-configf (conc test-path "/testconfig"))
		     (testexists (let loop ((tries-left 7))
                                   (cond

                                    ( (and (common:file-exists? test-configf)(file-read-access? test-configf))
                                      #t)

                                    ( (common:file-exists? test-configf)
                                      (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf)
                                      #f)

                                    ( (and wait-a-minute (> tries-left 0))
                                      (thread-sleep! 10)

                                      (loop (sub1 tries-left)))
                                    (else
                                     (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf)
                                     #f))))
		     (tcfg         (if testexists
				       (read-config test-configf #f system-allowed
						    environ-patt: (if system-allowed
								      "pre-launch-env-vars"
								      #f))
				       #f)))
		(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data







>
>
>
>
>
>
>
>


|
|
>
|
|
>
|
|
|
>
|
|
>
|
|
|
|







1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
	  (if (and  dat ;; have a locally cached version
		    (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data?
	      dat
	      ;; no cached data available
	      (let* ((treg         (or test-registry
				       (tests:get-all)))
		     (test-path    (or (hash-table-ref/default treg test-name #f)
                                       (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/"
                                                                 (getenv "MT_TARGET") "/"
                                                                 (getenv "MT_RUNNAME") "/"
                                                                 test-name "/" item-path))
                                              (local-tcfg (conc local-tcdir "/testconfig")))
                                         (if (common:file-exists? local-tcfg)
                                             local-tcdir
                                             #f))
				       (conc *toppath* "/tests/" test-name)))
		     (test-configf (conc test-path "/testconfig"))
		     (testexists   (let loopa ((tries-left 30))
                                     (cond
                                      (
                                       (and (common:file-exists? test-configf)(file-read-access? test-configf))
                                       #t)
                                      (
                                       (common:file-exists? test-configf)
                                       (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf)
                                       #f)
                                      (
                                       (and wait-a-minute (> tries-left 0))
                                       (thread-sleep! 10)
                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds.  Tries left: "tries-left) ;; BB: this fires
                                       (loopa (sub1 tries-left)))
                                      (else
                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires
                                       #f))))
		     (tcfg         (if testexists
				       (read-config test-configf #f system-allowed
						    environ-patt: (if system-allowed
								      "pre-launch-env-vars"
								      #f))
				       #f)))
		(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data