Megatest

Check-in [f5657ea556]
Login
Overview
Comment:Added support for profiling, moved inter-test-delay to better location so initial registration is faster.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.65
Files: files | file ages | folders
SHA1: f5657ea556d0c2df26b40be9e2edfcc133dd7a68
User & Date: matt on 2020-08-17 18:58:38
Other Links: branch diff | manifest | tags
Context
2020-08-18
12:13
updates and fixes to archive megatestdb check-in: 47e78ace28 user: pjhatwal tags: v1.65
2020-08-17
18:58
Added support for profiling, moved inter-test-delay to better location so initial registration is faster. check-in: f5657ea556 user: matt tags: v1.65
14:17
Replaced with-output-to-file with an explicit open/close due to posible file handles being left open. Fixed bug in runner time sharing. check-in: a1963bd5a9 user: mrwellan tags: v1.65, v1.6561
Changes

Modified dashboard.scm from [2679042d5f] to [24d9a3f097].

95
96
97
98
99
100
101



102
103
104
105
106
107
108
			"-main"
			"-v"
			"-q"
			"-use-db-cache"
			"-skip-version-check"
			"-repl"
                        "-rh5.11" ;; fix to allow running on rh5.11



			)
		 args:arg-hash
		 0))

;; check for MT_* environment variables and exit if found
(if (not (args:get-arg "-test"))
    (begin







>
>
>







95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
			"-main"
			"-v"
			"-q"
			"-use-db-cache"
			"-skip-version-check"
			"-repl"
                        "-rh5.11" ;; fix to allow running on rh5.11

			;; placeholder
			;; "-:p"
			)
		 args:arg-hash
		 0))

;; check for MT_* environment variables and exit if found
(if (not (args:get-arg "-test"))
    (begin

Modified launch.scm from [56ecab23d3] to [747c3edf8a].

17
18
19
20
21
22
23
24

25
26
27
28
29
30
31
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================
;; launch a task - this runs on the originating host, tests themselves
;;
;;======================================================================

(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables csv)

(use typed-records pathname-expand matchable)

(import (prefix base64 base64:))
(import (prefix sqlite3 sqlite3:))

(declare (unit launch))
(declare (uses subrun))







|
>







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================
;; launch a task - this runs on the originating host, tests themselves
;;
;;======================================================================

(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3
     call-with-environment-variables csv)
(use typed-records pathname-expand matchable)

(import (prefix base64 base64:))
(import (prefix sqlite3 sqlite3:))

(declare (unit launch))
(declare (uses subrun))
1608
1609
1610
1611
1612
1613
1614
1615



1616
1617
1618
1619
1620
1621
1622
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))



      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run







|
>
>
>







1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '())
				(if (configf:lookup *configdat* "misc" "profilesw")
				    (list (configf:lookup *configdat* "misc" "profilesw"))
				    '()))))
      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
1750
1751
1752
1753
1754
1755
1756
1757

1758
1759
1760
1761
1762
1763
1764
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)))


;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid







|
>







1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)
    (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0))))

;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid

Modified megatest.scm from [c7191390c1] to [5f0ce788d2].

441
442
443
444
445
446
447




448
449
450
451
452
453
454
			"-sync-to-megatest.db"
                        "-sync-brute-force"
			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only

                        "-diff-rep"




                        )
		 args:arg-hash
		 0))

;; Add args that use remargs here
;;
(if (and (not (null? remargs))







>
>
>
>







441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
			"-sync-to-megatest.db"
                        "-sync-brute-force"
			"-logging"
			"-v" ;; verbose 2, more than normal (normal is 1)
			"-q" ;; quiet 0, errors/warnings only

                        "-diff-rep"

			;; junk placeholder
			;; "-:p"
			
                        )
		 args:arg-hash
		 0))

;; Add args that use remargs here
;;
(if (and (not (null? remargs))

Modified runs.scm from [54e6c40a87] to [4d33de1e71].

515
516
517
518
519
520
521
522
523
524

525
526
527
528
529
530
531
		       ;; to cond clauses below where we determine we
		       ;; have too many jobs running rather than each
		       ;; time the and condition above is true (which
		       ;; seems like always)?
        	  ((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time
		   (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
		   10)  ;; obviously haven't had any work to do for a while
		  (else
		   ;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero?
		   (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01))))

  
  (let* ((num-running             (rmt:get-count-tests-running run-id #f)) ;; fastmode=no
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))







|
|
|
>







515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
		       ;; to cond clauses below where we determine we
		       ;; have too many jobs running rather than each
		       ;; time the and condition above is true (which
		       ;; seems like always)?
        	  ((> (runs:dat-can-run-more-tests-count runsdat) 20) ;; original intent was - save cycles, wait a long time
		   (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
		   10)  ;; obviously haven't had any work to do for a while
		  (else 0)))
;;		   ;; if have a number for inter-test-delay, use it, else don't delay much, maybe even zero?
;;		   (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.01)
;;		   )))
  
  (let* ((num-running             (rmt:get-count-tests-running run-id #f)) ;; fastmode=no
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))

Modified server.scm from [b1a1de6e53] to [d954a8194e].

123
124
125
126
127
128
129


130
131
132
133
134
135


136
137

138
139
140
141
142
143
144
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))


	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
							   " -daemonize "
							   "")
		      ;; " -log " logfile
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))


	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))

    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    (thread-start! log-rotate)
    
    ;; host.domain.tld match host?
    (if (and target-host 







>
>





|
>
>

|
>







123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
         ;; (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
	 (profile-mode (or (configf:lookup *configdat* "misc" "profilesw")
			   ""))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
							   " -daemonize "
							   "")
		      ;; " -log " logfile
		      " -m testsuite:" testsuite
		      " " profile-mode
		      )) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))
	 )
    ;; we want the remote server to start in *toppath* so push there
    (push-directory areapath)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    (thread-start! log-rotate)
    
    ;; host.domain.tld match host?
    (if (and target-host