Megatest

Check-in [505cde89e4]
Login
Overview
Comment:Some cleanup on the run-away open files fix
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70
Files: files | file ages | folders
SHA1: 505cde89e4310cd5871c28bbc24bc0401d6865f2
User & Date: matt on 2022-11-04 02:12:41
Other Links: branch diff | manifest | tags
Context
2022-11-05
21:04
Removed or updated prints to get bare-prints passing in ext-tests. check-in: 429f76ae7e user: matt tags: v1.70
2022-11-04
12:09
Changed megatest version to 1.7009 check-in: b9e8eb19c3 user: mmgraham tags: v1.70, v1.7009
02:12
Some cleanup on the run-away open files fix check-in: 505cde89e4 user: matt tags: v1.70
2022-11-03
20:36
Possible fixes to run-away open files check-in: 38e7dc2f46 user: matt tags: v1.70
Changes

Modified common.scm from [3b2eb62756] to [8364d1a2fe].

1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
      (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn)
      '(-99 -99 -99))
    (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
      (or (common:get-cached-info actual-hostname "cpu-load")
	  (let ((result (if remote-host
			    (map (lambda (res)
				   (if (eof-object? res) 9e99 res))
			         (with-input-from-pipe 
				  (conc "ssh " remote-host " cat /proc/loadavg")
				  (lambda ()(list (read)(read)(read)))))
			    (with-input-from-file "/proc/loadavg" 
			      (lambda ()(list (read)(read)(read)))))))
	    (match
		result
	      ((l1 l2 l3)
	       (if (and (number? l1)
		      (number? l2)







<
|
<







1803
1804
1805
1806
1807
1808
1809

1810

1811
1812
1813
1814
1815
1816
1817
      (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn)
      '(-99 -99 -99))
    (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
      (or (common:get-cached-info actual-hostname "cpu-load")
	  (let ((result (if remote-host
			    (map (lambda (res)
				   (if (eof-object? res) 9e99 res))

			         (common:raw-get-remote-host-load remote-host))

			    (with-input-from-file "/proc/loadavg" 
			      (lambda ()(list (read)(read)(read)))))))
	    (match
		result
	      ((l1 l2 l3)
	       (if (and (number? l1)
		      (number? l2)
1848
1849
1850
1851
1852
1853
1854




1855

1856
1857


1858
1859
1860
1861
1862
1863
1864
     ((eq? res #f) default)   ;; this would be the #eof
     (else default))))

(define (common:get-normalized-cpu-load-raw remote-host)
  (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost
    (or (common:get-cached-info actual-host "normalized-load")
	(let ((data (if remote-host




			(with-input-from-pipe 

			    (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")
			  read-lines)


			(append 
			 (with-input-from-file "/proc/loadavg" 
			   read-lines)
			 (with-input-from-file "/proc/cpuinfo"
			   read-lines)
			 (list "end"))))
	      (load-rx  (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))







>
>
>
>
|
>
|
|
>
>







1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
     ((eq? res #f) default)   ;; this would be the #eof
     (else default))))

(define (common:get-normalized-cpu-load-raw remote-host)
  (let* ((actual-host (or remote-host (get-host-name)))) ;; #f is localhost
    (or (common:get-cached-info actual-host "normalized-load")
	(let ((data (if remote-host
			(let ((inp #f))
			  (handle-exceptions
			      exn
			    (begin
			      (close-input-port inp)
			      '())
			    (set! inp (open-input-port (conc "ssh " remote-host " \"cat /proc/loadavg;cat /proc/cpuinfo;echo end\"")))
			    (let* ((res (read-lines inp)))
			      (close-input-port inp)
			      res)))
			(append 
			 (with-input-from-file "/proc/loadavg" 
			   read-lines)
			 (with-input-from-file "/proc/cpuinfo"
			   read-lines)
			 (list "end"))))
	      (load-rx  (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067

















2068
2069
2070
2071
2072
2073
2074
						       numcpu
						       #f) ;; if zero return #f so caller knows that things are not working
						   (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
							     (+ numcpu 1)
							     numcpu)
							 (read-line))))))
				   (result (if remote-host
					       (with-input-from-pipe 
						(conc "ssh " remote-host " cat /proc/cpuinfo")
						proc)
					       (with-input-from-file "/proc/cpuinfo" proc))))
			      (if (and (number? result)
				       (> result 0))
				  (common:write-cached-info actual-host "num-cpus" result))
			      result))))
	  (hash-table-set! *numcpus-cache* actual-host numcpus)
	  numcpus))))


















;;======================================================================
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (if num-cpus







|

|







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
						       numcpu
						       #f) ;; if zero return #f so caller knows that things are not working
						   (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
							     (+ numcpu 1)
							     numcpu)
							 (read-line))))))
				   (result (if remote-host
					       (common:generic-ssh
						(conc "ssh " remote-host " cat /proc/cpuinfo")
						proc -1)
					       (with-input-from-file "/proc/cpuinfo" proc))))
			      (if (and (number? result)
				       (> result 0))
				  (common:write-cached-info actual-host "num-cpus" result))
			      result))))
	  (hash-table-set! *numcpus-cache* actual-host numcpus)
	  numcpus))))

(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f))
  (let ((inp #f))
    (handle-exceptions
	exn
      (begin
	(close-input-port inp)
	(if msg-proc
	    (msg-proc)
	    (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn))
	default)
      (set! inp (open-input-pipe ssh-command))
      (with-input-from-port inp
	(lambda ()
	  (let ((res (proc)))
	    (close-input-port inp)
	    res))))))

;;======================================================================
;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (if num-cpus

Modified launch.scm from [c3fcd3bfc9] to [9b744a8e91].

815
816
817
818
819
820
821




822
823
824
825
826
827
828
829

830

831
832
833
834
835
836
837
	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	   #f
	   #t))
      #t))





(define (launch:is-test-alive host pid)
  (let* ((same-host (equal? host (get-host-name)))
	 (cmd (conc 
	       (if same-host "" (conc "ssh "host" "))
	       "pstree -A "pid)))
    (if (and host pid
	     (not (equal? host "n/a")))
	

	(let* ((output (with-input-from-pipe cmd read-lines)))

	  (debug:print 2 *default-log-port* "Running " cmd " received " output)
	  (if (eq? (length output) 0)
	      #f
	      #t))
	#t))) ;; assuming bad query is about a live test is likely not the right thing to do?

(define (launch:kill-tests-if-dead run-id)







>
>
>
>








>
|
>







815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	   #f
	   #t))
      #t))

;; this is a close duplicate of:
;;    process:alist-on-host?
;;    process:alive
;;
(define (launch:is-test-alive host pid)
  (let* ((same-host (equal? host (get-host-name)))
	 (cmd (conc 
	       (if same-host "" (conc "ssh "host" "))
	       "pstree -A "pid)))
    (if (and host pid
	     (not (equal? host "n/a")))
	
	(let* ((output (if same-host
			   (with-input-from-pipe cmd read-lines)
			   (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines)))
	  (debug:print 2 *default-log-port* "Running " cmd " received " output)
	  (if (eq? (length output) 0)
	      #f
	      #t))
	#t))) ;; assuming bad query is about a live test is likely not the right thing to do?

(define (launch:kill-tests-if-dead run-id)

Modified process.scm from [f9dfbe5500] to [63f5286965].

193
194
195
196
197
198
199



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214





215
216
217
218
219
220
221
   (common:file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))

(define (process:alive-on-host? host pid)
  (let ((cmd (conc "ssh " host " ps -o pid= -p " pid)))



    (handle-exceptions
	exn
      (begin
	(debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn)
	#f) ;; anything goes wrong - assume the process in NOT running.
     (with-input-from-pipe 
      cmd
      (lambda ()
	(let loop ((inl (read-line)))
	  (if (eof-object? inl)
	      #f
	      (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl))
		     (innum     (string->number clean-str)))
		(and innum
		     (eq? pid innum))))))))))






(define (process:get-sub-pids pid)
  (with-input-from-pipe
   (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid)
   (lambda ()
     (let loop ((inl (read-line))
		(res '()))







>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>







193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
   (common:file-exists? (conc "/proc/" pid))
   (let-values (((rpid exit-type exit-signal)(process-wait pid #t)))
       (and (number? rpid)
	    (equal? rpid pid)))))

(define (process:alive-on-host? host pid)
  (let ((cmd (conc "ssh " host " ps -o pid= -p " pid)))
    (common:generic-ssh
     cmd
   ;; 
   ;; handle-exceptions
   ;; 	exn
   ;;  (begin
   ;; 	(debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn)
   ;; 	#f) ;; anything goes wrong - assume the process in NOT running.
   ;;  (with-input-from-pipe 
   ;;   cmd
     (lambda ()
       (let loop ((inl (read-line)))
	 (if (eof-object? inl)
	     #f
	     (let* ((clean-str (string-substitute "^[^\\d]*([0-9]+)[^\\d]*$" "\\1" inl))
		    (innum     (string->number clean-str)))
	       (and innum
		    (eq? pid innum))))))
     #f
     (lambda ()
       (debug:print 0 *default-log-port* "failed to identify if process "
		    pid", on host "host" is alive. exn="exn)))))


(define (process:get-sub-pids pid)
  (with-input-from-pipe
   (conc "pstree -A -p " pid) ;; | tr 'a-z\\-+`()\\.' ' ' " pid)
   (lambda ()
     (let loop ((inl (read-line))
		(res '()))