Megatest

Diff
Login

Differences From Artifact [22542b9de5]:

To Artifact [e484b86e7d]:


18
19
20
21
22
23
24
25
26
27
28
29
30
31


32
33
34
35
36
37
38
39

;;======================================================================

(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
     format dot-locking csv-xml z3 udp ;; sql-de-lite
     hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix (srfi 18) extras ;; tcp 
     (prefix nanomsg nmsg:)
     (prefix sqlite3 sqlite3:)
     pkts (prefix dbi dbi:)
     )

(declare (unit common))
(declare (uses commonmod))


(import commonmod)

(include "common_records.scm")


;; (require-library margs)
;; (include "margs.scm")








|

|




>
>
|







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

;;======================================================================

(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
     format dot-locking csv-xml z3 udp ;; sql-de-lite
     hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix (srfi 18) extras ;; tcp 
     ;; (prefix nanomsg nmsg:)
     (prefix sqlite3 sqlite3:)
     (prefix dbi dbi:)
     )

(declare (unit common))
(declare (uses commonmod))
(import (prefix commonmod cmod:))

(import pkts)

(include "common_records.scm")


;; (require-library margs)
;; (include "margs.scm")

224
225
226
227
228
229
230






















231
232
233
234
235
236
237
            (caddr argv))
           (else (car argv))))
         (fullpath (realpath this-script)))
    fullpath))
(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))























(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
     ( 3 . check )
     ( 4 . waived )







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
            (caddr argv))
           (else (car argv))))
         (fullpath (realpath this-script)))
    fullpath))
(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

;; when called from a wrapper I need sometimes to find the calling
;; wrapper, this is for dashboard to find the correct megatest.
;;
(define (common:find-local-megatest #!optional (progname "megatest"))
  (let ((res (filter file-exists?
		     (map (lambda (updir)
			    (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    (conc updir progname))
				      ((mtest)     (conc updir progname))
				      ((dashboard) progname)
				      (else exe)))))
			  '("../../" "../")))))
    (if (null? res)
	(begin
	  (debug:print 0 *current-log-port* "Failed to find this executable! Using what can be found on the path")
	  progname)
	(car res))))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
     ( 3 . check )
     ( 4 . waived )
482
483
484
485
486
487
488
489
490
491

492
493
494
495
496
497
498
	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    (directory-fold 
     (lambda (file rem)
       (handle-exceptions
	exn
	(begin
	  (debug:print-info 0 *default-log-port* "unable to rotate log " file ", probably handled by another process.")
	  (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  (print-call-chain (current-error-port)))

	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time)))
	  (hash-table-set! all-files file mod-time)
	  (if (or (and (string-match "^.*.log" file)
		       (> (file-size fullname) 200000))
		  (and (string-match "^server-.*.log" file)







|
|
|
>







506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    (directory-fold 
     (lambda (file rem)
       (handle-exceptions
	exn
	(begin
	  (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore.")
	  (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  ;; (print-call-chain (current-error-port)) ;; 
	  )
	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time)))
	  (hash-table-set! all-files file mod-time)
	  (if (or (and (string-match "^.*.log" file)
		       (> (file-size fullname) 200000))
		  (and (string-match "^server-.*.log" file)
871
872
873
874
875
876
877
878
879









880

881
882









883

884
885
886
887
888
889
890
    (if val val default)))

(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))

(define (common:get-testsuite-name)
  (or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description.
      (configf:lookup *configdat* "setup" "testsuite" )









      (getenv "MT_TESTSUITE_NAME")

      (if (string? *toppath* )
          (pathname-file *toppath*)









          #f))) ;; (pathname-file (current-directory)))))


(define common:get-area-name common:get-testsuite-name)

(define (common:get-db-tmp-area . junk)
  (if *db-cache-path*
      *db-cache-path*
      (if *toppath* ;; common:get-create-writeable-dir







|
|
>
>
>
>
>
>
>
>
>
|
>
|
|
>
>
>
>
>
>
>
>
>
|
>







896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
    (if val val default)))

(define (assoc/default key lst . default)
  (let ((res (assoc key lst)))
    (if res (cadr res)(if (null? default) #f (car default)))))

(define (common:get-testsuite-name)
  (cmod:get-testsuite-name *toppath* *configdat*))

;; safe getting of toppath
(define (common:get-toppath areapath)
  (or *toppath*
      (if areapath
	  (begin
	    (set! *toppath* areapath)
	    (setenv "MT_RUN_AREA_HOME" areapath)
	    areapath)
	  #f)
      (if (getenv "MT_RUN_AREA_HOME")
	  (begin
	    (set! *toppath* (getenv "MT_RUN_AREA_HOME"))
	    *toppath*)
	  #f)
      ;; last resort, look for megatest.config
      (let loop ((thepath (realpath ".")))
	(if (file-exists? (conc thepath "/megatest.config"))
	    thepath
	    (if (equal? thepath "/")
		(begin
		  (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.")
		  #f)
		(loop (pathname-directory thepath)))))
      ))

(define common:get-area-name common:get-testsuite-name)

(define (common:get-db-tmp-area . junk)
  (if *db-cache-path*
      *db-cache-path*
      (if *toppath* ;; common:get-create-writeable-dir
1170
1171
1172
1173
1174
1175
1176
1177












1178
1179
1180
1181
1182
1183
1184

;; use bash to expand a glob. Does NOT handle paths with spaces!
;;
(define (common:bash-glob instr)
  (string-split
   (with-input-from-pipe
       (conc "/bin/bash -c \"echo " instr "\"")
     read-line)))












  
;;======================================================================
;; T A R G E T S  ,   S T A T E ,   S T A T U S ,   
;;                    R U N N A M E    A N D   T E S T P A T T
;;======================================================================

;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t))))







|
>
>
>
>
>
>
>
>
>
>
>
>







1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241

;; use bash to expand a glob. Does NOT handle paths with spaces!
;;
(define (common:bash-glob instr)
  (string-split
   (with-input-from-pipe
       (conc "/bin/bash -c \"echo " instr "\"")
       read-line)))

;;======================================================================
;; Some safety net stuff
;;======================================================================

;; return input if it is a list or return null
(define (common:list-or-null inlst #!key (ovrd #f)(message #f))
  (if (list? inlst)
      inlst
      (begin
	(if message (debug:print-error 0 *default-log-port* message))
	(or ovrd '()))))
  
;;======================================================================
;; T A R G E T S  ,   S T A T E ,   S T A T U S ,   
;;                    R U N N A M E    A N D   T E S T P A T T
;;======================================================================

;; (map print (map car (hash-table->alist (read-config "runconfigs.config" #f #t))))
1275
1276
1277
1278
1279
1280
1281

1282
1283
1284




1285
1286
1287
1288
1289
1290
1291
       path-string
       #f)))

(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
      (if *configdat*
	  (configf:lookup *configdat* "setup" "linktree")

	  (if *toppath*
	      (conc *toppath* "/lt")
	      #f))))





(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")
		 (args:get-arg ":runname")
		 (getenv "MT_RUNNAME"))))
    ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ...
    res))







>
|
|
|
>
>
>
>







1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
       path-string
       #f)))

(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
      (if *configdat*
	  (configf:lookup *configdat* "setup" "linktree")
	  #f)
      (if (or *toppath* (getenv "MT_RUN_AREA_HOME"))
	  (conc (or *toppath* (getenv "MT_RUN_AREA_HOME")) "/lt")
	  #f)
      (let* ((tp (common:get-toppath #f))
	     (lt (conc tp "/lt")))
	(if (not tp)(debug:print 0 *default-log-port* "WARNING: cannot calculate best path for linktree, using " lt))
	lt)))

(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")
		 (args:get-arg ":runname")
		 (getenv "MT_RUNNAME"))))
    ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ...
    res))
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
		    (if (> curr-rownum rownum) curr-rownum rownum)
		    (if (> curr-colnum colnum) curr-colnum colnum)
		    ))))))

;; if it looks like a number -> convert it to a number, else return it
;;
(define (common:lazy-convert inval)
  (let* ((as-num (if (string? inval)(string->number inval) #f)))
    (or as-num inval)))

;; convert string a=1; b=2; c=a silly thing; d=
;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
;;
(define (common:val->alist val #!key (convert #f))
  (let ((val-list (string-split-fields ";\\s*" val #:infix)))
    (if val-list
	(map (lambda (x)
	       (let ((f (string-split-fields "\\s*=\\s*" x #:infix)))
		 (case (length f)
		   ((0) `(,#f))  ;; null string case
		   ((1) `(,(string->symbol (car f))))
		   ((2) `(,(string->symbol (car f)) . ,(let ((inval (cadr f)))
							 (if convert (common:lazy-convert inval) inval))))
		   (else f))))
	     val-list)
	'())))

;;======================================================================
;; S Y S T E M   S T U F F
;;======================================================================

;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;







<
|





<
<
<
<
<
<
<
<
|
<
<
<







1646
1647
1648
1649
1650
1651
1652

1653
1654
1655
1656
1657
1658








1659



1660
1661
1662
1663
1664
1665
1666
		    (if (> curr-rownum rownum) curr-rownum rownum)
		    (if (> curr-colnum colnum) curr-colnum colnum)
		    ))))))

;; if it looks like a number -> convert it to a number, else return it
;;
(define (common:lazy-convert inval)

  (cmod:lazy-convert inval))

;; convert string a=1; b=2; c=a silly thing; d=
;; to '((a . 1)(b . 2)(c . "a silly thing")(d . ""))
;;
(define (common:val->alist val #!key (convert #f))








  (cmod:val->alist val #!key (convert #f)))




;;======================================================================
;; S Y S T E M   S T U F F
;;======================================================================

;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
1670
1671
1672
1673
1674
1675
1676
1677

1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693

1694
1695

1696
1697
1698
1699
1700
1701
1702









1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719







1720
1721


1722
1723
1724
1725
1726
1727
1728
;; 			    (set! cpu-load newval))))))
;; 	      (car load-res))
;;     cpu-load))

;; get values from cached info from dropping file in logs dir
;;  e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 5))

  (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")))
    (if (and (file-exists? fullpath)
	     (file-read-access? fullpath))
	(handle-exceptions
	 exn
	 #f
	 (debug:print 2 *default-log-port* "reading file " fullpath)
	 (let ((real-age (- (current-seconds)(file-change-time fullpath)))) 
	   (if (< real-age age)
	       (with-input-from-file fullpath read)
	       (begin
		 (debug:print 1 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it")
		 #f))))
	(begin
	  (debug:print 2 *default-log-port* "not reading file " fullpath)
	  #f))))

 
(define (common:write-cached-info key dtype dat)

  (let* ((fulldir  (conc *toppath* "/.sysdata"))
	 (fullpath (conc fulldir "/" key "-" dtype ".log")))
    (if (not (file-exists? fulldir))(create-directory fulldir #t))
    (handle-exceptions
     exn
     #f
     (with-output-to-file fullpath (lambda ()(pp dat))))))










;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
  (handle-exceptions
   exn
   '(99 99 99)
   (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
     (or (common:get-cached-info actual-hostname "cpu-load")
	 (let ((result (if remote-host
			   (map (lambda (res)
				  (if (eof-object? res) 9e99 res))
			        (with-input-from-pipe 
				 (conc "ssh " remote-host " cat /proc/loadavg")
				 (lambda ()(list (read)(read)(read)))))
			   (with-input-from-file "/proc/loadavg" 
			     (lambda ()(list (read)(read)(read)))))))







	   (common:write-cached-info actual-hostname "cpu-load" result)
	                     result)))))



;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((res (common:get-normalized-cpu-load-raw remote-host))







|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>


>
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>






|










>
>
>
>
>
>
>
|
|
>
>







1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
;; 			    (set! cpu-load newval))))))
;; 	      (car load-res))
;;     cpu-load))

;; get values from cached info from dropping file in logs dir
;;  e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 10))
  (if *toppath*
      (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log")))
	(if (and (file-exists? fullpath)
		 (file-read-access? fullpath))
	    (handle-exceptions
	     exn
	     #f
	     (debug:print 2 *default-log-port* "reading file " fullpath)
	     (let ((real-age (- (current-seconds)(file-change-time fullpath)))) 
	       (if (< real-age age)
		   (with-input-from-file fullpath read)
		   (begin
		     (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it")
		     #f))))
	    (begin
	      (debug:print 2 *default-log-port* "not reading file " fullpath)
	      #f)))
      #f))
 
(define (common:write-cached-info key dtype dat)
  (if *toppath*
      (let* ((fulldir  (conc *toppath* "/.sysdata"))
	     (fullpath (conc fulldir "/" key "-" dtype ".log")))
	(if (not (file-exists? fulldir))(create-directory fulldir #t))
	(handle-exceptions
	 exn
	 #f
	 (with-output-to-file fullpath (lambda ()(pp dat)))))
      #f))

(define (common:raw-get-remote-host-load remote-host)
  (handle-exceptions
   exn
   #f ;; more specific handling of errors needed
   (with-input-from-pipe 
    (conc "ssh " remote-host " cat /proc/loadavg")
    (lambda ()(list (read)(read)(read))))))

;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
  (handle-exceptions
   exn
   '(-99 -99 -99)
   (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
     (or (common:get-cached-info actual-hostname "cpu-load")
	 (let ((result (if remote-host
			   (map (lambda (res)
				  (if (eof-object? res) 9e99 res))
			        (with-input-from-pipe 
				 (conc "ssh " remote-host " cat /proc/loadavg")
				 (lambda ()(list (read)(read)(read)))))
			   (with-input-from-file "/proc/loadavg" 
			     (lambda ()(list (read)(read)(read)))))))
	   (match
	    result
	    ((l1 l2 l3)
	     (if (and (number? l1)
		      (number? l2)
		      (number? l3))
		 (begin
		   (common:write-cached-info actual-hostname "cpu-load" result)
		   result)
		 '(-1 -1 -1))) ;; -1 is bad result
	    (else '(-2 -2 -2))))))))

;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((res (common:get-normalized-cpu-load-raw remote-host))
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942

1943
1944
1945
1946
1947
1948
1949
1950
1951


1952
1953
1954
1955
1956
1957
1958

1959





1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973




1974


1975






1976
1977
1978






1979
1980
1981





1982
1983
1984
1985
1986
1987
1988




1989
1990
1991
1992
1993
1994
1995
(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) ;; hosts had better not be changing the number of cpus too often!
	(let* ((proc   (lambda ()
			 (let loop ((numcpu 0)
				    (inl    (read-line)))
			   (if (eof-object? inl)
			       (begin
				 (common:write-cached-info actual-host "num-cpus" numcpu)
				 numcpu)

			       (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
					 (+ numcpu 1)
					 numcpu)
				     (read-line))))))
	       (result (if remote-host
			   (with-input-from-pipe 
			       (conc "ssh " remote-host " cat /proc/cpuinfo")
			     proc)
			   (with-input-from-file "/proc/cpuinfo" proc))))


	  (common:write-cached-info actual-host "num-cpus" result)
	  result))))

;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload msg remote-host)
  (let ((num-cpus (common:get-num-cpus remote-host)))

    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))






;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load
;;
(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (if force-maxload
		      maxload-in
		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1




	 (loadjmp (- first next))


         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) ))  )));; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously






    (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
		      ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)
    (cond






     ((and (> first adjload)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))





      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp numcpus)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)))))





(define (get-uname . params)
  (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
	"unknown"
	(caar uname-res))))







<
|
|
>









>
>
|




|

>
|
>
>
>
>
>



|









|
>
>
>
>

>
>
|
>
>
>
>
>
>
|
|

>
>
>
>
>
>
|

|
>
>
>
>
>






|
>
>
>
>







2004
2005
2006
2007
2008
2009
2010

2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    (or (common:get-cached-info actual-host "num-cpus" age: (+ 2592000 (random 3600))) ;; hosts had better not be changing the number of cpus too often!
	(let* ((proc   (lambda ()
			 (let loop ((numcpu 0)
				    (inl    (read-line)))
			   (if (eof-object? inl)

			       (if (> numcpu 0)
				   numcpu
				   #f) ;; if zero return #f so caller knows that things are not working
			       (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
					 (+ numcpu 1)
					 numcpu)
				     (read-line))))))
	       (result (if remote-host
			   (with-input-from-pipe 
			       (conc "ssh " remote-host " cat /proc/cpuinfo")
			     proc)
			   (with-input-from-file "/proc/cpuinfo" proc))))
	  (if (and (number? result)
		   (> result 0))
	      (common:write-cached-info actual-host "num-cpus" result))
	  result))))

;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload msg remote-host #!optional (rem-tries 5))
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (if num-cpus
	(common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)
	(begin
	  (thread-sleep! (random 60)) ;; we failed to get num cpus. wait a bit and try again
	  (if (> rem-tries 0)
	      (common:wait-for-normalized-load maxload msg remote-host (- rem-tries 1))
	      #f)))))

;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load
;;
(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (if force-maxload
		      maxload-in
		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjmaxload (* maxload (max 1 numcpus))) ;; possible bug where
					       ;; numcpus (or could be
					       ;; maxload) is zero,
					       ;; crude fallback is to
					       ;; at least use 1
	 (loadjmp (- first next))
	 ;; add some randomness to the time to break any alignment
	 ;; where netbatch dumps many jobs to machines simultaneously
         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)
						      (/ (- 1000 count) 10)
						      waitdelay)
						   (- first adjmaxload) ))  )))
    ;; let's let the user know once in a long while that load checking
    ;; is happening but not constantly report it
    (if (> (random 100) 75) ;; about 25% of the time
	(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
			  ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
    (cond
     ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable
	   (> num-tries 0))
      (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.")
      (thread-sleep! 10)
      (common:wait-for-cpuload maxload-in numcpus-in waitdelay
			       count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1)))
     ((and (> first adjmaxload)
	   (> count 0))
      (debug:print-info 0 *default-log-port*
			"server start delayed " adjwait
			" seconds due to load " first
			" exceeding max of " adjmaxload
			" on server " (or remote-host (get-host-name))
			" (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp numcpus)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     (else
      (if (> num-tries 0)
	  (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") " is acceptable at " first " continuing.")
	  (debug:print 0 *default-log-port* "Load on " (or remote-host "localhost") ", "first" could not be retrieved. Giving up and continuing."))))))

(define (get-uname . params)
  (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
	"unknown"
	(caar uname-res))))