Megatest

Diff
Login

Differences From Artifact [d232b0060b]:

To Artifact [d95f6303fc]:


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

66

67
68
69
70
71
72
73
74
75
76
77
78
79

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
  (handle-exceptions
   exn
   (begin
     (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception)
     (debug:print-info 0 *default-log-port*
                       (string-substitute "\n?Error:" "nonfatal condition:"
                                          (with-output-to-string
                                            (lambda ()
                                              (print-error-message exn) ))))
     (debug:print-info 0 *default-log-port* "    -- continuing after nonfatal condition...")
     #f)
   (thunk)))

(define getenv get-environment-variable)
(define (safe-setenv key val)

  (if (or (substring-index "!" key) (substring-index ":" key)) ;; variables containing : are for internal use and cannot be environment variables.

      (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\" or starting with \"!\"")
      (if (and (string? val)
	       (string? key))
	  (handle-exceptions
	      exn
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))









|











>
|
>





|







47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
  (handle-exceptions
   exn
   (begin
     (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception", exn=" exn)
     (debug:print-info 0 *default-log-port*
                       (string-substitute "\n?Error:" "nonfatal condition:"
                                          (with-output-to-string
                                            (lambda ()
                                              (print-error-message exn) ))))
     (debug:print-info 0 *default-log-port* "    -- continuing after nonfatal condition...")
     #f)
   (thunk)))

(define getenv get-environment-variable)
(define (safe-setenv key val)
  (if (or (substring-index "!" key)
	  (substring-index ":" key)  ;; variables containing : are for internal use and cannot be environment variables.
	  (substring-index "." key)) ;; periods are not allowed in environment variables
      (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\" or starting with \"!\"")
      (if (and (string? val)
	       (string? key))
	  (handle-exceptions
	      exn
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val ", exn=" exn)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))


504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    (directory-fold 
     (lambda (file rem)
       (handle-exceptions
	exn
	(begin
	  (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore.")
	  (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  ;; (print-call-chain (current-error-port)) ;; 
	  )
	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time))
	       (file-old  (> file-age (* 48 60 60)))







|







506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
    (if (not (directory-exists? "logs"))(create-directory "logs"))
    (directory-fold 
     (lambda (file rem)
       (handle-exceptions
	exn
	(begin
	  (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn)
	  (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  ;; (print-call-chain (current-error-port)) ;; 
	  )
	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time))
	       (file-old  (> file-age (* 48 60 60)))
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
	      (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
		       (file-exists? fullname)) ;; just in case it was gzipped - will get it next time
		  (handle-exceptions
		   exn
		   #f
		   (if (directory? fullname)
		       (begin
			 (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
			 (inc-stat "directories"))
		       (begin
			 (delete-file* fullname)
			 (inc-stat "deleted")))
		   (hash-table-delete! all-files file)))))))
     '()
     "logs")







|







541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
	      (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
		       (file-exists? fullname)) ;; just in case it was gzipped - will get it next time
		  (handle-exceptions
		   exn
		   #f
		   (if (directory? fullname)
		       (begin
			 (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
			 (inc-stat "directories"))
		       (begin
			 (delete-file* fullname)
			 (inc-stat "deleted")))
		   (hash-table-delete! all-files file)))))))
     '()
     "logs")
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))
		 (if (directory? fullname)
		     (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname)
		      (delete-file* fullname)))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
  
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;







|


|







565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))
		 (if (directory? fullname)
		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
		      (delete-file* fullname)))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
  
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
              (handle-exceptions
               exn
               (begin
                 (debug:print 0 *default-log-port* "Failed to switch versions.")
                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
                 (print-call-chain (current-error-port))
                 (exit 1))
               (common:cleanup-db dbstruct)))
             ((not (common:file-exists? mtconf))
              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))







|







595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
              (handle-exceptions
               exn
               (begin
                 (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn)
                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
                 (print-call-chain (current-error-port))
                 (exit 1))
               (common:cleanup-db dbstruct)))
             ((not (common:file-exists? mtconf))
              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
      (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn))
      (print-call-chain (current-error-port))
      #f)
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again







|







706
707
708
709
710
711
712
713
714
715
716
717
718
719
720

(define (common:read-encoded-string instr)
  (handle-exceptions
   exn
   (handle-exceptions
    exn
    (begin
      (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
      (print-call-chain (current-error-port))
      #f)
    (read (open-input-string (base64:base64-decode instr))))
   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
(define (common:get-db-tmp-area . junk)
  (if *db-cache-path*
      *db-cache-path*
      (if *toppath* ;; common:get-create-writeable-dir
	  (handle-exceptions
	      exn
	      (begin
		(debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path*)
		(exit 1))
	    (let ((dbpath (common:get-create-writeable-dir
			   (list (conc "/tmp/" (current-user-name)
				       "/megatest_localdb/"
				       (common:get-testsuite-name) "/"
				       (string-translate *toppath* "/" ".")))))) ;;  #t))))
	      (set! *db-cache-path* dbpath)







|







944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
(define (common:get-db-tmp-area . junk)
  (if *db-cache-path*
      *db-cache-path*
      (if *toppath* ;; common:get-create-writeable-dir
	  (handle-exceptions
	      exn
	      (begin
		(debug:print-error 0 *default-log-port* "Couldn't create path to " *db-cache-path* ", exn=" exn)
		(exit 1))
	    (let ((dbpath (common:get-create-writeable-dir
			   (list (conc "/tmp/" (current-user-name)
				       "/megatest_localdb/"
				       (common:get-testsuite-name) "/"
				       (string-translate *toppath* "/" ".")))))) ;;  #t))))
	      (set! *db-cache-path* dbpath)
1187
1188
1189
1190
1191
1192
1193
1194

1195
1196
1197
1198
1199
1200
1201
		 (tal (cdr dirs)))
	(let ((res (or (and (directory? hed)
			    (file-write-access? hed)
			    hed)
		       (handle-exceptions
			   exn
			   (begin
			     (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road.")

			     #f)
			(create-directory hed #t)))))
	  (if (and (string? res)
		   (directory? res))
	      res
	      (if (null? tal)
		  #f







|
>







1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
		 (tal (cdr dirs)))
	(let ((res (or (and (directory? hed)
			    (file-write-access? hed)
			    hed)
		       (handle-exceptions
			   exn
			   (begin
			     (debug:print-info 0 *default-log-port* "could not create " hed
					       ", this might cause problems down the road. exn=" exn)
			     #f)
			(create-directory hed #t)))))
	  (if (and (string? res)
		   (directory? res))
	      res
	      (if (null? tal)
		  #f
1331
1332
1333
1334
1335
1336
1337
1338


1339
1340
1341
1342
1343
1344
1345
1346

;; does the directory exist and do we have write access?
;;
;;    returns the directory or #f
;;
(define (common:directory-writable? path-string)
  (handle-exceptions
   exn


   #f
   (if (and (directory-exists? path-string)
            (file-write-access? path-string))
       path-string
       #f)))

(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")







|
>
>
|







1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351

;; does the directory exist and do we have write access?
;;
;;    returns the directory or #f
;;
(define (common:directory-writable? path-string)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "Failed to identify access to " path-string ", exn=" exn)
      #f)
   (if (and (directory-exists? path-string)
            (file-write-access? path-string))
       path-string
       #f)))

(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
1425
1426
1427
1428
1429
1430
1431
1432


1433
1434
1435
1436
1437


1438
1439
1440
1441
1442
1443
1444
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (handle-exceptions
			     exn
			     (if (> trynum 0)
				 (let ((delay-time (* (- 5 trynum) 5)))
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn))


				   (thread-sleep! delay-time)
				   (common:get-homehost trynum: (- trynum 1)))
				 (begin
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "  ((condition-property-accessor 'exn 'message) exn))


				   (exit 1)))
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (common:file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
				       (with-output-to-file hhf







|
>
>




|
>
>







1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (handle-exceptions
			     exn
			     (if (> trynum 0)
				 (let ((delay-time (* (- 5 trynum) 5)))
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Failed to read .homehost file, delaying "
						delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn)
						", exn=" exn)
				   (thread-sleep! delay-time)
				   (common:get-homehost trynum: (- trynum 1)))
				 (begin
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)
						"] Failed to read .homehost file after trying five times. Giving up and exiting, message: "
						((condition-property-accessor 'exn 'message) exn))
				   (exit 1)))
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (common:file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
				       (with-output-to-file hhf
1685
1686
1687
1688
1689
1690
1691


1692

1693

1694
1695
1696
1697
1698


1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
;;======================================================================

;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
      exn


      0

    (file-modification-time fpath)))


;; find timestamp of newest file associated with a sqlite db file
(define (common:lazy-sqlite-db-modification-time fpath)
  (let* ((glob-list (handle-exceptions
			exn


			`(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))
		      (glob (conc fpath "*"))))
         (file-list (if (eq? 0 (length glob-list))
			'("/no/such/file")
			glob-list)))
  (apply max
   (map
    common:lazy-modification-time 
    file-list))))

;; return a nice clean pathname made absolute
(define (common:nice-path dir)
  (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir)))
    (if match ;; using ~ for home?
	(common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match)))
	(normalize-pathname (if (absolute-pathname? dir)
				dir
				(conc (current-directory) "/" dir))))))

;; make "nice-path" available in config files and the repl
(define nice-path common:nice-path)

(define (common:read-link-f path)
  (handle-exceptions
      exn
      (begin
	(debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed.")
	path) ;; just give up
    (with-input-from-pipe
	(conc "/bin/readlink -f " path)
      (lambda ()
	(read-line)))))

(define (get-cpu-load #!key (remote-host #f))







>
>
|
>
|
>





>
>
|





|
|
|

















|







1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
;;======================================================================

;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn)
      0)
    (if (file-exists? fpath)
	(file-modification-time fpath)
	0)))

;; find timestamp of newest file associated with a sqlite db file
(define (common:lazy-sqlite-db-modification-time fpath)
  (let* ((glob-list (handle-exceptions
			exn
		      (begin
			(debug:print 0 *default-log-port* "Failed to glob " fpath "*, exn=" exn)
			`(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn))))
		      (glob (conc fpath "*"))))
         (file-list (if (eq? 0 (length glob-list))
			'("/no/such/file")
			glob-list)))
  (apply max
	 (map
	  common:lazy-modification-time 
	  file-list))))

;; return a nice clean pathname made absolute
(define (common:nice-path dir)
  (let ((match (string-match "^(~[^\\/]*)(\\/.*|)$" dir)))
    (if match ;; using ~ for home?
	(common:nice-path (conc (common:read-link-f (cadr match)) "/" (caddr match)))
	(normalize-pathname (if (absolute-pathname? dir)
				dir
				(conc (current-directory) "/" dir))))))

;; make "nice-path" available in config files and the repl
(define nice-path common:nice-path)

(define (common:read-link-f path)
  (handle-exceptions
      exn
      (begin
	(debug:print-error 0 *default-log-port* "command \"/bin/readlink -f " path "\" failed. exn=" exn)
	path) ;; just give up
    (with-input-from-pipe
	(conc "/bin/readlink -f " path)
      (lambda ()
	(read-line)))))

(define (get-cpu-load #!key (remote-host #f))
1761
1762
1763
1764
1765
1766
1767
1768

1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781


1782
1783
1784
1785
1786
1787
1788


1789
1790
1791
1792
1793
1794
1795
1796
1797
1798


1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
		       exn
		     (begin
		       (debug:print-info 1 *default-log-port* " removing bad file " fullpath)
		       (delete-file* fullpath)
		       #f)
		     (with-input-from-file fullpath read))
		   (begin
		     (debug:print-info 2 *default-log-port* "file " fullpath " is too old (" real-age" seconds) to trust, skipping reading it")

		     #f))))
	    (begin
	      (debug:print 2 *default-log-port* "not reading file " fullpath)
	      #f)))
      #f))
 
(define (common:write-cached-info key dtype dat)
  (if *toppath*
      (let* ((fulldir  (conc *toppath* "/.sysdata"))
	     (fullpath (conc fulldir "/" key "-" dtype ".log")))
	(if (not (file-exists? fulldir))(create-directory fulldir #t))
	(handle-exceptions
	 exn


	 #f
	 (with-output-to-file fullpath (lambda ()(pp dat)))))
      #f))

(define (common:raw-get-remote-host-load remote-host)
  (handle-exceptions
   exn


   #f ;; more specific handling of errors needed
   (with-input-from-pipe 
    (conc "ssh " remote-host " cat /proc/loadavg")
    (lambda ()(list (read)(read)(read))))))

;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
  (handle-exceptions
   exn


   '(-99 -99 -99)
   (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
     (or (common:get-cached-info actual-hostname "cpu-load")
	 (let ((result (if remote-host
			   (map (lambda (res)
				  (if (eof-object? res) 9e99 res))
			        (with-input-from-pipe 
				 (conc "ssh " remote-host " cat /proc/loadavg")
				 (lambda ()(list (read)(read)(read)))))
			   (with-input-from-file "/proc/loadavg" 
			     (lambda ()(list (read)(read)(read)))))))
	   (match
	    result
	    ((l1 l2 l3)
	     (if (and (number? l1)
		      (number? l2)
		      (number? l3))
		 (begin
		   (common:write-cached-info actual-hostname "cpu-load" result)
		   result)
		 '(-1 -1 -1))) ;; -1 is bad result
	    (else '(-2 -2 -2))))))))

;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((res (common:get-normalized-cpu-load-raw remote-host))







|
>












|
>
>
|
|




|
>
>
|
|
|
|





|
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|


|
|
|
|
|







1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
		       exn
		     (begin
		       (debug:print-info 1 *default-log-port* " removing bad file " fullpath)
		       (delete-file* fullpath)
		       #f)
		     (with-input-from-file fullpath read))
		   (begin
		     (debug:print-info 2 *default-log-port* "file " fullpath
				       " is too old (" real-age" seconds) to trust, skipping reading it")
		     #f))))
	    (begin
	      (debug:print 2 *default-log-port* "not reading file " fullpath)
	      #f)))
      #f))
 
(define (common:write-cached-info key dtype dat)
  (if *toppath*
      (let* ((fulldir  (conc *toppath* "/.sysdata"))
	     (fullpath (conc fulldir "/" key "-" dtype ".log")))
	(if (not (file-exists? fulldir))(create-directory fulldir #t))
	(handle-exceptions
	    exn
	  (begin
	    (debug:print 0 *default-log-path* "failed to write file " fullpath ", exn=" exn)
	    #f)
	  (with-output-to-file fullpath (lambda ()(pp dat)))))
      #f))

(define (common:raw-get-remote-host-load remote-host)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn)
      #f) ;; more specific handling of errors needed
    (with-input-from-pipe 
     (conc "ssh " remote-host " cat /proc/loadavg")
     (lambda ()(list (read)(read)(read))))))

;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load remote-host)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn)
      '(-99 -99 -99))
    (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
      (or (common:get-cached-info actual-hostname "cpu-load")
	  (let ((result (if remote-host
			    (map (lambda (res)
				   (if (eof-object? res) 9e99 res))
			         (with-input-from-pipe 
				  (conc "ssh " remote-host " cat /proc/loadavg")
				  (lambda ()(list (read)(read)(read)))))
			    (with-input-from-file "/proc/loadavg" 
			      (lambda ()(list (read)(read)(read)))))))
	    (match
		result
	      ((l1 l2 l3)
	       (if (and (number? l1)
		      (number? l2)
		      (number? l3))
		   (begin
		     (common:write-cached-info actual-hostname "cpu-load" result)
		     result)
		   '(-1 -1 -1))) ;; -1 is bad result
	      (else '(-2 -2 -2))))))))

;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((res (common:get-normalized-cpu-load-raw remote-host))
2095
2096
2097
2098
2099
2100
2101
2102

2103
2104
2105
2106
2107
2108
2109
    ;; is happening but not constantly report it
    (if (> (random 100) 75) ;; about 25% of the time
	(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
			  ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
    (cond
     ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable
	   (> num-tries 0))
      (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load " first ", we'll sleep 10s and try " num-tries " more times.")

      (thread-sleep! 10)
      (common:wait-for-cpuload maxload-in numcpus-in waitdelay
			       count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1)))
     ((and (> first adjmaxload)
	   (> count 0))
      (debug:print-info 0 *default-log-port*
			"server start delayed " adjwait







|
>







2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
    ;; is happening but not constantly report it
    (if (> (random 100) 75) ;; about 25% of the time
	(debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
			  ", load: " first ", adjmaxload: " adjmaxload ", loadjmp: " loadjmp))
    (cond
     ((and (< first 0) ;; this indicates the loadavg data is bad - machine may not be reachable
	   (> num-tries 0))
      (debug:print 0 *default-log-port* "WARNING: received bad data from get-cpu-load "
		   first ", we'll sleep 10s and try " num-tries " more times.")
      (thread-sleep! 10)
      (common:wait-for-cpuload maxload-in numcpus-in waitdelay
			       count: count remote-host: remote-host force-maxload: force-maxload num-tries: (- num-tries 1)))
     ((and (> first adjmaxload)
	   (> count 0))
      (debug:print-info 0 *default-log-port*
			"server start delayed " adjwait
2431
2432
2433
2434
2435
2436
2437


2438
2439
2440
2441
2442
2443
2444
  "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches"
  (let ((default '(("tag-expr"  . "-tagexpr")
                   ("mode-patt" . "-modepatt")
                   ("run-name"  . "-runname")
                   ("contour"   . "-contour")
                   ("target"    . "-target")
                   ("test-patt" . "-testpatt")


                   ("msg"       . "-m")
                   ("log"       . "-log")
                   ("start-dir" . "-start-dir")
                   ("new"       . "-set-state-status"))))
    (if (eq? flavor 'switch-symbol)
        (map (lambda (x)
               (cons (string->symbol (conc "-" (car x))) (cdr x)))







>
>







2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
  "returns alist mapping string keys in testconfig/subrun to megatest command line switches; if flavor is switch-symbol, maps tcmt symbolic switches to megatest switches"
  (let ((default '(("tag-expr"  . "-tagexpr")
                   ("mode-patt" . "-modepatt")
                   ("run-name"  . "-runname")
                   ("contour"   . "-contour")
                   ("target"    . "-target")
                   ("test-patt" . "-testpatt")
		   ("rerun"     . "-rerun")
		   ("setvars"   . "-setvars")
                   ("msg"       . "-m")
                   ("log"       . "-log")
                   ("start-dir" . "-start-dir")
                   ("new"       . "-set-state-status"))))
    (if (eq? flavor 'switch-symbol)
        (map (lambda (x)
               (cons (string->symbol (conc "-" (car x))) (cdr x)))
3016
3017
3018
3019
3020
3021
3022

3023
3024
3025
3026
3027
3028
3029
				     (launcher-exe   (car launcher-parts)))
				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
				    (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
						    (count     100))
				      (if targ-host
					  (conc "remrun " targ-host)
					  (if (> count 0)

					      (begin
						(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
						(thread-sleep! (- 101 count))
						(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
							   (- count 1)))
					      (begin
						(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)







>







3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
				     (launcher-exe   (car launcher-parts)))
				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
				    (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
						    (count     100))
				      (if targ-host
					  (conc "remrun " targ-host)
					  (if (> count 0)
					      
					      (begin
						(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
						(thread-sleep! (- 101 count))
						(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
							   (- count 1)))
					      (begin
						(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
3340
3341
3342
3343
3344
3345
3346
3347


3348
3349
3350
3351
3352
3353
3354
3355
3356
(define (common:join-backgrounded-threads)
  ;; may need to trap and ignore exceptions -- dunno how atomic threads are...
  (for-each
   (lambda (thread-name)
     (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f)))
       (if thread
           (handle-exceptions
           exn


           #t ;; just ignore it, it might have died in the meantime so joining it will throw an exception
           (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

(define *common:telemetry-log-state* 'startup)
(define *common:telemetry-log-socket* #f)

(define (common:telemetry-log-open)







|
>
>
|
|







3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
(define (common:join-backgrounded-threads)
  ;; may need to trap and ignore exceptions -- dunno how atomic threads are...
  (for-each
   (lambda (thread-name)
     (let* ((thread (hash-table-ref/default *common:thread-punchlist* thread-name #f)))
       (if thread
           (handle-exceptions
               exn
	     (begin
	       (debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
               #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
             (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

(define *common:telemetry-log-state* 'startup)
(define *common:telemetry-log-socket* #f)

(define (common:telemetry-log-open)