Megatest

Diff
Login

Differences From Artifact [e5653c6a69]:

To Artifact [f38a54bc63]:


1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

















33
34
35
36
37
38
39
40
41
42
43
44
45
46










47
48
49
50
51
52
53
1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80











-
+




















+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














+
+
+
+
+
+
+
+
+
+







;;======================================================================
;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use srfi-1 posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack
(use srfi-1 data-structures posix regex-case base64 format dot-locking csv-xml z3 sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix srfi-18 extras
     pkts (prefix dbi dbi:))

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

(declare (unit common))

(include "common_records.scm")

;; (require-library margs)
;; (include "margs.scm")

;; (define old-exit exit)
;; 
;; (define (exit . code)
;;   (if (null? code)
;;       (old-exit)
;;       (old-exit code)))


;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
  (handle-exceptions
   exn
   (begin
     (debug:print-info 0 *default-log-port* "notable but nonfatal condition - "warning-message-on-exception)
     (debug:print-info 0 *default-log-port*
                       (string-substitute "\n?Error:" "nonfatal condition:"
                                          (with-output-to-string
                                            (lambda ()
                                              (print-error-message exn) ))))
     (debug:print-info 0 *default-log-port* "    -- continuing after nonfatal condition...")
     #f)
   (thunk)))

(define getenv get-environment-variable)
(define (safe-setenv key val)
  (if (substring-index ":" key) ;; variables containing : are for internal use and cannot be environment variables.
      (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\"")
      (if (and (string? val)
	       (string? key))
	  (handle-exceptions
	      exn
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))


;; returns list of fd count, socket count
(define (get-file-descriptor-count #!key  (pid (current-process-id )))
  (list
    (length (glob (conc "/proc/" pid "/fd/*")))
    (length  (filter identity (map socket? (glob (conc "/proc/" pid "/fd/*")))))
  )
)


;; GLOBALS

;; CONTEXTS
(defstruct cxt
  (taskdb #f)
  (cmutex (make-mutex)))
85
86
87
88
89
90
91

92
93
94
95
96
97
98
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126







+







(define *test-meta-updated* (make-hash-table))
(define *globalexitstatus*  0) ;; attempt to work around possible thread issues
(define *passnum*           0) ;; when running track calls to run-tests or similar
;; (define *alt-log-file* #f)  ;; used by -log
(define *common:denoise*    (make-hash-table)) ;; for low noise printing
(define *default-log-port*  (current-error-port))
(define *time-zero* (current-seconds)) ;; for the watchdog
(define *default-area-tag* "local")

;; DATABASE
(define *dbstruct-db*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129
143
144
145
146
147
148
149

150
151
152
153
154
155
156
157







-
+








;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
;; (define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
;; (define *server-id*         #f)
(define *server-id*         #f)
(define *server-info*       #f)  ;; good candidate for easily convert to non-global
(define *time-to-exit*      #f)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)
156
157
158
159
160
161
162
163

164
165
166
167
168
169
170
184
185
186
187
188
189
190

191
192
193
194
195
196
197
198







-
+








(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (server:get-timeout)) ;; default from server:get-timeout
  (server-timeout    (server:expiration-timeout))
  (force-server      #f)
  (ro-mode           #f)  
  (ro-mode-checked   #f)) ;; flag that indicates we have checked for ro-mode

;; launching and hosts
(defstruct host
  (reachable    #f)
251
252
253
254
255
256
257
258

259
260


261
262
263
264
265
266
267
279
280
281
282
283
284
285

286
287

288
289
290
291
292
293
294
295
296







-
+

-
+
+







   dbstruct
   'schema
   ;; 'new2old
   'killservers
   'adj-target
   ;; 'old2new
   'new2old
   (if full
   ;; (if full
       '(dejunk)
       '()))
       ;; '())
       )
  (if (common:api-changed?)
      (common:set-last-run-version)))

;; Rotate logs, logic: 
;;                 if > 500k and older than 1 week:
;;                     remove previous compressed log and compress this log
;; WARNING: This proc operates assuming that it is in the directory above the
497
498
499
500
501
502
503

504
505



506
507
508
509
510
511
512
526
527
528
529
530
531
532
533


534
535
536
537
538
539
540
541
542
543







+
-
-
+
+
+







    (1 "n/a")
    (2 "PASS")
    (3 "SKIP")
    (4 "WARN")
    (5 "WAIVED")
    (6 "CHECK")
    (7 "STUCK/DEAD")
    (8 "DEAD")
    (8 "FAIL")
    (9 "ABORT")))
    (9 "FAIL")
    (10 "PREQ_FAIL")
    (11 "ABORT")))

(define *common:ended-states*       ;; states which indicate the test is stopped and will not proceed
  '("COMPLETED" "ARCHIVED" "KILLED" "KILLREQ" "STUCK" "INCOMPLETE"))

(define *common:badly-ended-states* ;; these roll up as CHECK, i.e. results need to be checked
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

827
828
829
830
831
832
833
834
835




836
837
838
839
840
841
842
858
859
860
861
862
863
864


865
866
867
868
869
870
871
872
873
874
875







-
-
+
+
+
+







      #f
      (let loop ((hed (car dirs))
		 (tal (cdr dirs)))
	(let ((res (or (and (directory? hed)
			    (file-write-access? hed)
			    hed)
		       (handle-exceptions
			exn
			#f
			   exn
			   (begin
			     (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road.")
			     #f)
			(create-directory hed #t)))))
	  (if (and (string? res)
		   (directory? res))
	      res
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))
1481
1482
1483
1484
1485
1486
1487
1488

1489




1490
1491
1492

1493
1494
1495
1496
1497

1498
1499

1500
1501
1502
1503
1504

1505
1506
1507
1508
1509
1510
1511
1512

1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532

1533
1534
1535
1536
1537
1538
1539
1514
1515
1516
1517
1518
1519
1520

1521
1522
1523
1524
1525
1526
1527
1528

1529
1530
1531
1532
1533

1534
1535

1536
1537
1538
1539
1540

1541
1542
1543
1544
1545
1546
1547
1548

1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568

1569
1570
1571
1572
1573
1574
1575
1576







-
+

+
+
+
+


-
+




-
+

-
+




-
+







-
+



















-
+







          ((< (+ load (/ (random 250) 1000))         ;; add a random factor to keep from getting in a rut
              (+ best-load (/ (random 250) 1000))  )
           (set! best-load load)
           (set! best-host hostname)))))
     hosts)
    best-host))

(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (numcpus (if (< 1 numcpus-in) ;; not possible
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (max maxload-in 0.5)) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload numcpus))
	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
	 (loadjmp (- first next)))
    (cond
     ((and (> first adjload)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload " " (if msg msg ""))
      (debug:print-info 0 *default-log-port* "server start delayed " waitdelay " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! waitdelay)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp numcpus)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
      (thread-sleep! waitdelay)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)))))

(define (common:wait-for-homehost-load maxload msg)
  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
                     #f
                     (common:get-homehost)))
         (hh     (if hh-dat (car hh-dat) #f))
         (numcpus (common:get-num-cpus hh)))
    (common:wait-for-normalized-load maxload msg: msg remote-host: hh)))
    (common:wait-for-normalized-load maxload msg hh)))

(define (common:get-num-cpus remote-host)
  (let ((proc (lambda ()
		(let loop ((numcpu 0)
			   (inl    (read-line)))
		  (if (eof-object? inl)
		      numcpu
		      (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
				(+ numcpu 1)
				numcpu)
			    (read-line)))))))
    (if remote-host
	(with-input-from-pipe 
	 (conc "ssh " remote-host " cat /proc/cpuinfo")
	 proc)
	(with-input-from-file "/proc/cpuinfo" proc))))

;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload #!key (msg #f)(remote-host #f))
(define (common:wait-for-normalized-load maxload msg remote-host)
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))

(define (get-uname . params)
  (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
1767
1768
1769
1770
1771
1772
1773
1774

1775
1776
1777


1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789





1790
1791
1792
1793
1794
1795
1796
1804
1805
1806
1807
1808
1809
1810

1811
1812


1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824


1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836







-
+

-
-
+
+










-
-
+
+
+
+
+







		  
;;======================================================================
;; T I M E   A N D   D A T E
;;======================================================================

;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)
  (let ((parts     (string-split tstr))
  (let ((parts     (string-split-fields "\\w+" tstr))
	(time-secs 0)
	;; s=seconds, m=minutes, h=hours, d=days
	(trx       (regexp "(\\d+)([smhd])")))
	;; s=seconds, m=minutes, h=hours, d=days, M=months, y=years, w=weeks
	(trx       (regexp "(\\d+)([smhdMyw])")))
    (for-each (lambda (part)
		(let ((match  (string-match trx part)))
		  (if match
		      (let ((val (string->number (cadr match)))
			    (unt (caddr match)))
			(if val 
			    (set! time-secs (+ time-secs (* val
							    (case (string->symbol unt)
							      ((s) 1)
							      ((m) 60)
							      ((h) (* 60 60))
							      ((d) (* 24 60 60))
							      ((h) 3600)
							      ((d) 86400)
							      ((2) 604800)
							      ((M) 2628000) ;; aproximately one month
							      ((y) 31536000)
							      (else 0))))))))))
	      parts)
    time-secs))
		       
(define (seconds->hr-min-sec secs)
  (let* ((hrs (quotient secs 3600))
	 (min (quotient (- secs (* hrs 3600)) 60))
2070
2071
2072
2073
2074
2075
2076






2077

2078
2079
2080
2081
2082
2083
2084
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122

2123
2124
2125
2126
2127
2128
2129
2130







+
+
+
+
+
+
-
+







  (string-intersperse 
   (map (lambda (x)
          (number->string x 16))
        (map string->number
             (string-split instr)))
   "/"))

;;======================================================================
;; L O C K I N G   M E C H A N I S M S 
;;======================================================================

;; faux-lock is deprecated. Please use simple-lock below
;;
(define (common:faux-lock keyname #!key (wait-time 8))
(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
  (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
      (if (> wait-time 0)
	  (begin
	    (thread-sleep! 1)
	    (if (eq? wait-time 1) ;; only one second left, steal the lock
		(begin
		  (debug:print-info 0 *default-log-port* "stealing lock for " keyname)
2092
2093
2094
2095
2096
2097
2098




2099





2100
2101
2102
2103
2104
2105
2106
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148

2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160







+
+
+
+
-
+
+
+
+
+







(define (common:faux-unlock keyname #!key (force #f))
  (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
      (begin
        (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
        #t)
      #f))

;; simple lock. improve and converge on this one.
;;
(define (common:simple-lock keyname)
  (rmt:no-sync-get-lock keyname))
  

;;======================================================================
;;
;;======================================================================

(define (common:in-running-test?)
  (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO")))

(define (common:get-color-from-status status)
  (cond
   ((equal? status "PASS")    "green")
   ((equal? status "FAIL")    "red")