Megatest

Check-in [8ca2b352b3]
Login
Overview
Comment:Sync from test to server only when cpu changed, disk changed by more than 10% or more than 30 seconds past.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: 8ca2b352b3dfd988f26bcf5e13b0edb17722d753
User & Date: mrwellan on 2017-05-24 10:48:00
Other Links: branch diff | manifest | tags
Context
2017-05-24
14:48
Check disk usage every 30 seconds instead of every 3 seconds check-in: b60108422e user: mrwellan tags: v1.64
10:48
Sync from test to server only when cpu changed, disk changed by more than 10% or more than 30 seconds past. check-in: 8ca2b352b3 user: mrwellan tags: v1.64
2017-05-23
09:50
Fix for crash on -remove-runs when the toplevel test was removed. check-in: 3c96bce614 user: matt tags: v1.64, v1.6414
Changes

Modified launch.scm from [d6685cb69b] to [4b0e8bb2f3].

328
329
330
331
332
333
334
335

336
337
338
339
340
341
342

343
344
345


346
347
348
349
350
351
352
353

354
355
356
357
358
359
360
361
			     start-seconds)))))
	 (kill-tries 0))
    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory))))

      (let ((new-cpu-load (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
				 (delta (abs (- load cpu-load))))
			    (if (> delta 0.1) ;; don't bother updating with small changes
				load
				#f)))
	    (new-disk-free (let* ((df    (get-df (current-directory)))
				  (delta (abs (- df disk-free))))

			     (if (> delta 200) ;; ignore changes under 200 Meg
				 df
				 #f))))


	(set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
			    (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
						(time-exceeded (> run-seconds runtlim)))
					   (if time-exceeded
					       (begin
						 (debug:print-info 0 *default-log-port* "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
						 #t)
					       #f)))))

	(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
	(if kill-job? 
	    (begin
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
	      ;;       between tries?
	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))







|
>
|
|
|
|
|
|
|
>
|
|
|
>
>








>
|







328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
			     start-seconds)))))
	 (kill-tries 0))
    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      (let* ((new-cpu-load (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                  (delta (abs (- load cpu-load))))
                             (if (> delta 0.1) ;; don't bother updating with small changes
                                 load
                                 #f)))
             (new-disk-free (let* ((df    (get-df (current-directory)))
                                   (delta (abs (- df disk-free))))
                              (if (and (> df 0)
                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
                                  df
                                  #f)))
             (do-sync       (or new-cpu-load new-disk-free (> (current-seconds) (+ last-sync 30)))))
        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
	(set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
			    (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
						(time-exceeded (> run-seconds runtlim)))
					   (if time-exceeded
					       (begin
						 (debug:print-info 0 *default-log-port* "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
						 #t)
					       #f)))))
        (if do-sync
            (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f))
	(if kill-job? 
	    (begin
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
	      ;;       between tries?
	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
395
396
397
398
399
400
401
402



403
404
405
406
407
408
409
	      (mutex-unlock! m)
	      ;; no point in sticking around. Exit now.
	      (exit)))
	(if (hash-table-ref/default misc-flags 'keep-going #f)
	    (begin
	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free)))))))



    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional


(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)







|
>
>
>







400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
	      (mutex-unlock! m)
	      ;; no point in sticking around. Exit now.
	      (exit)))
	(if (hash-table-ref/default misc-flags 'keep-going #f)
	    (begin
	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
                        (or new-disk-free disk-free)
                        (if do-sync (current-seconds) last-sync)))))))
    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional


(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)

Modified tests/tests.scm from [63be015a8f] to [9ef0e0195b].

15
16
17
18
19
20
21
















22
23
24
25
26
27
28
(require-extension posix)
(import posix)
(import srfi-18)
;; (require-extension zmq)
;; (import zmq)

(define test-work-dir (current-directory))

















;; read in all the _record files
(let ((files (glob "*_records.scm")))
  (for-each
   (lambda (file)
     (print "Loading " file)
     (load file))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
(require-extension posix)
(import posix)
(import srfi-18)
;; (require-extension zmq)
;; (import zmq)

(define test-work-dir (current-directory))

;; given list of lists
;;  ( ( msg expected param1 param2 ...)
;;    ( ... ) )
;; apply test to all
;;
(define (test-batch proc pname inlst #!key (post-proc #f))
  (for-each
   (lambda (spec)
     (let ((msg    (conc pname " " (car spec)))
           (result (cadr spec))
           (params (cddr spec)))
       (if post-proc
           (test msg result (post-proc (apply proc params)))
           (test msg result (apply proc params)))))
   inlst))

;; read in all the _record files
(let ((files (glob "*_records.scm")))
  (for-each
   (lambda (file)
     (print "Loading " file)
     (load file))

Modified tests/unittests/all-rmt.scm from [091111a6e5] to [a8b587a3b0].

24
25
26
27
28
29
30






31
32









33

34

35

36
37
38
39
40
41
42

(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down.
(test #f #t (string? (server:check-if-running ".")))
;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))
;; DEF (rmt:kill-server run-id)
;; DEF (rmt:start-server run-id)
(test #f '(#t "successful login")(rmt:login #f))






;; DEF (rmt:login-no-auto-client-setup connection-info)
(test #f #t (pair? (rmt:get-latest-host-load (get-host-name))))









(test #f #t (list? (rmt:get-changed-record-ids 0)))

(test #f #f (begin (runs:update-all-test_meta #f) #f))

(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (hash-table->alist (rmt:get-tests-tags)) equal?) string<=))

(test #f '() (rmt:get-key-val-pairs 0))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start
(test #f '() (rmt:get-key-vals 1))
(test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets))
(test #f "" (rmt:get-target 1))
(test #f #t (rmt:register-test 1 "foo" ""))







>
>
>
>
>
>


>
>
>
>
>
>
>
>
>

>

>

>







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down.
(test #f #t (string? (server:check-if-running ".")))
;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))
;; DEF (rmt:kill-server run-id)
;; DEF (rmt:start-server run-id)
(test #f '(#t "successful login")(rmt:login #f))

(test-batch rmt:login
            "rmt:login"
            (list (list "good" (list #t "successful login") #f)
                  (list "bad"  (list #f "login failed")     #t)))

;; DEF (rmt:login-no-auto-client-setup connection-info)
(test #f #t (pair? (rmt:get-latest-host-load (get-host-name))))

;; get-latest-host-load does a lookup in the db, it won't return a useful value unless
;; a test ran recently on host
(test-batch rmt:get-latest-host-load
            "rmt:get-latest-host-load"
            (list (list "localhost"  #t (get-host-name))
                  (list "not-a-host" #t "not-a-host"  ))
            post-proc: pair?)
                                           
(test #f #t (list? (rmt:get-changed-record-ids 0)))

(test #f #f (begin (runs:update-all-test_meta #f) #f))

(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (hash-table->alist (rmt:get-tests-tags)) equal?) string<=))

(test #f '() (rmt:get-key-val-pairs 0))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start
(test #f '() (rmt:get-key-vals 1))
(test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets))
(test #f "" (rmt:get-target 1))
(test #f #t (rmt:register-test 1 "foo" ""))
80
81
82
83
84
85
86

87















88
89
90
91
92
93
94
(test #f #t (vector? (rmt:get-runs "%" 10 0 keypatts)))
(test #f '(1)(rmt:get-all-run-ids))
(test #f '()(rmt:get-prev-run-ids 1))
(test #f #t (begin (rmt:lock/unlock-run 1 #t #f "mikey") #t))
(test #f "JUSTFINE" (rmt:get-run-status 1))
(test #f #t (begin (rmt:set-run-status 1 "NOTFINE" msg: "A message") #t))
(test #f #t (begin (rmt:update-run-event_time 1) #t))

;; (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default















;; (rmt:find-and-mark-incomplete run-id ovr-deadtime)
;; (rmt:get-main-run-stats run-id)
;; (rmt:get-var varname)
;; (rmt:set-var varname value)
;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
;; (rmt:get-previous-test-run-record run-id test-name item-path)
;; (rmt:get-run-stats)







>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
(test #f #t (vector? (rmt:get-runs "%" 10 0 keypatts)))
(test #f '(1)(rmt:get-all-run-ids))
(test #f '()(rmt:get-prev-run-ids 1))
(test #f #t (begin (rmt:lock/unlock-run 1 #t #f "mikey") #t))
(test #f "JUSTFINE" (rmt:get-run-status 1))
(test #f #t (begin (rmt:set-run-status 1 "NOTFINE" msg: "A message") #t))
(test #f #t (begin (rmt:update-run-event_time 1) #t))

;; (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default
;;
(let ((keys (rmt:get-keys))
      (rnp  "%")    ;; run name patt
      (tpt  "%/%")) ;; target patt
  (test-batch rmt:get-runs-by-patt
              "rmt:get-runs-by-patt"
              (list (list "t=0" #t keys rnp tpt #f #f #f 0)
                    (list "t=current" #f keys rnp tpt #f #f #f (+ 100 (current-seconds))) ;; should be no records from the future
                    )
              post-proc: (lambda (res)
                           (print "rmt:get-runs-by-patt returned: " res)
                           (and (vector? res)
                                (let ((rows (vector-ref res 1)))
                                  (> (length rows) 0))))))

;; (rmt:find-and-mark-incomplete run-id ovr-deadtime)
;; (rmt:get-main-run-stats run-id)
;; (rmt:get-var varname)
;; (rmt:set-var varname value)
;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
;; (rmt:get-previous-test-run-record run-id test-name item-path)
;; (rmt:get-run-stats)