Megatest

Check-in [c7ef1b27a4]
Login
Overview
Comment:Don't updated stored cpu/disk space unless changed more than 200 Meg or .6 load
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: c7ef1b27a474fb857361e4d3e794e4869db24f74
User & Date: mrwellan on 2015-11-02 09:36:50
Other Links: branch diff | manifest | tags
Context
2015-11-04
08:09
Improvements to -list-targets for RobertG check-in: b95936eb28 user: mrwellan tags: v1.60
2015-11-02
09:36
Don't updated stored cpu/disk space unless changed more than 200 Meg or .6 load check-in: c7ef1b27a4 user: mrwellan tags: v1.60
08:04
More robust handling of rget when dependent vars do not exist. Minor output cleanup check-in: e1476e429d user: mrwellan tags: v1.60
Changes

Modified launch.scm from [d1b54f5011] to [58b47c99b7].

433
434
435
436
437
438
439
440
441











442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
				   ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
				   (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
				   (let loop ((minutes   (calc-minutes)))
				     (begin











				       (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?
					     (let* ((pid1 (vector-ref exit-info 0))







|
|
>
>
>
>
>
>
>
>
>
>
>








|







433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
				   ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
				   (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
				   (let loop ((minutes   (calc-minutes))
					      (cpu-load  (get-cpu-load))
					      (disk-free (get-df (current-directory))))
				     (let ((new-cpu-load (let* ((load  (get-cpu-load))
								(delta (abs (- load cpu-load))))
							   (if (> delta 0.6) ;; don't bother updating with small changes
							       load
							       #f)))
					   (new-disk-free (let* ((df    (get-df (current-directory)))
								 (delta (abs (- df disk-free))))
							    (if (> delta 200) ;; ignore changes under 200 Meg
								df
								#f))))
				       (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?
					     (let* ((pid1 (vector-ref exit-info 0))
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
						     )))
					     (mutex-unlock! m)
					     ;; no point in sticking around. Exit now.
					     (exit)))
				       (if keep-going
					   (begin
					     (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
					     (if keep-going
						 (loop (calc-minutes)))))))
				   (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job")))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)







|
|







501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
						     )))
					     (mutex-unlock! m)
					     ;; no point in sticking around. Exit now.
					     (exit)))
				       (if keep-going
					   (begin
					     (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
					     (if keep-going    ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
						 (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free)))))))
				   (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job")))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)

Modified tests.scm from [e114fdcd33] to [45bbca6979].

1071
1072
1073
1074
1075
1076
1077

1078
1079
1080
1081
1082
1083
1084
1085
	   (set! res count))
	 tdb
	 "SELECT count(id) FROM test_rundat;")
	res))
  0)

(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)

  (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)
  (if minutes 
      (rmt:general-call 'update-run-duration run-id minutes test-id))
  (if (and uname hostname)
      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
  
;; This one is for running with no db access (i.e. via rmt: internally)
(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)







>
|







1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
	   (set! res count))
	 tdb
	 "SELECT count(id) FROM test_rundat;")
	res))
  0)

(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
  (if (and cpuload diskfree)
      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
  (if minutes 
      (rmt:general-call 'update-run-duration run-id minutes test-id))
  (if (and uname hostname)
      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
  
;; This one is for running with no db access (i.e. via rmt: internally)
(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)