Megatest

Check-in [180ef4e375]
Login
Overview
Comment:Merged e7a3 from v1.55 branch into v1.60 branch
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 180ef4e375500b4096aa095ae8725e7670072666
User & Date: mrwellan on 2014-06-02 10:49:06
Other Links: branch diff | manifest | tags
Context
2014-06-02
11:21
Merged 0f5d from v1.55 to v1.60 check-in: ce8b9e0b55 user: mrwellan tags: v1.60
10:49
Merged e7a3 from v1.55 branch into v1.60 branch check-in: 180ef4e375 user: mrwellan tags: v1.60
2014-05-22
13:46
Sucessful merge from 2b3c(v1.55) into v1.60 check-in: 23bb603800 user: mrwellan tags: v1.60
2014-03-28
00:08
Found couple more cases of poorly protected sqlite3 calls. Fixed. check-in: e7a3a54019 user: matt tags: v1.55
Changes

Modified db.scm from [95bd13385e] to [b980dbcc63].

497
498
499
500
501
502
503
504

505
506
507
508
509
510
511
497
498
499
500
501
502
503

504
505
506
507
508
509
510
511







-
+







	(debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
     (apply open-run-close-exception-handling proc idb params))
   (apply open-run-close-no-exception-handling proc idb params)))

;; (define open-run-close 
(define open-run-close open-run-close-exception-handling)
		;;	   open-run-close-no-exception-handling
			   open-run-close-exception-handling)
;;			   open-run-close-exception-handling)
;;)

(define (db:initialize-main-db db)
  (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
	 (keys     (keys:config-get-fields configdat))
	 (havekeys (> (length keys) 0))
	 (keystr   (keys->keystr keys))
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827







1828
1829
1830
1831
1832
1833
1834
1814
1815
1816
1817
1818
1819
1820







1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834







-
-
-
-
-
-
-
+
+
+
+
+
+
+







	    (db:general-call db 'top-test-set-running (list test-name))
	    (db:general-call db 'top-test-set-per-pf-counts (list test-name test-name test-name)))
	#f)
      #f))

(define (db:tests-register-test dbstruct run-id test-name item-path)
  (sqlite3:execute (db:get-db dbstruct run-id) 'register-test run-id test-name item-path))
       (let ((sleep-time (random 20))
	     (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	 (case err-status
	   ((busy)(thread-sleep! 4))
	   (else
	    (debug:print 0 "WARNING: possible problem with call to cdb:remote-run, database may be read-only and locked, waiting and trying again ...")
	    (thread-sleep! sleep-time)))
;;        (let ((sleep-time (random 20))
;; 	     (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
;; 	 (case err-status
;; 	   ((busy)(thread-sleep! 4))
;; 	   (else
;; 	    (debug:print 0 "WARNING: possible problem with call to cdb:remote-run, database may be read-only and locked, waiting and trying again ...")
;; 	    (thread-sleep! sleep-time)))

(define (db:test-get-logfile-info dbstruct run-id test-name)
  (let ((res #f))
    (sqlite3:for-each-row 
     (lambda (path final_logf)
       ;; (let ((path       (sdb:qry 'getstr path-id))
       ;;       (final_logf (sdb:qry 'getstr final_logf-id)))

Modified launch.scm from [59d0d6772c] to [dd204b865e].

142
143
144
145
146
147
148

149


150
151
152
153
154
155
156
142
143
144
145
146
147
148
149

150
151
152
153
154
155
156
157
158







+
-
+
+







	  (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  ;; open-run-close not needed for test-set-meta-info
	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
	  (tests:set-full-meta-info test-id run-id 0 work-area)
	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)

	  ;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	  (thread-sleep! 0.3) ;; NFS slowness has caused grief here

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
300
301
302
303
304
305
306

307


308
309
310
311
312
313
314
315
316
317
318

319

320
321
322
323
324
325
326
302
303
304
305
306
307
308
309

310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332







+
-
+
+











+

+







					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
				   (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
				   ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
				   (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
				   (let loop ((minutes   (calc-minutes)))
				     (begin
				       (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       ;; open-run-close not needed for test-set-meta-info
				       ;; (tests:set-partial-meta-info #f test-id run-id minutes work-area)
				       (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)
				       ;; (tests:set-partial-meta-info #f test-id run-id minutes work-area 10)
				       ;; (tests:set-partial-meta-info test-id run-id minutes work-area)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?

Modified megatest.scm from [a6d2c3a647] to [f5cb1d64b3].

989
990
991
992
993
994
995
996




997
998
999
1000
1001
1002
1003
989
990
991
992
993
994
995

996
997
998
999
1000
1001
1002
1003
1004
1005
1006







-
+
+
+
+







		    (exit 6))
		  (let* ((stepname   (args:get-arg "-runstep"))
			 (logprofile (args:get-arg "-logpro"))
			 (logfile    (conc stepname ".log"))
			 (cmd        (if (null? remargs) #f (car remargs)))
			 (params     (if cmd (cdr remargs) '()))
			 (exitstat   #f)
			 (shell      (last (string-split (get-environment-variable "SHELL") "/")))
			 (shell      (let ((sh (get-environment-variable "SHELL") ))
				       (if sh 
					   (last (string-split sh "/"))
					   "bash")))
			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")
				       (else ">&")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))

Modified tests.scm from [6b6e58b833] to [c93debf593].

634
635
636
637
638
639
640

641
642
643
644
645
646
647
648
649
650


























651
652
653


















654
655
656
657
658
659
660
634
635
636
637
638
639
640
641










642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668


669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693







+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)
  (if minutes 
      (rmt:general-call 'update-run-duration run-id minutes test-id))
  (if (and uname hostname)
      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
  
;; This one is for running with no db access (i.e. via rmt: internally)
(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
(define (tests:set-full-meta-info test-id run-id minutes work-area)
  (let* ((num-records 0)
	 (cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (uname    (get-uname "-srvpio"))
	 (hostname (get-host-name)))
    (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes)
    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
	  
(define (tests:set-partial-meta-info test-id run-id minutes work-area)
;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
;;  (let ((remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print "exn=" (condition->list exn))
	   (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain)))
     (let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb))
	    (cpuload  (get-cpu-load))
	    (diskfree (get-df (current-directory)))
	    (uname    (get-uname "-srvpio"))
	    (hostname (get-host-name)))
       (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))))
    
;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory))))
    (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes)))
	 (diskfree (get-df (current-directory)))
	 (remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print "exn=" (condition->list exn))
	   (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain)))
     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  )))
	 
;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)