Megatest

Check-in [180ef4e375]
Login
Overview
Comment:Merged e7a3 from v1.55 branch into v1.60 branch
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 180ef4e375500b4096aa095ae8725e7670072666
User & Date: mrwellan on 2014-06-02 10:49:06
Other Links: branch diff | manifest | tags
Context
2014-06-02
11:21
Merged 0f5d from v1.55 to v1.60 check-in: ce8b9e0b55 user: mrwellan tags: v1.60
10:49
Merged e7a3 from v1.55 branch into v1.60 branch check-in: 180ef4e375 user: mrwellan tags: v1.60
2014-05-22
13:46
Sucessful merge from 2b3c(v1.55) into v1.60 check-in: 23bb603800 user: mrwellan tags: v1.60
2014-03-28
00:08
Found couple more cases of poorly protected sqlite3 calls. Fixed. check-in: e7a3a54019 user: matt tags: v1.55
Changes

Modified db.scm from [95bd13385e] to [b980dbcc63].

497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
	(debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
     (apply open-run-close-exception-handling proc idb params))
   (apply open-run-close-no-exception-handling proc idb params)))

;; (define open-run-close 
(define open-run-close open-run-close-exception-handling)
		;;	   open-run-close-no-exception-handling
			   open-run-close-exception-handling)
;;)

(define (db:initialize-main-db db)
  (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
	 (keys     (keys:config-get-fields configdat))
	 (havekeys (> (length keys) 0))
	 (keystr   (keys->keystr keys))







|







497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
	(debug:print-info 0 "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
     (apply open-run-close-exception-handling proc idb params))
   (apply open-run-close-no-exception-handling proc idb params)))

;; (define open-run-close 
(define open-run-close open-run-close-exception-handling)
		;;	   open-run-close-no-exception-handling
;;			   open-run-close-exception-handling)
;;)

(define (db:initialize-main-db db)
  (let* ((configdat (car *configinfo*))  ;; tut tut, global warning...
	 (keys     (keys:config-get-fields configdat))
	 (havekeys (> (length keys) 0))
	 (keystr   (keys->keystr keys))
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
	    (db:general-call db 'top-test-set-running (list test-name))
	    (db:general-call db 'top-test-set-per-pf-counts (list test-name test-name test-name)))
	#f)
      #f))

(define (db:tests-register-test dbstruct run-id test-name item-path)
  (sqlite3:execute (db:get-db dbstruct run-id) 'register-test run-id test-name item-path))
       (let ((sleep-time (random 20))
	     (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	 (case err-status
	   ((busy)(thread-sleep! 4))
	   (else
	    (debug:print 0 "WARNING: possible problem with call to cdb:remote-run, database may be read-only and locked, waiting and trying again ...")
	    (thread-sleep! sleep-time)))

(define (db:test-get-logfile-info dbstruct run-id test-name)
  (let ((res #f))
    (sqlite3:for-each-row 
     (lambda (path final_logf)
       ;; (let ((path       (sdb:qry 'getstr path-id))
       ;;       (final_logf (sdb:qry 'getstr final_logf-id)))







|
|
|
|
|
|
|







1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
	    (db:general-call db 'top-test-set-running (list test-name))
	    (db:general-call db 'top-test-set-per-pf-counts (list test-name test-name test-name)))
	#f)
      #f))

(define (db:tests-register-test dbstruct run-id test-name item-path)
  (sqlite3:execute (db:get-db dbstruct run-id) 'register-test run-id test-name item-path))
;;        (let ((sleep-time (random 20))
;; 	     (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
;; 	 (case err-status
;; 	   ((busy)(thread-sleep! 4))
;; 	   (else
;; 	    (debug:print 0 "WARNING: possible problem with call to cdb:remote-run, database may be read-only and locked, waiting and trying again ...")
;; 	    (thread-sleep! sleep-time)))

(define (db:test-get-logfile-info dbstruct run-id test-name)
  (let ((res #f))
    (sqlite3:for-each-row 
     (lambda (path final_logf)
       ;; (let ((path       (sdb:qry 'getstr path-id))
       ;;       (final_logf (sdb:qry 'getstr final_logf-id)))

Modified launch.scm from [59d0d6772c] to [dd204b865e].

142
143
144
145
146
147
148

149

150
151
152
153
154
155
156
	  (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  ;; open-run-close not needed for test-set-meta-info

	  (tests:set-full-meta-info test-id run-id 0 work-area)


	  ;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	  (thread-sleep! 0.3) ;; NFS slowness has caused grief here

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")







>
|
>







142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
	  (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  ;; open-run-close not needed for test-set-meta-info
	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)

	  ;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	  (thread-sleep! 0.3) ;; NFS slowness has caused grief here

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
300
301
302
303
304
305
306

307

308
309
310
311
312
313
314
315
316
317
318

319

320
321
322
323
324
325
326
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))

				   (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)

				   (let loop ((minutes   (calc-minutes)))
				     (begin
				       (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       ;; open-run-close not needed for test-set-meta-info

				       (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)

				       ;; (tests:set-partial-meta-info test-id run-id minutes work-area)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?







>
|
>











>

>







302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
				   ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
				   (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
				   (let loop ((minutes   (calc-minutes)))
				     (begin
				       (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       ;; open-run-close not needed for test-set-meta-info
				       ;; (tests:set-partial-meta-info #f test-id run-id minutes work-area)
				       (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)
				       ;; (tests:set-partial-meta-info #f test-id run-id minutes work-area 10)
				       ;; (tests:set-partial-meta-info test-id run-id minutes work-area)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?

Modified megatest.scm from [a6d2c3a647] to [f5cb1d64b3].

989
990
991
992
993
994
995
996



997
998
999
1000
1001
1002
1003
		    (exit 6))
		  (let* ((stepname   (args:get-arg "-runstep"))
			 (logprofile (args:get-arg "-logpro"))
			 (logfile    (conc stepname ".log"))
			 (cmd        (if (null? remargs) #f (car remargs)))
			 (params     (if cmd (cdr remargs) '()))
			 (exitstat   #f)
			 (shell      (last (string-split (get-environment-variable "SHELL") "/")))



			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")
				       (else ">&")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))







|
>
>
>







989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
		    (exit 6))
		  (let* ((stepname   (args:get-arg "-runstep"))
			 (logprofile (args:get-arg "-logpro"))
			 (logfile    (conc stepname ".log"))
			 (cmd        (if (null? remargs) #f (car remargs)))
			 (params     (if cmd (cdr remargs) '()))
			 (exitstat   #f)
			 (shell      (let ((sh (get-environment-variable "SHELL") ))
				       (if sh 
					   (last (string-split sh "/"))
					   "bash")))
			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")
				       (else ">&")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))

Modified tests.scm from [6b6e58b833] to [c93debf593].

634
635
636
637
638
639
640

641















642
643
644
645
646
647
648
649
650

651
652















653

654
655
656
657
658
659
660
  (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)
  (if minutes 
      (rmt:general-call 'update-run-duration run-id minutes test-id))
  (if (and uname hostname)
      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
  
;; This one is for running with no db access (i.e. via rmt: internally)

(define (tests:set-full-meta-info test-id run-id minutes work-area)















  (let* ((num-records 0)
	 (cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (uname    (get-uname "-srvpio"))
	 (hostname (get-host-name)))
    (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes)
    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
	  
(define (tests:set-partial-meta-info test-id run-id minutes work-area)

  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory))))















    (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes)))

	 
;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)







>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
>

|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>







634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
  (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id)
  (if minutes 
      (rmt:general-call 'update-run-duration run-id minutes test-id))
  (if (and uname hostname)
      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
  
;; This one is for running with no db access (i.e. via rmt: internally)
(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
;;  (let ((remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print "exn=" (condition->list exn))
	   (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain)))
     (let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb))
	    (cpuload  (get-cpu-load))
	    (diskfree (get-df (current-directory)))
	    (uname    (get-uname "-srvpio"))
	    (hostname (get-host-name)))
       (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname))))
    
;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print "exn=" (condition->list exn))
	   (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain)))
     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  )))
	 
;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)