Megatest

Check-in [8013462115]
Login
Overview
Comment:Minor refactor for performance in test control panel
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: 80134621156457b86ab876db959333a32366fa12
User & Date: mrwellan on 2013-10-31 17:30:30
Other Links: branch diff | manifest | tags
Context
2013-11-01
07:00
Moved call to set-sync to runs.scm check-in: 58bf63874d user: matt tags: v1.55
2013-10-31
17:30
Minor refactor for performance in test control panel check-in: 8013462115 user: mrwellan tags: v1.55
2013-10-30
07:41
Added placeholder for script runner mtrunscript check-in: 9890845462 user: matt tags: v1.55
Changes

Modified dashboard-tests.scm from [7817a2c78f] to [e5436ec9f2].

367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
















383
384
385
386
387
388
389
390


391
392



393
394
395
396
397
398
399
	       (refreshdat (lambda ()
			     (let* ((curr-mod-time (max (file-modification-time db-path)
							(if (file-exists? testdat-path)
							    (file-modification-time testdat-path)
							    (begin
							      (set! testdat-path (conc rundir "/testdat.db"))
							      0))))
				    (need-update   (or (and (>= curr-mod-time db-mod-time)
							    (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched
						       (> (current-milliseconds)(+ last-update 10000))     ;; force update even 10 seconds
						       request-update))
				    (newtestdat (if need-update 
						    (handle-exceptions
						     exn 
						     (debug:print-info 2 "test db access issue: " ((condition-property-accessor 'exn 'message) exn))
						     (open-run-close db:get-test-info-by-id db test-id )))))
















			       (cond
				((and need-update newtestdat)
				 (set! testdat newtestdat)
				 (set! teststeps    (db:get-compressed-steps test-id work-area: rundir))
				 (set! logfile      (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat)))
				 (set! rundir       (db:test-get-rundir testdat))
				 (set! testfullname (db:test-get-fullname testdat))
				 ;; (debug:print 0 "INFO: teststeps=" (intersperse teststeps "\n    "))


				 (if (eq? curr-mod-time db-mod-time) ;; do only once if same
				     (set! db-mod-time (+ curr-mod-time 1))



				     (set! db-mod-time curr-mod-time))
				 (set! last-update (current-milliseconds))
				 (set! request-update #f) ;; met the need ...
				 )
				(need-update ;; if this was true and yet there is no data ....
				 (db:test-set-testname! testdat "DEAD OR DELETED TEST")))
			       (if need-update







|







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








>
>
|
|
>
>
>







367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
	       (refreshdat (lambda ()
			     (let* ((curr-mod-time (max (file-modification-time db-path)
							(if (file-exists? testdat-path)
							    (file-modification-time testdat-path)
							    (begin
							      (set! testdat-path (conc rundir "/testdat.db"))
							      0))))
				    (need-update   (or (and (> curr-mod-time db-mod-time)
							    (> (current-milliseconds)(+ last-update 250))) ;; every half seconds if db touched
						       (> (current-milliseconds)(+ last-update 10000))     ;; force update even 10 seconds
						       request-update))
				    (newtestdat (if need-update 
						    (handle-exceptions
						     exn 
						     (debug:print-info 2 "test db access issue: " ((condition-property-accessor 'exn 'message) exn))
						     (let* ((newdat (open-run-close db:get-test-info-by-id db test-id ))
							    (tstdat (if newdat
									(open-run-close tests:testdat-get-testinfo db test-id #f)
									'())))
						       (if (and newdat 
								(not (null? tstdat))) ;; (update-time cpuload diskfree run-duration)
							   (let* ((rec      (car tstdat))
								  (cpuload  (vector-ref rec 1))
								  (diskfree (vector-ref rec 2))
								  (run-dur  (vector-ref rec 3)))
							     (db:test-set-run_duration! newdat run-dur)
							     (db:test-set-diskfree!     newdat diskfree)
							     (db:test-set-cpuload!      newdat cpuload)))
						       ;; (debug:print 0 "newdat=" newdat)
						       newdat))
						    #f)))
			       ;; (debug:print 0 "newtestdat=" newtestdat)
			       (cond
				((and need-update newtestdat)
				 (set! testdat newtestdat)
				 (set! teststeps    (db:get-compressed-steps test-id work-area: rundir))
				 (set! logfile      (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat)))
				 (set! rundir       (db:test-get-rundir testdat))
				 (set! testfullname (db:test-get-fullname testdat))
				 ;; (debug:print 0 "INFO: teststeps=" (intersperse teststeps "\n    "))
				 
				 ;; I don't see why this was implemented this way. Please comment it ...
				 ;; (if (eq? curr-mod-time db-mod-time) ;; do only once if same
				 ;;     (set! db-mod-time (+ curr-mod-time 1))
				 ;;     (set! db-mod-time curr-mod-time))

				 (if (not (eq? curr-mod-time db-mod-time))
				     (set! db-mod-time curr-mod-time))
				 (set! last-update (current-milliseconds))
				 (set! request-update #f) ;; met the need ...
				 )
				(need-update ;; if this was true and yet there is no data ....
				 (db:test-set-testname! testdat "DEAD OR DELETED TEST")))
			       (if need-update

Modified launch.scm from [ae5ddfc81a] to [8df7772acd].

345
346
347
348
349
350
351
352

353
354
355
356
357
358
359
360

361
362
363
364
365
366
367
368
369
370
371

372
373
374
375
376
377
378
						   ;;      (car processes))
						   ;;     (system (conc "kill -9 -" pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (tests:test-set-status! test-id "KILLED"  "FAIL"
								     (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1))))

					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (if keep-going
					   (begin
					     (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
					     (if keep-going
						 (loop (calc-minutes)))))))))) ;; NOTE: Checking twice for keep-going is intentional

		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job")))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (set! keep-going #f)
	    (thread-join! th1)
	    ;; (thread-sleep! 1)
	    ;; (thread-terminate! th1) ;; Not sure if this is a good idea
	    (thread-sleep! 1)       ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.

	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path)))
	      ;; Am I completed?
	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
				                                        ;; "COMPLETED"







|
>







|
>











>







345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
						   ;;      (car processes))
						   ;;     (system (conc "kill -9 -" pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (tests:test-set-status! test-id "KILLED"  "FAIL"
								     (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1) ;; IS THIS NECESSARY OR WISE???
						     )))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (if keep-going
					   (begin
					     (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
					     (if keep-going
						 (loop (calc-minutes)))))))
				   (tests:update-central-meta-info test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job")))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (set! keep-going #f)
	    (thread-join! th1)
	    ;; (thread-sleep! 1)
	    ;; (thread-terminate! th1) ;; Not sure if this is a good idea
	    (thread-sleep! 1)       ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
	    ;; (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f)
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (cdb:get-test-info-by-id *runremote* test-id))) ;; )) ;; run-id test-name item-path)))
	      ;; Am I completed?
	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
				                                        ;; "COMPLETED"

Modified tests.scm from [ed985ac2fe] to [b56060ee86].

718
719
720
721
722
723
724

725

726
727
728


729
730
731

732












733
734
735
736
737
738
739
	  
(define (tests:set-partial-meta-info db test-id run-id minutes work-area)
  ;; DOES cdb:remote-run under the hood!
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory))))
    (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
    ;; Update central with uname and hostname = #f

    (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f)))

	 
(define (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  (let ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area)))


    (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);"
		     cpuload diskfree minutes)
    (sqlite3:finalize! tdb)))

 












;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)








>
|
>



>
>
|
|
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>







718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
	  
(define (tests:set-partial-meta-info db test-id run-id minutes work-area)
  ;; DOES cdb:remote-run under the hood!
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory))))
    (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
    ;; Update central with uname and hostname = #f
    ;; Is this one of the performance problems? This info should come from testdat-meta anyway
    ;; (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f)
  ))
	 
(define (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  (let ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area)))
    (if tdb
	(begin
	  (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);"
			   cpuload diskfree minutes)
	  (sqlite3:finalize! tdb))
	(debug:print 2 "Can't update testdat.db for test " test-id " read-only or non-existant"))))
    
(define (tests:testdat-get-testinfo db test-id work-area)
   (let ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area))
	 (res         '()))
     (if tdb
	 (sqlite3:for-each-row
	  (lambda (update-time cpuload diskfree run-duration)
	    (set! res (cons (vector update-time cpuload diskfree run-duration) res)))
	  tdb
	  "SELECT update_time,cpuload,diskfree,run_duration FROM test_rundat ORDER BY update_time ASC;")
	 (sqlite3:finalize! tdb))
     res))

;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)

Modified utils/installall.sh from [8cb233ef3b] to [067c450820].

56
57
58
59
60
61
62
63

64
65
66
67
68
69
70
71
72
if [[ $KTYPE == "" ]]; then
  echo 'Using KTYPE=26'
  export KTYPE=26
else
  echo Using KTYPE=$KTYPE
fi

export CHICKEN_VERSION=4.8.0

if ! [[ -e chicken-${CHICKEN_VERSION}.tar.gz ]]; then 
    wget http://code.call-cc.org/releases/${CHICKEN_VERSION}/chicken-${CHICKEN_VERSION}.tar.gz
fi 

BUILDHOME=$PWD
DEPLOYTARG=$BUILDHOME/deploy

if [[ $PREFIX == "" ]]; then
   PREFIX=$PWD/inst







|
>

|







56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
if [[ $KTYPE == "" ]]; then
  echo 'Using KTYPE=26'
  export KTYPE=26
else
  echo Using KTYPE=$KTYPE
fi

export CHICKEN_VERSION=4.8.0.5
export CHICKEN_BASEVER=4.8.0
if ! [[ -e chicken-${CHICKEN_VERSION}.tar.gz ]]; then 
    wget http://code.call-cc.org/releases/${CHICKEN_BASEVER}/chicken-${CHICKEN_VERSION}.tar.gz
fi 

BUILDHOME=$PWD
DEPLOYTARG=$BUILDHOME/deploy

if [[ $PREFIX == "" ]]; then
   PREFIX=$PWD/inst
122
123
124
125
126
127
128


129
130
131
132
133
134
135
    if [[ -e sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then
	tar xfz sqlite-autoconf-$SQLITE3_VERSION.tar.gz 
	(cd sqlite-autoconf-$SQLITE3_VERSION;./configure --prefix=$PREFIX;make;make install)
	# CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL -prefix $DEPLOYTARG -deploy $PROX sqlite3
	CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL $PROX sqlite3
    fi
fi



# $CHICKEN_INSTALL $PROX sqlite3

if [[ `uname -a | grep x86_64` == "" ]]; then 
    export ARCHSIZE=''
else
    export ARCHSIZE=64_







>
>







123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
    if [[ -e sqlite-autoconf-$SQLITE3_VERSION.tar.gz ]]; then
	tar xfz sqlite-autoconf-$SQLITE3_VERSION.tar.gz 
	(cd sqlite-autoconf-$SQLITE3_VERSION;./configure --prefix=$PREFIX;make;make install)
	# CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL -prefix $DEPLOYTARG -deploy $PROX sqlite3
	CSC_OPTIONS="-I$PREFIX/include -L$PREFIX/lib" $CHICKEN_INSTALL $PROX sqlite3
    fi
fi

exit

# $CHICKEN_INSTALL $PROX sqlite3

if [[ `uname -a | grep x86_64` == "" ]]; then 
    export ARCHSIZE=''
else
    export ARCHSIZE=64_