Megatest

Check-in [598ddd3327]
Login
Overview
Comment:Starting point for server implemntation
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 598ddd3327c35d4a4ba783ff295d72b1a5b43f95
User & Date: matt on 2011-09-11 12:51:09
Other Links: manifest | tags
Context
2011-09-12
00:05
Rollup to test_data completed. Rebuild db reworked check-in: d406fee8c4 user: matt tags: trunk, v1.24
2011-09-11
12:51
Starting point for server implemntation check-in: 598ddd3327 user: matt tags: trunk
2011-09-10
23:03
Added lineitem data uploading and tests check-in: 1eb40d3a48 user: matt tags: trunk
Changes

Modified common.scm from [158dd112b2] to [219aacd413].

8
9
10
11
12
13
14


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

35
36
37
38
39
40
41
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use sqlite3 srfi-1 posix regex-case base64 format dot-locking csv-xml)
(require-extension sqlite3 regex posix)



(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

;; (require-library margs)
(include "margs.scm")

(define getenv get-environment-variable)

(define home (getenv "HOME"))
(define user (getenv "USER"))

;; global gletches
(define *configinfo* #f)
(define *configdat*  #f)
(define *toppath*    #f)
(define *already-seen-runconfig-info* #f)
(define *waiting-queue* (make-hash-table))
(define *globalexitstatus* 0) ;; attempt to work around possible thread issues
(define *passnum*     0) ;; when running track calls to run-tests or similar
(define *verbosity*   1)


(define-inline (get-with-default val default)
  (let ((val (args:get-arg val)))
    (if val val default)))

(define-inline (assoc/default key lst . default)
  (let ((res (assoc key lst)))







>
>




















>







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(use sqlite3 srfi-1 posix regex-case base64 format dot-locking csv-xml)
(require-extension sqlite3 regex posix)

(require-extension (srfi 18) extras tcp rpc)

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

;; (require-library margs)
(include "margs.scm")

(define getenv get-environment-variable)

(define home (getenv "HOME"))
(define user (getenv "USER"))

;; global gletches
(define *configinfo* #f)
(define *configdat*  #f)
(define *toppath*    #f)
(define *already-seen-runconfig-info* #f)
(define *waiting-queue* (make-hash-table))
(define *globalexitstatus* 0) ;; attempt to work around possible thread issues
(define *passnum*     0) ;; when running track calls to run-tests or similar
(define *verbosity*   1)
(define *rpc:listener* #f) ;; if set up for server communication this will hold the tcp port

(define-inline (get-with-default val default)
  (let ((val (args:get-arg val)))
    (if val val default)))

(define-inline (assoc/default key lst . default)
  (let ((res (assoc key lst)))

Modified megatest.scm from [714eb0a317] to [53a5275b02].

445
446
447
448
449
450
451
452





453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
							   (inexact->exact 
							    (round 
							     (- 
							      (current-seconds) 
							      start-seconds)))))
					  (kill-tries 0))
				     (let loop ((minutes   (calc-minutes)))
				       (let ((db    (open-db)))





					 (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
					 (test-update-meta-info db run-id test-name itemdat minutes)
					 (if kill-job? 
					     (begin
					       (mutex-lock! m)
					       (let* ((pid (vector-ref exit-info 0)))
						 (if (number? pid)
						     (begin
						       (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
						       ;;(cond
						       ;;((>   kill-tries 0) ; 2)
						       (let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
							 (for-each 
							  (lambda (p)
							    (let* ((parts  (string-split p))
								   (p-id   (if (> (length parts) 0)
									       (string->number (car parts))
									       #f)))
							      (if p-id
								  (begin
								    (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								    (system (conc "kill -9 " p-id))))))
							  (car processes))
							 (system (conc "kill -9 " pid))))
						     ;;(let* ((ppid (process-group-id pid))
						     ;;       (kcmd (conc "pkill -9 -g " ppid)))
						     ;;  ;; (process-signal pid signal/term)
						     ;;  ;; (process-signal pid signal/kill)
						     ;;  (debug:print 0 "Attempting to kill pid " pid " and children in process group " ppid " with command:\n    " kcmd)
						     ;;  (debug:print 0 "Children:")
						     ;;  (system (conc "pgrep -g -l " ppid))
						     ;;  (system kcmd)
						     ;;  (sleep 1) ;; give it a rest
						     ;;  (test-set-status! db run-id test-name "KILLED"  "FAIL"
						     ;;       	     itemdat (args:get-arg "-m"))
						     ;;  (sqlite3:finalize! db)
						     ;;  (exit 1)))))
						     (begin
						       (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						       (test-set-status! db run-id test-name "KILLED"  "FAIL"
									 itemdat (args:get-arg "-m") #f)
						       (sqlite3:finalize! db)
						       (exit 1))))
					       ;;     (thread-terminate! job-thread)))
					       (set! kill-tries (+ 1 kill-tries))
					       (mutex-unlock! m)))
					 ;; (handle-exceptions
					       ;;  exn
					       ;;  (begin
					       ;;    (debug:print 0 "ERROR: Problem killing process " (vector-ref exit-info 0))
					       ;;    (abort exn))
					       ;;  (let* ((pid   (vector-ref exit-info 0))
					       ;;         ;; (pgid  (process-group-id pid))
					       ;;         ;; (cmd  (conc "pkill -9 -P " pgid))
					       ;;         )
					       ;;    ;; (debug:print 0 "Running \"" cmd "\"")
					       ;;    ;; (system cmd)
					       ;;    (debug:print 0 "Running \"kill -9 " pid "\"")
					       ;;    (system (conc "kill -9 " pid))
					       ;;    ;; (process-signal (vector-ref exit-info 0) signal/kill)
					       ;;    ))))
					 (sqlite3:finalize! db)
					 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses
					 (loop (calc-minutes)))))))
		   (th1          (make-thread monitorjob))
		   (th2          (make-thread runit)))
	      (set! job-thread th2)
	      (thread-start! th1)







|
>
>
>
>
>

|







<
<













<
<
<
<
<
<
<
<
<
<
<
<
<






<


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466


467
468
469
470
471
472
473
474
475
476
477
478
479













480
481
482
483
484
485

486
487















488
489
490
491
492
493
494
							   (inexact->exact 
							    (round 
							     (- 
							      (current-seconds) 
							      start-seconds)))))
					  (kill-tries 0))
				     (let loop ((minutes   (calc-minutes)))
				       (let* ((db       (open-db))
					      (cpuload  (get-cpu-load))
					      (diskfree (get-df (current-directory)))
					      (tmpfree  (get-df "/tmp")))
					 (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
					 (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
					 (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
					 (test-update-meta-info db run-id test-name itemdat minutes cpuload diskfree tmpfree)
					 (if kill-job? 
					     (begin
					       (mutex-lock! m)
					       (let* ((pid (vector-ref exit-info 0)))
						 (if (number? pid)
						     (begin
						       (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")


						       (let ((processes (cmd-run->list (conc "pgrep -l -P " pid))))
							 (for-each 
							  (lambda (p)
							    (let* ((parts  (string-split p))
								   (p-id   (if (> (length parts) 0)
									       (string->number (car parts))
									       #f)))
							      (if p-id
								  (begin
								    (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								    (system (conc "kill -9 " p-id))))))
							  (car processes))
							 (system (conc "kill -9 " pid))))













						     (begin
						       (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						       (test-set-status! db run-id test-name "KILLED"  "FAIL"
									 itemdat (args:get-arg "-m") #f)
						       (sqlite3:finalize! db)
						       (exit 1))))

					       (set! kill-tries (+ 1 kill-tries))
					       (mutex-unlock! m)))















					 (sqlite3:finalize! db)
					 (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses
					 (loop (calc-minutes)))))))
		   (th1          (make-thread monitorjob))
		   (th2          (make-thread runit)))
	      (set! job-thread th2)
	      (thread-start! th1)

Modified runs.scm from [e4e9795012] to [77923710f4].

414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
		  diskfree
		  uname
		  runpath
		  run-id
		  testname
		  item-path)))

(define (test-update-meta-info db run-id testname itemdat minutes)
  (let ((item-path (item-list->path itemdat))
	(cpuload  (get-cpu-load))
	(diskfree (get-df (current-directory))))
    (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
    (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
    (if (not item-path)(begin (debug:print 0 "WARNING: ITEMPATH not set.")   (set! item-path "")))
    ;; (let ((testinfo (db:get-test-info db run-id testname item-path)))
    ;;   (if (and (not (equal? (db:test-get-status testinfo) "COMPLETED"))
    ;;            (not (equal? (db:test-get-status testinfo) "KILLREQ"))
    (sqlite3:execute
     db
     "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE run_id=? AND testname=? AND item_path=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"







|
|
<
<
<
<







414
415
416
417
418
419
420
421
422




423
424
425
426
427
428
429
		  diskfree
		  uname
		  runpath
		  run-id
		  testname
		  item-path)))

(define (test-update-meta-info db run-id testname itemdat minutes cpuload diskfree tmpfree)
  (let ((item-path (item-list->path itemdat)))




    (if (not item-path)(begin (debug:print 0 "WARNING: ITEMPATH not set.")   (set! item-path "")))
    ;; (let ((testinfo (db:get-test-info db run-id testname item-path)))
    ;;   (if (and (not (equal? (db:test-get-status testinfo) "COMPLETED"))
    ;;            (not (equal? (db:test-get-status testinfo) "KILLREQ"))
    (sqlite3:execute
     db
     "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE run_id=? AND testname=? AND item_path=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"

Added server.scm version [f6c984417d].





















































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58

;; Copyright 2006-2011, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;; procstr is the name of the procedure to be called as a string
(define (server:autoremote procstr params)
  (handle-exceptions
   exn
   (begin
     (debug:print 1 "Remote failed for " proc " " params)
     (apply (eval (string->symbol proc)) params))
   (if *runremote*
       (apply (eval (string->symbol (conc "remote:" procstr))) params)
       (eval (string->symbol procstr) params))))

(define (server:start db)
  (debug:print 0 "Attempting to start the server ...")
  (let* ((rpc:listener (server:find-free-port-and-open (rpc:default-server-port)))
	 (th1          (make-thread
			(cute (rpc:make-server rpc:listener) "rpc:server")
			'rpc:server)))
    (db:set-var db "SERVER" (conc (get-host-name) ":" (rpc:default-server-port)))
    (rpc:publish-procedure! 
     'remote:run 
     (lambda (procstr . params)
       (server:autoremote procstr params)))
    (set! *rpc:listener* rpc:listener*)
    (thread-start! rpc:server)))

(define (server:find-free-port-and-open port)
  (handle-exceptions
   exn
   (begin
     (print "Failed to bind to port " (rpc:default-server-port) ", trying next port")
     (server:find-free-port-and-open (+ port 1)))
   (rpc:default-server-port port)
   (tcp-listen (rpc:default-server-port))))

(define (server:client-setup db)
  (let* ((hostinfo (db:get-var db "SERVER"))
	 (hostdat  (if hostinfo (string-split hostinfo ":")))
	 (host     (if hostinfo (car hostdat)))
	 (port     (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f)))
    
    (rpc:publish-procedure!
     'query
     host
     (lambda (sql callback)
       (print "Executing query '" sql "' ...")
       (sqlite3:for-each-row
	callback
	db sql))))