Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -10,10 +10,12 @@ ;;====================================================================== (use sqlite3 srfi-1 posix regex-case base64 format dot-locking csv-xml) (require-extension sqlite3 regex posix) +(require-extension (srfi 18) extras tcp rpc) + (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) ;; (require-library margs) (include "margs.scm") @@ -30,10 +32,11 @@ (define *already-seen-runconfig-info* #f) (define *waiting-queue* (make-hash-table)) (define *globalexitstatus* 0) ;; attempt to work around possible thread issues (define *passnum* 0) ;; when running track calls to run-tests or similar (define *verbosity* 1) +(define *rpc:listener* #f) ;; if set up for server communication this will hold the tcp port (define-inline (get-with-default val default) (let ((val (args:get-arg val))) (if val val default))) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -447,22 +447,25 @@ (- (current-seconds) start-seconds))))) (kill-tries 0)) (let loop ((minutes (calc-minutes))) - (let ((db (open-db))) + (let* ((db (open-db)) + (cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (tmpfree (get-df "/tmp"))) + (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) + (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) - (test-update-meta-info db run-id test-name itemdat minutes) + (test-update-meta-info db run-id test-name itemdat minutes cpuload diskfree tmpfree) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) (if (number? pid) (begin (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")") - ;;(cond - ;;((> kill-tries 0) ; 2) (let ((processes (cmd-run->list (conc "pgrep -l -P " pid)))) (for-each (lambda (p) (let* ((parts (string-split p)) (p-id (if (> (length parts) 0) @@ -472,47 +475,18 @@ (begin (debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id) (system (conc "kill -9 " p-id)))))) (car processes)) (system (conc "kill -9 " pid)))) - ;;(let* ((ppid (process-group-id pid)) - ;; (kcmd (conc "pkill -9 -g " ppid))) - ;; ;; (process-signal pid signal/term) - ;; ;; (process-signal pid signal/kill) - ;; (debug:print 0 "Attempting to kill pid " pid " and children in process group " ppid " with command:\n " kcmd) - ;; (debug:print 0 "Children:") - ;; (system (conc "pgrep -g -l " ppid)) - ;; (system kcmd) - ;; (sleep 1) ;; give it a rest - ;; (test-set-status! db run-id test-name "KILLED" "FAIL" - ;; itemdat (args:get-arg "-m")) - ;; (sqlite3:finalize! db) - ;; (exit 1))))) (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") (test-set-status! db run-id test-name "KILLED" "FAIL" itemdat (args:get-arg "-m") #f) (sqlite3:finalize! db) (exit 1)))) - ;; (thread-terminate! job-thread))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) - ;; (handle-exceptions - ;; exn - ;; (begin - ;; (debug:print 0 "ERROR: Problem killing process " (vector-ref exit-info 0)) - ;; (abort exn)) - ;; (let* ((pid (vector-ref exit-info 0)) - ;; ;; (pgid (process-group-id pid)) - ;; ;; (cmd (conc "pkill -9 -P " pgid)) - ;; ) - ;; ;; (debug:print 0 "Running \"" cmd "\"") - ;; ;; (system cmd) - ;; (debug:print 0 "Running \"kill -9 " pid "\"") - ;; (system (conc "kill -9 " pid)) - ;; ;; (process-signal (vector-ref exit-info 0) signal/kill) - ;; )))) (sqlite3:finalize! db) (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses (loop (calc-minutes))))))) (th1 (make-thread monitorjob)) (th2 (make-thread runit))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -416,16 +416,12 @@ runpath run-id testname item-path))) -(define (test-update-meta-info db run-id testname itemdat minutes) - (let ((item-path (item-list->path itemdat)) - (cpuload (get-cpu-load)) - (diskfree (get-df (current-directory)))) - (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) - (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) +(define (test-update-meta-info db run-id testname itemdat minutes cpuload diskfree tmpfree) + (let ((item-path (item-list->path itemdat))) (if (not item-path)(begin (debug:print 0 "WARNING: ITEMPATH not set.") (set! item-path ""))) ;; (let ((testinfo (db:get-test-info db run-id testname item-path))) ;; (if (and (not (equal? (db:test-get-status testinfo) "COMPLETED")) ;; (not (equal? (db:test-get-status testinfo) "KILLREQ")) (sqlite3:execute ADDED server.scm Index: server.scm ================================================================== --- /dev/null +++ server.scm @@ -0,0 +1,58 @@ + +;; Copyright 2006-2011, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;; procstr is the name of the procedure to be called as a string +(define (server:autoremote procstr params) + (handle-exceptions + exn + (begin + (debug:print 1 "Remote failed for " proc " " params) + (apply (eval (string->symbol proc)) params)) + (if *runremote* + (apply (eval (string->symbol (conc "remote:" procstr))) params) + (eval (string->symbol procstr) params)))) + +(define (server:start db) + (debug:print 0 "Attempting to start the server ...") + (let* ((rpc:listener (server:find-free-port-and-open (rpc:default-server-port))) + (th1 (make-thread + (cute (rpc:make-server rpc:listener) "rpc:server") + 'rpc:server))) + (db:set-var db "SERVER" (conc (get-host-name) ":" (rpc:default-server-port))) + (rpc:publish-procedure! + 'remote:run + (lambda (procstr . params) + (server:autoremote procstr params))) + (set! *rpc:listener* rpc:listener*) + (thread-start! rpc:server))) + +(define (server:find-free-port-and-open port) + (handle-exceptions + exn + (begin + (print "Failed to bind to port " (rpc:default-server-port) ", trying next port") + (server:find-free-port-and-open (+ port 1))) + (rpc:default-server-port port) + (tcp-listen (rpc:default-server-port)))) + +(define (server:client-setup db) + (let* ((hostinfo (db:get-var db "SERVER")) + (hostdat (if hostinfo (string-split hostinfo ":"))) + (host (if hostinfo (car hostdat))) + (port (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f))) + + (rpc:publish-procedure! + 'query + host + (lambda (sql callback) + (print "Executing query '" sql "' ...") + (sqlite3:for-each-row + callback + db sql))))