ADDED ezsteps.scm Index: ezsteps.scm ================================================================== --- /dev/null +++ ezsteps.scm @@ -0,0 +1,26 @@ + +;; Copyright 2006-2011, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + +(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit ezsteps)) +(declare (uses db)) +(declare (uses common)) +(declare (uses items)) +(declare (uses runconfig)) + +(include "common_records.scm") +; (include "key_records.scm") +; (include "db_records.scm") +; (include "run_records.scm") + Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -23,10 +23,170 @@ (declare (uses db)) (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") + +(define (launch:execute encoded-cmd) + (let* ((cmdinfo (read (open-input-string (base64:base64-decode encoded-cmd))))) + (setenv "MT_CMDINFO" encoded-cmd) + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (db-host (assoc/default 'db-host cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (fullrunscript (conc testpath "/" runscript)) + (db #f)) + (debug:print 2 "Exectuing " test-name " on " (get-host-name)) + (change-directory testpath) + (setenv "MT_TEST_RUN_DIR" work-area) + (setenv "MT_TEST_NAME" test-name) + (setenv "MT_ITEM_INFO" (conc itemdat)) + (setenv "MT_RUNNAME" runname) + (setenv "MT_MEGATEST" megatest) + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) + + (if (not (setup-for-run)) + (begin + (debug:print 0 "Failed to setup, exiting") + (exit 1))) + ;; now can find our db + (set! db (open-db)) + (change-directory work-area) + (set-run-config-vars db run-id) + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + (set-megatest-env-vars db run-id) + (set-item-env-vars itemdat) + (save-environment-as-files "megatest") + (test-set-meta-info db run-id test-name itemdat) + (test-set-status! db run-id test-name "REMOTEHOSTSTART" "n/a" itemdat (args:get-arg "-m") #f) + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (not (file-execute-access? fullrunscript)) + (system (conc "chmod ug+x " fullrunscript)))) + ;; We are about to actually kick off the test + ;; so this is a good place to remove the records for + ;; any previous runs + ;; (db:test-remove-steps db run-id testname itemdat) + + ;; from here on out we will open and close the db + ;; on every access to reduce the probablitiy of + ;; contention or stuck access on nfs. + (sqlite3:finalize! db) + + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (make-vector 3)) + (job-thread #f) + (runit (lambda () + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + (let ((pid (process-run fullrunscript))) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + )))))) + (monitorjob (lambda () + (let* ((start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + (let loop ((minutes (calc-minutes))) + (let* ((db (open-db)) + (cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (tmpfree (get-df "/tmp"))) + (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) + (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) + (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) + (test-update-meta-info db run-id test-name itemdat minutes cpuload diskfree tmpfree) + (if kill-job? + (begin + (mutex-lock! m) + (let* ((pid (vector-ref exit-info 0))) + (if (number? pid) + (begin + (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")") + (let ((processes (cmd-run->list (conc "pgrep -l -P " pid)))) + (for-each + (lambda (p) + (let* ((parts (string-split p)) + (p-id (if (> (length parts) 0) + (string->number (car parts)) + #f))) + (if p-id + (begin + (debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id) + (system (conc "kill -9 " p-id)))))) + (car processes)) + (system (conc "kill -9 " pid)))) + (begin + (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") + (test-set-status! db run-id test-name "KILLED" "FAIL" + itemdat (args:get-arg "-m") #f) + (sqlite3:finalize! db) + (exit 1)))) + (set! kill-tries (+ 1 kill-tries)) + (mutex-unlock! m))) + (sqlite3:finalize! db) + (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses + (loop (calc-minutes))))))) + (th1 (make-thread monitorjob)) + (th2 (make-thread runit))) + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (mutex-lock! m) + (set! db (open-db)) + (let* ((item-path (item-list->path itemdat)) + (testinfo (db:get-test-info db run-id test-name item-path))) + (if (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (begin + (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result") + (test-set-status! db run-id test-name + (if kill-job? "KILLED" "COMPLETED") + (if (vector-ref exit-info 1) ;; look at the exit-status + (if (and (not kill-job?) + (eq? (vector-ref exit-info 2) 0)) + "PASS" + "FAIL") + "FAIL") itemdat (args:get-arg "-m") #f))) + ;; for automated creation of the rollup html file this is a good place... + (if (not (equal? item-path "")) + (tests:summarize-items db run-id test-name #f)) ;; don't force - just update if no + ) + (mutex-unlock! m) + ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) + ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) + (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " + work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") + (sqlite3:finalize! db) + (if (not (vector-ref exit-info 1)) + (exit 4))))))) (define (setup-for-run) (set! *configinfo* (find-and-read-config (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config"))) (set! *configdat* (if (car *configinfo*)(car *configinfo*) #f)) (set! *toppath* (if (car *configinfo*)(cadr *configinfo*) #f)) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -369,168 +369,12 @@ ;; - passes info to steps via MT_CMDINFO env var (future is to use a dot file) ;; - gathers host info and ;;====================================================================== (if (args:get-arg "-execute") - (let* ((cmdinfo (read (open-input-string (base64:base64-decode (args:get-arg "-execute")))))) - (setenv "MT_CMDINFO" (args:get-arg "-execute")) - (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) - (let* ((testpath (assoc/default 'testpath cmdinfo)) - (work-area (assoc/default 'work-area cmdinfo)) - (test-name (assoc/default 'test-name cmdinfo)) - (runscript (assoc/default 'runscript cmdinfo)) - (db-host (assoc/default 'db-host cmdinfo)) - (run-id (assoc/default 'run-id cmdinfo)) - (itemdat (assoc/default 'itemdat cmdinfo)) - (env-ovrd (assoc/default 'env-ovrd cmdinfo)) - (runname (assoc/default 'runname cmdinfo)) - (megatest (assoc/default 'megatest cmdinfo)) - (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) - (fullrunscript (conc testpath "/" runscript)) - (db #f)) - (debug:print 2 "Exectuing " test-name " on " (get-host-name)) - (change-directory testpath) - (setenv "MT_TEST_RUN_DIR" work-area) - (setenv "MT_TEST_NAME" test-name) - (setenv "MT_ITEM_INFO" (conc itemdat)) - (setenv "MT_RUNNAME" runname) - (setenv "MT_MEGATEST" megatest) - (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) - - (if (not (setup-for-run)) - (begin - (debug:print 0 "Failed to setup, exiting") - (exit 1))) - ;; now can find our db - (set! db (open-db)) - (change-directory work-area) - (set-run-config-vars db run-id) - ;; environment overrides are done *before* the remaining critical envars. - (alist->env-vars env-ovrd) - (set-megatest-env-vars db run-id) - (set-item-env-vars itemdat) - (save-environment-as-files "megatest") - (test-set-meta-info db run-id test-name itemdat) - (test-set-status! db run-id test-name "REMOTEHOSTSTART" "n/a" itemdat (args:get-arg "-m") #f) - (if (args:get-arg "-xterm") - (set! fullrunscript "xterm") - (if (not (file-execute-access? fullrunscript)) - (system (conc "chmod ug+x " fullrunscript)))) - ;; We are about to actually kick off the test - ;; so this is a good place to remove the records for - ;; any previous runs - ;; (db:test-remove-steps db run-id testname itemdat) - - ;; from here on out we will open and close the db - ;; on every access to reduce the probablitiy of - ;; contention or stuck access on nfs. - (sqlite3:finalize! db) - - (let* ((m (make-mutex)) - (kill-job? #f) - (exit-info (make-vector 3)) - (job-thread #f) - (runit (lambda () - ;; (let-values - ;; (((pid exit-status exit-code) - ;; (run-n-wait fullrunscript))) - (let ((pid (process-run fullrunscript))) - (let loop ((i 0)) - (let-values - (((pid-val exit-status exit-code) (process-wait pid #t))) - (mutex-lock! m) - (vector-set! exit-info 0 pid) - (vector-set! exit-info 1 exit-status) - (vector-set! exit-info 2 exit-code) - (mutex-unlock! m) - (if (eq? pid-val 0) - (begin - (thread-sleep! 2) - (loop (+ i 1))) - )))))) - (monitorjob (lambda () - (let* ((start-seconds (current-seconds)) - (calc-minutes (lambda () - (inexact->exact - (round - (- - (current-seconds) - start-seconds))))) - (kill-tries 0)) - (let loop ((minutes (calc-minutes))) - (let* ((db (open-db)) - (cpuload (get-cpu-load)) - (diskfree (get-df (current-directory))) - (tmpfree (get-df "/tmp"))) - (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) - (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) - (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) - (test-update-meta-info db run-id test-name itemdat minutes cpuload diskfree tmpfree) - (if kill-job? - (begin - (mutex-lock! m) - (let* ((pid (vector-ref exit-info 0))) - (if (number? pid) - (begin - (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")") - (let ((processes (cmd-run->list (conc "pgrep -l -P " pid)))) - (for-each - (lambda (p) - (let* ((parts (string-split p)) - (p-id (if (> (length parts) 0) - (string->number (car parts)) - #f))) - (if p-id - (begin - (debug:print 0 "Killing " (cadr parts) "; kill -9 " p-id) - (system (conc "kill -9 " p-id)))))) - (car processes)) - (system (conc "kill -9 " pid)))) - (begin - (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") - (test-set-status! db run-id test-name "KILLED" "FAIL" - itemdat (args:get-arg "-m") #f) - (sqlite3:finalize! db) - (exit 1)))) - (set! kill-tries (+ 1 kill-tries)) - (mutex-unlock! m))) - (sqlite3:finalize! db) - (thread-sleep! (+ 8 (random 4))) ;; add some jitter to the call home time to spread out the db accesses - (loop (calc-minutes))))))) - (th1 (make-thread monitorjob)) - (th2 (make-thread runit))) - (set! job-thread th2) - (thread-start! th1) - (thread-start! th2) - (thread-join! th2) - (mutex-lock! m) - (set! db (open-db)) - (let* ((item-path (item-list->path itemdat)) - (testinfo (db:get-test-info db run-id test-name item-path))) - (if (not (equal? (db:test-get-state testinfo) "COMPLETED")) - (begin - (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result") - (test-set-status! db run-id test-name - (if kill-job? "KILLED" "COMPLETED") - (if (vector-ref exit-info 1) ;; look at the exit-status - (if (and (not kill-job?) - (eq? (vector-ref exit-info 2) 0)) - "PASS" - "FAIL") - "FAIL") itemdat (args:get-arg "-m") #f))) - ;; for automated creation of the rollup html file this is a good place... - (if (not (equal? item-path "")) - (tests:summarize-items db run-id test-name #f)) ;; don't force - just update if no - ) - (mutex-unlock! m) - ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) - ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) - (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " - work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") - (sqlite3:finalize! db) - (if (not (vector-ref exit-info 1)) - (exit 4))))) + (begin + (launch:execute (args:get-arg "-execute")) (set! *didsomething* #t))) (if (args:get-arg "-step") (if (not (getenv "MT_CMDINFO")) (begin