Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -513,11 +513,11 @@ (set! bestsize freespc))))) (map car disks))) (if (and best (> bestsize 0)) best (begin - (if (common:low-noise-print 20 "disks" disk-num) + (if (common:low-noise-print 20 "no valid disks") (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!")) (exit 1))))) ;; Desired directory structure: ;; Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -194,10 +194,11 @@ (debug:print 1 "WARNING: number of jobs " num-running-in-jobgroup " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit)) #t) (else #f)))) (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit))))) + ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; @@ -210,12 +211,25 @@ (test-records (make-hash-table)) ;; need to process runconfigs before generating these lists (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) - (required-tests #f)) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work + (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work + (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) + (tasks-db (tasks:open-db))) + (set-signal-handler! signal/int + (lambda (signum) + (let ((tdb (tasks:open-db))) + (tasks:set-state-given-param-key tdb task-key "killed") + (sqlite3:finalize! tdb)) + (print "Killed by sigint. Exiting") + (exit))) + + ;; register this run in monitor.db + (tasks:add tasks-db "run-tests" user target runname test-patts task-key) ;; params) + (tasks:set-state-given-param-key tasks-db task-key "running") (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target) (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) @@ -354,11 +368,12 @@ (if (not (hash-table-ref/default flags "-rerun" #f)) (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS")) (runs:run-tests target runname test-patts user flags run-count: (- run-count 1))))) (debug:print-info 0 "No tests to run"))) (debug:print-info 4 "All done by here") - )) + (tasks:set-state-given-param-key tasks-db task-key "done") + (sqlite3:finalize! tasks-db))) ;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable. ;; ;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns @@ -1353,10 +1368,11 @@ ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(remove-data-only #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) + (tasks-db (tasks:open-db)) (keys (rmt:get-keys)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) @@ -1379,19 +1395,24 @@ sort-by: (case action ((remove-runs) 'rundir) (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) + (run-name (db:get-value-by-header run header "runname")) (tests (if (not (equal? run-state "locked")) (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")) (debug:print-info 4 "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin (case action ((remove-runs) + ;; seek and kill in flight -runtests with % as testpatt here + (if (equal? testpatt "%") + (tasks:kill-runner tasks-db target run-name) + (debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) @@ -1498,11 +1519,12 @@ ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) ))))) )) - runs)) + runs) + (sqlite3:finalize! tasks-db)) #t) (define (runs:remove-test-directory db test remove-data-only) (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (file-exists? run-dir) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -389,17 +389,16 @@ res)) ;; register a task (define (tasks:add mdb action owner target runname testpatt params) (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) - VALUES (?,?,'new',?,?,?,?,?,strftime('%s','now'),0);" + VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" action owner target runname - test - item + testpatt (if params params ""))) (define (keys:key-vals-hash->target keys key-params) (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) (if (> (length keys) 1) @@ -577,10 +576,75 @@ (define (tasks:set-state mdb task-id state) (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" state task-id)) + +;;====================================================================== +;; Access using task key (stored in params; (hash-table->alist flags) hostname pid +;;====================================================================== + +(define (tasks:param-key->id mdb task-params) + (handle-exceptions + exn + #f + (sqlite3:first-result mdb "SELECT id FROM tasks_queue WHERE params LIKE ?;" task-params))) + +(define (tasks:set-state-given-param-key mdb param-key new-state) + (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)) + +(define (tasks:get-records-given-param-key mdb param-key state-patt action-patt test-patt) + (handle-exceptions + exn + '() + (sqlite3:first-row mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE + params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + param-key state-patt action-patt test-patt))) + + +;;====================================================================== +;; Rogue items, no place to put these yet +;;====================================================================== + +(define (tasks:find-task-queue-records mdb target run-name test-patt state-patt action-patt) + ;; (handle-exceptions + ;; exn + ;; '() + ;; (sqlite3:first-row + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (cons a b) res))) + mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue + WHERE + target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + target run-name state-patt action-patt test-patt) + res)) ;; ) + + +(define (tasks:kill-runner mdb target run-name) + (let ((records (tasks:find-task-queue-records mdb target run-name "%" "running" "run-tests")) + (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string + (if (null? records) + (debug:print 0 "No run launching processes found for " target " / " run-name) + (debug:print 0 "Found " (length records) " run(s) to kill.")) + (for-each + (lambda (record) + (let* ((param-key (list-ref record 8)) + (match-dat (string-search hostpid-rx param-key)) + (hostname (cadr match-dat)) + (pid (caddr match-dat))) + (debug:print 0 "Sending SIGINT to process " pid " on host " hostname) + (if (equal? (get-host-name) hostname) + (process-signal (string->number pid) signal/int) + ;; (call-with-environment-variables + (let ((old-targethost (getenv "TARGETHOST"))) + (set-environment-variable "TARGETHOST" hostname) + (system (conc "nbfake " kill " " pid)) + (if old-targethost (set-environment-variable "TARGETHOST" old-targethost)))))) + records))) + ;;====================================================================== ;; The routines to process tasks ;;====================================================================== Index: tests/fdktestqa/fdk.config ================================================================== --- tests/fdktestqa/fdk.config +++ tests/fdktestqa/fdk.config @@ -19,11 +19,15 @@ launcher nbfake [server] # timeout 0.01 # homehost xena -homehost 143.182.225.38 +# homehost 143.182.225.38 + +# force server +server-query-threshold 0 + [jobtools] -launcher nbq -P ch_vp -C SLES11_EM64T_4G -Q /ciaf/fdk +# launcher nbq -P ch_vp -C SLES11_EM64T_4G -Q /ciaf/fdk # launcher nbfake -maxload 4 +# maxload 4