Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -70,11 +70,10 @@
 ;; Awful. Please FIXME
 (define *env-vars-by-run-id* (make-hash-table))
 (define *current-run-name*   #f)
 
 (define (common:clear-caches)
-  (set! *target*             (make-hash-table))
   (set! *keys*               (make-hash-table))
   (set! *keyvals*            (make-hash-table))
   (set! *toptest-paths*      (make-hash-table))
   (set! *test-paths*         (make-hash-table))
   (set! *test-ids*           (make-hash-table))

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -725,39 +725,30 @@
     (debug:print-info 11 "db:get-key-val-pairs END keys: " keys " run-id: " run-id)
     (reverse res)))
 
 ;; get key vals for a given run-id
 (define (db:get-key-vals db run-id)
-  (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f)))
-    (if mykeyvals 
-	mykeyvals
-	(let* ((keys (db:get-keys db))
-	       (res  '()))
-	  (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id)
-	  (for-each 
-	   (lambda (key)
-	     (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
-	       ;; (debug:print 0 "qry: " qry)
-	       (sqlite3:for-each-row 
-		(lambda (key-val)
-		  (set! res (cons key-val res)))
-		db qry run-id)))
-	   keys)
-	  (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id)
-	  (let ((final-res (reverse res)))
-	    (hash-table-set! *keyvals* run-id final-res)
-	    final-res)))))
+  (let* ((keys (db:get-keys db))
+	 (res  '()))
+    (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id)
+    (for-each 
+     (lambda (key)
+       (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
+	 ;; (debug:print 0 "qry: " qry)
+	 (sqlite3:for-each-row 
+	  (lambda (key-val)
+	    (set! res (cons key-val res)))
+	  db qry run-id)))
+     keys)
+    (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id)
+    (reverse res)))
 
 ;; The target is keyval1/keyval2..., cached in *target* as it is used often
 (define (db:get-target db run-id)
-  (let ((mytarg (hash-table-ref/default *target* run-id #f)))
-    (if mytarg
-	mytarg
-	(let* ((keyvals (db:get-key-vals db run-id))
-	       (thekey  (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
-	  (hash-table-set! *target* run-id thekey)
-	  thekey))))
+  (let* ((keyvals (db:get-key-vals db run-id))
+	 (thekey  (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
+    thekey))
 
 ;;======================================================================
 ;;  T E S T S
 ;;======================================================================
 

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -548,12 +548,12 @@
 ;; NB// should pass in keys?
 ;;
 (define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f))
   (common:clear-caches) ;; clear all caches
   (let* ((db           #f)
-	 (keys         (open-run-close db:get-keys db))
-	 (rundat       (open-run-close runs:get-runs-by-patt db keys runnamepatt target))
+	 (keys         (cdb:remote-run db:get-keys db))
+	 (rundat       (cdb:remote-run runs:get-runs-by-patt db keys runnamepatt target))
 	 (header       (vector-ref rundat 0))
 	 (runs         (vector-ref rundat 1))
 	 (states       (if state  (string-split state  ",") '()))
 	 (statuses     (if status (string-split status ",") '()))
 	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f))))
@@ -568,11 +568,11 @@
 						(db:get-value-by-header run header k)) keys) "/"))
 	     (dirs-to-remove (make-hash-table)))
 	 (let* ((run-id    (db:get-value-by-header run header "id"))
 		(run-state (db:get-value-by-header run header "state"))
 		(tests     (if (not (equal? run-state "locked"))
-			       (open-run-close db:get-tests-for-run db run-id
+			       (cdb:remote-run db:get-tests-for-run db run-id
 						      testpatt states statuses
 						      not-in:  #f
 						      sort-by: (case action
 								 ((remove-runs) 'rundir)
 								 (else          'event_time)))
@@ -589,77 +589,104 @@
 		   ((print-run)
 		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
 		    action)
 		   (else
 		    (debug:print-info 0 "action not recognised " action)))
-		 (for-each
-		  (lambda (test)
-		    (let* ((item-path (db:test-get-item-path test))
-			   (test-name (db:test-get-testname test))
-			   (run-dir   (db:test-get-rundir test))    ;; run dir is from the link tree
-			   (real-dir  (if (file-exists? run-dir)
-					  (resolve-pathname run-dir)
-					  #f))
-			   (test-id   (db:test-get-id test)))
-		      ;;   (tdb       (db:open-test-db run-dir)))
-		      (debug:print-info 4 "test=" test) ;;   " (db:test-get-testname test) " id: " (db:test-get-id test) " " item-path " action: " action)
-		      (case action
-			((remove-runs) ;; the tdb is for future possible. 
-			 (open-run-close db:delete-test-records db #f (db:test-get-id test))
-			 (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
-			 (if (and real-dir 
-				  (> (string-length real-dir) 5)
-				  (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
-			     (begin ;; let* ((realpath (resolve-pathname run-dir)))
-			       (debug:print-info 1 "Recursively removing " real-dir)
-			       (if (file-exists? real-dir)
-				   (if (> (system (conc "rm -rf " real-dir)) 0)
-				       (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f"))
-				   (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable")))
-			     (if real-dir 
-				 (debug:print 0 "WARNING: directory " real-dir " does not exist")
-				 (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
-			 (if (symbolic-link? run-dir)
-			     (begin
-			       (debug:print-info 1 "Removing symlink " run-dir)
-			       (handle-exceptions
-				exn
-				(debug:print 0 "ERROR:  Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
-				(delete-file run-dir)))
-			     (if (directory? run-dir)
-				 (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
-				     (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty")
-				      (handle-exceptions
-				       exn
-				       (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
-				       (delete-directory run-dir)))
-				 (if run-dir
-				     (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
-				     (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
-				 )))
-			((set-state-status)
-			 (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
-			 (open-run-close db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f)))))
-		  (sort tests (lambda (a b)(let ((dira (db:test-get-rundir a))
-						 (dirb (db:test-get-rundir b)))
-					     (if (and (string? dira)(string? dirb))
-						 (> (string-length dira)(string-length dirb))
-						 #f)))))))
+		 (let ((sorted-tests     (sort tests (lambda (a b)(let ((dira (db:test-get-rundir a))
+									(dirb (db:test-get-rundir b)))
+								    (if (and (string? dira)(string? dirb))
+									(> (string-length dira)(string-length dirb))
+									#f)))))
+		       (test-retry-time  (make-hash-table))
+		       (allow-run-time   25)) ;; seconds to allow for killing tests before just brutally killing 'em
+		   (let loop ((test (car sorted-tests))
+			      (tal  (cdr sorted-tests)))
+		     (let* ((test-id       (db:test-get-id test))
+			    (new-test-dat  (cdb:remote-run db:get-test-info-by-id #f test-id))
+			    (item-path     (db:test-get-item-path new-test-dat))
+			    (test-name     (db:test-get-testname new-test-dat))
+			    (run-dir       (db:test-get-rundir new-test-dat))    ;; run dir is from the link tree
+			    (real-dir      (if (file-exists? run-dir)
+					       (resolve-pathname run-dir)
+					       #f))
+			    (test-state    (db:test-get-state new-test-dat))
+			    (test-fulln    (db:test-get-fullname new-test-dat)))
+			   (case action
+			     ((remove-runs)
+			      (debug:print-info 0 "test-state: " test-state)
+			      (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
+				  (begin
+				    (if (not (hash-table-ref/default test-retry-time test-fulln #f))
+					(hash-table-set! test-retry-time test-fulln (current-seconds)))
+				    (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
+				      ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
+				      ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
+				      ;; up and blow it away.
+				      (begin
+					(debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
+					(cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "FAILEDKILL" "n/a" #f)
+					(thread-sleep! 1))
+				      (begin
+					(cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "KILLREQ" "n/a" #f)
+					(thread-sleep! 1)))
+				    ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
+				    (if (null? tal)
+					(loop new-test-dat tal)
+					(loop (car tal)(append tal (list new-test-dat)))))
+				  (begin
+				    (cdb:remote-run db:delete-test-records db #f (db:test-get-id test))
+				    (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
+				    (if (and real-dir 
+					     (> (string-length real-dir) 5)
+					     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
+					(begin ;; let* ((realpath (resolve-pathname run-dir)))
+					  (debug:print-info 1 "Recursively removing " real-dir)
+					  (if (file-exists? real-dir)
+					      (if (> (system (conc "rm -rf " real-dir)) 0)
+						  (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f"))
+					      (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable")))
+					(if real-dir 
+					    (debug:print 0 "WARNING: directory " real-dir " does not exist")
+					    (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
+				    (if (symbolic-link? run-dir)
+					(begin
+					  (debug:print-info 1 "Removing symlink " run-dir)
+					  (handle-exceptions
+					   exn
+					   (debug:print 0 "ERROR:  Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
+					   (delete-file run-dir)))
+					(if (directory? run-dir)
+					    (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
+						(debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty")
+						(handle-exceptions
+						 exn
+						 (debug:print 0 "ERROR:  Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
+						 (delete-directory run-dir)))
+					    (if run-dir
+						(debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
+						(debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
+					    ))
+				    (if (not (null? tal))
+					(loop (car tal)(cdr tal))))))
+			     ((set-state-status)
+			      (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
+			      (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f)))))
+		   )))
 	   ;; remove the run if zero tests remain
 	   (if (eq? action 'remove-runs)
-	       (let ((remtests (open-run-close db:get-tests-for-run db (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
+	       (let ((remtests (cdb:remote-run db:get-tests-for-run db (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
 		 (if (null? remtests) ;; no more tests remaining
 		     (let* ((dparts  (string-split lasttpath "/"))
 			    (runpath (conc "/" (string-intersperse 
 						(take dparts (- (length dparts) 1))
 						"/"))))
 		       (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
-		       (open-run-close db:delete-run db run-id)
+		       (cdb:remote-run db:delete-run db run-id)
 		       ;; This is a pretty good place to purge old DELETED tests
-		       (open-run-close db:delete-tests-for-run db run-id)
-		       (open-run-close db:delete-old-deleted-test-records db)
-		       (open-run-close db:set-var db "DELETED_TESTS" (current-seconds))
+		       (cdb:remote-run db:delete-tests-for-run db run-id)
+		       (cdb:remote-run db:delete-old-deleted-test-records db)
+		       (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds))
 		       ;; need to figure out the path to the run dir and remove it if empty
 		       ;;    (if (null? (glob (conc runpath "/*")))
 		       ;;        (begin
 		       ;; 	 (debug:print 1 "Removing run dir " runpath)
 		       ;; 	 (system (conc "rmdir -p " runpath))))
@@ -695,11 +722,11 @@
 	(if (not (setup-for-run))
 	    (begin 
 	      (debug:print 0 "Failed to setup, exiting")
 	      (exit 1)))
 	;; (if (args:get-arg "-server")
-	;;     (open-run-close server:start db (args:get-arg "-server")))
+	;;     (cdb:remote-run server:start db (args:get-arg "-server")))
 	(set! keys (keys:config-get-fields *configdat*))
 	;; have enough to process -target or -reqtarg here
 	(if (args:get-arg "-reqtarg")
 	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
 		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
@@ -727,21 +754,21 @@
 ;; Lock/unlock runs
 ;;======================================================================
 
 (define (runs:handle-locking target keys runname lock unlock user)
   (let* ((db       #f)
-	 (rundat   (open-run-close runs:get-runs-by-patt db keys runname target))
+	 (rundat   (cdb:remote-run runs:get-runs-by-patt db keys runname target))
 	 (header   (vector-ref rundat 0))
 	 (runs     (vector-ref rundat 1)))
     (for-each (lambda (run)
 		(let ((run-id (db:get-value-by-header run header "id")))
 		  (if (or lock
 			  (and unlock
 			       (begin
 				 (print "Do you really wish to unlock run " run-id "?\n   y/n: ")
 				 (equal? "y" (read-line)))))
-		      (open-run-close db:lock/unlock-run db run-id lock unlock user)
+		      (cdb:remote-run db:lock/unlock-run db run-id lock unlock user)
 		      (debug:print-info 0 "Skipping lock/unlock on " run-id))))
 	      runs)))
 ;;======================================================================
 ;; Rollup runs
 ;;======================================================================
@@ -773,23 +800,23 @@
        (let* ((test-path    (conc *toppath* "/tests/" test-name))
 	      (test-configf (conc test-path "/testconfig"))
 	      (testexists   (and (file-exists? test-configf)(file-read-access? test-configf)))
 	      ;; read configs with tricks turned off (i.e. no system)
 	      (test-conf    (if testexists (read-config test-configf #f #f)(make-hash-table))))
-	 ;; use the open-run-close instead of passing in db
+	 ;; use the cdb:remote-run instead of passing in db
 	 (runs:update-test_meta test-name test-conf)))
      test-names)))
 
 ;; This could probably be refactored into one complex query ...
 (define (runs:rollup-run keys runname user keyvals)
   (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user)
   (let* ((db              #f)
 	 (new-run-id      (cdb:remote-run db:register-run #f keyvals runname "new" "n/a" user))
-	 (prev-tests      (open-run-close test:get-matching-previous-test-run-records db new-run-id "%" "%"))
-	 (curr-tests      (open-run-close db:get-tests-for-run db new-run-id "%/%" '() '()))
+	 (prev-tests      (cdb:remote-run test:get-matching-previous-test-run-records db new-run-id "%" "%"))
+	 (curr-tests      (cdb:remote-run db:get-tests-for-run db new-run-id "%/%" '() '()))
 	 (curr-tests-hash (make-hash-table)))
-    (open-run-close db:update-run-event_time db new-run-id)
+    (cdb:remote-run db:update-run-event_time db new-run-id)
     ;; index the already saved tests by testname and itemdat in curr-tests-hash
     (for-each
      (lambda (testdat)
        (let* ((testname  (db:test-get-testname testdat))
 	      (item-path (db:test-get-item-path testdat))
@@ -803,23 +830,23 @@
      (lambda (testdat)
        (let* ((testname  (db:test-get-testname testdat))
 	      (item-path (db:test-get-item-path testdat))
 	      (full-name (conc testname "/" item-path))
 	      (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f))
-	      (test-steps    (open-run-close db:get-steps-for-test db (db:test-get-id testdat)))
+	      (test-steps    (cdb:remote-run db:get-steps-for-test db (db:test-get-id testdat)))
 	      (new-test-record #f))
 	 ;; replace these with insert ... select
 	 (apply sqlite3:execute 
 		db 
 		(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) "
 		      "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);")
 		new-run-id (cddr (vector->list testdat)))
-	 (set! new-testdat (car (open-run-close db:get-tests-for-run db new-run-id (conc testname "/" item-path) '() '())))
+	 (set! new-testdat (car (cdb:remote-run db:get-tests-for-run db new-run-id (conc testname "/" item-path) '() '())))
 	 (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table?
 	 ;; Now duplicate the test steps
 	 (debug:print 4 "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat))
-	 (open-run-close 
+	 (cdb:remote-run 
 	  (lambda ()
 	    (sqlite3:execute 
 	     db 
 	     (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) "
 		   "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;")

Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -539,13 +539,13 @@
 ;;======================================================================
 
 ;; teststep-set-status! used to be here
 
 (define (test-get-kill-request test-id) ;; run-id test-name itemdat)
-  (let* (;; (item-path (item-list->path itemdat))
-	 (testdat   (cdb:get-test-info-by-id *runremote* test-id))) ;; run-id test-name item-path)))
-    (equal? (test:get-state testdat) "KILLREQ")))
+  (let* ((testdat   (cdb:get-test-info-by-id *runremote* test-id))) ;; run-id test-name item-path)))
+    (and testdat
+	 (equal? (test:get-state testdat) "KILLREQ"))))
 
 (define (test:tdb-get-rundat-count tdb)
   (if tdb
       (let ((res 0))
 	(sqlite3:for-each-row