Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -21,16 +21,19 @@
 (define getenv get-environment-variable)
 
 (define home (getenv "HOME"))
 (define user (getenv "USER"))
 
+;; global gletches
 (define *configinfo* #f)
 (define *configdat*  #f)
 (define *toppath*    #f)
 (define *already-seen-runconfig-info* #f)
 (define *waiting-queue* (make-hash-table))
 (define *globalexitstatus* 0) ;; attempt to work around possible thread issues
+(define *passnum*     0) ;; when running track calls to run-tests or similar
+
 
 (define-inline (get-with-default val default)
   (let ((val (args:get-arg val)))
     (if val val default)))
 

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -212,10 +212,21 @@
 		   run-id test-name (item-list->path itemdat)))
 ;; 
 (define (db:delete-test-records db test-id)
   (sqlite3:execute db "DELETE FROM test_steps WHERE test_id=?;" test-id)
   (sqlite3:execute db "DELETE FROM tests WHERE id=?;" test-id))
+
+;; set tests with state currstate and status currstatus to newstate and newstatus
+;; use currstate = #f and or currstatus = #f to apply to any state or status respectively
+;; WARNING: SQL injection risk
+(define (db:set-tests-state-status db run-id tests currstate currstatus newstate newstatus)
+  (sqlite3:execute db (conc "UPDATE tests SET state=?,status=? WHERE "
+			    (if currstate  (conc "state='" currstate "' AND ") "")
+			    (if currstatus (conc "status='" currstatus "' AND ") "")
+			    " testname in "
+			    "('" (string-intersperse tests "','") "')")
+		   newstate newstatus))
 
 (define (db:get-count-tests-running db)
   (let ((res 0))
     (sqlite3:for-each-row
      (lambda (count)

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -45,15 +45,16 @@
   -showkeys               : show the keys used in this megatest setup
 
 Misc 
   -force                  : override some checks
   -xterm                  : start an xterm instead of launching the test
-  -remove-runs            : remove the data for a run, requires fields, :runname 
+  -remove-runs            : remove the data for a run, requires all fields be specified
+                            and :runname ,-testpatt and -itempatt
                             and -testpatt
-  -testpatt patt          : remove tests matching patt (requires -remove-runs)
   -keepgoing              : continue running until no jobs are \"LAUNCHED\" or
                             \"NOT_STARTED\"
+  -rerun FAIL,WARN...     : re-run if called on a test that previously ran
 
 Helpers
   -runstep stepname  ...  : take remaining params as comand and execute as stepname
                             log will be in stepname.log. Best to put command in quotes
   -logpro file            : with -exec apply logpro file to stepname.log, creates
@@ -83,10 +84,11 @@
 			"-setlog"
 			"-set-toplog"
 			"-runstep"
 			"-logpro"
 			"-m"
+			"-rerun"
 			) 
 		 (list  "-h"
 		        "-force"
 		        "-xterm"
 		        "-showkeys"
@@ -117,10 +119,12 @@
 
 ;;======================================================================
 ;; Remove old run(s)
 ;;======================================================================
 
+;; since several actions can be specified on the command line the removal
+;; is done first
 (define (remove-runs)
   (cond
    ((not (args:get-arg ":runname"))
     (print "ERROR: Missing required parameter for -remove-runs, you must specify the run name pattern with :runname patt")
     (exit 2))
@@ -482,11 +486,12 @@
 		    (begin
 		      (print "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result")
 		      (test-set-status! db run-id test-name
 					(if kill-job? "KILLED" "COMPLETED")
 					(if (vector-ref exit-info 1) ;; look at the exit-status
-					    (if (eq? (vector-ref exit-info 2) 0)
+					    (if (and (not kill-job?) 
+						     (eq? (vector-ref exit-info 2) 0))
 						"PASS"
 						"FAIL")
 					    "FAIL") itemdat (args:get-arg "-m")))))
 	      (mutex-unlock! m)
 	      ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -250,16 +250,22 @@
 
 (define (run-tests db test-names)
   (let* ((keys        (db-get-keys db))
 	 (keyvallst   (keys->vallist keys #t))
 	 (run-id      (register-run db keys))) ;;  test-name)))
+    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
+    ;; -keepgoing is specified
+    (if (and (eq? *passnum* 0)
+	     (args:get-arg "-keepgoing"))
+	(db:set-tests-state-status db run-id test-names #f "FAIL" "NOT_STARTED" "FAIL"))
+    (set! *passnum* (+ *passnum* 1))
     (let loop ((numtimes 0))
       (for-each 
        (lambda (test-name)
 	 (let ((num-running (db:get-count-tests-running db))
 	       (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
-	   (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
+	   ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
 	   (if (or (not max-concurrent-jobs)
 		   (and max-concurrent-jobs
 			(string->number max-concurrent-jobs)
 			(not (>= num-running (string->number max-concurrent-jobs)))))
 	       (run-one-test db run-id test-name keyvallst)
@@ -270,14 +276,16 @@
 	  (let ((estrem (db:estimated-tests-remaining db run-id)))
 	    (if (> estrem 0)
 		(begin
 		  (print "Keep going, estimated " estrem " tests remaining to run, will continue in 10 seconds ...")
 		  (sleep 10)
+		  ;; (run-waiting-tests db)
 		  (loop (+ numtimes 1)))))))))
 	   
 ;; VERY INEFFICIENT! Move stuff that should be done once up to calling proc
 (define (run-one-test db run-id test-name keyvallst)
+  (run-waiting-tests db)
   (print "Launching test " test-name)
   ;; All these vars might be referenced by the testconfig file reader
   (setenv "MT_TEST_NAME" test-name) ;; 
   (setenv "MT_RUNNAME"   (args:get-arg ":runname"))
   (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
@@ -308,14 +316,14 @@
 		     (tal     (cdr allitems)))
 	    ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer"))
 	    (let* ((item-path     (item-list->path itemdat)) ;; (string-intersperse (map cadr itemdat) "/"))
 		   (new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
 		   (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique
-		   (test-status   #f)
+		   (testdat   #f)
 		   (num-running (db:get-count-tests-running db))
 		   (max-concurrent-jobs (config-lookup *configdat* "setup" "max_concurrent_jobs")))
-	      (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
+	      ;; (print "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
 	      (if (not (or (not max-concurrent-jobs)
 			   (and max-concurrent-jobs
 				(string->number max-concurrent-jobs)
 				(not (>= num-running (string->number max-concurrent-jobs))))))
 		  (print "WARNING: Max running jobs exceeded, current number running: " num-running 
@@ -326,64 +334,80 @@
 		      (if (and (not ts)
 			       (< ct 10))
 			  (begin
 			    (register-test db run-id test-name item-path)
 			    (db:test-set-comment db run-id test-name item-path "")
-			    ;; (test-set-status! db run-id test-name "NOT_STARTED" "n/a" itemdat "")
-			    ;; (db:set-comment-for-test db run-id test-name item-path "")
-
-			    ;; Move the next line into the test exectute code
-			    ;; (db:delete-test-step-records db run-id test-name) ;; clean out if this is a re-run
-
 			    (loop2 (db:get-test-info db run-id test-name item-path)
 				   (+ ct 1)))
 			  (if ts
-			      (set! test-status ts)
+			      (set! testdat ts)
 			      (begin
 				(print "WARNING: Couldn't register test " test-name " with item path " item-path ", skipping")
 				(if (not (null? tal))
 				    (loop (car tal)(cdr tal)))))))
 		    (change-directory test-path)
 		    ;; this block is here only to inform the user early on
 		    (if (file-exists? runconfigf)
 			(setup-env-defaults db runconfigf run-id *already-seen-runconfig-info*)
 			(print "WARNING: You do not have a run config file: " runconfigf))
-		    ;; (print "run-id: " run-id " test-name: " test-name " item-path: " item-path " test-status: " (test:get-status test-status) " test-state: " (test:get-state test-status))
+		    ;; (print "run-id: " run-id " test-name: " test-name " item-path: " item-path " testdat: " (test:get-status testdat) " test-state: " (test:get-state testdat))
 		    (case (if (args:get-arg "-force")
 			      'NOT_STARTED
-			      (if test-status
-				  (string->symbol (test:get-state test-status))
+			      (if testdat
+				  (string->symbol (test:get-state testdat))
 				  'failed-to-insert))
 		      ((failed-to-insert)
 		       (print "ERROR: Failed to insert the record into the db"))
-		      ((NOT_STARTED COMPLETED) ;; (cadr status is the row id for the run record)
-		       (if (and (equal? (test:get-state test-status)  "COMPLETED")
-				(or (equal? (test:get-status test-status) "PASS")
-				    (equal? (test:get-status test-status) "WARN")
-				    (equal? (test:get-status test-status) "CHECK"))
-				(not (args:get-arg "-force")))
-			   (print "NOTE: Not starting test " new-test-name " as it is state \"COMPLETED\" and status \"" (test:get-status test-status) "\", use -force to override")
-			   (let* ((get-prereqs-cmd (lambda ()
-						     (db-get-prereqs-not-met db run-id waiton))) ;; check before running ....
-				  (launch-cmd      (lambda ()
-						     (launch-test db run-id test-conf keyvallst test-name test-path itemdat)))
-				  (testrundat      (list get-prereqs-cmd launch-cmd)))
-			     (if (or (args:get-arg "-force")
-				     (null? ((car testrundat)))) ;; are there any tests that must be run before this one...
-				 ((cadr testrundat)) ;; this is the line that launches the test to the remote host
-				 (hash-table-set! *waiting-queue* new-test-name testrundat)))))
+		      ((NOT_STARTED COMPLETED)
+		       (print "Got here, " (test:get-state testdat))
+		       (let ((runflag #f))
+			 (cond
+			  ;; -force, run no matter what
+			  ((args:get-arg "-force")(set! runflag #t))
+			  ;; NOT_STARTED, run no matter what
+			  ((equal? (test:get-state testdat) "NOT_STARTED")(set! runflag #t))
+			  ;; not -rerun and PASS, WARN or CHECK, do no run
+			  ((and (or (not (args:get-arg "-rerun"))
+				    (args:get-arg "-keepgoing"))
+				(member (test:get-status testdat) '("PASS" "WARN" "CHECK")))
+			   (set! runflag #f))
+			  ;; -rerun and status is one of the specifed, run it
+			  ((and (args:get-arg "-rerun")
+				(let ((rerunlst (string-split (args:get-arg "-rerun") ","))) ;; FAIL,
+				  (member (test:get-status testdat) rerunlst)))
+			   (set! runflag #t))
+			  ;; -keepgoing, do not rerun FAIL
+			  ((and (args:get-arg "-keepgoing")
+				(member (test:get-status testdat) '("FAIL")))
+			   (set! runflag #f))
+			  ((and (not (args:get-arg "-rerun"))
+				(member (test:get-status testdat) '("FAIL" "n/a")))
+			   (set! runflag #t))
+			  (else (set! runflag #f)))
+			 ;; (print "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
+			 (if (not runflag)
+			     (print "NOTE: Not starting test " new-test-name " as it is state \"COMPLETED\" and status \"" (test:get-status testdat) "\", use -force to override")
+			     (let* ((get-prereqs-cmd (lambda ()
+						       (db-get-prereqs-not-met db run-id waiton))) ;; check before running ....
+				    (launch-cmd      (lambda ()
+						       (launch-test db run-id test-conf keyvallst test-name test-path itemdat)))
+				    (testrundat      (list get-prereqs-cmd launch-cmd)))
+			       (if (or (args:get-arg "-force")
+				       (null? ((car testrundat)))) ;; are there any tests that must be run before this one...
+				   ((cadr testrundat)) ;; this is the line that launches the test to the remote host
+				   (hash-table-set! *waiting-queue* new-test-name testrundat))))))
 		      ((KILLED) 
 		       (print "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
 		      ((LAUNCHED REMOTEHOSTSTART RUNNING)  
-		       (if (> (- (current-seconds)(+ (db:test-get-event_time test-status)
-						     (db:test-get-run_duration test-status)))
+		       (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
+						     (db:test-get-run_duration testdat)))
 			      100) ;; i.e. no update for more than 100 seconds
 			   (begin
-			     (print "WARNING: Test " test-name " appears to be dead.")
+			     (print "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
 			     (test-set-status! db run-id test-name "INCOMPLETE" "STUCK/DEAD" itemdat "Test is stuck or dead"))
 			   (print "NOTE: " test-name " is already running")))
-		      (else       (print "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state test-status))))))
+		      (else       (print "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat))))))
 	      (if (not (null? tal))
 		  (loop (car tal)(cdr tal)))))))))
 
 (define (run-waiting-tests db)
   (let ((numtries           0)

Index: utils/nbfake
==================================================================
--- utils/nbfake
+++ utils/nbfake
@@ -7,6 +7,6 @@
 fi
 
 # Can't always trust $PWD
 CURRWD=`pwd`
 
-ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\""
+ssh -n -f $TARGETHOST "sh -c \"cd $CURRWD;export DISPLAY=$DISPLAY; export PATH=$PATH; nohup $* > NBFAKE-`date +%GWW%V.%u_%T` 2>&1 &\""