Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -16,11 +16,11 @@
 #     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 # make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
 # rm <files>.o ; make install CSCOPTS='-profile' ; ... ;  chicken-profile | less
 
-all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut tcmt unitdeps.pdf
+all : $(PREFIX)/bin/.$(ARCHSTR) mtest dboard mtut 
 
 SHELL=/bin/bash
 PREFIX=$(PWD)
 CSCOPTS=
 INSTALL=install
@@ -51,11 +51,11 @@
 mofiles/dbfile.o     : \
        mofiles/debugprint.o mofiles/commonmod.o
 
 configf.o : commonmod.import.o
 mofiles/dbfile.o : mofiles/debugprint.o
-mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o
+mofiles/rmtmod.o mofiles/dbmod.o : mofiles/dbfile.o mofiles/commonmod.o mofiles/debugprint.o
 db.o : mofiles/dbmod.o mofiles/dbfile.o
 mofiles/debugprint.o : mofiles/mtargs.o
 mofiles/tcp-transportmod.o : mofiles/portlogger.o
 
 # ftail.scm rmtmod.scm commonmod.scm removed
@@ -85,10 +85,11 @@
 #	@touch $*.import.scm # ensure it is touched after the .o is made
 
 %.import.scm mofiles/%.o : %.scm
 	@mkdir -p mofiles
 	csc $(CSCOPTS) -J -c $< -o mofiles/$*.o
+	@if [[ -e $*.import.scm ]];then touch $*.import.scm;fi # ensure it is touched after the .o is made
 
 ADTLSCR=mt_laststep mt_runstep mt_ezstep
 HELPERS=$(addprefix $(PREFIX)/bin/,$(ADTLSCR))
 DEPLOYHELPERS=$(addprefix deploytarg/,$(ADTLSCR))
 MTESTHASH=$(shell fossil info|grep checkout:| awk '{print $$2}')
@@ -120,40 +121,40 @@
 mtut: $(OFILES) $(MOFILES) $(MOIMPFILES) megatest-fossil-hash.scm mtut.scm megatest-version.scm 
 	csc $(CSCOPTS) $(OFILES) $(MOFILES)  $(MOIMPFILES) mtut.scm -o mtut
 
 # include makefile.inc
 
-TCMTOBJS = \
-	api.o \
-	archive.o \
-	cgisetup/models/pgdb.o \
-	common.o \
-	configf.o \
-	db.o \
-	env.o \
-	items.o \
-	keys.o \
-	launch.o \
-	margs.o \
-	mt.o \
-	ods.o \
-	process.o \
-	rmt.o \
-	runconfig.o \
-	runs.o \
-	server.o \
-	tasks.o \
-	tdb.o \
-	tests.o \
-	subrun.o \
-        ezsteps.o
-
-#        mofiles/rmtmod.o \
-#        mofiles/commonmod.o \
-
-tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES)
-	csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt
+# TCMTOBJS = \
+# 	api.o \
+# 	archive.o \
+# 	cgisetup/models/pgdb.o \
+# 	common.o \
+# 	configf.o \
+# 	db.o \
+# 	env.o \
+# 	items.o \
+# 	keys.o \
+# 	launch.o \
+# 	margs.o \
+# 	mt.o \
+# 	ods.o \
+# 	process.o \
+# 	rmt.o \
+# 	runconfig.o \
+# 	runs.o \
+# 	server.o \
+# 	tasks.o \
+# 	tdb.o \
+# 	tests.o \
+# 	subrun.o \
+#         ezsteps.o
+# 
+# #        mofiles/rmtmod.o \
+# #        mofiles/commonmod.o \
+# 
+# tcmt : $(TCMTOBJS) tcmt.scm megatest-version.scm $(MOFILES) $(MOIMPFILES)
+# 	csc $(CSCOPTS) $(TCMTOBJS) $(MOFILES) $(MOIMPFILES) tcmt.scm -o tcmt
 
 # install documentation to $(PREFIX)/docs
 # DOES NOT REBUILD DOCS
 #
 $(PREFIX)/share/docs/megatest_manual.html : docs/manual/megatest_manual.html
@@ -171,11 +172,11 @@
 	$(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql
 
 # Special dependencies for the includes
 $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm
 
-mofiles/commonmod.o : megatest-fossil-hash.scm
+mofiles/commonmod.o : mofiles/debugprint.o megatest-fossil-hash.scm
 common.o : mofiles/commonmod.o
 
 # mofiles/dbmod.o : mofiles/configfmod.o
 
 # commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm
@@ -276,16 +277,16 @@
 	utils/mk_wrapper $(PREFIX) mtexec $(PREFIX)/bin/mtexec
 	chmod a+x $(PREFIX)/bin/mtexec
 
 # tcmt
 
-$(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
-	$(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt
-
-$(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper
-	utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt
-	chmod a+x $(PREFIX)/bin/tcmt
+# $(PREFIX)/bin/.$(ARCHSTR)/tcmt : tcmt
+# 	$(INSTALL) tcmt $(PREFIX)/bin/.$(ARCHSTR)/tcmt
+# 
+# $(PREFIX)/bin/tcmt : $(PREFIX)/bin/.$(ARCHSTR)/tcmt utils/mk_wrapper
+# 	utils/mk_wrapper $(PREFIX) tcmt $(PREFIX)/bin/tcmt
+# 	chmod a+x $(PREFIX)/bin/tcmt
 
 $(PREFIX)/bin/mt_laststep : utils/mt_laststep
 	$(INSTALL) $< $@
 	chmod a+x $@
 
@@ -379,17 +380,17 @@
           $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
 	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
           $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
-	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
+	  $(PREFIX)/share/db/mt-pg.sql \
           $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
           $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
           $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
           $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0
 
-#         $(PREFIX)/bin/.$(ARCHSTR)/ndboard
+#         $(PREFIX)/bin/.$(ARCHSTR)/ndboard $(PREFIX)/bin/tcmt 
 #         $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
 # $(PREFIX)/bin/newdashboard
 
 $(PREFIX)/bin/.$(ARCHSTR) : 
 	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)

Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -32,10 +32,11 @@
 (import dbfile)
 (import debugprint)
 (import tcp-transportmod)
 
 (use srfi-69
+     srfi-18
      posix
      matchable
      s11n)
 
 ;; allow these queries through without starting a server
@@ -345,11 +346,14 @@
     ((csv->test-data)               (apply db:csv->test-data dbstruct params))
 
     ;; MISC
     ((sync-inmem->db)               (let ((run-id (car params)))
                                       (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t)))
-    ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))
+    ((get-toplevels-and-incompletes) (apply db:get-toplevels-and-incompletes dbstruct params))
+    ((mark-incomplete)              #f);;(thread-start! (make-thread (lambda () ;; no need to block on this one
+					;;			  (apply db:find-and-mark-incomplete dbstruct params)
+					;;			  #t)))) 
     ((create-all-triggers)          (db:create-all-triggers dbstruct))
     ((drop-all-triggers)            (db:drop-all-triggers dbstruct)) 
 
     ;; TESTMETA
     ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -2046,27 +2046,10 @@
 				  (common:write-cached-info actual-host "num-cpus" result))
 			      result))))
 	  (hash-table-set! *numcpus-cache* actual-host numcpus)
 	  numcpus))))
 
-(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f))
-  (let ((inp #f))
-    (handle-exceptions
-	exn
-      (begin
-	(close-input-port inp)
-	(if msg-proc
-	    (msg-proc)
-	    (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn))
-	default)
-      (set! inp (open-input-pipe ssh-command))
-      (with-input-from-port inp
-	(lambda ()
-	  (let ((res (proc)))
-	    (close-input-port inp)
-	    res))))))
-
 ;;======================================================================
 ;; wait for normalized cpu load to drop below maxload
 ;;
 (define (common:wait-for-normalized-load maxnormload msg remote-host #!optional (rem-tries 5))
   (let ((num-cpus (common:get-num-cpus remote-host)))

Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -17,11 +17,11 @@
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 
 (declare (unit commonmod))
-;; (declare (uses debugprint))
+(declare (uses debugprint))
 
 (use srfi-69)
 
 (module commonmod
 	*
@@ -29,10 +29,11 @@
 (import scheme)
 (cond-expand
  (chicken-4
   
   (import chicken
+	  ports
 	  
 	  (prefix sqlite3 sqlite3:)
 	  data-structures
 	  extras
 	  files
@@ -45,11 +46,14 @@
 	  regex
 	  regex-case
 	  srfi-1
 	  srfi-18
 	  srfi-69
-	  typed-records)
+	  typed-records
+
+	  debugprint
+	  )
   (use srfi-69))
  (chicken-5
   (import (prefix sqlite3 sqlite3:)
 	  ;; data-structures
 	  ;; extras
@@ -697,7 +701,46 @@
     (if (null? res)
 	(begin
 	  ;; (debug:print 0 *default-log-port* "Failed to find this executable! Using what can be found on the path")
 	  progname)
 	(car res))))
+
+(define (common:generic-ssh ssh-command proc default #!optional (msg-proc #f))
+  (let ((inp #f))
+    (handle-exceptions
+	exn
+      (begin
+	(close-input-port inp)
+	(if msg-proc
+	    (msg-proc)
+	    (debug:print 0 *default-log-port* "Command: \""ssh-command"\" failed. exn="exn))
+	default)
+      (set! inp (open-input-pipe ssh-command))
+      (with-input-from-port inp
+	(lambda ()
+	  (let ((res (proc)))
+	    (close-input-port inp)
+	    res))))))
+
+;; this is a close duplicate of:
+;;    process:alist-on-host?
+;;    process:alive
+;;
+(define (commonmod:is-test-alive host pid)
+  (let* ((same-host (equal? host (get-host-name)))
+	 (cmd (conc 
+	       (if same-host "" (conc "ssh "host" "))
+	       "pstree -A "pid)))
+    (if (and host pid
+	     (not (equal? host "n/a")))
+	
+	(let* ((output (if same-host
+			   (with-input-from-pipe cmd read-lines)
+			   (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines)))
+	  (debug:print 2 *default-log-port* "Running " cmd " received " output)
+	  (if (eq? (length output) 0)
+	      #f
+	      #t))
+	#t))) ;; assuming bad query is about a live test is likely not the right thing to do?
+
 
 )

Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -22,10 +22,11 @@
 (declare (uses mtargs))
 (declare (uses mtargs.import))
 (declare (uses keys))
 (declare (uses items))
 (declare (uses debugprint))
+(declare (uses debugprint.import))
 (declare (uses db))
 (declare (uses configf))
 (declare (uses process))
 (declare (uses launch))
 (declare (uses runs))
@@ -72,10 +73,11 @@
 ;; set some parameters here - these need to be put in something that can be loaded from other
 ;; executables such as dashboard and mtutil
 ;;
 (include "dashboard-transport-mode.scm")
 (dbfile:db-init-proc db:initialize-main-db)
+(set! rmtmod:send-receive rmt:send-receive)
 
 (define help (conc 
 	      "Megatest Dashboard, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version 
               " license GPL, Copyright (C) Matt Welland 2012-2017
 

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -1111,99 +1111,10 @@
 ;;                 (null? oldlaunched)
 ;;                 (null? toplevels))
 ;;            #f
 ;;            #t)))))
 
-(define (db:get-status-from-final-status-file run-dir)
-  (let ((infile (conc run-dir "/.final-status")))
-    ;; first verify we are able to write the output file
-    (if (not (file-read-access? infile))
-        (begin 
-	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
-          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
-          #f
-          )
-        (with-input-from-file infile read-lines)
-	)))
-
-;;  select end_time-now from
-;;      (select testname,item_path,event_time+run_duration as
-;;                          end_time,strftime('%s','now') as now from tests where state in
-;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
-;;
-;; NOT EASY TO MIGRATE TO db{file,mod}
-;;
-(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
-  (let* ((incompleted '())
-	 (oldlaunched '())
-	 (toplevels   '())
-          ;; The default running-deadtime is 720 seconds = 12 minutes.
-          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
-         (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime")))
-         (server-start-allowance 200)
-         (server-overloaded-budget 200)
-         (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30))
-         (launch-monitor-on-time-budget 30)
-         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
-         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
-         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
-         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
-         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
-
-    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
-    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)
-
-    (let* ((dat (db:get-toplevels-and-incompletes dbstruct run-id running-deadtime remotehoststart-deadtime)))
-      (set! oldlaunched (list-ref dat 1))
-      (set! toplevels   (list-ref dat 2))
-      (set! incompleted (list-ref dat 0)))
-
-    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
-		      (length toplevels) " old LAUNCHED toplevel tests and "
-		      (length incompleted) " tests marked RUNNING but apparently dead.")
-  
-    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
-    ;;
-    ;; (db:delay-if-busy dbdat)
-    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
-	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
-      (if (> (length all-ids) 0)
-	  (begin
-	    ;; (launch:is-test-alive "localhost" 435)
-	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
-			 " as DEAD")
-	    (for-each
-             (lambda (test-id)
-               (let* ((tinfo   (db:get-test-info-by-id dbstruct run-id test-id))
-		      (run-dir (db:test-get-rundir     tinfo))
-		      (host    (db:test-get-host       tinfo))
-		      (pid     (db:test-get-process_id tinfo))
-		      (result (db:get-status-from-final-status-file run-dir)))
-		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
-		     (begin
-		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
-		       (db:set-state-status-and-roll-up-items
-			dbstruct run-id test-id 'foo "COMPLETED" "PASS"
-			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
-		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
-					  (launch:is-test-alive host pid))))
-		       (if is-alive
-			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
-					" has a process on pid " pid ", NOT setting to DEAD.")
-			   (begin
-			     (debug:print 0 *default-log-port* "INFO: test " test-id
-					  " final state/status is not COMPLETED/PASS. It is " result)
-			     (db:set-state-status-and-roll-up-items
-			      dbstruct run-id test-id 'foo "COMPLETED" "DEAD"
-			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
-	     ;; call end of eud of run detection for posthook - from merge, is it needed?
-	     ;; (launch:end-of-run-check run-id)
-	     all-ids)
-	    ;;call end of eud of run detection for posthook
-	    (launch:end-of-run-check run-id)
-	    )))))
-
 ;; BUG: Probably broken - does not explicitly use run-id in the query
 ;;
 (define (db:top-test-set-per-pf-counts dbstruct run-id test-name)
   (db:general-call dbstruct run-id 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name)))
 

Index: db_records.scm
==================================================================
--- db_records.scm
+++ db_records.scm
@@ -16,20 +16,10 @@
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 ;; dbstruct
 ;;======================================================================
-
-
-;; Returns the database for a particular run-id fron the dbstruct:localdbs
-;;
-(define (dbr:dbstruct-localdb v run-id)
-  (hash-table-ref/default (dbr:dbstruct-locdbs v) run-id #f))
-
-(define (dbr:dbstruct-localdb-set! v run-id db)
-  (hash-table-set! (dbr:dbstruct-locdbs v) run-id db))
-
 
 (define (make-db:test)(make-vector 20))
 (define-inline (db:test-get-id           vec) (vector-ref vec 0))
 (define-inline (db:test-get-run_id       vec) (vector-ref vec 1))
 (define-inline (db:test-get-testname     vec) (vector-ref vec 2))

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -840,31 +840,10 @@
 	(if (eq? (length output) 0)
 	   #f
 	   #t))
       #t))
 
-;; this is a close duplicate of:
-;;    process:alist-on-host?
-;;    process:alive
-;;
-(define (launch:is-test-alive host pid)
-  (let* ((same-host (equal? host (get-host-name)))
-	 (cmd (conc 
-	       (if same-host "" (conc "ssh "host" "))
-	       "pstree -A "pid)))
-    (if (and host pid
-	     (not (equal? host "n/a")))
-	
-	(let* ((output (if same-host
-			   (with-input-from-pipe cmd read-lines)
-			   (common:generic-ssh cmd read-lines '())))) ;; (with-input-from-pipe cmd read-lines)))
-	  (debug:print 2 *default-log-port* "Running " cmd " received " output)
-	  (if (eq? (length output) 0)
-	      #f
-	      #t))
-	#t))) ;; assuming bad query is about a live test is likely not the right thing to do?
-
 (define (launch:kill-tests-if-dead run-id)
   (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
        (let loop ((running-test (car running-tests))
 			     (tal    (cdr running-tests))
 			     (kill-cnt 0))

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -25,16 +25,16 @@
 (declare (uses common))
 ;; (declare (uses megatest-version))
 ;; (declare (uses margs))
 ;; (declare (uses mtargs))
 ;; (declare (uses mtargs.import))
+(declare (uses debugprint))
+(declare (uses debugprint.import))
 (declare (uses commonmod))
 (declare (uses commonmod.import))
 (declare (uses mtargs))
 (declare (uses mtargs.import))
-(declare (uses debugprint))
-(declare (uses debugprint.import))
 (declare (uses runs))
 (declare (uses launch))
 (declare (uses server))
 (declare (uses tests))
 (declare (uses genexample))

Index: mtargs.scm
==================================================================
--- mtargs.scm
+++ mtargs.scm
@@ -17,7 +17,9 @@
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 
 (declare (unit mtargs))
+
+(use srfi-69)
 
 (include "mtargs/mtargs.scm")

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -314,72 +314,10 @@
 
 ;;======================================================================
 ;;  T E S T S
 ;;======================================================================
 
-;; Just some syntatic sugar
-(define (rmt:register-test run-id test-name item-path)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:general-call 'register-test run-id run-id test-name item-path))
-
-(define (rmt:get-test-id run-id testname item-path)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
-
-(define (rmt:get-test-info-by-id run-id test-id)
-  (if (number? test-id)
-      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
-      (begin
-	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
-	(print-call-chain (current-error-port))
-	#f)))
-
-(define (rmt:get-test-state-status-by-id run-id test-id)
-  (rmt:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))
-
-(define (rmt:test-get-rundir-from-test-id run-id test-id)
-  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
-
-(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
-  (assert (number? run-id) "FATAL: Run id required.")
-  (let* ((test-path (if (string? work-area)
-			work-area
-			(rmt:test-get-rundir-from-test-id run-id test-id))))
-    (debug:print 3 *default-log-port* "TEST PATH: " test-path)
-    (open-test-db test-path)))
-
-;; WARNING: This currently bypasses the transaction wrapped writes system
-(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
-
-(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
-
-(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
-  (assert (number? run-id) "FATAL: Run id required.")
-  ;; (if (number? run-id)
-  (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
-  ;;    (begin
-  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
-  ;;	(print-call-chain (current-error-port))
-  ;;	'())))
-
-(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
-
-;; get stuff via synchash 
-(define (rmt:synchash-get run-id proc synckey keynum params)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
-
-(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
-  
 ;; IDEA: Threadify these - they spend a lot of time waiting ...
 ;;
 (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
   (let ((multi-run-mutex (make-mutex))
 	(run-id-list (if run-ids
@@ -501,16 +439,10 @@
 
 (define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
   (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))
 
-;; state and status are extra hints not usually used in the calculation
-;;
-(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
-  (assert (number? run-id) "FATAL: Run id required.")
-  (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
-
 (define (rmt:set-state-status-and-roll-up-run run-id state status)
   (assert (number? run-id) "FATAL: Run id required.")
   (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))
 
 
@@ -831,5 +763,15 @@
 		 (tt-ro-mode-set! runremote ro-mode)
 		 (tt-ro-mode-checked-set! runremote #t)
 		 ro-mode)
 	       ro-mode))))))
 
+;;======================================================================
+;; Maintenance
+;;======================================================================
+
+(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
+  (let* ((cfg-deadtime             (configf:lookup-number *configdat* "setup" "deadtime"))
+	 (test-stats-update-period (configf:lookup-number *configdat* "setup" "test-stats-update-period")))
+   (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
+   ;;call end of eud of run detection for posthook
+   (launch:end-of-run-check run-id)))

Index: rmtmod.scm
==================================================================
--- rmtmod.scm
+++ rmtmod.scm
@@ -17,13 +17,13 @@
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 ;;======================================================================
 
 (declare (unit rmtmod))
+(declare (uses debugprint))
 (declare (uses commonmod))
 (declare (uses dbfile))    ;; needed for records
-(declare (uses debugprint))
 
 ;; (declare (uses apimod))
 ;; (declare (uses apimod.import))
 ;; (declare (uses ulex))
 
@@ -30,15 +30,17 @@
 ;; (include "ulex/ulex.scm")
 
 (module rmtmod
 	*
 	
-(import scheme chicken data-structures extras matchable)
+(import scheme chicken data-structures extras matchable srfi-69)
 (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18)
 (import commonmod dbfile debugprint) ;; (prefix commonmod cmod:))
 ;; (import apimod)
 ;; (import (prefix ulex ulex:))
+
+(include "db_records.scm")
 
 (defstruct alldat
   (areapath #f)
   (ulexdat  #f)
   )
@@ -109,8 +111,175 @@
   (let* ((testname  (alist-ref "testname" test-rec equal?))
 	 (item-path (alist-ref "item_path" test-rec equal?)))
     (debug:print 0 *default-log-port* "   Insert test in run "run-id": "testname"/"item-path)
     (rmtmod:send-receive 'insert-test run-id test-rec)))
 
+;;======================================================================
+;;  T E S T S
+;;======================================================================
+
+;; Just some syntatic sugar
+(define (rmt:register-test run-id test-name item-path)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmt:general-call 'register-test run-id run-id test-name item-path))
+
+(define (rmt:get-test-id run-id testname item-path)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmtmod:send-receive 'get-test-id run-id (list run-id testname item-path)))
+
+(define (rmt:get-test-info-by-id run-id test-id)
+  (if (number? test-id)
+      (rmtmod:send-receive 'get-test-info-by-id run-id (list run-id test-id))
+      (begin
+	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
+	(print-call-chain (current-error-port))
+	#f)))
+
+(define (rmt:get-test-state-status-by-id run-id test-id)
+  (rmtmod:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))
+
+(define (rmt:test-get-rundir-from-test-id run-id test-id)
+  (rmtmod:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))
+
+;; (define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
+;;   (assert (number? run-id) "FATAL: Run id required.")
+;;   (let* ((test-path (if (string? work-area)
+;; 			work-area
+;; 			(rmt:test-get-rundir-from-test-id run-id test-id))))
+;;     (debug:print 3 *default-log-port* "TEST PATH: " test-path)
+;;     (open-test-db test-path)))
+
+;; WARNING: This currently bypasses the transaction wrapped writes system
+(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmtmod:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))
+
+(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmtmod:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))
+
+(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
+  (assert (number? run-id) "FATAL: Run id required.")
+  ;; (if (number? run-id)
+  (rmtmod:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
+  ;;    (begin
+  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
+  ;;	(print-call-chain (current-error-port))
+  ;;	'())))
+
+(define (rmt:get-tests-for-run-state-status run-id testpatt last-update)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmtmod:send-receive 'get-tests-for-run-state-status run-id (list run-id testpatt last-update)))
+
+;; get stuff via synchash 
+(define (rmt:synchash-get run-id proc synckey keynum params)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmtmod:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))
+
+(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmtmod:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
+  
+;; state and status are extra hints not usually used in the calculation
+;;
+(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
+  (assert (number? run-id) "FATAL: Run id required.")
+  (rmtmod:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))
+
+
+;;======================================================================
+;; Maintenance
+;;======================================================================
+
+
+(define (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)
+  (rmtmod:send-receive 'get-toplevels-and-incompletes run-id (list run-id running-deadtime remotehoststart-deadtime)))
+
+(define (rmt:get-status-from-final-status-file run-dir)
+  (let ((infile (conc run-dir "/.final-status")))
+    ;; first verify we are able to write the output file
+    (if (not (file-read-access? infile))
+        (begin 
+	  (debug:print 2 *default-log-port* "ERROR: cannot read " infile)
+          (debug:print 2 *default-log-port* "ERROR: run-dir is " run-dir)
+          #f
+          )
+        (with-input-from-file infile read-lines)
+	)))
+  
+;;  select end_time-now from
+;;      (select testname,item_path,event_time+run_duration as
+;;                          end_time,strftime('%s','now') as now from tests where state in
+;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
+;;
+;; NOT EASY TO MIGRATE TO db{file,mod}
+;;
+(define (rmt:find-and-mark-incomplete-engine run-id ovr-deadtime cfg-deadtime test-stats-update-period)
+  (let* ((incompleted '())
+	 (oldlaunched '())
+	 (toplevels   '())
+          ;; The default running-deadtime is 720 seconds = 12 minutes.
+          ;; "(running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))" = 200 + (2 * (200 + 30 + 30))
+         (deadtime-trim (or ovr-deadtime cfg-deadtime))
+         (server-start-allowance 200)
+         (server-overloaded-budget 200)
+         (launch-monitor-off-time (or test-stats-update-period 30))
+         (launch-monitor-on-time-budget 30)
+         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
+         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
+         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
+         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
+         (running-deadtime (or deadtime-trim running-deadtime-default))) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
+
+    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
+    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)
+
+    (let* ((dat (rmt:get-toplevels-and-incompletes run-id running-deadtime remotehoststart-deadtime)))
+      (set! oldlaunched (list-ref dat 1))
+      (set! toplevels   (list-ref dat 2))
+      (set! incompleted (list-ref dat 0)))
+
+    (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, "
+		      (length toplevels) " old LAUNCHED toplevel tests and "
+		      (length incompleted) " tests marked RUNNING but apparently dead.")
+  
+    ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
+    ;;
+    ;; (db:delay-if-busy dbdat)
+    (let* ((min-incompleted-ids (map car incompleted)) ;; do 'em all
+	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
+      (if (> (length all-ids) 0)
+	  (begin
+	    ;; (launch:is-test-alive "localhost" 435)
+	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ")
+			 " as DEAD")
+	    (for-each
+             (lambda (test-id)
+               (let* ((tinfo   (rmt:get-test-info-by-id run-id test-id))
+		      (run-dir (db:test-get-rundir     tinfo))
+		      (host    (db:test-get-host       tinfo))
+		      (pid     (db:test-get-process_id tinfo))
+		      (result (rmt:get-status-from-final-status-file run-dir)))
+		 (if (and (list? result) (> (length result) 1) (equal? "PASS" (cadr result)) (equal? "COMPLETED" (car result))) 
+		     (begin
+		       (debug:print 0 *default-log-port* "INFO: test " test-id " actually passed, so marking PASS not DEAD")
+		       (rmt:set-state-status-and-roll-up-items
+			run-id test-id 'foo "COMPLETED" "PASS"
+			"Test stopped responding but it has PASSED; marking it PASS in the DB."))
+		     (let ((is-alive (and (not (eq? pid 0))  ;; 0 is default in re-used field "attemptnum" where pid stored.
+					  (commonmod:is-test-alive host pid))))
+		       (if is-alive
+			   (debug:print 0 *default-log-port* "INFO: test " test-id " on host " host
+					" has a process on pid " pid ", NOT setting to DEAD.")
+			   (begin
+			     (debug:print 0 *default-log-port* "INFO: test " test-id
+					  " final state/status is not COMPLETED/PASS. It is " result)
+			     (rmt:set-state-status-and-roll-up-items
+			      run-id test-id 'foo "COMPLETED" "DEAD"
+			      "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")))))))
+	     ;; call end of eud of run detection for posthook - from merge, is it needed?
+	     ;; (launch:end-of-run-check run-id)
+	     all-ids)
+	    )))))
 
 
 )