Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -311,10 +311,13 @@
 
 $(PREFIX)/bin/mt-new-to-old.sh : utils/mt-new-to-old.sh
 	$(INSTALL) $< $@
 	chmod a+x $@
 
+$(PREFIX)/bin/convert-db.sh : utils/convert-db.sh
+	$(INSTALL) $< $@
+	chmod a+x $@
 
 deploytarg/nbfake : utils/nbfake
 	$(INSTALL) $< $@
 	chmod a+x $@
 
@@ -358,10 +361,11 @@
 install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
           $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
           $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
 	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
           $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
+          $(PREFIX)/bin/convert-db.sh $(PREFIX)/bin/convert-db.sh \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
 	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
           $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
           $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \

Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -229,10 +229,13 @@
                      ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                      ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
                      ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                      ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))
 
+                     ((insert-test)                   (let ((run-id (alist-ref "run_id" params equal? #f)))
+                                                       (db:insert-test dbstruct run-id params)))
+
                      ;; RUNS
                      ((register-run)                 (apply db:register-run dbstruct params))
                      ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
                      ((delete-run)                   (apply db:delete-run dbstruct params))
                      ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
@@ -241,10 +244,12 @@
                      ((set-var)                      (apply db:set-var dbstruct params))
                      ((inc-var)                      (apply db:inc-var dbstruct params))
 		     ((dec-var)                      (apply db:dec-var dbstruct params))
                      ((del-var)                      (apply db:del-var dbstruct params))
 		     ((add-var)                      (apply db:add-var dbstruct params))
+
+                     ((insert-run)                   (apply db:insert-run dbstruct params))
 
                      ;; STEPS
                      ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
                      ((delete-steps-for-test!)       (apply db:delete-steps-for-test! dbstruct params))
                      

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -31,28 +31,24 @@
 (declare (uses commonmod))
 (import commonmod)
 
 (include "common_records.scm")
 
-
-;; (require-library margs)
-;; (include "margs.scm")
-
-;; (define old-exit exit)
-;; 
-;; (define (exit . code)
-;;   (if (null? code)
-;;       (old-exit)
-;;       (old-exit code)))
+(define (remove-files filespec)
+  (let ((files (glob filespec)))
+    (for-each delete-file* files)))
 
 (define (stop-the-train)
   (thread-start! (make-thread (lambda ()
 				(let loop ()
 				  (if (and *toppath*
 					   (file-exists? (conc *toppath*"/stop-the-train")))
 				      (begin
 					(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
+                                        (remove-files (conc *toppath* "/logs/server*"))
+                                        (remove-files (conc *toppath* "/.servinfo/*"))
+                                        (remove-files (conc *toppath* "/.mtdb/*lock"))
 					(exit 1)))
 				  (thread-sleep! 5)
 				  (loop))))))
 
 ;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
@@ -133,10 +129,11 @@
 (define *configinfo*   #f)   ;; raw results from setup, includes toppath and table from megatest.config
 (define *runconfigdat* #f)   ;; run configs data
 (define *configdat*    #f)   ;; megatest.config data
 (define *configstatus* #f)   ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done
 (define *toppath*      #f)
+(define *dbdir* ".mtdb")
 (define *already-seen-runconfig-info* #f)
 
 (define *test-meta-updated* (make-hash-table))
 (define *globalexitstatus*  0) ;; attempt to work around possible thread issues
 (define *passnum*           0) ;; when running track calls to run-tests or similar
@@ -153,11 +150,11 @@
 (define *db-stats-mutex*      (make-mutex))
 ;; db access
 (define *db-last-access*      (current-seconds)) ;; last db access, used in server
 ;; (define *db-write-access*     #t)
 ;; db sync
-;; (define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
+;; (define *db-last-sync*        0)                 ;; last time the sync to nfs db happened
 (define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
 ;; (define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
 ;; task db
 (define *task-db*             #f) ;; (vector db path-to-db)
 (define *db-access-allowed*   #t) ;; flag to allow access
@@ -530,16 +527,20 @@
 	  ;; (print-call-chain (current-error-port)) ;; 
 	  )
 	(let* ((fullname  (conc "logs/" file))
 	       (mod-time  (file-modification-time fullname))
 	       (file-age  (- (current-seconds) mod-time))
-	       (file-old  (> file-age (* 48 60 60)))
+	       (file-old  (> file-age (* 48 60 60))) ;; over 48 hours
 	       (file-big  (> (file-size fullname) 200000)))
 	  (hash-table-set! all-files file mod-time)
-	  (if (or (and (string-match "^.*.log" file)
+	  (if (or 
+                ;; gzip:
+                ;; any old and big log files: (server logs, runlogs, update_ext_specs, etc.
+               (and (string-match "^.*.log" file)
 		       file-old
 		       file-big)
+                  ;; old server log files:
 		  (and (string-match "^server-.*.log" file)
 		       file-old))
 	      (let ((gzfile (conc fullname ".gz")))
 		(if (common:file-exists? gzfile)
 		    (begin
@@ -551,10 +552,11 @@
 		(system (conc "gzip " fullname))
 		(inc-stat "gzipped")
 		(hash-table-set! all-files (conc file ".gz") file-age)  ;; add the .gz file and remove the base file
 		(hash-table-delete! all-files file)
 		)
+              ;; delete other files over expiration date:
 	      (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
 		       (file-exists? fullname)) ;; just in case it was gzipped - will get it next time
 		  (handle-exceptions
 		   exn
 		   #f
@@ -580,63 +582,38 @@
 				   (lambda (a b)
 				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
 			     (- num-logs max-allowed))))
 	    (for-each
 	     (lambda (file)
-	       (let* ((fullname (conc "logs/" file)))
+	       (let* ((fullname (conc "logs/" file))
+                      (is-alive 0))
+                 ;; Don't delete it if it's the log file of a running server.
+                 (if (string-match "server-\\d+-[a-zA-Z0-9]+\\.log" file)
+                    (let* ((parts (string-split file "-."))
+                        (pid (cadr parts)) ; Second part is the PID
+                        (server-machine (caddr parts)) ; Third part is the server machine
+                        (local (string=? (get-host-name) server-machine))
+                        (test-cmd (conc "test -d /proc/" pid)))
+                        (if local
+                           (set! is-alive (not (system test-cmd))) 
+                           (set! is-alive (not (system (conc "ssh " server-machine " test -d /proc/" pid))))
+                        )
+                    )
+                 )
 		 (if (directory? fullname)
 		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
 		     (handle-exceptions
 		      exn
 		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
-		      (delete-file* fullname)))))
+		      (if (not is-alive) 
+                        (delete-file* fullname)
+                        (debug:print-info 0 *default-log-port* "Not deleting log file " file " since its server is still alive")
+                      )
+                      ))))
 	     files)
 	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
 
-;;======================================================================
-;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
-;; Do NOT check if not on homehost!
-;;
-(define (common:exit-on-version-changed)
-  (if (common:on-homehost?)
-      (if (common:api-changed?)
-	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
-                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
-                 (read-only (not (file-write-access? dbfile)))
-                 (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
-	    (debug:print 0 *default-log-port*
-			 "WARNING: Version mismatch!\n"
-			 "   expected: " (common:version-signature) "\n"
-			 "   got:      " (common:get-last-run-version))
-            (cond
-             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
-             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
-                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
-              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
-              (handle-exceptions
-               exn
-               (begin
-                 (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn)
-                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-                 (print-call-chain (current-error-port))
-                 (exit 1))
-               (common:cleanup-db dbstruct)))
-             ((not (common:file-exists? mtconf))
-              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             ((not (common:file-exists? dbfile))
-              (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             ((not (eq? (current-user-id)(file-owner mtconf)))
-              (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             (read-only
-              (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             (else
-              (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
-              (exit 1)))))))
 ;;======================================================================
 ;;      (begin
 ;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
 ;;	(exit 1))))
 
@@ -706,10 +683,16 @@
 	""))))
 
 (define (common:alist-ref/default key alist default)
   (or (alist-ref key alist) default))
 
+
+;; The `common:low-noise-print` function is a utility that can be used to throttle the
+;; frequency of certain operations. It does this by tracking the last time an operation was
+;; performed and only allowing it again after a specified interval (`waitval`). This can be useful
+;; for reducing noise in logs or limiting the rate of user notifications, among other use cases.
+
 (define (common:low-noise-print waitval . keys)
   (let* ((key      (string-intersperse (map conc keys) "-" ))
 	 (lasttime (hash-table-ref/default *common:denoise* key 0))
 	 (currtime (current-seconds)))
     (if (> (- currtime lasttime) waitval)
@@ -954,16 +937,16 @@
 					  "/megatest_localdb/"
 					  tsname
 					  (string-translate *toppath* "/" "."))
 				    ))))
 		(set! *db-cache-path* dbpath)
-		;; ensure megatest area has .megatest
-		(let ((dbarea (conc *toppath* "/.megatest")))
+		;; ensure megatest area has dbdir
+		(let ((dbarea (conc *toppath* "/" *dbdir*)))
 		  (if (not (file-exists? dbarea))
 		      (create-directory dbarea)))
-		;; ensure tmp area has .megatest
-		(let ((dbarea (conc dbpath "/.megatest")))
+		;; ensure tmp area has dbdir
+		(let ((dbarea (conc dbpath "/" *dbdir*)))
 		  (if (not (file-exists? dbarea))
 		      (create-directory dbarea)))
 		dbpath))
 	  #f)))
 

Index: commonmod.scm
==================================================================
--- commonmod.scm
+++ commonmod.scm
@@ -86,10 +86,17 @@
 	'()))) ;; should it return empty list or #f to indicate not set?
 
 
 (define (get-section cfgdat section)
   (hash-table-ref/default cfgdat section '()))
+
+(define (common:make-tmpdir-name areapath tmpadj)
+  (let* ((area (pathname-file areapath))
+         (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb")))
+    (unless (directory-exists? dname)
+      (create-directory dname #t))
+    dname))
 
 ;; dot-locking egg seems not to work, using this for now
 ;; if lock is older than expire-time then remove it and try again
 ;; to get the lock
 ;;

Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -664,11 +664,11 @@
 	 (last-db-time (if do-not-use-db-file-timestamps
 			   0
 			   (dboard:rundat-last-db-time run-dat)))
 	 (db-path      (or (dboard:rundat-db-path run-dat)
 			   (let* ((db-dir (common:get-db-tmp-area))
-				  (db-pth (conc db-dir "/.megatest/main.db")))
+				  (db-pth (conc db-dir "/" *dbdir* "/main.db")))
 			     (dboard:rundat-db-path-set! run-dat db-pth)
 			     db-pth)))
 	 (db-mod-time  (common:lazy-sqlite-db-modification-time db-path))
 	 (db-modified  (>= db-mod-time last-db-time))
 	 (multi-get    (> (dboard:rundat-run-data-offset run-dat) 0))  ;; multi-get in progress
@@ -3792,11 +3792,11 @@
 (stop-the-train)
 
 (define (main)
   ;; (print "Starting dashboard main")
     
-  (let* ((mtdb-path (conc *toppath* "/.megatest/main.db"))
+  (let* ((mtdb-path (conc *toppath* "/" *dbdir* "/main.db"))
          (target (args:get-arg "-target"))
          (commondat       (dboard:commondat-make)))
     (if target
         (begin
           (args:remove-arg-from-ht "-target")
@@ -3816,15 +3816,10 @@
       (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (common:get-homehost))
       (debug:print 0 *default-log-port* "It will be slower.")
       ))
 
 
-    (if (and (common:file-exists? mtdb-path)
-	     (file-write-access? mtdb-path))
-	(if (not (args:get-arg "-skip-version-check"))
-            (common:exit-on-version-changed)))
-
     (let* ()
       ;; Move this stuff to db.scm? I'm not sure that is the right thing to do...
       (cond 
        ((args:get-arg "-test") ;; run-id,test-id
 	(let* ((dat     (let ((d (map string->number (string-split (args:get-arg "-test") ",")))) 
@@ -3889,11 +3884,11 @@
 
 
 ;; Sync to tmp only if in read-only mode.
 
 (define (sync-db-to-tmp tabdat)
-  (let* ((db-file "./.megatest/main.db"))
+  (let* ((db-file (conc "./" *dbdir* "/main.db")))
     (if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5)))
       (begin
         (db:multi-db-sync (db:setup #f) 'old2new)
         (set! last-copy-time (current-seconds))
       )

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -411,20 +411,20 @@
 
 (define (db:all-db-sync dbstruct)
   (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
 	 (data-synced       0) ;; count of changed records
     (tmp-area       (common:get-db-tmp-area))
-    (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
+    (dbfiles        (glob (conc tmp-area"/" *dbdir* "/*.db")))
     (sync-durations (make-hash-table))
     (no-sync-db        (db:open-no-sync-db)))
     (for-each
      (lambda (file) ;; tmp db file
        (debug:print-info 3 *default-log-port* "file: " file)
        (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
               (wal-file (conc file "-wal"))
               (shm-file (conc file "-shm"))
-	      (fulln       (conc *toppath*"/.megatest/"fname)) ;; fulln is nfs db name
+	      (fulln       (conc *toppath*"/" *dbdir* "/"fname)) ;; fulln is nfs db name
               (wal-time     (if (file-exists? wal-file)             
 			       (file-modification-time wal-file)
                                0))
               (shm-time     (if (file-exists? shm-file)             
 			       (file-modification-time shm-file)
@@ -489,11 +489,11 @@
     (dejunk (member 'dejunk options))
     (killservers (member 'killservers options))
     (servers (server:get-list *toppath*))
     (src-area (if old2new *toppath* tmp-area))
     (dest-area (if old2new tmp-area *toppath*))
-    (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
+    (dbfiles        (if old2new (glob (conc *toppath* "/" *dbdir* "/*.db")) (glob (conc tmp-area "/" *dbdir* "/*.db"))))
     (keys (db:get-keys dbstruct))
     (sync-durations (make-hash-table)))
 
 
     (if killservers
@@ -516,12 +516,12 @@
      (lambda (srcfile)
        (debug:print-info 3 *default-log-port* "file: " srcfile)
        (let* ((fname (conc (pathname-file srcfile) ".db"))
               (basename (pathname-file srcfile))
               (run-id (if (string= basename "main") #f (string->number basename)))
-	      (destfile (conc dest-area "/.megatest/" fname))
-              (dest-directory  (conc dest-area "/.megatest/"))
+	      (destfile (conc dest-area "/" *dbdir* "/" fname))
+              (dest-directory  (conc dest-area "/" *dbdir* "/"))
               (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile))
               (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk))
               ;; TODO: time1 and time2 need to take into account -wal and -shm files
 	      (time1 (file-modification-time srcfile))
               (time2 (if (file-exists? destfile)
@@ -1581,10 +1581,68 @@
 	     (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res)
 	     res))) 
 	(begin
 	  (debug:print-error 0 *default-log-port* "Called without all necessary keys")
 	  #f))))
+
+;; called with run-id=#f so will operate on main.db
+;;
+(define (db:insert-run dbstruct run-id target runname run-meta)
+  (let* ((keys (db:get-keys dbstruct))
+     	 (runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update
+    ;; need to insert run based on target and runname
+    (let* ((targvals (string-split target "/"))
+	   (keystr   (string-intersperse keys ","))
+	   (key?str  (string-intersperse (make-list (length targvals) "?") ","))
+	   (qrystr   (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")"))
+	   (get-var  (lambda (db qrystr)
+		       (let* ((res #f))
+			 (sqlite3:for-each-row
+			  (lambda row
+			    (set res (car row)))
+			  db qrystr run-id runname)
+			 res))))
+      (if (null? runs)
+        (begin
+	  (db:create-initial-run-record dbstruct run-id runname target)
+        )
+      )
+      run-id)))
+
+(define (db:create-initial-run-record dbstruct run-id runname target)	  
+  (let* ((keys     (db:get-keys dbstruct))
+     	 (targvals (string-split target "/"))
+	 (keystr   (string-intersperse keys ","))
+	 (key?str  (string-intersperse (make-list (length targvals) "?") ",")) ;; a string with the same length as targvals, where each element is "?" and interspersed with commas.
+	 (qrystr   (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")")))
+
+    (db:with-db
+     dbstruct #f #t ;; run-id writable
+     (lambda (dbdat db)
+       (apply sqlite3:execute db qrystr run-id runname targvals)))))
+
+(define (db:insert-test dbstruct run-id test-rec)
+  (let* ((testname  (alist-ref "testname" test-rec equal?))
+	 (item-path (alist-ref "item_path" test-rec equal?))
+	 (id        (db:get-test-id dbstruct run-id testname item-path))
+	 (fieldvals (filter (lambda (x)(not (member (car x) '("id" "last_update")))) test-rec))
+	 (setqry    (conc "UPDATE tests SET "(string-intersperse
+					      (map (lambda (dat)
+						     (conc (car dat)"=?"))
+						   fieldvals)
+					      ",")" WHERE id=?;"))
+	 (insqry   (conc "INSERT INTO tests ("(string-intersperse (map (lambda (x) (car x)) fieldvals) ",")
+			 ") VALUES ("(string-intersperse (make-list (length fieldvals) "?") ",")");")))
+    ;; (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry)
+    (db:with-db
+     dbstruct
+     run-id #t
+     (lambda (dbdat db)
+      ;; (if id
+      ;;   (apply sqlite3:execute db setqry (append (map cdr fieldvals) (list id)))
+	   (apply sqlite3:execute db insqry (map cdr fieldvals))
+           ))))
 
 ;; replace header and keystr with a call to runs:get-std-run-fields
 ;;
 ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
 ;; runpatts: patt1,patt2 ...
@@ -1656,20 +1714,21 @@
 				 "")
 			     (if (number? offset)
 				 (conc " OFFSET " offset)
 				 "")))
 	   )
-    (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
+    (debug:print-info 11 *default-log-port* "db:simple-get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
     (db:with-db dbstruct #f #f
 		(lambda (dbdat db)		
 		  (sqlite3:for-each-row
 		   (lambda (target id runname state status owner event_time)
-		     (set! res (cons (make-simple-run target id runname state status owner event_time) res)))
+		     (set! res (cons (make-simple-run target id runname state status owner event_time) res))
+                   )
 		   db
 		   qrystr
 		   )))
-    (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
+    (debug:print-info 11 *default-log-port* "db:simple-get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
     res))
 
 ;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db)
 ;;
 ;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!!
@@ -1679,11 +1738,11 @@
 
 
 
  (define (db:get-changed-run-ids since-time)
   (let* ((dbdir      (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir"))
-        (alldbs     (glob (conc dbdir "/.megatest/[0-9]*.db*")))
+        (alldbs     (glob (conc dbdir "/" *dbdir* "/[0-9]*.db*")))
         (changed    (filter (lambda (dbfile)
                               (> (file-modification-time dbfile) since-time))
                             alldbs)))
     (delete-duplicates
      (map (lambda (dbfile)
@@ -4149,11 +4208,11 @@
           (db:with-db dbstruct #f #f 
             (lambda (dbdat db)
               (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))
           )
          )
-         (changed_run_ids (filter (lambda (run) (member (modulo run 100) changed_run_dbs)) all_run_ids))
+         (changed_run_ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed_run_dbs)) all_run_ids))
          ;; TODO: couldn't we just use changed_run_ids for run_ids?
          (run_ids 
           (db:with-db dbstruct #f #f 
             (lambda (dbdat db)
               (sqlite3:fold-row backcons '() db "SELECT id FROM runs  WHERE last_update>=?" since-time))
@@ -4372,17 +4431,17 @@
 
 ;; sync for filesystem local db writes
 ;;
 (define (db:run-lock-and-sync no-sync-db)
   (let* ((tmp-area       (common:get-db-tmp-area))
-	 (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
+	 (dbfiles        (glob (conc tmp-area"/" *dbdir* "/*.db")))
 	 (sync-durations (make-hash-table)))
     ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
     (for-each
      (lambda (file)
        (let* ((fname (conc (pathname-file file) ".db"))
-	      (fulln (conc *toppath*"/.megatest/"fname))
+	      (fulln (conc *toppath*"/" *dbdir* "/"fname))
 	      (time1 (if (file-exists? file)
 			 (file-modification-time file)
 			 (begin
 			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "file)
 			   1)))

Index: dbfile.scm
==================================================================
--- dbfile.scm
+++ dbfile.scm
@@ -42,13 +42,19 @@
 	commonmod
 	)
 
 ;; (import debugprint)
 
+;; Parameters
+
+(define num-run-dbs           (make-parameter 10))
+
 ;;======================================================================
 ;;  R E C O R D S
 ;;======================================================================
+
+;; (define-record simple-run target id runname state status owner event_time)
 
 ;; a single Megatest area with it's multiple dbs is
 ;; managed in a dbstruct
 ;;
 (defstruct dbr:dbstruct
@@ -60,16 +66,16 @@
   )
 
 ;; NOTE: Need one dbr:subdb per main.db, 1.db ...
 ;;
 (defstruct dbr:subdb
-  (dbname      #f) ;; .megatest/1.db
-  (mtdbfile    #f) ;; mtrah/.megatest/1.db
+  (dbname      #f) ;; " *dbdir* "/1.db
+  (mtdbfile    #f) ;; mtrah/" *dbdir* "/1.db
   (mtdbdat     #f) ;; only need one of these for syncing
   ;; (dbdats      (make-hash-table))  ;; id => dbdat 
-  (tmpdbfile   #f) ;; /tmp/.../.megatest/1.db
-  ;; (refndbfile  #f) ;; /tmp/.../.megatest/1.db_ref
+  (tmpdbfile   #f) ;; /tmp/.../" *dbdir* "/1.db
+  ;; (refndbfile  #f) ;; /tmp/.../" *dbdir* "/1.db_ref
   (dbstack     (make-stack)) ;; stack for tmp dbr:dbdat,
   (homehost    #f) ;; not used yet
   (on-homehost #f) ;; not used yet
   (read-only   #f)
   (last-sync   0)
@@ -93,10 +99,11 @@
 (define *max-api-process-requests* 0)
 (define *api-process-request-count* 0)
 (define *db-write-access*     #t)
 (define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
 (define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
+(define *dbdir* ".mtdb")
 
 (define (db:generic-error-printout exn . message)
   (print-call-chain (current-error-port))
   (apply dbfile:print-err message)
   (dbfile:print-err
@@ -196,12 +203,12 @@
   (conc apath"/"dbname))
 
 ;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
 (define (dbfile:run-id->dbname run-id)
   (cond
-   ((number? run-id) (conc ".megatest/" (modulo run-id 100) ".db"))
-   ((not run-id)     (conc ".megatest/main.db"))
+   ((number? run-id) (conc  *dbdir* "/" (modulo run-id (num-run-dbs)) ".db"))
+   ((not run-id)     (conc  *dbdir* "/main.db"))
    (else             run-id)))
 
 ;; Make the dbstruct, setup up auxillary db's and call for main db at least once
 ;;
 ;; called in http-transport and replicated in rmt.scm for *local* access. 
@@ -368,19 +375,20 @@
 				     (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
 				 (if journal-mode
 				     (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
 				 (if (and init-proc (not db-exists))
 				     (init-proc db))
-				 db)))
+				 db))
+			     expire-time: 30)
                             (begin
 			      (if (file-exists? fname )
                                   (let ((db (sqlite3:open-database fname)))
 				    ;; pragmas synchronous not needed because this db is used read-only
 				    ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
 				    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
 				    db )
-                                  (print "file doesn't exist: " fname))))
+                                  (print "cautious-open-database: file doesn't exist: " fname))))
 			(exn (io-error)
 			     (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
 			     (retry))
 			(exn (corrupt)
 			     (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
@@ -805,11 +813,13 @@
 	            (set! fromdat (cons (apply vector a b) fromdat))
 	            (if (> (length fromdat) batch-len)
 		      (begin
 		        (set! fromdats (cons fromdat fromdats))
 		        (set! fromdat  '())
-		        (set! totrecords (+ totrecords 1)))
+		        (set! totrecords (+ totrecords 1))
+                        (thread-sleep! 2)
+                      )
                     )
                  )
 	         (dbr:dbdat-dbh fromdb)
 	         full-sel)
               )
@@ -1152,11 +1162,15 @@
 	      (handle-exceptions exn
                 #f 
                 (with-input-from-file fname
 	  	  (lambda ()
 		    (equal? key-string (read-line)))))
-	      #f)
+              (begin
+                (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 0.25 seconds later")
+	      #f
+              )
+          )
        )
     )
   )
 )
 

Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -77,11 +77,10 @@
 			    (if ipstr ipstr hostn))) ;; hostname))) 
 	 (start-port      (portlogger:open-run-close portlogger:find-port))
 	 (link-tree-path  (common:get-linktree))
 	 (tmp-area        (common:get-db-tmp-area))
 	 (start-file      (conc tmp-area "/.server-start")))
-    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
     ;; set some parameters for the server
     (root-path     (if link-tree-path 
 		       link-tree-path
 		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
     (handle-directory spiffy-directory-listing)
@@ -464,10 +463,13 @@
     (handle-exceptions
 	exn
       (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
       (with-output-to-file started-file (lambda ()(print (current-process-id)))))
 
+    (debug:print 0 *default-log-port* "Creating servinfo file for " (get-host-name) ":" (cadr *server-info*)) 
+    (http:create-server-registration-file *toppath* (get-host-name) (cadr *server-info*))
+
     (let loop ((count         0)
 	       (server-state 'available)
 	       (bad-sync-count 0)
 	       (start-time     (current-milliseconds)))
 
@@ -535,27 +537,16 @@
 		 (set! *server-id* (server:mk-signature)))
              (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
 	     (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
 	     (flush-output *default-log-port*)))
       (if (common:low-noise-print 60 "dbstats")
-	  (begin
+	(begin
 	    (debug:print 0 *default-log-port* "Server stats:")
 	    (db:print-current-query-stats)))
-      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
-	(cond
-	 #;((and *server-run*
-	       (> (- (current-seconds) server-start-time) 420)) ;; let's try server replacement
-	  ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1))
-	  (let* ((loaddat       (common:get-normalized-cpu-load #f))
-		 (adj-proc-load (alist-ref 'adj-proc-load loaddat))
-		 (adj-core-load (alist-ref 'adj-core-load loaddat))
-		 (adj-load      (max adj-proc-load adj-core-load)))
-	    (if (< adj-load 2) ;; reduce chance of runaway
-		(server:run *toppath*))
-	    (db:all-db-sync *dbstruct-dbs*)
-	    (thread-sleep! 30)
-	    (http-transport:server-shutdown port)))
+
+        (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
+	 (cond
          ((and *server-run*
 	       (> (+ last-access server-timeout)
 		  (current-seconds)))
           (if (common:low-noise-print 120 "server continuing")
               (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
@@ -655,16 +646,35 @@
                                 )) "Server run"))
            (th3 (make-thread (lambda ()
                                (debug:print-info 0 *default-log-port* "Server monitor thread started")
                                (http-transport:keep-running)
                                "Keep running"))))
+
       (thread-start! th2)
-      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
+      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. 
       (thread-start! th3)
       (set! *didsomething* #t)
       (thread-join! th2)
       (exit))))
+
+;; return servid
+;; side-effects:
+;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
+(define (http:create-server-registration-file areapath host port)
+  (let* (
+         (servdir  (server:get-servinfo-dir areapath))
+         (servinf (conc servdir"/"host":"port"-"(current-process-id)))
+         (serv-id (server:mk-signature))
+         (clean-proc (lambda ()
+                       (delete-file* servinf)
+                       )))
+    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn")
+    (with-output-to-file servinf
+      (lambda ()
+        (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))))
+      serv-id))
+
 
 ;; (define (http-transport:server-signal-handler signum)
 ;;   (signal-mask! signum)
 ;;   (handle-exceptions
 ;;    exn

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -326,10 +326,55 @@
                         (or new-cpu-load cpu-load)
                         (or new-disk-free disk-free)
                         (if do-sync (current-seconds) last-sync)))))))
     (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
 
+
+;; read testconfig and create .logpro and script files
+;;    - use #f for tconfigreg to re-read the testconfigs from disk
+;;
+(define (launch:extract-scripts-logpro  test-dir test-name item-path tconfigreg-in)
+  (let* ((tconfigreg      (or tconfigreg-in
+			      (tests:get-all)))
+	 (tconfig-fname   (conc test-dir "/.testconfig"))
+	 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
+	 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
+	 (scripts         (configf:get-section tconfig "scripts"))
+	 (logpros         (configf:get-section tconfig "logpro")))
+    ;; create .testconfig file
+    (configf:write-alist tconfig tconfig-tmpfile)
+    (file-move tconfig-tmpfile tconfig-fname #t)
+    (delete-file* ".final-status")
+    
+    ;; extract scripts from testconfig and write them to files in test run dir
+    (for-each
+     (lambda (scriptdat)
+       (match scriptdat
+	      ((name content)
+	       (debug:print-info 2 *default-log-port* "Creating script "(current-directory)"/"name)
+	       (with-output-to-file name
+		 (lambda ()
+		   (print content)))
+	       (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)))
+	      (else
+	       (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\""))))
+     scripts)
+
+    ;; extract logpro from testconfig and write them to files in test run dir
+    (for-each
+     (lambda (logprodat)
+       (match logprodat
+	      ((name content)
+	       (debug:print-info 2 *default-log-port* "Creating logpro file "(current-directory)"/"name)
+	       (with-output-to-file name
+		 (lambda ()
+		   (print content)
+		   ;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))
+		   )))
+	      (else
+	       (debug:print-info 0 "Invalid logpro definiton found in [logpro] section of testconfig. \"" logprodat "\""))))
+     logpros)))
 
 (define (launch:execute encoded-cmd)
   (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
 	 (tconfigreg #f))
     (setenv "MT_CMDINFO" encoded-cmd)
@@ -593,17 +638,19 @@
 	      (list  "MT_TARGET"    target)
 	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
 	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
           ;;(bb-check-path msg: "launch:execute post block 3")
 
-	  (if mt-bindir-path (setenv "PATH" (conc "\""(getenv "PATH")":"mt-bindir-path"\"")))
+	  (let ((tmppath (getenv "PATH")))
+	    (if (string-search tmppath " ")
+		(debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported."))
+	    (if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path))))
           ;;(bb-check-path msg: "launch:execute post block 4")
 	  ;; (change-directory top-path)
 	  ;; Can setup as client for server mode now
 	  ;; (client:setup)
-
-	  
+  
 	  ;; environment overrides are done *before* the remaining critical envars.
 	  (alist->env-vars env-ovrd)
           ;;(bb-check-path msg: "launch:execute post block 41")
 	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
           ;;(bb-check-path msg: "launch:execute post block 42")
@@ -629,37 +676,39 @@
 	      (set! fullrunscript "xterm")
 	      (if (and fullrunscript 
 		       (common:file-exists? fullrunscript)
 		       (not (file-execute-access? fullrunscript)))
 		  (system (conc "chmod ug+x " fullrunscript))))
-
-	  ;; We are about to actually kick off the test
-	  ;; so this is a good place to remove the records for 
-	  ;; any previous runs
-	  ;; (db:test-remove-steps db run-id testname itemdat)
-	  ;; now is also a good time to write the .testconfig file
-	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
-		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
-		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
-		 (scripts (configf:get-section tconfig "scripts")))
-	    ;; create .testconfig file
-	    (configf:write-alist tconfig tconfig-tmpfile)
-	    (file-move tconfig-tmpfile tconfig-fname #t)
-	    (delete-file* ".final-status")
-
-	    ;; extract scripts from testconfig and write them to files in test run dir
-	    (for-each
-	     (lambda (scriptdat)
-	       (match scriptdat
-		      ((name content)
-		       (with-output-to-file name
-			 (lambda ()
-			   (print content)
-			   (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)))))
-		      (else
-		       (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\""))))
-	     scripts))
+	  (launch:extract-scripts-logpro work-area test-name item-path tconfigreg)
+
+;;;;;	  ;; We are about to actually kick off the test
+;;;;;	  ;; so this is a good place to remove the records for 
+;;;;;	  ;; any previous runs
+;;;;;	  ;; (db:test-remove-steps db run-id testname itemdat)
+;;;;;	  ;; now is also a good time to write the .testconfig file
+;;;;;	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
+;;;;;		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
+;;;;;		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
+;;;;;		 (scripts         (configf:get-section tconfig "scripts"))
+;;;;;		 (precmd          (configf:lookup tconfig )
+;;;;;	    ;; create .testconfig file
+;;;;;	    (configf:write-alist tconfig tconfig-tmpfile)
+;;;;;	    (file-move tconfig-tmpfile tconfig-fname #t)
+;;;;;	    (delete-file* ".final-status")
+;;;;;
+;;;;;	    ;; extract scripts from testconfig and write them to files in test run dir
+;;;;;	    (for-each
+;;;;;	     (lambda (scriptdat)
+;;;;;	       (match scriptdat
+;;;;;		      ((name content)
+;;;;;		       (with-output-to-file name
+;;;;;			 (lambda ()
+;;;;;			   (print content)
+;;;;;			   (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)))))
+;;;;;		      (else
+;;;;;		       (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\""))))
+;;;;;	     scripts))
 	  ;;
 
 	  (let* ((m            (make-mutex))
 		 (kill-job?    #f)
 		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
@@ -675,12 +724,19 @@
 		 (th1          (make-thread monitorjob "monitor job"))
 		 (th2          (make-thread runit "run job"))
                  (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t))
                  (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code"))
                  (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED"))
-                 (test-status "not set")
-                 )
+                 (test-status     "not set")
+		 (test-state      "not set")
+		 (precmd          (configf:lookup tconfig "setup" "precmd"))
+		 (postcmd         (configf:lookup tconfig "setup" "postcmd")))
+	    ;; first, if set, run the precmd
+	    (if precmd ;; (file-exists? precmd)(file-execute-access? precmd))
+		(begin
+		  ;; (save-environment-as-files "precmd-envt")
+		  (system precmd))) ;; up to test author to put nbfake if desired.
 	    (set! job-thread th2)
 	    (thread-start! th1)
 	    (thread-start! th2)
 	    (thread-join! th2)
 	    (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...")
@@ -737,22 +793,30 @@
 	    (mutex-unlock! m)
             (launch:end-of-run-check run-id )
 	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
 			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
 
-
-            (set! test-status (db:test-get-status (rmt:get-testinfo-state-status run-id test-id)))
+	    (let* ((testrec  (rmt:get-testinfo-state-status run-id test-id)))
+              (set! test-status (db:test-get-status testrec))
+	      (set! test-state  (db:test-get-state  testrec)))
 
             ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1.
+
 
             (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list))
                (begin
                 (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) 
                 (set! *globalexitstatus* 1)
                )
             )
 
+	    (if postcmd
+		(begin
+		  (setenv "MT_TEST_STATE" test-state)
+		  (setenv "MT_TEST_STATUS" test-status)
+		  ;; (save-environment-as-files "postcmd-envt")
+		  (system postcmd)))
 	    (if (not (launch:einf-exit-status exit-info))
 		(exit 4))))
         )))
 
 ;; Spec for End of test
@@ -891,11 +955,11 @@
 		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
 			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
 			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
 		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
 			(begin
-			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
+			  (debug:print-info 2 *default-log-port* "Caching megatest.config in " tmpfile)
                           (if (not (common:in-running-test?))
                               (configf:write-alist *configdat* tmpfile))
 			  (system (conc "ln -sf " tmpfile " " targfile))))
 		    )))
 	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))

Index: megatest-version.scm
==================================================================
--- megatest-version.scm
+++ megatest-version.scm
@@ -18,6 +18,6 @@
 ;; Always use two or four digit decimal
 ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
 
 ;; (declare (unit megatest-version))
 
-(define megatest-version 1.7014)
+(define megatest-version 1.7105)

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -86,11 +86,19 @@
 
 ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
 ;;
 (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
   (if (common:file-exists? debugcontrolf)
-      (load debugcontrolf)))
+    (begin
+      ;; for some reason, debug:print does not work here. Had to use print.
+      (with-output-to-port (current-error-port)
+	(lambda ()
+	  (print (conc "WARNING: loading " debugcontrolf))))
+      (load debugcontrolf)
+    )
+  )
+)
 
 ;; usage logging, careful with this, it is not designed to deal with all real world challenges!
 ;;
 (if (and *usage-log-file*
          (file-write-access? *usage-log-file*))
@@ -193,11 +201,11 @@
   -list-disks             : list the disks available for storing runs
   -list-targets           : list the targets in runconfigs.config
   -list-db-targets        : list the target combinations used in the db
   -show-config            : dump the internal representation of the megatest.config file
   -show-runconfig         : dump the internal representation of the runconfigs.config file
-  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line)
+  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexpr etc. (add -debug 0,9 to see which file contributes each line)
   -show-cmdinfo           : dump the command info for a test (run in test environment)
   -section sectionName
   -var varName            : for config and runconfig lookup value for sectionName varName
   -since N                : get list of runs changed since time N (Unix seconds)
   -fields fieldspec       : fields to include in json dump; runs:id,runame+tests:testname+steps
@@ -231,18 +239,20 @@
   -ping run-id|host:port  : ping server, exit with 0 if found
   -debug N|N,M,O...       : enable debug 0-N or N and M and O ...
   -debug-noprop N|M,M,O...: enable debug but do not propagate to subprocesses via MT_DEBUG
   -config fname           : override the megatest.config file with fname
   -append-config fname    : append fname to the megatest.config file
-
+  -import-sexpr fname     : import a sexpr file (use -list-runs % -dumpmode sexpr to create)
+  -regen-testfiles        : regenerate scripts and logpro files from testconfig, run in test context
+  
 Utilities
   -env2file fname         : write the environment to fname.csh and fname.sh
   -envcap a               : save current variables labeled as context 'a' in file envdat.db
   -envdelta a-b           : output enviroment delta from context a to context b to -o fname
                             set the output mode with -dumpmode csh, bash or ini
                             note: ini format will use calls to use curr and minimize path
-  -refdb2dat refdb        : convert refdb to sexp or to format specified by s-dumpmode
+  -refdb2dat refdb        : convert refdb to sexpr or to format specified by -dumpmode
                             formats: perl, ruby, sqlite3, csv (for csv the -o param
                             will substitute %s for the sheet name in generating 
                             multiple sheets)
   -o                      : output file for refdb2dat (defaults to stdout)
   -archive cmd            : archive runs specified by selectors to one of disks specified
@@ -349,10 +359,11 @@
 			"-env2file"
 			"-envcap"
 			"-envdelta"
 			"-setvars"
 			"-set-state-status"
+			"-import-sexpr"
 
                         ;; move runs stuff here
                         "-remove-keep"           
 			"-set-run-status"
 			"-age"
@@ -434,10 +445,11 @@
 			"-local"         ;; run some commands using local db access
 			"-generate-html"
 			"-generate-html-structure" 
 			"-list-run-time"
                         "-list-test-time"
+			"-regen-testfiles"
 			
 			;; misc queries
 			"-list-disks"
 			"-list-targets"
 			"-list-db-targets"
@@ -947,47 +959,116 @@
 (if (args:get-arg "-adjutant")
     (begin
       (adjutant-run)
       (set! *didsomething* #t)))
 
-(if (or (args:get-arg "-list-servers")
-        (args:get-arg "-kill-servers"))
-    (let ((tl (launch:setup)))
-      (if tl ;; all roads from here exit
-	  (let* ((servers (server:get-list *toppath*))
-		 (fmtstr  "~33a~22a~20a~20a~8a\n"))
-	    (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
-	    (format #t fmtstr "==" "=========" "=========" "========" "=====")
-	    (for-each ;;  ( mod-time host port start-time pid )
-	     (lambda (server)
-	       (let* ((mtm (any->number (car server)))
-		      (mod (if mtm (- (current-seconds) mtm) "unk"))
-		      (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
-		      (url (conc (cadr server) ":" (caddr server)))
-		      (pid (list-ref server 4))
-		      (alv (if (number? mod)(< mod 10) #f)))
-		 (format #t
-			 fmtstr
-			 pid
-			 url
-			 (seconds->hr-min-sec age)
-			 (seconds->hr-min-sec mod)
-			 (if alv "alive" "dead"))
-		 (if (and alv
-			  (args:get-arg "-kill-servers"))
-		     (begin
-		       (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid)
-		       (server:kill server)))))
-	     (sort servers (lambda (a b)
-			     (let ((ma (or (any->number (car a)) 9e9))
-				   (mb (or (any->number (car b)) 9e9)))
-			       (> ma mb)))))
-	    ;; (debug:print-info 1 *default-log-port* "Done with listservers")
-	    (set! *didsomething* #t)
-	    (exit))
-	  (exit))))
-      ;; must do, would have to add checks to many/all calls below
+(if (args:get-arg "-list-servers")
+  (let* ((tl (launch:setup)) ;; need this to initialize *toppath*
+        (servdir (server:get-servinfo-dir *toppath*))
+        (servfiles (glob (conc servdir "/*:*.db")))
+        (fmtstr  "~10a~22a~10a~25a~25a~8a\n")
+        (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db"))))
+     )
+     (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state")
+     (for-each
+        (lambda (dbfile)
+          (let* (
+            (dbfname (conc (pathname-file dbfile) ".db"))
+            )
+                (let (
+                  (sinfos (server:get-server-info-sorted *toppath* dbfname))
+                  )
+                  (for-each 
+                     (lambda (sinfo)
+                       (let* (
+                         (db (list-ref sinfo 5))
+                         (pid (list-ref sinfo 4))
+                         (host (list-ref sinfo 0))
+                         (port (list-ref sinfo 1))
+                         (server-id (list-ref sinfo 3))
+                         (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2))))
+                         (last-mod (seconds->string (list-ref sinfo 2)))
+                         (status (system (conc "ssh " host " ps " pid " > /dev/null")))
+                         (state (if (> status 0)
+                                  "dead"
+                                  (tt:ping host port server-id 0)
+                                ))
+                         )
+                         (format #t fmtstr db (conc host ":" port) pid age last-mod state)
+                       )
+                     )
+                     sinfos
+                  )
+                ) 
+          )
+       )
+       dbfiles
+     )
+     (set! *didsomething* #t)
+     (exit)  
+  )
+)
+
+
+(if (args:get-arg "-kill-servers")
+
+  (let* ((tl (launch:setup)) ;; need this to initialize *toppath*
+        (servdir (server:get-servinfo-dir *toppath*))
+        (servfiles (glob (conc servdir "/*:*.db")))
+        (fmtstr  "~10a~22a~10a~25a~25a~8a\n")
+        (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db"))))
+     )
+     (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state")
+     (for-each
+        (lambda (dbfile)
+          (let* (
+            (dbfname (conc (pathname-file dbfile) ".db"))
+            (sfiles   (server:find-server *toppath* dbfname))
+            )
+            (for-each 
+              (lambda (sfile)
+                (let (
+                  (sinfos (servert:get-server-info-sorted *toppath* dbfname))
+                  )
+                  (for-each 
+                     (lambda (sinfo)
+                       (let* (
+                         (db (list-ref sinfo 5))
+                         (pid (list-ref sinfo 4))
+                         (host (list-ref sinfo 0))
+                         (port (list-ref sinfo 1))
+                         (server-id (list-ref sinfo 3))
+                         (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2))))
+                         (last-mod (seconds->string (list-ref sinfo 2)))
+                         (killed (system (conc "ssh " host " kill " pid " > /dev/null")))
+                         (dummy2 (sleep 1))
+                         (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive"))
+                            )
+                         (format #t fmtstr db (conc host ":" port) pid age last-mod state)
+                         (delete-file* sfile)
+                       )
+                     )
+                     sinfos
+                  )
+                ) 
+              )
+              sfiles
+            )
+          )
+       )
+       dbfiles
+     )
+     ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id.
+     (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db"))
+       (delete-file (conc *toppath* "/.mtdb/no-sync.db"))
+     )
+     (set! *didsomething* #t)
+     (exit)  
+  )
+)
+
+
 
 ;;======================================================================
 ;; Weird special calls that need to run *after* the server has started?
 ;;======================================================================
 
@@ -1062,11 +1143,11 @@
 			 (configf:lookup data "default" (args:get-arg "-var")))))
 	    (if val (print val))))
 	 ((or (not (args:get-arg "-dumpmode"))
               (string=? (args:get-arg "-dumpmode") "ini"))
 	  (configf:config->ini data))
-	 ((string=? (args:get-arg "-dumpmode") "sexp")
+	 ((string=? (args:get-arg "-dumpmode") "sexpr")
 	  (pp (hash-table->alist data)))
 	 ((string=? (args:get-arg "-dumpmode") "json")
 	  (json-write data))
 	 (else
 	  (debug:print-error 0 *default-log-port* "-dumpmode of " (args:get-arg "-dumpmode") " not recognised")))
@@ -1084,11 +1165,11 @@
 	(let ((val (configf:lookup data (args:get-arg "-section")(args:get-arg "-var"))))
 	  (if val (print val))))
 
        ;; print just a section if only -section
 
-       ((equal? (args:get-arg "-dumpmode") "sexp")
+       ((equal? (args:get-arg "-dumpmode") "sexpr")
 	(pp (hash-table->alist data)))
        ((equal? (args:get-arg "-dumpmode") "json")
 	(json-write data))
        ((or (not (args:get-arg "-dumpmode"))
 	    (string=? (args:get-arg "-dumpmode") "ini"))
@@ -1144,12 +1225,10 @@
 	  (begin
 	    (debug:print-error 0 *default-log-port* "Attempted " action "on test(s) but run area config file not found")
 	    (exit 1))
 	  ;; put test parameters into convenient variables
 	  (begin
-	    ;; check for correct version, exit with message if not correct
-	    (common:exit-on-version-changed)
 	    (runs:operate-on  action
 			      target
 			      runname
 			      testpatt
 			      state:  (common:args-get-state)
@@ -1433,10 +1512,15 @@
 				  db:test-record-fields
 				  t)))
 	       (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) ;; '("id"))))
 	       (steps-spec  (alist-ref "steps" fields-spec equal?))
 	       (test-field-index (make-hash-table)))
+	  (if (and (args:get-arg "-dumpmode")
+		   (not (member (args:get-arg "-dumpmode") '("sexpr" "json" "ods" "list"))))
+	      (begin
+		(debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
+		(exit)))
 	  (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec
 	      (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec)))
 		(if (null? invalid-tests-spec)
 		    ;; generate the lookup map test-field-name => index-number
 		    (let loop ((hed (car adj-tests-spec))
@@ -1488,12 +1572,12 @@
 			;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header "id"))  targetstr runname "meta" "id"         )
 			;; (mutils:hierhash-set! data (db:get-value-by-header run header "event_time") targetstr runname "meta" "event_time" )
 			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
 			;; ;; add last entry twice - seems to be a bug in hierhash?
 			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
-		       (else
-			(if (null? runs-spec)
+                        ((#f list)
+			 (if (null? runs-spec)
 			    (print "Run: " targetstr "/" runname 
 				   " status: " (db:get-value-by-header run header "state")
 				   " run-id: " run-id ", number tests: " (length tests)
 				   " event_time: " (db:get-value-by-header run header "event_time"))
 			    (begin
@@ -1504,11 +1588,14 @@
 			       (lambda (field-name)
 				 (if (equal? field-name "target")
 				     (display (conc "target: " targetstr " "))
 				     (display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " "))))
 			       runs-spec)
-			      (newline)))))
+			      (newline))))
+		       (else
+			(debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
+			))
 		       
 		     (for-each 
 		      (lambda (test)
 		      	(common:debug-handle-exceptions #f
 			 exn
@@ -1996,10 +2083,25 @@
 		  (paths    (tests:test-get-paths-matching keys target (args:get-arg "-test-files"))))
 	     (for-each (lambda (path)
 			 (print path))
 		       paths))))))
 
+;;======================================================================
+;; Utils for test areas
+;;======================================================================
+
+(if (args:get-arg "-regen-testfiles")
+    (if (getenv "MT_TEST_RUN_DIR")
+	(begin
+	  (launch:setup)
+	  (change-directory (getenv "MT_TEST_RUN_DIR"))
+	  (let* ((testname (getenv "MT_TEST_NAME"))
+		 (itempath (getenv "MT_ITEMPATH")))
+	    (launch:extract-scripts-logpro (getenv "MT_TEST_RUN_DIR") testname itempath #f))
+	  (set! *didsomething* #t))
+	(debug:print 0 *default-log-port* "ERROR: Must run -regen-testfiles in a test environment (i.e. test xterm from dashboard)")))
+		 	  
 ;;======================================================================
 ;; Archive tests
 ;;======================================================================
 ;; Archive tests matching target, runname, and testpatt
 (if (equal? (args:get-arg "-archive") "replicate-db")
@@ -2478,10 +2580,16 @@
        'dejunk
        'adj-testids
        'old2new
        )
       (set! *didsomething* #t)))
+
+(if (args:get-arg "-import-sexpr")
+   (begin
+   (launch:setup)
+   (rmt:import-sexpr (args:get-arg "-import-sexpr"))
+   (set! *didsomething* #t)))
 
 (when (args:get-arg "-sync-brute-force")
   (launch:setup)
   ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t))
   (set! *didsomething* #t))

Index: portlogger.scm
==================================================================
--- portlogger.scm
+++ portlogger.scm
@@ -26,11 +26,12 @@
 (declare (uses db))
 
 ;; lsof -i
 
 (define (portlogger:open-db fname)
-  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
+  (let* (;; (avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
+         (avail #t)
 	 (exists   (common:file-exists? fname))
 	 (db       (if avail 
 		       (sqlite3:open-database fname)
 		       (begin
 			 (system (conc "rm -f " fname))
@@ -56,12 +57,12 @@
             fail_count INTEGER DEFAULT 0,
             update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
     db))
 
 (define (portlogger:open-run-close proc . params)
-  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db"))
-	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
+  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db")))
+	 ;; (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
     (handle-exceptions
      exn
      (begin
        ;; (release-dot-lock fname)
        (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -335,11 +335,11 @@
     (if success ;; success only tells us that the transport was
 	;; successful, have to examine the data to see if
 	;; there was a detected issue at the other end
 	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
 	(begin
-           (debug:print-error 0 *default-log-port* " dat=" dat) 
+           (debug:print-info 0 *default-log-port* "Bad return data from Megatest server: dat=" dat) 
            (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
 	)))
 
 (define (rmt:print-db-stats)
   (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
@@ -1095,5 +1095,85 @@
 #;(set-functions rmt:send-receive                       remote-server-url-set!
 	       http-transport:close-connections	      remote-conndat-set!
 	       debug:print                            debug:print-info
 	       remote-ro-mode                         remote-ro-mode-set!
 	       remote-ro-mode-checked-set!            remote-ro-mode-checked)
+
+
+;;======================================================================
+;; import an sexpr file into the db
+;;======================================================================
+
+(define (rmt:import-sexpr sexpr-file)
+  (if (file-exists? sexpr-file)
+      (let* ((data (with-input-from-file sexpr-file read)))
+	(for-each
+	 (lambda (targ-dat)
+	   (rmt:import-target targ-dat)) ;; ("target" ("run1" ("data" (1 ("field" . "value") ...
+	 data))
+      (let* ((msg (conc "ERROR: file "sexpr-file" not found")))
+	(debug:print 0 *default-log-port* msg)
+	(cons #f msg))))
+
+(define (rmt:import-target targ-dat)
+  (let* ((target (car targ-dat))
+	 (data   (cdr targ-dat)))
+    (for-each
+     (lambda (run-dat)
+       (rmt:import-run target run-dat)) ;; ("runname" ("data" ("testid" ("field" . "value") ...
+     data)))
+
+(define (rmt:import-run target run-dat)
+  (let* ((runname    (car run-dat))
+	 (all-dat    (cdr run-dat))
+	 (tests-data (alist-ref "data" all-dat equal?))
+	 (run-meta   (alist-ref "meta" all-dat equal?))
+         (run-id     (string->number (alist-ref "id"   run-meta equal?))))
+    (rmt:insert-run run-id target runname run-meta)
+    (if (list? tests-data)
+      (begin
+        (debug:print 0 *default-log-port* "Inserting " (length tests-data) " tests in run " runname)
+        (for-each
+          (lambda (test-dat)
+            (let* ((test-id  (car test-dat))
+	      (test-rec (cdr test-dat)))
+	      (rmt:insert-test run-id test-rec)))
+         tests-data)
+      )
+      (debug:print 0 *default-log-port* "rmt:import-run: tests-data is empty")
+    )
+  )
+)
+
+;; insert run if not there, return id either way
+(define (rmt:insert-run run-id target runname run-meta)
+  ;; look for id, return if found
+  (let* ((runs (rmt:send-receive 'simple-get-runs #f
+				    ;;    runpatt count offset target last-update)
+				    (list runname #f    #f     target #f))))
+    (if (null? runs)
+       (begin
+        (debug:print 0 *default-log-port* "inserting run for runname " runname " target " target)
+	(rmt:send-receive 'insert-run #f (list run-id target runname run-meta))
+       )
+       (begin
+	(simple-run-id (car runs))
+       ))))
+
+
+(define (rmt:insert-test run-id test-rec)
+  (let* ((testname  (alist-ref "testname" test-rec equal?))
+	 (item-path (alist-ref "item_path" test-rec equal?))
+         (test-id (rmt:get-test-id run-id testname item-path))
+         )
+
+    (if test-id
+       (debug:print 0 *default-log-port* "test "testname"/"item-path " already exists in run-id " run-id)
+       (begin
+         (rmt:send-receive 'insert-test run-id test-rec)
+       )
+    )
+  )
+)
+
+
+

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -799,50 +799,24 @@
 	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
     ;; NOTE: these are all parent tests, items are not expanded yet.
     (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
     (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
       (if (> (length (hash-table-keys test-records)) 0)
-	  (let* ((keep-going        #t)
-		 (run-queue-retries 5)
-		;; (th1        (make-thread (lambda ()
-		;; 			    (handle-exceptions
-		;; 				exn
-		;; 				(begin
-		;; 				  (print-call-chain)
-		;; 				  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
-		;; 			      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
-		;; 						    (any->number reglen) all-tests-registry)))
-		;; 			  "runs:run-tests-queue"))
-		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
-					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
-					    (let ((run-ids (rmt:get-all-run-ids)))
-					      (for-each (lambda (run-id)
-							  (if keep-going
-							      (handle-exceptions
-							       exn
-							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
-							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
-							run-ids)))
-					  "runs: mark-incompletes")))
-	    ;; (thread-start! th1)
-	    (thread-start! th2)
-	    ;; (thread-join! th1)
+	  (let* ()
 	    ;; just do the main stuff in the main thread
 	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
 								    (any->number reglen) all-tests-registry)
-	    (set! keep-going #f)
-	    (thread-join! th2)
 	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
 	    (if (> run-count 0) ;; handle reruns
 		(begin
 		  (if (not (hash-table-ref/default flags "-preclean" #f))
 		      (hash-table-set! flags "-preclean" #t))
 		  (if (not (hash-table-ref/default flags "-rerun" #f))
 		      (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
 		  ;; recursive call to self
-      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
-                  (launch:end-of-run-check run-id)))
+                  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
+                 (launch:end-of-run-check run-id)))
 	  (debug:print-info 0 *default-log-port* "No tests to run")))
     (debug:print-info 4 *default-log-port* "All done by here")
     ;; TODO: try putting post hook call here
       
     ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
@@ -1380,12 +1354,16 @@
 			      #f ;; yes, really
 			      (list (car tal)(cdr tal) reg reruns))
 			  (begin
 			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
 				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))
-			    ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
-                            (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)
+			    (let* ((test-id      (rmt:get-test-id run-id testname item-path))
+				   (test-info    (rmt:get-testinfo-state-status run-id test-id)) ;; we need *current* info
+				   (status       (db:test-status test-info)))
+			      (if (equal? status "KEEP_TRYING")
+				  (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)
+				  (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)))
 			    (hash-table-set! test-registry hed 'removed) ;; was 0
                             (if (not (and (null? reg) (null? tal)))
                                 (runs:loop-values tal reg reglen regfull reruns)
                                 #f))))
 		     (else
@@ -2054,11 +2032,11 @@
     ;;
     ;; There is now a single call to runs:update-all-test_meta and this 
     ;; per-test call is not needed. Given the delicacy of the move to 
     ;; v1.55 this code is being left in place for the time being.
     ;;
-    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
+    (if (not (hash-table-exists? *test-meta-updated* test-name))
         (begin
           (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta test-name test-conf)))
     
     ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
@@ -2380,11 +2358,11 @@
 	 (bup-mutex    (make-mutex))
          (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
 	 (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop
 
     (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
-           (dbfile             (conc  *toppath* "/.megatest/main.db"))
+           (dbfile             (conc  *toppath* "/" *dbdir* "/main.db"))
            (readonly-mode      (not (file-write-access? dbfile))))
       (when (and readonly-mode
                  (member action write-access-actions))
         (debug:print-error 0 *default-log-port* dbfile " is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
         (exit 1)))
@@ -2906,11 +2884,11 @@
 	      (fld (car  key))
 	      (val (configf:lookup test-conf "test_meta" fld)))
 	 ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
 	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
 	     (begin
-	       (debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val)
+	       (debug:print 2 *default-log-port* "Updating " test-name " " fld " to " val)
 	       (rmt:testmeta-update-field test-name fld val)))))
      '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))
 
 ;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."
 ;;

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -412,15 +412,257 @@
 		      (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
 		      (equal? server-key new-server-key)))
 	       #t
                ;; If either of the above conditions is not true, print a "Gating server start" message, wait <idle-time> + 1, then call this function recursively. 
 	       (begin
-		 (debug:print-info 0 *default-log-port* "Gating server start, last start: "
+		 (debug:print-info 2 *default-log-port* "Gating server start, last start: "
 				   (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
 		 
 		 (thread-sleep! ( + 1 idletime))
 		 (server:wait-for-server-start-last-flag areapath)))))))
+
+(define (server:get-servinfo-dir areapath)
+  (let* ((spath (conc areapath"/.servinfo")))
+    (if (not (file-exists? spath))
+	(create-directory spath #t))
+    spath))
+
+;; gets server info and appends path to server file
+;; sorts by age, oldest first
+;;
+;; returns list of (host port startseconds server-id servinfofile)
+;;
+(define (server:get-server-info-sorted areapath dbfname)
+  (let* (
+	 (sfiles   (server:find-server areapath dbfname))
+	 (sdats    (filter car (map server:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
+	 (sorted   (sort sdats (lambda (a b)
+				 (let* ((starta (list-ref a 2))
+					(startb (list-ref b 2)))
+				   (if (eq? starta startb)
+				       (string>? (list-ref a 3)(list-ref b 3)) ;; if servers started at same time look at server-id
+				       (< starta startb))))))
+	 (count    0))
+    (for-each
+     (lambda (rec)
+       (if (or (> (length sorted) 1)
+	       (common:low-noise-print 120 "server info sorted"))
+	   (debug:print 2 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", ")))
+       (set! count (+ count 1)))
+     sorted)
+    sorted))
+
+(define (server:clean-up-old areapath)
+  ;; any server file that has not been touched in ten minutes is effectively dead
+  (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
+    (for-each
+     (lambda (sfile)
+       (let* ((modtime (handle-exceptions
+			   exn
+			 (begin
+			   (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
+			   (current-seconds))
+			 (file-modification-time sfile))))
+	 (if (and (number? modtime)
+		  (> (- (current-seconds) modtime)
+		     600))
+	     (begin
+	       (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
+	       (handle-exceptions
+		   exn
+		 (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
+		 (delete-file sfile))))))
+     sfiles)))
+
+
+
+(define server-last-start 0)
+
+;; oldest server alive determines host then choose random of youngest
+;; five servers on that host
+;;
+;; mode:
+;;   best - get best server (random of newest five)
+;;   home - get home host based on oldest server
+;;   info - print info
+(define (server:choose-server areapath #!optional (mode 'best))
+  ;; age is current-starttime
+  ;; find oldest alive
+  ;;   1. sort by age ascending and ping until good
+  ;; find alive rand from youngest
+  ;;   1. sort by age descending
+  ;;   2. take five
+  ;;   3. check alive, discard if not and repeat
+  ;; first we clean up old server files
+  (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode))
+  (server:clean-up-old areapath)
+  (let* ((since-last (- (current-seconds) server-last-start))
+        (server-start-delay 10))     
+    (if ( < (- (current-seconds) server-last-start) 10 )
+      (begin
+        (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+        (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
+        (thread-sleep! server-start-delay)
+      )
+      (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
+    )
+  )
+  (let* ((serversdat  (server:get-servers-info areapath))
+	 (servkeys    (hash-table-keys serversdat))
+	 (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
+			  (sort servkeys ;; list of "host:port"
+				(lambda (a b)
+				  (>= (list-ref (hash-table-ref serversdat a) 2)
+				      (list-ref (hash-table-ref serversdat b) 2))))
+			  '())))
+    (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
+    (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
+    (if (not (null? by-time-asc))
+	(let* ((oldest     (last by-time-asc))
+	       (oldest-dat (hash-table-ref serversdat oldest))
+	       (host       (list-ref oldest-dat 0))
+	       (all-valid  (filter (lambda (x)
+				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
+				   by-time-asc))
+	       (best-ten  (lambda ()
+			     (if (> (length all-valid) 11)
+				 (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
+				 (if (> (length all-valid) 8)
+				     (drop-right all-valid 1)
+				     all-valid))))
+	       (names->dats (lambda (names)
+			      (map (lambda (x)
+				     (hash-table-ref serversdat x))
+				   names)))
+	       (am-home?    (lambda ()
+			      (let* ((currhost (get-host-name))
+				     (bestadrs (server:get-best-guess-address currhost)))
+				(or (equal? host currhost)
+				    (equal? host bestadrs))))))
+	  (case mode
+	    ((info)
+	     (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
+	     (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid))))
+	    ((home)     host)
+	    ((homehost) (cons host (am-home?))) ;; shut up old code
+	    ((home?)    (am-home?))
+	    ((best-ten)(names->dats (best-ten)))
+	    ((all-valid)(names->dats all-valid))
+	    ((best)     (let* ((best-ten (best-ten))
+			       (len       (length best-ten)))
+			  (hash-table-ref serversdat (list-ref best-ten (random len)))))
+	    ((count)(length all-valid))
+	    (else
+	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
+	     #f)))
+	(begin
+	  (server:run areapath)
+          (set! server-last-start (current-seconds))
+	  ;; (thread-sleep! 3)
+	  (case mode
+	    ((homehost) (cons #f #f))
+	    (else	#f))))))
+
+
+
+
+;; oldest server alive determines host then choose random of youngest
+;; five servers on that host
+;;
+(define (server:get-servers-info areapath)
+  ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
+  (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
+    (if (not (file-exists? servinfodir))
+	(create-directory servinfodir))
+    (let* ((allfiles    (glob (conc servinfodir"/*")))
+	   (res         (make-hash-table)))
+      (for-each
+       (lambda (f)
+	 (let* ((hostport  (pathname-strip-directory f))
+		(serverdat (server:logf-get-start-info f)))
+	   (match serverdat
+	     ((host port start server-id pid)
+	      (if (and host port start server-id pid)
+		  (hash-table-set! res hostport serverdat)
+		  (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
+	     (else
+	      (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
+       allfiles)
+      res)))
+
+
+
+;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
+;; example of what it's looking for in the log file:
+;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
+;;
+(define (server:server-get-info logf)
+  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
+	(bad-dat      (list #f #f #f #f #f #f logf)))
+     (let ((fdat     (handle-exceptions
+			 exn
+		       (begin
+			 ;; WARNING: this is potentially dangerous to blanket ignore the errors
+			 (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn="(condition->list exn))
+			 '()) ;; no idea what went wrong, call it a bad server, return empty list
+		       (with-input-from-file logf read-lines))))
+       (if (null? fdat) ;; bad data, return bad-dat
+	   bad-dat
+	   (let loop ((inl  (car fdat))
+		      (tail (cdr fdat))
+		      (lnum 0))
+	     (let ((mlst (string-match server-rx inl)))
+	       (if (not mlst)
+		   (if (> lnum 500) ;; give up if more than 500 lines of server log read
+		       bad-dat
+		       (if (null? tail)
+			   bad-dat
+			   (loop (car tail)(cdr tail)(+ lnum 1))))
+		   (match mlst ;; have a not null list
+		     ((_ host port start server-id pid dbfname)
+		      (list host
+			    (string->number port)
+			    (string->number start)
+			    server-id
+			    (string->number pid)
+			    dbfname
+			    logf))
+		     (else
+		      (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
+		      bad-dat)))))))))
+
+
+
+;; find valid server
+;; get servers listed, last part of name must match :<dbfname>
+;; if more than one, wait one second and look again
+;; future: ping oldest, if alive remove other :<dbfname> files
+;;
+(define (server:find-server areapath dbfname)
+  (let* ((servdir  (server:get-servinfo-dir areapath))
+	 (sfiles   (glob (conc servdir"/*:"dbfname)))
+         (good-files '()))
+         (for-each 
+           (lambda (sfile)
+             (let* ((sinfo (tt:server-get-info sfile))
+                 (host (list-ref sinfo 0))
+                 (port (list-ref sinfo 1))
+                 (server-id (list-ref sinfo 3))
+                 (pid (list-ref sinfo 4))
+                 (status (system (conc "ssh " host " ps " pid " > /dev/null")))
+                 )
+                (if (= status 0)
+                  (set! good-files (cons sfile good-files))
+                  (delete-file* sfile)
+                )
+             )
+           )
+           sfiles
+         )
+    (debug:print-info 2 *default-log-port* "server:find-server: good-files: " good-files " sfiles: " sfiles)
+    good-files))
+
 
 
         
 ;; kind start up of server, wait before allowing another server for a given
 ;; area to be launched
@@ -437,10 +679,31 @@
 	  (system (conc "touch " start-flag)) ;; lazy but safe
 	  (server:run areapath)
 	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
 	  (common:simple-file-release-lock lock-file)))
       (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))
+
+;; return servid
+;; side-effects:
+;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
+(define (server:create-server-registration-file areapath host port)
+  (let* (
+	 (servdir  (server:get-servinfo-dir areapath))
+	 (servinf (conc servdir"/"host":"port"-"(current-process-id)))
+	 (serv-id (server:mk-signature areapath))
+	 (clean-proc (lambda ()
+		       (delete-file* servinf)
+		       )))
+    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn")
+    (tt-cleanup-proc-set! ttdat clean-proc)
+    (tt-servinf-file-set! ttdat servinf)
+    (with-output-to-file servinf
+      (lambda ()
+	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))))
+      serv-id))
+
+
 
 ;; this one seems to be the general entry point
 ;;
 (define (server:start-and-wait areapath #!key (timeout 60))
   (let ((give-up-time (+ (current-seconds) timeout)))

Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -1639,11 +1639,11 @@
 		(if (and testexists
 			 cache-file
 			 (file-write-access? cache-path)
 			 allow-write-cache)
 		    (let ((tpath (conc cache-path "/.testconfig")))
-		      (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
+		      (debug:print-info 2 *default-log-port* "Caching testconfig for " test-name " in " tpath)
                       (if (and tcfg (not (common:in-running-test?)))
                           (configf:write-alist tcfg tpath))))
 		tcfg))))))
   
 ;; sort tests by priority and waiton

ADDED   utils/convert-db.sh
Index: utils/convert-db.sh
==================================================================
--- /dev/null
+++ utils/convert-db.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+if [ -z "megatest.config" ]; then
+    echo "The file 'megatest.config' does not exist. This must be run in a megatest area."
+    exit 1
+fi
+if [ -d ".mtdb" ]; then
+    echo "The .mtdb directory already exists. Will not do the conversion"
+    exit 1
+fi
+if [ -d ".megatest" ]; then
+    echo "Found a .megatest directory. Will convert from megatest 1.70 to 1.71/1.80 format"
+    /p/foundry/env/pkgs/megatest/1.70/16/bin/megatest -list-runs % -dumpmode sexpr > data.sexpr
+else 
+    if [ -f "megatest.db" ]; then
+        echo "Found megatest.db. Will convert from megatest 1.65 to 1.71/1.80 format"
+        /p/foundry/env/pkgs/megatest/1.65/92/bin/megatest -list-runs % -dumpmode sexpr > data.sexpr
+    else
+        echo "Did not find .megatest or megatest.db. Cannot do the conversion"
+        exit 1
+    fi
+fi
+which megatest
+megatest -import-sexpr data.sexpr
+