Index: Makefile
==================================================================
--- Makefile
+++ Makefile
@@ -28,11 +28,11 @@
            ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm		\
            subrun.scm portlogger.scm archive.scm env.scm		\
            diff-report.scm cgisetup/models/pgdb.scm
 
 # module source files
-MSRCFILES = mutils.scm
+MSRCFILES = adjutant.scm mutils.scm mttop.scm
 # ftail.scm rmtmod.scm commonmod.scm removed
 # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm	\
 #             mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm	\
 #             rmtmod.scm apimod.scm
 
@@ -315,19 +315,40 @@
 # install dashboard as dboard so wrapper script can be called dashboard
 $(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper
 	utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard
 	chmod a+x $(PREFIX)/bin/dashboard
 	$(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard
+
+$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so : lib/libpangox-1.0.so
+	if [[ $(ARCHSTR) == 12.5 ]]; then \
+	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
+	$(INSTALL) lib/libpangox-1.0.so $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so; \
+	fi
+
+$(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 : lib/libpangox-1.0.so.0
+	if [[ $(ARCHSTR) == 12.5 ]]; then \
+	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
+	$(INSTALL) lib/libpangox-1.0.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0; \
+        fi
+
+$(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0 : lib/libxcb-xlib.so.0
+	if [[ $(ARCHSTR) == 12.5 ]]; then \
+	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib; \
+	$(INSTALL) lib/libxcb-xlib.so.0 $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0; \
+        fi
 
 install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
           $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
           $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
 	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
 	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
 	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
-          $(PREFIX)/share/js/jquery-3.1.0.slim.min.js 
+          $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
+          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
+          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
+          $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0
 #         $(PREFIX)/bin/.$(ARCHSTR)/ndboard
 
 # $(PREFIX)/bin/newdashboard
 
 $(PREFIX)/bin/.$(ARCHSTR) : 

Index: adjutant.scm
==================================================================
--- adjutant.scm
+++ adjutant.scm
@@ -22,12 +22,23 @@
 
 (module adjutant *
 
 (import scheme chicken data-structures extras files)
 (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69
-	md5 message-digest
+	md5 message-digest matchable
 	regex srfi-1)
 
-(define (adjutant-run)
-  (print "Running the adjutant!"))
+(define (adjutant-run host-type rmt:no-sync-take-job)
+  (print "Running the adjutant!")
+  (let loop ((wait-count 0))
+    (if (< wait-count 10) ;; 6 x 10 seconds = one minute
+	(let* ((dat (rmt:no-sync-take-job host-type)))
+	  (match dat
+	    ((id ht vars exekey cmdline state event-time last-update)
+	     (system cmdline)
+	     (loop 0))
+	    (else
+	     (thread-sleep! 10)
+	     (loop (+ wait-count 1)))))
+	(print "I'm bored. Exiting."))))
 
 )

Index: api.scm
==================================================================
--- api.scm
+++ api.scm
@@ -253,17 +253,20 @@
 		   ;; NO SYNC DB
 		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
 		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
 		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
 		   ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
+		   ((no-sync-add-job)           (apply db:no-sync-add-job     *no-sync-db* params))
+		   ((no-sync-take-job)          (apply db:no-sync-take-job    *no-sync-db* params))
+		   ((no-sync-job-records-clean) (apply db:no-sync-job-records-clean *no-sync-db* params))
 		   
                    ;; ARCHIVES
                    ;; ((archive-get-allocations)   
                    ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
                    ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
                    ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))
-
+		   
                    ;;======================================================================
                    ;; READ ONLY QUERIES
                    ;;======================================================================
 
                    ;; KEYS

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -3210,31 +3210,39 @@
 ;; arm cubie01 cubie02
 ;; x86_64 zeus xena myth01
 ;; allhosts #{g hosts arm} #{g hosts x86_64}
 ;; 
 ;; [host-types]
+;; C/M/A      lets megatest know this launcher provides C cores, M bytes memory for architecture A 
+;; 2/2G/arm   smart -cores 2 -memory 2G -arch arm
 ;; general #MTLOWESTLOAD #{g hosts allhosts}
 ;; arm     #MTLOWESTLOAD #{g hosts arm}
 ;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
+;;
+;; NOTE: host-rules is ONLY used for MTLOWESTLOAD
 ;;
 ;; [host-rules]
 ;; # maxnload   => max normalized load
 ;; # maxnjobs   => max jobs per cpu
 ;; # maxjobrate => max jobs per second
 ;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 
 ;; 
 ;; [launchers]
 ;; envsetup general
-;; xor/%/n 4C16G
+;; xor/%/n 2/2G/arm
 ;; % nbgeneral
 ;; 
 ;; [jobtools]
 ;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
 ;; flexi-launcher yes  
 ;; launcher nbfake
+;; mode adjutant|normal (default is normal)
+;; 
+;;
+;; mode is 'normal (i.e. directly use launcher) or 'adjutant (i.e. use adjutant)
 ;;
-(define (common:get-launcher configdat testname itempath)
+(define (common:get-launcher configdat testname itempath mode)
   (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
     (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
 	     (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
 	(let* ((launchers         (hash-table-ref/default configdat "launchers" '())))
 	  (if (null? launchers)
@@ -3241,32 +3249,36 @@
 	      fallback-launcher
 	      (let loop ((hed (car launchers))
 			 (tal (cdr launchers)))
 		(let ((patt      (car hed))
 		      (host-type (cadr hed)))
-		  (if (tests:match patt testname itempath)
+		  (if (tests:match patt testname itempath) ;; have a launcher match for this test
 		      (begin
 			(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
-			(let ((launcher (configf:lookup configdat "host-types" host-type)))
+			(let ((launcher (configf:lookup configdat "host-types" host-type))) ;; find the actual launcher from the host-types table
+			  ;; if we are in adjutant mode then we want to return both host-type and launcher
 			  (if launcher
 			      (let* ((launcher-parts (string-split launcher))
 				     (launcher-exe   (car launcher-parts)))
 				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
 				    (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
 						    (count     100))
 				      (if targ-host
 					  (conc "remrun " targ-host)
 					  (if (> count 0)
+					      
 					      (begin
 						(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
 						(thread-sleep! (- 101 count))
 						(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
 							   (- count 1)))
 					      (begin
 						(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
 						(exit)))))
-				    launcher))
+				    (case mode
+				      ((adjutant) (list host-type launcher))
+				      (else       launcher))))
 			      (begin
 				(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
 				(if (null? tal)
 				    fallback-launcher
 				    (loop (car tal)(cdr tal)))))))

Index: configf.scm
==================================================================
--- configf.scm
+++ configf.scm
@@ -530,12 +530,12 @@
 (define configf:read-file read-config)
 
 ;; safely look up a value that is expected to be a number, return
 ;; a default (#f unless provided)
 ;;
-(define (configf:lookup-number cfdat section varname #!key (default #f))
-  (let* ((val (configf:lookup *configdat* section varname))
+(define (configf:lookup-number cfgdat section varname #!key (default #f))
+  (let* ((val (configf:lookup cfgdat section varname))
          (res (if val
                   (string->number (string-substitute "\\s+" "" val #t))
                   #f)))
     (cond
      (res  res)

Index: dashboard-tests.scm
==================================================================
--- dashboard-tests.scm
+++ dashboard-tests.scm
@@ -324,12 +324,12 @@
 			    (lambda (state color)
 			      (for-each 
 			       (lambda (btn)
 				 (let* ((name     (iup:attribute btn "TITLE"))
 					(newcolor (if (equal? name state) color "192 192 192")))
-				   (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR")))
-				       (iup:attribute-set! btn "BGCOLOR" newcolor))))
+				   (if (not (colors-similar? newcolor (iup:attribute btn "FGCOLOR")))
+				       (iup:attribute-set! btn "FGCOLOR" newcolor))))
 			       btns)))
 	       btns))
       (apply iup:hbox
 	     (iup:label "STATUS:" #:size "30x")
 	     (let* ((btns  (map (lambda (status)
@@ -358,12 +358,12 @@
 			    (lambda (status color)
 			      (for-each 
 			       (lambda (btn)
 				 (let* ((name     (iup:attribute btn "TITLE"))
 					(newcolor (if (equal? name status) color "192 192 192")))
-				   (if (not (colors-similar? newcolor (iup:attribute btn "BGCOLOR")))
-				       (iup:attribute-set! btn "BGCOLOR" newcolor))))
+				   (if (not (colors-similar? newcolor (iup:attribute btn "FGCOLOR")))
+				       (iup:attribute-set! btn "FGCOLOR" newcolor))))
 			       btns)))
 	       btns))))))
 
 (define (dashboard-tests:run-a-step info)
   #t)

Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -1169,13 +1169,14 @@
 			   (button     (vector-ref columndat rown))
 			   (color      (car (gutils:get-color-for-state-status teststate teststatus)))
 			   (curr-color (vector-ref buttondat 1)) ;; (iup:attribute button "BGCOLOR"))
 			   (curr-title (vector-ref buttondat 2))) ;; (iup:attribute button "TITLE")))
 		      (if (not (equal? curr-color color))
-			  (iup:attribute-set! button "BGCOLOR" color))
+			  #;(iup:attribute-set! button "BGCOLOR" color)
+			  (iup:attribute-set! button "FGCOLOR" color))
 		      (if (not (equal? curr-title buttontxt))
-			  (iup:attribute-set! button "TITLE"   buttontxt))
+			  (iup:attribute-set! button "TITLE" (conc "<span weight=\"bold\">"  buttontxt "</span>")))
 		      (vector-set! buttondat 0 run-id)
 		      (vector-set! buttondat 1 color)
 		      (vector-set! buttondat 2 buttontxt)
 		      (vector-set! buttondat 3 testdat)
 		      (vector-set! buttondat 4 run-key)))
@@ -2500,22 +2501,22 @@
 	   (set! hide (iup:button "Hide"
 				  #:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL"
 				  #:action (lambda (obj)
 					     (dboard:tabdat-hide-not-hide-set! tabdat #t) ;; (not (dboard:tabdat-hide-not-hide tabdat)))
 					     ;; (iup:attribute-set! obj "TITLE" (if (dboard:tabdat-hide-not-hide tabdat) "HideTests" "NotHide"))
-					     (iup:attribute-set! hide "BGCOLOR" sel-color)
-					     (iup:attribute-set! show "BGCOLOR" nonsel-color)
+					     (iup:attribute-set! hide "FGCOLOR" sel-color)
+					     (iup:attribute-set! show "FGCOLOR" nonsel-color)
 					     (mark-for-update tabdat))))
 	   (set! show (iup:button "Show"
 				  #:expand "NO" #:size "40x15" ;; #:expand "HORIZONTAL"
 				  #:action (lambda (obj)
 					     (dboard:tabdat-hide-not-hide-set! tabdat #f) ;; (not (dboard:tabdat-hide-not-hide tabdat)))
-					     (iup:attribute-set! show "BGCOLOR" sel-color)
-					     (iup:attribute-set! hide "BGCOLOR" nonsel-color)
+					     (iup:attribute-set! show "FGCOLOR" sel-color)
+					     (iup:attribute-set! hide "FGCOLOR" nonsel-color)
 					     (mark-for-update tabdat))))
-	   (iup:attribute-set! hide "BGCOLOR" sel-color)
-	   (iup:attribute-set! show "BGCOLOR" nonsel-color)
+	   (iup:attribute-set! hide "FGCOLOR" sel-color)
+	   (iup:attribute-set! show "FGCOLOR" nonsel-color)
 	   ;; (dboard:tabdat-hide-not-hide-button-set! tabdat hideit) ;; never used, can eliminate ...
            (iup:vbox
             (iup:hbox hide show)
             sort-lb))) 
 	)
@@ -2884,10 +2885,11 @@
 	(let* ((button-key (mkstr runnum testnum))
 	       (butn       (iup:button
 			    "" ;; button-key 
 			    #:size (conc cell-width btn-height )
 			    #:expand "HORIZONTAL"
+			    #:MARKUP "YES"
 			    #:fontsize btn-fontsz
 			    #:button-cb
 			    (lambda (obj a pressed x y btn . rem)
 			      ;; (print "pressed= " pressed " x= " x " y= " y " rem=" rem " btn=" btn " string? " (string? btn))
 			      (if  (substring-index "3" btn)

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -2166,14 +2166,66 @@
 	 (db-exists (common:file-exists? dbname))
 	 (db     (sqlite3:open-database dbname)))
     (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
     (if (not db-exists)
 	(begin
-	  (sqlite3:execute db "PRAGMA synchronous = 0;")
 	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
 	  (sqlite3:execute db "PRAGMA journal_mode=WAL;")))
+    ;; MOVE THIS TABLE CREATION TO THE (begin above in about six months (it is Sep 2020 right now).
+    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS jobs_queue (id INTEGER PRIMARY KEY, host_type TEXT, cores INTEGER, memory TEXT, vars TEXT, exekey TEXT, cmdline TEXT, state TEXT, event_time INTEGER, last_update INTEGER);")
+    ;; not sure I'll use this next one. I prefer if tests simply append to a file:
+    ;;    last-update-seconds cpuload tmpspace rundirspace
+    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_extra_data (id INTEGER PRIMARY KEY, run_id INTEGER, test_id INTEGER, last_seen_running INTEGER);") 
+    (sqlite3:execute db "PRAGMA synchronous = 0;")
     db))
+
+(define (db:no-sync-add-job db-in host-type vars-list exekey cmdline)
+  (sqlite3:execute (db:no-sync-db db-in) "INSERT INTO jobs_queue (host_type,vars,exekey,cmdline,state,event_time,last_update) VALUES (?,?,?,?,?,?,?);"
+		   host-type
+		   (with-output-to-string
+		     (lambda ()
+		       (write vars-list)))
+		   exekey cmdline "waiting" (current-seconds)(current-seconds)))
+
+;; find next job (waiting longest) that matches host-type - future, we'll find jobs that fit if no exact match
+(define (db:no-sync-take-job db-in host-type)
+  (let* ((db   (db:no-sync-db db-in))
+	 (stmt1 "SELECT id,host_type,vars,exekey,cmdline,state,event_time,last_update FROM jobs_queue WHERE host_type=? AND state != 'taken' ORDER BY event_time ASC;")
+	 (stmt1h (sqlite3:prepare db stmt1))
+	 (stmt2  "UPDATE jobs_queue SET state='taken',last_update=? WHERE id=?;")
+	 (stmt2h (sqlite3:prepare db stmt2))
+	 (res    (sqlite3:with-transaction
+		  db
+		  (lambda ()
+		    (let* ((matching-jobs (sqlite3:fold-row
+					   (lambda (res . row) ;; id host-type vars exekey state event-time last-update)
+					     (cons row res))
+					   '()
+					   stmt1h
+					   host-type)))
+		      (if (null? matching-jobs)
+			  #f
+			  (let ((choosen-one  (let loop ((tal matching-jobs)
+							 (res #f)) ;; put bestest one in here
+						(if (null? tal)
+						    res
+						    (let ((curr (car tal))
+							  (rem  (cdr tal)))
+						      curr) ;; here we will compare with res, if better candidate the loop with curr else loop with res
+						    ))))
+			    (if choosen-one ;; we need to mark it as taken
+				(sqlite3:execute stmt2h (current-seconds) (car choosen-one)))
+			    choosen-one)))))))
+    (sqlite3:finalize! stmt1h) ;; it'd be nice to cache these and finalize on exit.
+    (sqlite3:finalize! stmt2h)
+    res))
+
+;; clean out old jobs in queue, i.e. taken and event_time > 24 hrs ago
+;;
+(define (db:no-sync-job-records-clean db)
+  (sqlite3:execute (db:no-sync-db db) "DELETE FROM jobs_queue WHERE state='taken' AND event_time < ?;" (- (current-seconds)(* 24 3600))))
+		 
 
 ;; if we are not a server create a db handle. this is not finalized
 ;; so watch for problems. I'm still not clear if it is needed to manually
 ;; finalize sqlite3 dbs with the sqlite3 egg.
 ;;
@@ -2191,12 +2243,13 @@
   (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))
 
 (define (db:no-sync-del! db var)
   (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))
 
-(define (db:no-sync-get/default db var default)
-  (let ((res default))
+(define (db:no-sync-get/default db-in var default)
+  (let ((db  (db:no-sync-db db-in))
+	(res default))
     (sqlite3:for-each-row
      (lambda (val)
        (set! res val))
      (db:no-sync-db db)
      "SELECT val FROM no_sync_metadat WHERE var=?;"

Index: http-transport.scm
==================================================================
--- http-transport.scm
+++ http-transport.scm
@@ -557,26 +557,31 @@
 	 (server-started      (conc tmp-area "/.server-started"))
 	 (start-time          (common:lazy-modification-time server-start))
 	 (started-time        (common:lazy-modification-time server-started))
 	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
 	 (start-time-old      (> (- (current-seconds) start-time) 5))
-         (cleanup-proc        (lambda (msg)
+         (cleanup-proc        (lambda (msg) ;; would like to use (modulo (current-seconds) 60) instead of process-id to wrap filenames
                                 (let* ((serv-fname      (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
+				       (new-fname       (conc "server-" (modulo (current-seconds) 60) "-" (get-host-name) ".log"))
                                        (full-serv-fname (conc *toppath* "/logs/" serv-fname))
-                                       (new-serv-fname  (conc *toppath* "/logs/" "defunct-" serv-fname)))
+                                       ;; (new-serv-fname  (conc *toppath* "/logs/" "defunct-" serv-fname))
+				       (new-serv-fname  (conc *toppath* "/logs/" new-fname))
+				       )
                                   (debug:print 0 *default-log-port* msg)
                                   (if (common:file-exists? full-serv-fname)
-                                      (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
+				      (with-output-to-pipe "at now + 10 minutes" (lambda ()
+										   (print "mv -f " full-serv-fname " " new-serv-fname)))
+                                      ;; (system (conc "sleep 10;mv -f " full-serv-fname " " new-serv-fname))
                                       (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
                                   (exit)))))
-    #;(if (and (not start-time-old) ;; last server start try was less than five seconds ago
+    (if (and (not start-time-old) ;; last server start try was less than five seconds ago
 	     (not server-starting))
 	(begin
 	  (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
 	  (exit)))
     ;; lets not even bother to start if there are already three or more server files ready to go
-    #;(let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
+    (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
       (if (> num-alive 3)
           (begin
             (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
             (exit))))
   (common:save-pkt `((action . start)

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -812,45 +812,11 @@
 				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
                  (+ kill-cnt flag))))))
 
 ;; DO NOT USE - caching of configs is handled in launch:setup now.
 ;;
-(define (launch:cache-config)
-  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
-  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
-  (if (and *configdat* 
-	   (or (args:get-arg "-run")
-	       (args:get-arg "-runtests")
-	       (args:get-arg "-execute")))
-      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
-	     (target   (common:args-get-target exit-if-bad: #t))
-	     (runname  (or (args:get-arg "-runname")
-			   (args:get-arg ":runname")
-			   (getenv "MT_RUNNAME")))
-	     (fulldir  (conc linktree "/"
-			     target "/"
-			     runname)))
-	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
-	    (begin
-	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
-	      (if (not (common:file-exists? fulldir))
-		  (create-directory fulldir #t)) ;; need to protect with exception handler 
-	      (if (and target
-		       runname
-		       (common:file-exists? fulldir))
-		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
-			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
-			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
-		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
-			(begin
-			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
-                          (if (not (common:in-running-test?))
-                              (configf:write-alist *configdat* tmpfile))
-			  (system (conc "ln -sf " tmpfile " " targfile))))
-		    )))
-	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
-
+;; (launch:cache-config) moved to attic below
 
 ;; gather available information, if legit read configs in this order:
 ;;
 ;;   if have cache;
 ;;      read it a return it
@@ -874,14 +840,30 @@
 	(mutex-unlock! *launch-setup-mutex*)
 	*toppath*)
       (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
 	(mutex-unlock! *launch-setup-mutex*)
 	res)))
+
+(define (launch:cache-files-changed? cache-files ref-seconds)
+  (let* ((changed #f))
+    (if (or (not cache-files)
+	    (null? cache-files))
+	(set! changed #t) ;; yep, they've changed
+	(for-each
+	 (lambda (fname)
+	   (if (not fname)
+	       (set! changed #t)
+	       (if (not (file-exists? fname))
+		   (set! changed #t)
+		   (if (> (file-modification-time fname) ref-seconds)
+		       (set! changed #t)))))
+	 cache-files))
+    changed))
 
 ;; return paths depending on what info is available.
 ;;
-(define (launch:get-cache-file-paths areapath toppath target mtconfig)
+(define (launch:get-cache-file-paths areapath toppath target)
   (let* ((use-cache (common:use-cache?))
          (runname  (common:args-get-runname))
          (linktree (common:get-linktree))
          (testname (common:get-full-test-name))
          (rundir   (if (and runname target linktree)
@@ -912,11 +894,11 @@
       (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
 	     (toppath  (common:get-toppath areapath))
 	     (target   (common:args-get-target))
 	     (sections (if target (list "default" target) #f)) ;; for runconfigs
 	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
-             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
+             (cachefiles (launch:get-cache-file-paths areapath toppath target))
 	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
 	     (mtcachef   (if (null? cachefiles)
 			     #f
 			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
 	     (rccachef   (if (null? cachefiles)
@@ -993,11 +975,11 @@
 					 (for-each (lambda (kt)
 						     (setenv (car kt) (cadr kt)))
 						   key-vals)
 					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
 						      sections: sections)))
-                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+                         (cachefiles   (launch:get-cache-file-paths areapath toppath target))
                          (mtcachef     (car cachefiles))
                          (rccachef     (cdr cachefiles)))
                     ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
                     ;; TODO - consider 1) using simple-lock to bracket cache write
                     ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
@@ -1076,11 +1058,11 @@
 	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
 	      (set! *toppath* #f) ;; force it to be false so we return #f
 	      #f))
 	
         ;; one more attempt to cache the configs for future reading
-        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target))
                (mtcachef     (car cachefiles))
                (rccachef     (cdr cachefiles)))
 
           ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
           ;; TODO - consider 1) using simple-lock to bracket cache write
@@ -1363,12 +1345,20 @@
       (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
       (rmt:set-var key (current-seconds))
       (rmt:find-and-mark-incomplete run-id #f))))
 
 
-
+(defstruct launch:ajt
+  (vars '())
+  (exekey #f)
+  (host-type #f)
+  (test-sig  #f)
+  (cmdline   #f))
 
+;; append vars
+(define (launch:ajt-add-vars dat vars)
+  (launch:ajt-vars-set! dat (append (launch:ajt-vars dat) vars)))
 
 ;; 1. look though disks list for disk with most space
 ;; 2. create run dir on disk, path name is meaningful
 ;; 3. create link from run dir to megatest runs area 
 ;; 4. remotely run the test on allocated host
@@ -1375,44 +1365,41 @@
 ;;    - could be ssh to host from hosts table (update regularly with load)
 ;;    - could be netbatch
 ;;      (launch-test db (cadr status) test-conf))
 (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
   (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
-  (let* ( ;; (lock-key        (conc "test-" test-id))
-	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
-	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
-	;; 		    (if (car lock)
-	;; 			#t
-	;; 			(if (> (current-seconds) expire-time)
-	;; 			    (begin
-	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
-	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
-	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
-	;; 			    (begin
-	;; 			      (thread-sleep! 1)
-	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
+  (let* (;; locking code removed from here commented out and pasted at end of file
 	 (item-path       (item-list->path itemdat))
-	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
+	 (contour         #f)                         ;; NOT READY FOR THIS (args:get-arg "-contour")))
+	 ;; launcher-mode will be 'adjutant or 'normal
+	 (launcher-mode   (string->symbol (or (configf:lookup *configdat* "jobtools" "mode") "normal")))
+	 (ajtdat          (make-launch:ajt)))
     (let loop ((delta        (- (current-seconds) *last-launch*))
 	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
       (if (> launch-delay delta)
 	  (begin
 	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
 		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
 	    (thread-sleep! (- launch-delay delta))
 	    (loop (- (current-seconds) *last-launch*) launch-delay))))
     (change-directory *toppath*)
-    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
-     (append
-      (list
-       (list "MT_RUN_AREA_HOME" *toppath*)
-       (list "MT_TEST_NAME" test-name)
-       (list "MT_RUNNAME"   runname)
-       (list "MT_ITEMPATH"  item-path)
-       (list "MT_CONTOUR"   contour)
-       )
-      itemdat))
+    (let ((var-list (append
+		     (list
+		      (list "MT_RUN_AREA_HOME" *toppath*)
+		      (list "MT_TEST_NAME" test-name)
+		      (list "MT_RUNNAME"   runname)
+		      (list "MT_ITEMPATH"  item-path)
+		      (list "MT_CONTOUR"   contour)
+		      )
+		     itemdat)))
+       ;; consolidate this code with the code in megatest.scm for
+       ;; "-execute", *maybe* - the longer they are set the longer
+       ;; each launch takes (must be non-overlapping with the vars)
+      (alist->env-vars var-list)
+      ;; the var-list into the ajtdat adjutant record whether it is needed or not.
+      (launch:ajt-add-vars ajtdat var-list))
+    
     (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
            ;; for tconfig, why do we allow fallback to test-conf?
 	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
 				(begin
                                   (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
@@ -1430,26 +1417,17 @@
 	   ;; (memory          (configf:lookup tconfig   "requirements" "memory"))
 	   ;; (hosts           (configf:lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
 	   (remote-megatest (configf:lookup *configdat* "setup" "executable"))
 	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
 				(configf:lookup  *configdat* "setup" "runtimelim")))
-	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
-	   ;;                allow running from dashboard. Extract the path
-	   ;;                from the called megatest and convert dashboard
-	   ;;             	  or dboard to megatest
 	   (local-megatest  (common:find-local-megatest))
-	   #;(local-megatest  (let* ((lm  (car (argv)))
-				   (dir (pathname-directory lm))
-				   (exe (pathname-strip-directory lm)))
-			      (conc (if dir (conc dir "/") "")
-				    (case (string->symbol exe)
-				      ((dboard)    "../megatest")
-				      ((mtest)     "../megatest")
-				      ((dashboard) "megatest")
-				      (else exe)))))
-	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools"     "launcher"))
-	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
+	   (launcher        (let ((l (common:get-launcher *configdat* test-name item-path launcher-mode)))
+			      (if (string? l)
+				  (string-split l)
+				  l))) ;; some nonhomogenuity here. '(cmd param1 param2 ...) OR '(host-type launcher)
+	    ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
+	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path))
 	   (work-area       #f)
 	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
 	   (diskpath   #f)
 	   (cmdparms   #f)
 	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
@@ -1459,15 +1437,24 @@
 	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
 				(if (args:get-arg "-logging")(list "-logging") '())
 				(if (configf:lookup *configdat* "misc" "profilesw")
 				    (list (configf:lookup *configdat* "misc" "profilesw"))
 				    '()))))
+      ;; save the test-sig in the ajtdat record
+      (launch:ajt-test-sig-set! ajtdat test-sig)
+      ;; go ahead and figure out if we have a host-type from the
+      ;; launcher call above and save it in the ajtdat record
+      (if (and (eq? launcher-mode 'adjutant)
+	       (list? launcher)
+	       (> (length launcher) 1))
+	  (launch:ajt-host-type-set! ajtdat (car launcher)))
+ 
       ;; (if hosts (set! hosts (string-split hosts)))
       ;; set the megatest to be called on the remote host
       (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
       (set! mt-bindir-path (pathname-directory remote-megatest))
-      (if launcher (set! launcher (string-split launcher)))
+      ;; (if launcher (set! launcher (string-split launcher)))           ;; yuk!
       ;; set up the run work area for this test
       (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
 	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
 	  (begin
 	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
@@ -1520,63 +1507,92 @@
 					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
 					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
 					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
 					(list 'runname   runname)
 					(list 'mt-bindir-path mt-bindir-path))))))))
+      ;; save the cmdparms in the ajtdat
+      (launch:ajt-exekey-set! ajtdat cmdparms)
       
       ;; clean out step records from previous run if they exist
       ;; (rmt:delete-test-step-records run-id test-id)
       ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
       (if (common:file-exists? work-area)
 	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
-      (cond
-       ;; ((and launcher hosts) ;; must be using ssh hostname
-       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
-       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
+
+      ;; save the command line for adjutant mode (might never be needed but best to assemble it here)
+      (launch:ajt-cmdline-set! ajtdat (string-intersperse
+				       (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
+      (cond       
        (launcher
 	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
-       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
        (else
 	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
 	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
-      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
+      
       (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
       (debug:print 1 *default-log-port* "Launching " work-area)
       ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
       (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
       (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
-      (let* ((commonprevvals (alist->env-vars
-			      (hash-table-ref/default *configdat* "env-override" '())))
-	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
-			      (append (list (list "MT_TEST_RUN_DIR" work-area)
-					    (list "MT_TEST_NAME" test-name)
-					    (list "MT_ITEM_INFO" (conc itemdat)) 
-					    (list "MT_RUNNAME"   runname)
-					    (list "MT_TARGET"    mt_target)
-					    (list "MT_ITEMPATH"  item-path))
-				      itemdat)))
-	     (testprevvals   (alist->env-vars
-			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
+      (let* ((env-override-vars  (hash-table-ref/default *configdat* "env-override" '()))
+	     (commonprevvals     (alist->env-vars env-override-vars))
+	     (misc-vars          (append (list (list "MT_TEST_RUN_DIR" work-area)
+					       (list "MT_TEST_NAME" test-name)
+					       (list "MT_ITEM_INFO" (conc itemdat)) 
+					       (list "MT_RUNNAME"   runname)
+					       (list "MT_TARGET"    mt_target)
+					       (list "MT_ITEMPATH"  item-path))
+					 itemdat))
+	     (miscprevvals   (alist->env-vars misc-vars));; consolidate this code with the code in megatest.scm for "-execute"
+	     (test-vars      (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))
+	     (testprevvals   (alist->env-vars test-vars))
+			      
 	     ;; Launchwait defaults to true, must override it to turn off wait
 	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
-	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
-					     process:cmd-run-with-stderr-and-exitcode->list
-					     process-run)
-					 (if useshell
-					     (let ((cmdstr (string-intersperse fullcmd " ")))
-					       (if launchwait
-						   cmdstr
-						   (conc cmdstr " >> mt_launch.log 2>&1 &")))
-					     (car fullcmd))
-					 (if useshell
-					     '()
-					     (cdr fullcmd))))
+	     ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
+	     (launch-results-prev (if (eq? launcher-mode 'adjutant)
+				      '(#t 0) ;; just some fake data to fool downstream but non-applicable code
+				      (apply (if launchwait
+						 process:cmd-run-with-stderr-and-exitcode->list
+						 process-run)
+					     (if useshell
+						 (let ((cmdstr (string-intersperse fullcmd " ")))
+						   (if launchwait
+						       cmdstr
+						       (conc cmdstr " >> mt_launch.log 2>&1 &")))
+						 (car fullcmd))
+					     (if useshell
+						 '()
+						 (cdr fullcmd)))))
              (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
              (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
-        (if (not success)
+
+	(launch:ajt-add-vars ajtdat env-override-vars)
+	(launch:ajt-add-vars ajtdat misc-vars)
+	(launch:ajt-add-vars ajtdat test-vars)
+
+	;; if in adjutant mode we register the job in the jobs_queue
+	;; then fire off an adjutant runner
+	;;
+	(if (eq? launcher-mode 'adjutant)
+	    (let* ((adjutant-runner-cmd (append (cdr launcher)
+						(list remote-megatest "-adjutant"
+						      (launch:ajt-host-type ajtdat)
+						      "-start-dir" *toppath*)))
+		   (adj-cmd     (conc (string-intersperse (map conc adjutant-runner-cmd) " ")
+				      "&")))         
+	      (rmt:no-sync-add-job
+	       (launch:ajt-host-type  ajtdat)
+	       (launch:ajt-vars ajtdat)
+	       (launch:ajt-exekey     ajtdat)
+	       (launch:ajt-cmdline    ajtdat))
+	      (print "adj-cmd: " adj-cmd)
+	      (system adj-cmd)
+	      ))
+	
+	(if (not success)
             (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
-        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
 	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
 	(if (not launchwait) ;; give the OS a little time to allow the process to start
 	    (thread-sleep! 0.01))
 	(with-output-to-file "mt_launch.log"
 	  (lambda ()
@@ -1598,10 +1614,14 @@
 	      (process-signal (current-process-id) signal/kill)
 	      ))
 	(alist->env-vars miscprevvals)
 	(alist->env-vars testprevvals)
 	(alist->env-vars commonprevvals)
+	;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
+	;; the unlock previously was further up. This seemed wrong as we should not proceed until the
+	;; vars have been reset.
+	(mutex-unlock! *launch-setup-mutex*)
 	launch-results))
     (change-directory *toppath*)
     (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0))))
 
 ;; recover a test where the top controlling mtest may have died
@@ -1627,5 +1647,60 @@
     ;; now wait on that process if all is correct
     ;; periodically update the db with runtime
     ;; when the process exits look at the db, if still RUNNING after 10 seconds set
     ;; state/status appropriately
     (process-wait pid)))
+
+;;======================================================================
+;; Attic
+;;======================================================================
+
+ ;; (lock-key        (conc "test-" test-id))
+	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
+	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
+	;; 		    (if (car lock)
+	;; 			#t
+	;; 			(if (> (current-seconds) expire-time)
+	;; 			    (begin
+	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
+	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
+	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
+	;; 			    (begin
+	;; 			      (thread-sleep! 1)
+	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
+	 
+
+
+#;(define (launch:cache-config)
+  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
+  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
+  (if (and *configdat* 
+	   (or (args:get-arg "-run")
+	       (args:get-arg "-runtests")
+	       (args:get-arg "-execute")))
+      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
+	     (target   (common:args-get-target exit-if-bad: #t))
+	     (runname  (or (args:get-arg "-runname")
+			   (args:get-arg ":runname")
+			   (getenv "MT_RUNNAME")))
+	     (fulldir  (conc linktree "/"
+			     target "/"
+			     runname)))
+	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
+	    (begin
+	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
+	      (if (not (common:file-exists? fulldir))
+		  (create-directory fulldir #t)) ;; need to protect with exception handler 
+	      (if (and target
+		       runname
+		       (common:file-exists? fulldir))
+		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
+			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
+			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
+		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
+			(begin
+			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
+                          (if (not (common:in-running-test?))
+                              (configf:write-alist *configdat* tmpfile))
+			  (system (conc "ln -sf " tmpfile " " targfile))))
+		    )))
+	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))

ADDED   lib/libpangox-1.0.so
Index: lib/libpangox-1.0.so
==================================================================
--- /dev/null
+++ lib/libpangox-1.0.so
cannot compute difference between binary files

ADDED   lib/libpangox-1.0.so.0
Index: lib/libpangox-1.0.so.0
==================================================================
--- /dev/null
+++ lib/libpangox-1.0.so.0
cannot compute difference between binary files

ADDED   lib/libxcb-xlib.so.0
Index: lib/libxcb-xlib.so.0
==================================================================
--- /dev/null
+++ lib/libxcb-xlib.so.0
cannot compute difference between binary files

Index: megatest-version.scm
==================================================================
--- megatest-version.scm
+++ megatest-version.scm
@@ -18,6 +18,6 @@
 ;; Always use two or four digit decimal
 ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..
 
 ;; (declare (unit megatest-version))
 
-(define megatest-version 1.6577)
+(define megatest-version 1.6579)

Index: megatest.scm
==================================================================
--- megatest.scm
+++ megatest.scm
@@ -40,10 +40,16 @@
 (declare (uses api))
 (declare (uses tasks)) ;; only used for debugging.
 (declare (uses env))
 (declare (uses diff-report))
 (declare (uses mutils))
+(declare (uses adjutant))
+(import adjutant)
+
+(declare (uses mttop))
+(import mttop)
+
 ;; (declare (uses ftail))
 ;; (import ftail)
 
 (define *db* #f) ;; this is only for the repl, do not use in general!!!!
 
@@ -52,12 +58,12 @@
 (include "db_records.scm")
 (include "run_records.scm")
 (include "megatest-fossil-hash.scm")
 
 (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:)
-     readline apropos json http-client directory-utils typed-records
-     http-client srfi-18 extras format)
+     readline apropos json http-client directory-utils typed-records matchable
+     http-client srfi-18 extras format call-with-environment-variables)
 
 ;; Added for csv stuff - will be removed
 ;;
 (use sparse-vectors)
 
@@ -105,10 +111,11 @@
  
 Usage: megatest [options]
   -h                      : this help
   -manual                 : show the Megatest user manual
   -version                : print megatest version (currently " megatest-version ")
+  help                    : help for the new Megatest interface
 
 Launching and managing runs
   -run                    : run all tests or as specified by -testpatt
   -remove-runs            : remove the data for a run, requires -runname and -testpatt
                             Optionally use :state and :status, use -keep-records to remove only
@@ -202,11 +209,11 @@
   -update-meta            : update the tests metadata for all tests
   -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are
                                  overwritten by values set in config files.
   -server -|hostname      : start the server (reduces contention on megatest.db), use
                             - to automatically figure out hostname
-  -adjutant C,M           : start the server/adjutant with allocated cores C and Mem M (Gig), 
+  -adjutant host-type     : start the server/adjutant with given host-type
                             use 0,0 to auto use full machine
   -transport http|rpc     : use http or rpc for transport (default is http) 
   -log logfile            : send stdout and stderr to logfile
   -list-servers           : list the servers 
   -kill-servers           : kill all servers
@@ -271,10 +278,14 @@
 Version " megatest-version ", built from " megatest-fossil-hash ))
 
 ;;  -gui                    : start a gui interface
 ;;  -config fname           : override the runconfigs file with fname
 
+
+(mttop-run (command-line-arguments)
+	   '("help"))
+
 ;; process args
 (define remargs (args:get-args 
 		 (argv)
 		 (list  "-runtests"  ;; run a specific test
 			"-config"    ;; override the config file name
@@ -915,16 +926,47 @@
     (let ((tl        (launch:setup))
           (transport-type (string->symbol (or (args:get-arg "-transport") "http"))))
       (server:launch 0 transport-type)
       (set! *didsomething* #t)))
 
+
+(define (naylist->alist inlst)
+  (map (lambda (dat)
+	 (cons (car dat)
+	       (or (if (list?   (cdr dat))
+		       (if (null? (cdr dat)) ""
+			   (cadr dat))
+		       (cdr dat))
+		   ""))) ;; we need a string for call-with-environment-variables
+       inlst))
+
 ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to
 ;; a specific Megatest area. Detail are being hashed out and this may change.
 ;;
 (if (args:get-arg "-adjutant")
-    (begin
-      (adjutant-run)
+    (let* ((host-type (args:get-arg "-adjutant")))
+      (launch:setup) ;; dang it, wish this wasn't needed
+      (print "Running the adjutant!")
+      (let loop ((wait-count 0))
+	(if (< wait-count 10) ;; 6 x 10 seconds = one minute
+	    (let* ((dat (rmt:no-sync-take-job host-type)))
+	      (match dat
+		  ((id ht vars exekey cmdline state event-time last-update)
+		      (let ((vars-alist (with-input-from-string vars read)
+					))
+			(print "Vars:")
+			(pp vars-alist)
+			(call-with-environment-variables
+			 (naylist->alist vars-alist)
+			 (lambda ()
+			   (system cmdline))))
+		      (loop 0))
+		  (else
+		   (thread-sleep! 10)
+		   (loop (+ wait-count 1)))))
+	    (print "I'm bored. Exiting.")))
+      ;; (adjutant-run (args:get-arg "-ajutant") rmt:no-sync-take-job)
       (set! *didsomething* #t)))
 
 (if (or (args:get-arg "-list-servers")
         (args:get-arg "-kill-servers"))
     (let ((tl (launch:setup)))

ADDED   mttop.scm
Index: mttop.scm
==================================================================
--- /dev/null
+++ mttop.scm
@@ -0,0 +1,55 @@
+;; Copyright 2006-2011, Matthew Welland.
+;; 
+;;  This program is made available under the GNU GPL version 2.0 or
+;;  greater. See the accompanying file COPYING for details.
+;; 
+;;  This program is distributed WITHOUT ANY WARRANTY; without even the
+;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;;  PURPOSE.
+
+;; This is from the perl world, a hash of hashes is a super easy way to keep a handle on
+;; lots of disparate data
+;;
+
+(declare (unit mttop))
+
+(module mttop
+    *
+
+(import chicken scheme
+	;; data-structures posix
+	srfi-1
+	;; srfi-13
+	srfi-69
+	ports
+	extras
+	regex
+	posix
+	data-structures
+	matchable
+	)
+
+(define (str-is-cmd cmd all-cmds)
+  (let* ((rx  (regexp (conc "^" cmd ".*")))
+	 (mx  (filter string? (map (lambda (x)
+				     (let ((res (string-match rx x)))
+				       (if res (car res) #f)))
+				   all-cmds))))
+    (if (eq? (length mx) 1) ;; have a command
+	(car mx)
+	#f)))
+
+(define (mttop-run args all-cmds)
+  ;; any path through this call must end in exit if it is NOT an old Megatest call
+  (if (null? args)
+      #f ;; continue on and do the old Megatest stuff
+      (let ((cmd (str-is-cmd (car args) all-cmds)))
+	(if cmd
+	    (begin
+	      (case (string->symbol cmd)
+		((help)(print "New help"))
+		(else (print "Command " cmd " is not implemented yet.")))
+	      (exit)) ;; always exit here
+	    #f))))    ;; or continue on to Megatest old stuff here
+  
+)

Index: mtut.scm
==================================================================
--- mtut.scm
+++ mtut.scm
@@ -154,10 +154,11 @@
    show [areas|contours... ] : show areas, contours or other section from megatest.config
    gendot                    : generate a graphviz dot file from pkts.
 
 Contour actions:
    process                   : runs import, rungen and dispatch 
+   go                        : runs import, rungen and dispatch every five minutes forever
 			     
 Trigger propagation actions:
    tsend a=b,c=d...          : send trigger info to all recpients in the [listeners] section
    tlisten -port N           : listen for trigger info on port N
 			     
@@ -773,11 +774,11 @@
 				(begin
 				  (print-call-chain)
 				  (print "FAILED TO RUN RUNNAME MAPPER " callname " FOR AREA " area)
 				  (print " message: " ((condition-property-accessor 'exn 'message) exn))
 				  runname)
-			      (print "(mapper " (string-intersperse (list runkey runname area area-path reason contour mode-patt) ", ") ")")
+			      (print "(mapper " (string-intersperse (map conc (list runkey runname area area-path reason contour mode-patt)) ", ") ")")
 			      (mapper runkey runname area area-path reason contour mode-patt))
 			    (case callname
 			      ((auto #f) runname)
 			      (else   runtrans)))))
 	 (new-target     target) ;; I believe we will want target manipulation here .. (map-targets xlatr-key runkey area contour))
@@ -1323,11 +1324,13 @@
 		      (begin
 			(print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"")
 			res)))))
 	  (conc "megatest " (if (not (member action '("sync")))
 				(conc action " " action-param)
-				""))
+				"") (if (member action '("-run" "-rerun-clean" "-rerun-all" "-kill-rerun"))
+                                                        "-rerun DEAD,ABORT,KILLED"
+                                                        ""))
 	  pkta)))
 
 ;; (use trace)(trace pkt->cmdline)
 
 (define (write-pkt pktsdir uuid pkt)
@@ -1610,23 +1613,70 @@
 	 ;;  (hash-table-keys adjargs))
 	 (let-values (((uuid pkt)
 		       (command-line->pkt *action* adjargs #f area-path: area-path new-ss: new-ss)))
            (print "run log @ " (conc (current-directory) "/" uuid "-" *action* ".log"))
 	   (write-pkt pktsdir uuid pkt))))
-      ((dispatch import rungen process)
+      ((dispatch import rungen process go)
        (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
 	      (mtconf    (car mtconfdat))
-	      (toppath   (configf:lookup mtconf "scratchdat" "toppath")))
+	      (toppath   (configf:lookup mtconf "scratchdat" "toppath"))
+	      (period    (configf:lookup-number mtconf "mtutil" "autorun-period" default: 300))
+	      (rest-time (configf:lookup-number mtconf "mtutil" "autorun-rest"   default: 30)))
+	 (print "Using period="period" and rest time="rest-time)
 	 (case (string->symbol *action*)
 	   ((process)  (begin
 			 (common:load-pkts-to-db mtconf)
 			 (generate-run-pkts mtconf toppath)
 			 (common:load-pkts-to-db mtconf)
 			 (dispatch-commands mtconf toppath)))
 	   ((import)   (common:load-pkts-to-db mtconf)) ;; import pkts
 	   ((rungen)   (generate-run-pkts mtconf toppath))
-	   ((dispatch) (dispatch-commands mtconf toppath)))))
+	   ((dispatch) (dispatch-commands mtconf toppath))
+	   ;; [mtutil]
+	   ;; # approximate interval between run processing in mtutil (seconds)
+	   ;; autorun-period 300
+	   ;; # minimal rest period between processing 
+	   ;; autorun-rest   30
+	   ((go)
+	    ;; determine if I'm the boss
+	    (if (file-exists? "mtutil-go.pid")
+		(begin
+		  (print "ERROR: mtutil go is already running under host and pid " (with-input-from-file "mtutil-go.pid" read-line)
+			 ". Please kill that process and remove the file \"mutil-go.pid\" and try again.")
+		  (exit)))
+	    (with-output-to-file "mtutil-go.pid" (lambda ()(print (get-host-name) " " (current-process-id))))
+	    (print "Starting long running import, rungen, and process loop")
+	    (if (file-exists? "do-not-run-mtutil-go")
+		(begin
+		  (print "NOTE: Removing flag file "(current-directory)"/do-not-run-mtutil-go")
+		  (delete-file* "do-not-run-mtutil-go")))
+	    (let loop ((last-run (- (current-seconds) (+ period 10))) ;; fake out first time in 
+		       (this-run (current-seconds)))
+	      (if (file-exists? "do-not-run-mtutil-go")
+		  (begin
+		    (print "File do-not-run-mtutil-go exists, exiting.")
+		    (delete-file* "mtutil-go.pid")
+		    (exit)))
+	      (let ((delta (- this-run last-run)))
+		(if (>= delta period)
+		    (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
+			   (mtconf    (car mtconfdat)))
+		      (print "Running import at " (current-seconds))
+		      (common:load-pkts-to-db mtconf)
+		      (print "Running generate run pkts at " (current-seconds))
+		      (generate-run-pkts mtconf toppath)
+		      (print "Running run dispatch at " (current-seconds))
+		      (common:load-pkts-to-db mtconf)
+		      (dispatch-commands mtconf toppath)
+		      (print "Done running import, generate, and dispatch done in " (- (current-seconds) this-run))
+		      (print "NOTE: touch " (current-directory) "/do-not-run-mtutil-go to kill this runner.")
+		      (loop this-run (current-seconds)))
+		    (let ((now (current-seconds)))
+		      (print "Sleeping " rest-time " seconds, next run in aproximately " (- period (- now last-run)) " seconds")
+		      (thread-sleep! rest-time)
+		      (loop last-run (current-seconds))))))
+	    (delete-file* "mtutil-go.pid")))))
       ;; misc
       ((show)
        (if (> (length remargs) 0)
 	   (let* ((mtconfdat (simple-setup (args:get-arg "-start-dir")))
 		  (mtconf    (car mtconfdat))
@@ -1805,46 +1855,52 @@
                                   )
 
                                 )))
                            (loop (nn-recv rep))))
 		       (print "ERROR: Port " portnum " already in use. Try another port")))))))
-      
-
-
-
-      ((tlisten)
-       (if (null? remargs)
-           (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"")
-           (let ((portnum (string->number (car remargs))))
-              
-             (if (not portnum)
-                 (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs))
-                 (begin
-                   (if (not (is-port-in-use portnum))  
-                       (let* ((rep       (start-nn-server portnum))
-                              (mtconfdat (simple-setup (args:get-arg "-start-dir")))
-                              (mtconf    (car mtconfdat))
-                              (contact   (configf:lookup mtconf "listener" "owner"))
-                              (script    (configf:lookup mtconf "listener" "script")))
-                         (print "Listening on port " portnum " for messages.")
-                         (set-signal-handler! signal/int  (lambda (signum) 
-															(set! *time-to-exit* #t)
-  														(debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!")
-  														(let ((email-body (mtut:stml->string (s:body
-																						(s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". "))))))
-             					        (sendmail contact "Listner has been terminated." email-body  use_html: #t))
-                              (exit)))
-															(set-signal-handler! signal/term  (lambda (signum) 
-															(set! *time-to-exit* #t)
-  														(debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting !!")
-  														(let ((email-body (mtut:stml->string (s:body
-																						(s:p (conc "Received signal " signum ". Lister has been terminated on host " (get-environment-variable "HOST") ". "))))))
-             					        (sendmail contact "Listner has been terminated." email-body  use_html: #t))
-                              (exit)))
-
-                         ;(set-signal-handler! signal/term special-signal-handler)
-                         
+
+    ((tlisten)
+     (if (null? remargs)
+         (print "ERROR: useage for tlisten is \"mtutil tlisten portnum\"")
+         (let ((portnum (string->number (car remargs))))
+           
+           (if (not portnum)
+               (print "ERROR: the portnumber parameter must be a number, you gave: " (car remargs))
+               (begin
+                 (if (not (is-port-in-use portnum))  
+                     (let* ((rep       (start-nn-server portnum))
+                            (mtconfdat (simple-setup (args:get-arg "-start-dir")))
+                            (mtconf    (car mtconfdat))
+                            (contact   (configf:lookup mtconf "listener" "owner"))
+                            (script    (configf:lookup mtconf "listener" "script")))
+                       (print "Listening on port " portnum " for messages.")
+                       (set-signal-handler! signal/int
+					    (lambda (signum) 
+					      (set! *time-to-exit* #t)
+  					      (debug:print-error 0 *default-log-port* "Received signal " signum
+								 " sending email befor exiting !!")
+  					      (let ((email-body (mtut:stml->string
+								 (s:body
+								  (s:p (conc "Received signal " signum
+									     ". Lister has been terminated on host "
+									     (get-environment-variable "HOST") ". "))))))
+             					(sendmail contact "Listner has been terminated." email-body  use_html: #t))
+					      (exit)))
+		       (set-signal-handler! signal/term  (lambda (signum) 
+							   (set! *time-to-exit* #t)
+  							   (debug:print-error 0 *default-log-port* "Received signal "
+									      signum " sending email befor exiting !!")
+  							   (let ((email-body (mtut:stml->string
+									      (s:body
+									       (s:p (conc "Received signal " signum
+											  ". Lister has been terminated on host "
+											  (get-environment-variable "HOST") ". "))))))
+             					             (sendmail contact "Listner has been terminated." email-body  use_html: #t))
+							   (exit)))
+
+		       ;; (set-signal-handler! signal/term special-signal-handler)
+                       
                          (let loop ((instr (nn-recv rep)))
                              (nn-send rep "ok")
                              (let ((ctime (date->string (current-date)))) 
                              (if  (equal? instr "time-to-die")
                               (begin 

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -969,10 +969,19 @@
 (define (rmt:no-sync-del! var)
   (rmt:send-receive 'no-sync-del! #f `(,var)))
 
 (define (rmt:no-sync-get-lock keyname)
   (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))
+
+(define (rmt:no-sync-add-job host-type vars-list exekey cmdline)
+  (rmt:send-receive 'no-sync-add-job #f `(,host-type ,vars-list ,exekey ,cmdline)))
+
+(define (rmt:no-sync-take-job host-type)
+  (rmt:send-receive 'no-sync-take-job #f `(,host-type)))
+
+(define (rmt:no-sync-job-records-clean)
+  (rmt:set-receive 'no-sync-job-records-clean #f '()))
 
 ;;======================================================================
 ;; A R C H I V E S
 ;;======================================================================
 

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -350,15 +350,14 @@
 
   (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
 
   (let* ((num-running             (rmt:get-count-tests-running run-id)) 
 	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
-	 (job-group-limit         (let ((jobg-count (configf:lookup *configdat* "jobgroups" jobgroup)))
-				    (if (string? jobg-count)
-					(string->number jobg-count)
-					jobg-count))))
-    (if (> (+ num-running num-running-in-jobgroup) 0)
+	 (job-group-limit         (configf:lookup-number *configdat* "jobgroups" jobgroup)))
+    (if (and (number? num-running)                   ;; checking for number - had a crash where a non-number was returned. Not sure why.
+	     (number? num-running-in-jobgroup)       ;; probably can remove this when rmt switches away from http.
+	     (> (+ num-running num-running-in-jobgroup) 0))
 	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
     (if (not (eq? *last-num-running-tests* num-running))
 	(begin
 	  (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
 	  (set! *last-num-running-tests* num-running)))
@@ -419,11 +418,13 @@
                    (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file))
                  (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf)
                  (system (conc run-pre-hook " >> " actual-logf " 2>&1"))
                  (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))
               (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run.")))))
-    
+   
+
+
 (define (runs:run-post-hook run-id)
     (let* ((run-post-hook   (configf:lookup *configdat* "runs" "post-hook"))
            (existing-tests (if run-post-hook
                                (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
                                                       #f #f ;; offset limit
@@ -458,10 +459,65 @@
 		  (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
 		  (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file))
 	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
 	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
 	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))
+
+
+(define (runs:rerun-hook test-id new-test-path testdat rerunlst)
+    (let* ((rerun-hook   (configf:lookup *configdat* "runs" "rerun-hook"))
+           (log-dir         (conc *toppath* "/reruns/logs"))
+           (target (getenv "MT_TARGET"))
+           (runname (common:args-get-runname))
+           (rundir (db:test-get-rundir testdat))
+           (tarfiledir (conc *toppath* "/reruns"))
+           (status (db:test-get-status testdat))
+           (comment (conc "\"" (db:test-get-comment testdat) "\"" ))
+           (testname (db:test-get-testname testdat))
+           (itempath (db:test-get-item-path testdat))
+           (file-body (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc  "." (string-translate itempath "/" "-")) "")))
+           (log-file (conc file-body ".log"))
+           ;; (log-file        (conc status "-" (string-translate target "/" "-") "-" runname "." testname (if (not (string=? itempath "")) (conc  "." (string-translate itempath "/" "-")) "") ".log"))
+           (full-log-fname  (conc log-dir "/" log-file))
+           (tarfilename (conc file-body ".tar"))
+           ;; (tarfilename (conc status "." (string-translate target "/" "-") "." runname "." testname (if (not (string=? itempath "")) (conc  "." (string-translate itempath "/" "-")) "") ".tar"))
+           )
+      (if rerun-hook
+	  (let* ((use-log-dir (if (not (directory-exists? log-dir))
+				  (handle-exceptions
+				      exn
+				      (begin
+					(debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir ", exn=" exn)
+					#f)
+				    (create-directory log-dir #t)
+				    #t)
+				  #t))
+		 (start-time   (current-seconds))
+		 (actual-logf  (if use-log-dir full-log-fname log-file))
+                 (sys-call-text (conc rerun-hook " " tarfilename " " rundir " " actual-logf " " runname " " tarfiledir " " status " " target " " comment " " testname " " itempath " >> " actual-logf " 2>&1"))
+                 )
+	    (debug:print 2 *default-log-port* "Found rerun-hook in config:" rerun-hook)
+	    (handle-exceptions
+		exn
+		(begin
+		  (print-call-chain *default-log-port*)
+		  (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)
+		  (debug:print 0 *default-log-port* "ERROR: failed to run rerun-hook " rerun-hook ", check the log " log-file))
+	      (debug:print-info 0 *default-log-port* "running rerun-hook: \"" rerun-hook "\", log is " actual-logf)
+              ;; call the hook
+              (debug:print-info 0 *default-log-port* "Calling rerun-hook for " test-id new-test-path testdat rerunlst)
+              (debug:print-info 0 *default-log-port* "rerun hook: " rerun-hook)
+              (debug:print-info 0 *default-log-port* "tarfilename: " tarfilename)
+              (debug:print-info 0 *default-log-port* "rundir: " rundir)
+              (debug:print-info 0 *default-log-port* "actual-logf: " actual-logf)
+              (debug:print-info 0 *default-log-port* "runname: " runname)
+              (debug:print-info 0 *default-log-port* "sys-call-text: " sys-call-text)
+	      (system sys-call-text)
+	      (debug:print-info 0 *default-log-port* "rerun-hook \"" rerun-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))
+
+
+
 
 ;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
 (define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
   (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))
 
@@ -493,11 +549,13 @@
 	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
 	 ;; (tdbdat             (tasks:open-db))
 	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
 			       (if x (string->number x) #f)))
 	 (allowed-tests      #f)
-	 (runconf            #f))
+	 (runconf            #f)
+	 (cache-files        (launch:get-cache-file-paths #f (common:get-toppath *toppath* ) target))
+	 (runstart-time      (current-seconds)))
 
     ;; check if readonly
     (when readonly-mode
       (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
       (exit 1))
@@ -631,11 +689,11 @@
     (runs:update-all-test_meta #f)
 
     ;; run the run prehook if there are no tests yet run for this run:
     ;;
     (runs:run-pre-hook run-id)
-    ;; mark all test launced flag as false in the meta table 
+    ;; mark all test launched flag as false in the meta table 
     (rmt:set-var (conc "lunch-complete-" run-id) "no")
     (debug:print-info 1 *default-log-port* "Setting end-of-run to no")
     (let* ((config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
 				 (if x (string->number x) #f)))
 	   (config-rerun-cnt (if config-reruns
@@ -780,20 +838,25 @@
 	    (thread-start! th2)
 	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
 								    (any->number reglen) all-tests-registry)
 	    (set! keep-going #f)
 	    (thread-join! th2)
+	    (if (launch:cache-files-changed? cache-files runstart-time)
+		(begin ;; force a start-over
+		  (launch:setup force-reread: #t)
+		  (runs:run-tests target runname test-patts user flags run-count: 0)))
+	    
 	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
 	    (if (> run-count 0) ;; handle reruns
 		(begin
 		  (if (not (hash-table-ref/default flags "-preclean" #f))
 		      (hash-table-set! flags "-preclean" #t))
 		  (if (not (hash-table-ref/default flags "-rerun" #f))
-		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
+		      (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
 		  ;; recursive call to self
 		  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
-		(launch:end-of-run-check run-id)))
+                (launch:end-of-run-check run-id)))
 	  (debug:print-info 0 *default-log-port* "No tests to run")))
     (debug:print-info 4 *default-log-port* "All done by here")
     ;; TODO: try putting post hook call here
       
     ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
@@ -2056,25 +2119,34 @@
 		  (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
 		      (member (test:get-state  testdat) '("COMPLETED")))) 
 	     (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
 	     (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED)
 	     (set! runflag #f))
+
 	    ;; -rerun and status is one of the specifed, run it
 	    ((and rerun
 		  (let* ((rerunlst   (string-split rerun ","))
 			 (must-rerun (member (test:get-status testdat) rerunlst)))
 		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
 		    must-rerun))
 	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
-	     (set! runflag #t))
-	    ;; -keepgoing, do not rerun FAIL
+	     (set! runflag #t)
+             (debug:print-info 2 *default-log-port* "Calling rerun hook")
+             (runs:rerun-hook test-id new-test-path testdat rerun)
+             )
+             
+
+	    
+            ;; -keepgoing, do not rerun FAIL
 	    ((and keepgoing
 		  (member (test:get-status testdat) '("FAIL")))
 	     (set! runflag #f))
-	    ((and (not rerun)
+	    
+            ((and (not rerun)
 		  (member (test:get-status testdat) '("FAIL" "n/a")))
 	     (set! runflag #t))
+
 	    (else (set! runflag #f)))
 	   (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
 	   (if (not runflag)
 	       (if (not parent-test)
 		   (if (runs:lownoise (conc "not starting test" full-test-name) 60)

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -322,29 +322,75 @@
         *my-client-signature*)))
 
 ;; wait for server=start-last to be three seconds old
 ;;
 (define (server:wait-for-server-start-last-flag areapath)
+  (let* ((flag-dir   (conc areapath "/logs"))
+	 (start-flag (conc flag-dir "/server-start-last"))
+	 ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
+	 (reftime    (configf:lookup-number *configdat* "server" "idletime" default: 4))
+	 (server-key (conc (get-host-name) "-" (current-process-id)))
+	 (create-key-file (lambda ()
+			    (with-output-to-file start-flag
+			      (lambda ()
+				(print server-key)))))
+	 (check-key-file  (lambda ()
+			    (let ((res (with-input-from-file start-flag
+					 (lambda ()
+					   (read-line)))))
+			      (equal? server-key res))))
+	 (get-file-age    (lambda ()
+			    (let* ((fmodtime (file-modification-time start-flag)))
+			      (- (current-seconds) fmodtime)))))
+    (if (not (directory-exists? flag-dir))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Directory " flag-dir " does not exist! Cannot gate.")
+	  #f)
+	(if (file-exists? start-flag)
+	    (if (check-key-file) ;; is it me?
+		#t               ;; yes, it is me, proceed
+		(let* ((file-age (get-file-age)))
+		  (if (> file-age reftime) ;; let the previous guy have at least 4 seconds to do their thing
+		      (begin                     ;; file is old enough, we can try to take it
+			(create-key-file)        ;; take the file and try again
+			(server:wait-for-server-start-last-flag areapath))
+		      (let* ((remtime (max 1 (min file-age reftime))))
+			(debug:print-info 0 *default-log-port* "Gating server start, waiting remtime="remtime)
+			(thread-sleep! remtime)
+			(server:wait-for-server-start-last-flag areapath)))))
+	    (begin
+	      (create-key-file)
+	      (server:wait-for-server-start-last-flag areapath))))))
+		    
+
+		    
+;; wait for server=start-last to be three seconds old
+;;
+(define (server:wait-for-server-start-last-flag-old areapath)
   (let* ((start-flag (conc areapath "/logs/server-start-last"))
 	 ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
 	 (reftime    (configf:lookup-number *configdat* "server" "idletime" default: 4))
-	 (server-key (conc (get-host-name) "-" (current-process-id))))
+	 (server-key (conc (get-host-name) "-" (current-process-id)))
+	 (create-key-file (lambda ()
+			    (with-output-to-file start-flag
+			      (lambda ()
+				(print server-key)))))
+	 (check-key-file  (lambda ()
+			    (let ((res (with-input-from-file start-flag
+					 (lambda ()
+					   (read-line)))))
+			      (equal? server-key res)))))
     ;; (thread-sleep! (/ (random 500) 1000)) ;; I don't think this made a difference
     (if (file-exists? start-flag)
 	(let* ((fmodtime (file-modification-time start-flag))
 	       (delta    (- (current-seconds) fmodtime))
 	       (all-go   (> delta reftime)))
 	  (if (and all-go
 		   (begin
-		     (with-output-to-file start-flag
-		       (lambda ()
-			 (print server-key)))
+		     (create-key-file)
 		     (thread-sleep! 0.25)
-		     (let ((res (with-input-from-file start-flag
-				  (lambda ()
-				    (read-line)))))
-		       (equal? server-key res))))
+		     (check-key-file)))
 	      #t ;; (system (conc "touch " start-flag)) ;; lazy but safe
 	      (begin
 		(debug:print-info 0 *default-log-port* "Gating server start, last start: "
 				  fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
 		(thread-sleep! reftime)

Index: tests/unittests/all-api.scm
==================================================================
--- tests/unittests/all-api.scm
+++ tests/unittests/all-api.scm
@@ -116,11 +116,11 @@
 (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-run (list 2))) 0)) ;; delete a non-existant run
 (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'update-run-stats (list 1 '()))) 0))
 (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-main-run-stats (list 1 ))) 0))
 (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'delete-old-deleted-test-records '())) 0))
 (test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'get-runs (list "%" 10 0 keypatts))) 0))
-(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts))) 0))
+(test #f #t (vector-ref (api:execute-requests my-dbstruct (vector 'simple-get-runs (list "%" 10 0 keypatts 0))) 0))
 (test #f #(#t (1))(api:execute-requests my-dbstruct (vector 'get-all-run-ids '())))
 (test #f #(#t ()) (api:execute-requests my-dbstruct (vector 'get-prev-run-ids '(1))))
 (test #f #(#t "JUSTFINE") (api:execute-requests my-dbstruct (vector 'get-run-status '(1))))
 (test #f #(#t "NEW") (api:execute-requests my-dbstruct (vector 'get-run-state '(1))))
 (test #f #(#t (("Totals" "UNKNOWN" 1) ("bar" "UNKNOWN" 1))) (api:execute-requests my-dbstruct (vector 'get-run-stats '())))

Index: tests/unittests/all-rmt.scm
==================================================================
--- tests/unittests/all-rmt.scm
+++ tests/unittests/all-rmt.scm
@@ -38,12 +38,12 @@
 (test #f #f (server:check-if-running toppath))           ;; these are used by server:start-and-wait
 (test #f #t (list? (server:get-list toppath)))
 (test #f '() (server:get-best '()))
 (test #f #t (common:simple-file-lock-and-wait "test.lock" expire-time: 15))
 (test #f "test.lock" (common:simple-file-release-lock "test.lock"))
-(test #f #t (server:get-best-guess-address (get-host-name)))
-(test #f #t (string? (common:get-homehost)))
+(test #f #t (string? (server:get-best-guess-address (get-host-name))))
+(test #f #t (string? (car (common:get-homehost))))
 
 ;; clean out any old running servers
 ;;
 (let ((servers (server:get-list toppath)))
   (print "Known servers: "  servers)
@@ -68,11 +68,11 @@
 (thread-sleep! 2)
 ;; (test #f #t (string? (server:start-and-wait *toppath*)))
 
 (test "setup for run" #t (begin (launch:setup)
  				(string? (getenv "MT_RUN_AREA_HOME"))))
-(test #f #t (client:setup-http toppath))
+(test #f #t (vector? (client:setup-http toppath)))
 (test #f #t (vector? (client:setup toppath)))
 
 (test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down.
 (test #f #t (string? (server:check-if-running ".")))
 ;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))

ADDED   utils/Makefile.whodunit
Index: utils/Makefile.whodunit
==================================================================
--- /dev/null
+++ utils/Makefile.whodunit
@@ -0,0 +1,9 @@
+.DEFAULT : all
+
+all : whodunit
+
+clean : 
+	rm whodunit
+
+whodunit : whodunit.scm
+	csc -static -L -static -L -lm -L -ldl -L -lpthread -L -lssl -L -lcrypto -L -lz whodunit.scm -o whodunit 

Index: utils/mk_wrapper
==================================================================
--- utils/mk_wrapper
+++ utils/mk_wrapper
@@ -19,10 +19,11 @@
 
 prefix=$1
 cmd=$2
 target=$3
 cfgfile="$prefix/bin/.$(lsb_release -sr)/cfg.sh"
+libdir="$prefix/bin/.$(lsb_release -sr)/lib"
 
 # we wish to create a var in cfg.sh for finding sqlite3 executable
 chicken_bin_dir=$(dirname $(which csi))
 if [[ -e $chicken_bin_dir/sqlite3 ]];then
     sqlite3_exe=$chicken_bin_dir/sqlite3
@@ -30,19 +31,20 @@
     sqlite3_exe=$(which sqlite3)
 fi
 
 if [ "$LD_LIBRARY_PATH" != "" ];then
   echo "INFO: Using LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >&2
+  echo "INFO: Writing $cfgfile" >&2
 ( cat << __EOF
 if [ -z \$MT_ORIG_ENV ]; then
     export MT_ORIG_ENV=\$( $prefix/bin/serialize-env )
 fi
 
 if [ "\$LD_LIBRARY_PATH" != "" ];then
-  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH
+  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:\$LD_LIBRARY_PATH:$libdir
 else
-  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH
+  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$libdir
 fi
 
 export MT_SQLITE3_EXE=$sqlite3_exe
 __EOF
 ) > $cfgfile

Index: utils/whodunit.scm
==================================================================
--- utils/whodunit.scm
+++ utils/whodunit.scm
@@ -13,11 +13,25 @@
 ;;     GNU General Public License for more details.
 ;; 
 ;;     You should have received a copy of the GNU General Public License
 ;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 ;;
-(use posix srfi-69)
+(module whodunit
+
+ *
+
+
+(import 
+  (chicken process-context)
+  (chicken process)
+  (chicken string)
+  (chicken base)
+  (chicken sort)
+  (chicken io)
+  srfi-69
+  scheme
+)
 
 (define *numsamples* (or (and (> (length (argv)) 1)
                               (string->number (cadr (argv))))
                          3))
 
@@ -24,11 +38,11 @@
 (define (topdata)
   (with-input-from-pipe
    (conc "top -b -n " *numsamples* " -d 0.1")
    read-lines))
 
-(define (cleanup-data topdat)list
+(define (cleanup-data topdat)
   (let loop ((hed (car topdat))
               (tal (cdr topdat))
               (res '()))
     (let* ((line-list (string-split hed))
            (nums      (map (lambda (indat)(or (string->number indat) indat)) line-list))
@@ -37,29 +51,37 @@
            (new-res   (if not-data res (cons nums res))))
       (if (null? tal)
           new-res
           (loop (car tal)(cdr tal) new-res)))))
 
+
+;; sum up
+(define (sum-up data ht)
+  (for-each
+   (lambda (indat)
+     (let ((pid (car indat))
+           (usr (cadr indat))
+           (cpu (list-ref indat 8)))
+       (hash-table-set! ht usr (+ cpu (hash-table-ref/default ht usr 0)))))
+   data))
+
+(define (print-results userhash)
+  (for-each
+   (lambda (usr)
+     (let* ((usage (inexact->exact (round (/ (hash-table-ref userhash usr) *numsamples*)))))
+       (if (> usage 0)
+	   (print usr (if (< (string-length usr) 8) "\t\t" "\t") usage))))
+   (sort (hash-table-keys userhash)
+	 (lambda (a b)
+           (> (hash-table-ref userhash a)
+	      (hash-table-ref userhash b))))))
+  
+)
+(import whodunit srfi-69 (chicken sort))
+
 (print "Getting " *numsamples* " samples of cpu usage data.")
 (define data (cleanup-data (topdata)))
 (define pidhash  (make-hash-table))
 (define userhash (make-hash-table))
-
-;; sum up and normalize the 
-(for-each
- (lambda (indat)
-   (let ((pid (car indat))
-         (usr (cadr indat))
-         (cpu (list-ref indat 8)))
-     (hash-table-set! userhash usr (+ cpu (hash-table-ref/default userhash usr 0)))))
- data)
-
-(for-each
- (lambda (usr)
-   (print usr
-          (if (< (string-length usr) 8) "\t\t" "\t")
-          (inexact->exact (round (/ (hash-table-ref userhash usr) *numsamples*)))))
- (sort (hash-table-keys userhash)
-       (lambda (a b)
-         (> (hash-table-ref userhash a)
-            (hash-table-ref userhash b)))))
-
+(sum-up data userhash)
+
+(print-results userhash)