Index: mtmod.scm
==================================================================
--- mtmod.scm
+++ mtmod.scm
@@ -56,4242 +56,9 @@
 	(prefix mtargs args:))
 
 (include "run_records.scm")
 (include "db_records.scm")
 (include "test_records.scm")
-
-;; This is the Megatest API. All generally "useful" routines will be wrapped or extended
-;; here.
-
-;;                       0           1              2              3
-(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))
-
-;;======================================================================
-;;  R U N S
-;;======================================================================
-
-;; runs:get-runs-by-patt
-;; get runs by list of criteria
-;; register a test run with the db
-;;
-;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
-;;  to extract info from the structure returned
-;;
-(define (mt:get-runs-by-patt keys runnamepatt targpatt)
-  (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0))
-	     (res      '())
-	     (offset   0)
-	     (limit    500))
-    ;; (print "runsdat: " runsdat)
-    (let* ((header    (vector-ref runsdat 0))
-	   (runslst   (vector-ref runsdat 1))
-	   (full-list (append res runslst))
-	   (have-more (eq? (length runslst) limit)))
-      ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more)
-      (if have-more 
-	  (let ((new-offset (+ offset limit))
-		(next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0)))
-	    (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.")
-	    (debug:print-info 0 *default-log-port* "next-batch: " next-batch)
-	    (loop next-batch
-		  full-list
-		  new-offset
-		  limit))
-	 (vector header full-list)))))
-
-;;======================================================================
-;;  T E S T S
-;;======================================================================
-
-(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f))
-  (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal))
-	     (res      '())
-	     (offset   0)
-	     (limit    500))
-    (let* ((full-list (append res testsdat))
-	   (have-more (eq? (length testsdat) limit)))
-      (if have-more 
-	  (let ((new-offset (+ offset limit)))
-	    (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.")
-	    (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal)
-		  full-list
-		  new-offset
-		  limit))
-	  full-list))))
-
-(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) )
-  (let* ((key    (list run-id waitons ref-item-path mode))
-	 (res    (hash-table-ref/default *pre-reqs-met-cache* key #f))
-	 (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f)))
-		   (if last-time
-		       (< (current-seconds)(+ last-time 5))
-		       #f))))
-    (if useres
-	(let ((result (vector-ref res 1)))
-	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
-	  result)
-	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
-	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
-	  newres))))
-
-(define (mt:get-run-stats dbstruct run-id)
-;;  Get run stats from local access, move this ... but where?
-  (db:get-run-stats dbstruct run-id))
-
-(define (mt:discard-blocked-tests run-id failed-test tests test-records)
-  (if (null? tests)
-      tests
-      (begin
-	(debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test)
-	(let loop ((testn (car tests))
-		   (remt  (cdr tests))
-		   (res   '()))
-	  (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '())))
-		 (waitons  (vector-ref test-dat 2)))
-	    ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons)
-	    (if (null? remt)
-		(let ((new-res (reverse res)))
-		  ;; (print "       new-res: " new-res)
-		  new-res)
-		(loop (car remt)
-		      (cdr remt)
-		      (if (member failed-test waitons)
-			  (begin
-			    (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test)
-			    res)
-			  (cons testn res)))))))))
-
-;;======================================================================
-;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
-;;======================================================================
-
-;; speed up for common cases with a little logic
-(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
-  (if (not (and run-id test-id))
-      (begin
-	(debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
-	(print-call-chain (current-error-port))
-	#f)
-      (begin
-	;; cond
-	;; ((and newstate newstatus newcomment)
-	;;  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
-	;; ((and newstate newstatus)
-	;;  (rmt:general-call 'state-status run-id newstate newstatus test-id))
-	;; (else
-	;;  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
-	;;  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
-	;;  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
-	(rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)
-	;; (mt:process-triggers run-id test-id newstate newstatus)
-	#t)))
-
-
-(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment)
-  (let* ((test-vec   (rmt:get-testinfo-state-status run-id test-id))
-         (state     (vector-ref test-vec 3)))
-    (if (equal? state "COMPLETED")
-        #t
-        (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment))))
-
-  
-(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
-  ;(let ((test-id (rmt:get-test-id run-id test-name item-path)))
-  (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment)
-  ;; (mt:process-triggers run-id test-id new-state new-status)
-  #t);)
-	;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment)))
-
-(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment)
-  (let ((test-id (rmt:get-test-id run-id test-name item-path)))
-    (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment)))
-    
-;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
-;; 
-;; do a remote call to get the task queue info but do the killing as self here.
-;;
-(define (tasks:kill-runner target run-name testpatt)
-  (let ((records    (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests"))
-	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
-    (if (null? records)
-	(debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *"))
-	(debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill."))
-    (for-each 
-     (lambda (record)
-       (let* ((param-key (list-ref record 8))
-	      (match-dat (string-search hostpid-rx param-key)))
-	 (if match-dat
-	     (let ((hostname  (cadr match-dat))
-		   (pid       (string->number (caddr match-dat))))
-	       (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname)
-	       (if (equal? (get-host-name) hostname)
-		   (if (process:alive? pid)
-		       (begin
-			 (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.")
-			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-			    #t)
-			  (process-signal pid signal/int)
-			  (thread-sleep! 5)
-			  (if (process:alive? pid)
-			      (process-signal pid signal/kill)))))
-		   ;;  (call-with-environment-variables
-		   (let ((old-targethost (getenv "TARGETHOST")))
-		     (setenv "TARGETHOST" hostname)
-		     (setenv "TARGETHOST_LOGF" "server-kills.log")
-		     (system (conc "nbfake kill " pid))
-		     (if old-targethost (setenv "TARGETHOST" old-targethost))
-		     (unsetenv "TARGETHOST")
-		     (unsetenv "TARGETHOST_LOGF"))))
-	     (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
-     records)))
-
-(define (task:get-run-times)
-   (let* ( 
-           (run-patt (if (args:get-arg "-run-patt")
-                        (args:get-arg "-run-patt")
-                        "%"))
-           (target-patt (if (args:get-arg "-target-patt")
-                        (args:get-arg "-target-patt")
-                        "%"))
- 
-           (run-times  (rmt:get-run-times  run-patt target-patt )))
-   (if (eq? (length run-times) 0)
-     (begin
-       (print "Data not found!!")
-       (exit)))
-   (if (equal? (args:get-arg "-dumpmode") "json")
-       (task:print-runtime-as-json run-times)
-         (if (equal? (args:get-arg "-dumpmode") "csv")
-	     (task:print-runtime run-times ",")
-	     (task:print-runtime run-times "  ")))))
-
- (define (task:get-test-times)
-   (let* ((runname (if (args:get-arg "-runname")
-                        (args:get-arg "-runname")
-                        #f))
-           (target (if (args:get-arg "-target")
-                        (args:get-arg "-target")
-                        #f))
- 
-           (test-times  (rmt:get-test-times  runname target )))
-   (if (not runname)
-      (begin
-      (print "Error: Missing argument -runname")
-      (exit))) 
-    (if (string-contains runname "%")
-      (begin
-      (print "Error: Invalid runname, '%' not allowed  (" runname ") ")
-      (exit)))
-    (if (not target)
-      (begin
-      (print "Error: Missing argument -target")
-      (exit)))
-     (if  (string-contains target "%")
-      (begin
-      (print "Error: Invalid target, '%' not allowed  (" target ") ")
-      (exit)))
- 
-   (if (eq? (length test-times) 0)
-     (begin
-       (print "Data not found!!")
-       (exit)))
-   (if (equal? (args:get-arg "-dumpmode") "json")
-       (task:print-testtime-as-json test-times)
-         (if (equal? (args:get-arg "-dumpmode") "csv")
-	     (task:print-testtime test-times ",")
-	     (task:print-testtime test-times "  ")))))
-
-
-
-;; gets mtpg-run-id and syncs the record if different
-;;
-(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
-  (let* ((runs-ht (hash-table-ref cached-info 'runs))
-	 (runinf  (hash-table-ref/default runs-ht run-id #f))
-         (area-id (vector-ref area-info 0)))
-       (if runinf
-	runinf ;; already cached
-	(let* ((run-dat    (rmt:get-run-info run-id))               ;; NOTE: get-run-info returns a vector < row header >
-	       (run-name   (rmt:get-run-name-from-id run-id))
-	       (row        (db:get-rows run-dat))                   ;; yes, this returns a single row
-	       (header     (db:get-header run-dat))
-	       (state      (db:get-value-by-header row header "state"))
-	       (status     (db:get-value-by-header row header "status"))
-	       (owner      (db:get-value-by-header row header "owner"))
-	       (event-time (db:get-value-by-header row header "event_time"))
-	       (comment    (db:get-value-by-header row header "comment"))
-	       (fail-count (db:get-value-by-header row header "fail_count"))
-	       (pass-count (db:get-value-by-header row header "pass_count"))
-         (db-contour (db:get-value-by-header row header "contour"))
-	       (contour    (if (args:get-arg "-prepend-contour") 
-                                 (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
-                                           (begin 
-                                            (debug:print-info 1 *default-log-port*  "db-contour") 
- 						db-contour)
-					    (args:get-arg "-contour"))))
-         (run-tag (if (args:get-arg "-run-tag")
-                            (args:get-arg "-run-tag")
-									""))
-         (last-update (db:get-value-by-header row header "last_update"))
-	       (keytarg    (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
-	       			(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
-	       (target     (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) 
-	       			(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id)))                 ;; e.g. v1.63/a3e1/ubuntu
-	       (spec-id    (pgdb:get-ttype dbh keytarg))
-	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
-                            event-time
-                           (current-seconds))) 
-	       (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id)))
-         (if new-run-id
-	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
-		        (hash-table-set! runs-ht run-id new-run-id)
-		;; ensure key fields are up to date
-     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
-    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
-           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
-     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
-        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
-		(pgdb:refresh-run-info
-		 dbh
-		 new-run-id
-		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
-     (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
-     (if (not (equal? run-tag ""))
-      (task:add-run-tag dbh new-run-id run-tag))
-		new-run-id) 
-      
-	      (if (equal? state "deleted")
-          (begin 
-          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
-          (if (handle-exceptions
-		        exn
-		        (begin (print-call-chain)
-              (print ((condition-property-accessor 'exn 'message) exn))     
-			      #f)
-            
-            (pgdb:insert-run
-		     dbh
-		     spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time))
-		       (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
-             (if (or (not smallest-time) (< last-update smallest-time))
-        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
-             (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
-		  #f)))))))
-
-(define (task:add-run-tag dbh run-id tag) 
-  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
-   (if (not tag-info)
-     (begin   
-     (if (handle-exceptions
-	   exn
-	   (begin 
-               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
-	   #f)
-	   (pgdb:insert-tag  dbh   tag))
-                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
-		  #f)))
-     ;;add to area_tags
-     (handle-exceptions
-	   exn
-	   (begin 
-               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
-	   #f)
-           (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0)  run-id))  
-	   (pgdb:insert-run-tag  dbh   (vector-ref tag-info 0)  run-id)))))
-
-
-(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
- ; (print "Sync Steps " test-step-ids )
-  (let ((test-ht (hash-table-ref cached-info 'tests))
-        (step-ht (hash-table-ref cached-info 'steps)))
-    (for-each
-     (lambda (test-step-id)
-        (let* ((test-step-info  (rmt:get-steps-info-by-id test-step-id))
-               (step-id (tdb:step-get-id test-step-info))
-               (test-id  (tdb:step-get-test_id    test-step-info))   
-	       (stepname (tdb:step-get-stepname  test-step-info))
-	       (state (tdb:step-get-state test-step-info))	
-	       (status (tdb:step-get-status test-step-info))	
-	       (event_time (tdb:step-get-event_time  test-step-info))	
-	       (comment  (tdb:step-get-comment test-step-info))	
-	       (logfile (tdb:step-get-logfile test-step-info))	
-         (last-update (tdb:step-get-last_update test-step-info))
-	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
-				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
-         (pgdb-step-id (if pgdb-test-id 
-                         (pgdb:get-test-step-id dbh pgdb-test-id stepname state)
-                          #f)))
-    (if step-id
-      (begin  
-        (if pgdb-test-id
-           (begin 
-                (if  pgdb-step-id
-                   (begin
-                    (debug:print-info 1 *default-log-port*  "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
-										(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
-         (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
-        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
-                    (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
-                    (begin
- 		      (debug:print-info 1 *default-log-port*  "Inserting test-step with test-id: " test-id " and step-id " step-id  " pgdb test id: " pgdb-test-id)
-                     (if (or (not smallest-time) (< last-update smallest-time))
-        				      (hash-table-set! smallest-last-update-time "smallest-time" last-update))
-                      (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
-                      (set! pgdb-step-id  (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
-                (hash-table-set! step-ht step-id pgdb-step-id ))
-           (debug:print-info 1 *default-log-port*  "Error: Test not cashed")))
-      (debug:print-info 1 *default-log-port*  "Error: Could not get test step info for step id " test-step-id ))))	;; this is a wierd senario need to debug      	
-   test-step-ids)))
-
-(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
-  (let ((test-ht (hash-table-ref cached-info 'tests))
-        (data-ht (hash-table-ref cached-info 'data)))
-    (for-each
-     (lambda (test-data-id)
-        (let* ((test-data-info  (rmt:get-data-info-by-id test-data-id))
-               (data-id (db:test-data-get-id  test-data-info))
-               (test-id  (db:test-data-get-test_id   test-data-info))   
-	       (category  (db:test-data-get-category  test-data-info))
-	       (variable  (db:test-data-get-variable test-data-info))	
-	       (value (db:test-data-get-value  test-data-info))	
-               (expected (db:test-data-get-expected  test-data-info))
-               (tol (db:test-data-get-tol  test-data-info))
-               (units (db:test-data-get-units  test-data-info))     
-	       (comment  (db:test-data-get-comment test-data-info))	
-               (status (db:test-data-get-status test-data-info))	
-	       (type (db:test-data-get-type test-data-info))
-				 (last-update (db:test-data-get-last_update test-data-info))
-				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
-   	
-	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
-               (pgdb-data-id (if pgdb-test-id 
-                                 (pgdb:get-test-data-id dbh pgdb-test-id category variable)
-                                  #f)))
-    (if data-id
-      (begin
-        (if pgdb-test-id
-           (begin 
-                (if  pgdb-data-id
-                   (begin
-                    (debug:print-info 1 *default-log-port*  "Updating existing test-data with test-id: " test-id " and  data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
-                    (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
-         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
-        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
-                    (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id  category variable value expected tol units comment status type last-update))
-                    (begin
- 		      (debug:print-info 1 *default-log-port*  "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
-                       (if (handle-exceptions
-		      exn
-		      (begin (print-call-chain)
-                              (print ((condition-property-accessor 'exn 'message) exn))     
-			#f)
-                     
-                    (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
-		       ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info)
-                      (begin
-                      ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type )
-											(if (or (not smallest-time) (< last-update smallest-time))
-        								(hash-table-set! smallest-last-update-time "smallest-time" last-update))
-                      (set! pgdb-data-id  (pgdb:get-test-data-id dbh pgdb-test-id  category variable)))
-		   #f)))
-                (hash-table-set! data-ht data-id pgdb-data-id ))
-             (begin
-                 (debug:print-info 1 *default-log-port*  "Error: Test not in pgdb"))))
-
-      (debug:print-info 1 *default-log-port*  "Error: Could not get test data info for data id " test-data-id ))))	;; this is a wierd senario need to debug      	
-   test-data-ids)))
-
-
-
-(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
-  (let ((test-ht (hash-table-ref cached-info 'tests)))
-    (for-each
-     (lambda (test-id)
-      ; (print test-id)
-       (let* ((test-info    (rmt:get-test-info-by-id #f test-id))
-	      (run-id       (db:test-get-run_id    test-info)) ;; look these up in db_records.scm
-	      (test-id      (db:test-get-id        test-info))
-	      (test-name    (db:test-get-testname  test-info))
-	      (item-path    (db:test-get-item-path test-info))
-	      (state        (db:test-get-state     test-info))
-	      (status       (db:test-get-status    test-info))
-	      (host         (db:test-get-host      test-info))
-        (pid          (db:test-get-process_id test-info)) 
-	      (cpuload      (db:test-get-cpuload   test-info))
-	      (diskfree     (db:test-get-diskfree  test-info))
-	      (uname        (db:test-get-uname     test-info))
-	      (run-dir      (db:test-get-rundir    test-info))
-	      (log-file     (db:test-get-final_logf test-info))
-	      (run-duration (db:test-get-run_duration test-info))
-	      (comment      (db:test-get-comment   test-info))
-	      (event-time   (db:test-get-event_time test-info))
-	      (archived     (db:test-get-archived  test-info))
-        (last-update  (db:test-get-last_update  test-info))
-	      (pgdb-run-id  (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
-        (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))       
-	      (pgdb-test-id (if pgdb-run-id 
-				(begin
-                                  ;(print pgdb-run-id)    
-                                 (pgdb:get-test-id dbh pgdb-run-id test-name item-path))
-                                 #f)))
-	 ;; "id"           "run_id"        "testname"  "state"      "status"      "event_time"
-	 ;; "host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
-	 ;; "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived"
-         (if pgdb-run-id
-           (begin
-	   (if pgdb-test-id ;; have a record
-	     (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
-	       (debug:print-info 0 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
-         (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
-         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
-        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
-	       (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
-	     (begin 
-           (debug:print-info 0 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
-           (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
-            (if (or (not smallest-time) (< last-update smallest-time))
-        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
-           (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
-           (hash-table-set! test-ht test-id pgdb-test-id))
-           (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
-     test-ids)))
-
-(define (task:add-area-tag dbh area-info tag) 
-  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
-   (if (not tag-info)
-     (begin   
-     (if (handle-exceptions
-	   exn
-	   (begin 
-               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
-	   #f)
-	   (pgdb:insert-tag  dbh   tag))
-                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
-		  #f)))
-     ;;add to area_tags
-     (handle-exceptions
-	   exn
-	   (begin 
-               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
-	   #f)
-           (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0)  (vector-ref area-info 0)))  
-	   (pgdb:insert-area-tag  dbh   (vector-ref tag-info 0)  (vector-ref area-info 0))))))
-
-(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
-  (for-each
-     (lambda (run-id)
-      (debug:print-info 1 *default-log-port*   "Check if run with " run-id " needs to be synced" )
-       (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
-run-ids))
-
-
-;; get runs changed since last sync
-;; (define (tasks:sync-test-data dbh cached-info area-info)
-;;   (let* ((
-
-(define (tasks:sync-to-postgres configdat dest)
-  (print "In sync")
-  (let* ((dbh         (pgdb:open configdat dbname: dest))
-	 (area-info   (pgdb:get-area-by-path dbh *toppath*))
-	 (cached-info (make-hash-table))
-	 (start       (current-seconds))
-   (test-patt   (if (args:get-arg "-testpatt")
-											(args:get-arg "-testpatt")
-                      "%"))
-   (target         (if (args:get-arg "-target")
-														 (args:get-arg "-target")
-													#f))
-    (run-name         (if (args:get-arg "-runname")
-														 (args:get-arg "-runname")
-													#f)))
-     (if (and target  (not run-name))
-       (begin
-					(print "Error: Provide runname")
-          (exit 1)))
-     (if (and (not target)  run-name)
-       (begin
-					(print "Error: Provide target")
-          (exit 1)))
-    ;(print "123")
-    ;(exit 1) 
-    (for-each (lambda (dtype)
-		(hash-table-set! cached-info dtype (make-hash-table)))
-	      '(runs targets tests steps data))
-    (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this
-    (if area-info
-	(let* ((last-sync-time (vector-ref area-info 3))
-	       (smallest-last-update-time  (make-hash-table))
-         (changed      (if (and target run-name)
-                            (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt)
-                            (rmt:get-changed-record-ids last-sync-time)))
-	       (run-ids        (alist-ref 'runs       changed))
-	       (test-ids       (alist-ref 'tests      changed))
-	       (test-step-ids  (alist-ref 'test_steps changed))
-	       (test-data-ids  (alist-ref 'test_data  changed))
-	       (run-stat-ids   (alist-ref 'run_stats  changed))
-         (area-tag    (if (args:get-arg "-area-tag") 
-                                 (args:get-arg "-area-tag")
-                                 (if (args:get-arg "-area") 
-                                   (args:get-arg "-area") 
-                                   ""))))
-           (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0))))
-            (set! area-tag *default-area-tag*)) 
-           (if (not (equal? area-tag "")) 
-             (task:add-area-tag dbh area-info area-tag)) 
-	  (if (or (not (null? test-ids)) (not (null? run-ids)))
-	      (begin
-                (debug:print-info 0 *default-log-port*  "syncing runs")   
-	              (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
-                (debug:print-info 0 *default-log-port*  "syncing tests")
-		            (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
-                (debug:print-info 0 *default-log-port*  "syncing test steps")
-                (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
-								(debug:print-info 0 *default-log-port*  "syncing test data")
-                (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
-                (print "----------done---------------")))
-     (let*  ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
-     (debug:print-info 0 "smallest-time :" smallest-time  " last-sync-time " last-sync-time)
-    (if (not (and target run-name)) 
-	  (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0)))
-				(pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed
-	(if (tasks:set-area dbh configdat)
-	    (tasks:sync-to-postgres configdat dest)
-	    (begin
-	      (debug:print 0 *default-log-port* "ERROR: unable to create an area record")
-	      #f)))))
-
-;;======================================================================
-;; L O C K I N G   M E C H A N I S M S 
-;;======================================================================
-
-;; faux-lock is deprecated. Please use simple-lock below
-;;
-(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
-  (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
-      (if (> wait-time 0)
-	  (begin
-	    (thread-sleep! 1)
-	    (if (eq? wait-time 1) ;; only one second left, steal the lock
-		(begin
-		  (debug:print-info 0 *default-log-port* "stealing lock for " keyname)
-		  (common:faux-unlock keyname force: #t)))
-	    (common:faux-lock keyname wait-time: (- wait-time 1)))
-	  #f)
-      (begin
-        (rmt:no-sync-set keyname (conc (current-process-id)))
-        (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))))
-
-(define (common:faux-unlock keyname #!key (force #f))
-  (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
-      (begin
-        (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
-        #t)
-      #f))
-
-;; simple lock. improve and converge on this one.
-;;
-(define (common:simple-lock keyname)
-  (rmt:no-sync-get-lock keyname))
-
-(define (common:simple-unlock keyname #!key (force #f))
-  (rmt:no-sync-del! keyname))
-
-;;======================================================================
-;; db based host calls
-;;======================================================================
-
-;;======================================================================
-;;  T E S T   L A U N C H I N G   P E R   I T E M   W I T H   H O S T   T Y P E S
-;;======================================================================
-;; 
-;; [hosts]
-;; arm cubie01 cubie02
-;; x86_64 zeus xena myth01
-;; allhosts #{g hosts arm} #{g hosts x86_64}
-;; 
-;; [host-types]
-;; general #MTLOWESTLOAD #{g hosts allhosts}
-;; arm     #MTLOWESTLOAD #{g hosts arm}
-;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
-;;
-;; [host-rules]
-;; # maxnload   => max normalized load
-;; # maxnjobs   => max jobs per cpu
-;; # maxjobrate => max jobs per second
-;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 
-;; 
-;; [launchers]
-;; envsetup general
-;; xor/%/n 4C16G
-;; % nbgeneral
-;; 
-;; [jobtools]
-;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
-;; flexi-launcher yes  
-;; launcher nbfake
-;;
-(define (common:get-launcher configdat testname itempath)
-  (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
-    (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
-	     (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
-	(let* ((launchers         (hash-table-ref/default configdat "launchers" '())))
-	  (if (null? launchers)
-	      fallback-launcher
-	      (let loop ((hed (car launchers))
-			 (tal (cdr launchers)))
-		(let ((patt      (car hed))
-		      (host-type (cadr hed)))
-		  (if (tests:match patt testname itempath)
-		      (begin
-			(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
-			(let ((launcher (configf:lookup configdat "host-types" host-type)))
-			  (if launcher
-			      (let* ((launcher-parts (string-split launcher))
-				     (launcher-exe   (car launcher-parts)))
-				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
-				    (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
-						    (count     100))
-				      (if targ-host
-					  (conc "remrun " targ-host)
-					  (if (> count 0)
-					      (begin
-						(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
-						(thread-sleep! (- 101 count))
-						(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
-							   (- count 1)))
-					      (begin
-						(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
-						(exit)))))
-				    launcher))
-			      (begin
-				(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
-				(if (null? tal)
-				    fallback-launcher
-				    (loop (car tal)(cdr tal)))))))
-		      ;; no match, try again
-		      (if (null? tal)
-			  fallback-launcher
-			  (loop (car tal)(cdr tal))))))))
-	fallback-launcher)))
-
-;; ideally put all this info into the db, no need to preserve it across moving homehost
-;;
-;; return list of
-;;  ( reachable? cpuload update-time )
-(define (common:get-host-info hostname)
-  (let* ((loadinfo                         (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data
-         (load                             (car loadinfo))
-         (load-sample-time                 (cdr loadinfo))
-         (load-sample-age                  (- (current-seconds) load-sample-time))
-         (loadinfo-timeout-seconds         6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds
-         (host-last-update-timeout-seconds 4)
-         (host-rec (hash-table-ref/default *host-loads* hostname #f))
-         )
-    (cond
-     ((< load-sample-age loadinfo-timeout-seconds)
-      (list #t
-            load-sample-time
-            load))
-     ((and host-rec
-           (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds)))
-      (list #t
-            (host-last-update host-rec)
-            (host-last-cpuload host-rec )))
-     ((common:unix-ping hostname)
-      (list #t
-            (current-seconds)
-            (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds
-     (else
-      (list #f 0 -1) ;; bad host, don't use!
-      ))))
-
-;; see defstruct host at top of file.
-;;    host: reachable last-update last-used last-cpuload
-;;
-(define (common:update-host-loads-table hosts-raw)
-  (let* ((hosts (filter (lambda (x)
-                          (string-match (regexp "^\\S+$") x))
-                        hosts-raw)))
-    (for-each
-     (lambda (hostname)
-       (let* ((rec       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
-                          (if h
-                              h
-                              (let ((h (make-host)))
-                                (hash-table-set! *host-loads* hostname h)
-                                h))))
-              (host-info         (common:get-host-info hostname))
-              (is-reachable      (car host-info))
-              (last-reached-time (cadr host-info))
-              (load              (caddr host-info)))
-         (host-reachable-set!    rec is-reachable)
-         (host-last-update-set!  rec last-reached-time)
-         (host-last-cpuload-set! rec load)))
-     hosts)))
-
-;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the
-;; [host-rules] section.
-;;
-(define (common:get-least-loaded-host hosts-raw host-type configdat)
-  (let* ((rdat       (configf:lookup configdat "host-rules" host-type))
-	 (rules      (common:val->alist (or rdat "") convert: #t))   ;; maxnload, maxnjobs, maxjobrate
-	 (maxnload   (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load
-	 (maxnjobs   (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs
-	 (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second
-	 (hosts      (filter (lambda (x)
-			       (string-match (regexp "^\\S+$") x))
-			     hosts-raw))
-         ;; (best-host #f)
-	 (get-rec    (lambda (hostname)
-		       ;; (print "get-rec hostname=" hostname)
-		       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
-			 (if h
-			     h
-			     (let ((h (make-host)))
-			       (hash-table-set! *host-loads* hostname h)
-			       h)))))
-         (best-load 99999)
-         (curr-time (current-seconds))
-	 (get-hosts-sorted (lambda (hosts)
-			     (sort hosts (lambda (a b)
-					   (let ((a-rec (get-rec a))
-						 (b-rec (get-rec b)))
-					     ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec))
-					     ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec))
-					     (< (host-last-used a-rec)
-						(host-last-used b-rec))))))))
-    (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts))
-    (if (null? hosts)
-	#f ;; no hosts to select from. All done and giving up now.
-	(let ((hosts-sorted (get-hosts-sorted hosts)))
-	  (common:update-host-loads-table hosts)
-	  (let loop ((hostname  (car hosts-sorted))
-		     (tal       (cdr hosts-sorted))
-		     (best-host #f))
-	    (let* ((rec       (get-rec hostname))
-		   (reachable (host-reachable     rec))
-		   (load      (host-last-cpuload  rec))
-		   (last-used (host-last-used     rec))
-		   (delta     (- curr-time last-used))
-		   (job-rate  (if (> delta 0)
-				  (/ 1 delta)
-				  999)) ;; jobs per second
-		   (new-best  
-		    (cond
-		     ((not reachable)
-		      (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.")
-		      best-host)
-		     ((and (< load maxnload)        ;; load is acceptable
-			   (< job-rate maxjobrate)) ;; job rate is acceptable
-		      (set! best-load load)
-		      hostname)
-		     (else best-host))))
-	      (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." )
-	      (if new-best
-		  (begin ;; found a host, return it
-		    (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
-		    (host-last-used-set! rec curr-time)
-		    new-best)
-		  (if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))
-
-(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f))
-  (let* ((loadavg (common:get-cpu-load remote-host))
-	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
-		      (common:get-num-cpus remote-host)
-		      numcpus-in))
-	 (maxload (if force-maxload
-		      maxload-in
-		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
-	 (first   (car loadavg))
-	 (next    (cadr loadavg))
-	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
-	 (loadjmp (- first next))
-         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) ))  ))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
-    (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
-		      ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)
-    (cond
-     ((and (> first adjload)
-	   (> count 0))
-      (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
-      (thread-sleep! adjwait)
-      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
-     ((and (> loadjmp numcpus)
-	   (> count 0))
-      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
-      (thread-sleep! adjwait)
-      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)))))
-
-(define (common:wait-for-homehost-load maxload msg)
-  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
-                     #f
-                     (common:get-homehost)))
-         (hh     (if hh-dat (car hh-dat) #f))
-         (numcpus (common:get-num-cpus hh)))
-    (common:wait-for-normalized-load maxload msg hh)))
-
-(define (common:get-num-cpus remote-host)
-  (let* ((actual-host (or remote-host (get-host-name))))
-    (or (common:get-cached-info actual-host "num-cpus" age: 86400) ;; hosts had better not be changing the number of cpus too often!
-	(let* ((proc   (lambda ()
-			 (let loop ((numcpu 0)
-				    (inl    (read-line)))
-			   (if (eof-object? inl)
-			       (begin
-				 (common:write-cached-info remote-host "num-cpus" numcpu)
-				 numcpu)
-			       (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
-					 (+ numcpu 1)
-					 numcpu)
-				     (read-line))))))
-	       (result (if remote-host
-			   (with-input-from-pipe 
-			       (conc "ssh " remote-host " cat /proc/cpuinfo")
-			     proc)
-			   (with-input-from-file "/proc/cpuinfo" proc))))
-	  (common:write-cached-info actual-host "num-cpus" result)
-	  result))))
-
-;; wait for normalized cpu load to drop below maxload
-;;
-(define (common:wait-for-normalized-load maxload msg remote-host)
-  (let ((num-cpus (common:get-num-cpus remote-host)))
-    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
-
-;;======================================================================
-;; D E B U G G I N G   S T U F F 
-;;======================================================================
-
-;; (define *verbosity*         1)
-;; (define *logging*           #f)
-
-(define (common:set-last-run-version)
-  (rmt:set-var "MEGATEST_VERSION" (common:version-signature)))
-
-;; postive number if megatest version > db version
-;; negative number if megatest version < db version
-(define (common:version-db-delta)
-         (- megatest-version (common:get-last-run-version-number)))
-
-(define (common:version-changed?)
-  (not (equal? (common:get-last-run-version)
-               (common:version-signature))))
-
-;; from metadat lookup MEGATEST_VERSION
-;;
-(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB
-  (rmt:get-var "MEGATEST_VERSION"))
-
-(define (common:get-last-run-version-number)
-  (string->number 
-   (substring (common:get-last-run-version) 0 6)))
-
-(define (common:api-changed?)
-  (not (equal? (substring (->string megatest-version) 0 4)
-               (substring (conc (common:get-last-run-version)) 0 4))))
-
-;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))'
-(define (common:get-disks #!key (configf #f))
-  (hash-table-ref/default 
-   (or configf (configf:read-config "megatest.config" #f #t))
-   "disks" '("none" "")))
-
-;;======================================================================
-;; watchdog and exit procedures
-;;======================================================================
-
-;;======================================================================
-;; E X I T   H A N D L I N G
-;;======================================================================
-
-;;   (let ((ohh (common:on-homehost?))
-;; 	(srv (args:get-arg "-server")))
-;;     (and ohh srv)))
-    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
-
-(define *watchdog* (make-thread
-		    (lambda ()
-		      (handle-exceptions
-			  exn
-			  (begin
-			    (print-call-chain)
-			    (print " message: " ((condition-property-accessor 'exn 'message) exn)))
-			(common:watchdog)))
-		    "Watchdog thread"))
-
-;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
-;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
-;;
-(define (common:readonly-watchdog dbstruct)
-  (thread-sleep! 0.05) ;; delay for startup
-  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
-  ;; sync megatest.db to /tmp/.../megatst.db
-  (let* ((sync-cool-off-duration   3)
-        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
-        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
-        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
-        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
-    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
-    (let loop ((last-sync-time 0))
-      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
-      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
-        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
-        (if (and (not *time-to-exit*)
-                 (< duration-since-last-sync sync-cool-off-duration))
-            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
-        (if (not *time-to-exit*)
-            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
-                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
-	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
-		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
-		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
-			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
-              (loop (current-seconds)))
-            #t)))
-    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))
-
-
-;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
-(define (common:watchdog)
-  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
-  (if (launch:setup)
-      (if (common:on-homehost?)
-	  (let ((dbstruct (db:setup #t)))
-	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
-	    (cond
-	     ((dbr:dbstruct-read-only dbstruct)
-	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
-	      (common:readonly-watchdog dbstruct))
-	     (else
-	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
-              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
-                (cond
-                 ((equal? syncer "brute-force-sync")
-                  (server:writable-watchdog-bruteforce dbstruct))
-                 ((equal? syncer "delta-sync")
-                  (server:writable-watchdog-deltasync dbstruct))
-                 (else
-                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
-                  (exit 1)))
-                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
-                )))
-	    (debug:print-info 13 *default-log-port* "watchdog done."))
-	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
-
-
-(define (std-exit-procedure)
-  ;;(common:telemetry-log-close)
-  (on-exit (lambda () 0))
-  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
-  (let ((no-hurry  (if *time-to-exit* ;; hurry up
-		       #f
-		       (begin
-			 (set! *time-to-exit* #t)
-			 #t))))
-    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
-    (if (and no-hurry (debug:debug-mode 18))
-	(rmt:print-db-stats))
-    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
-                              (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
-			      (if *task-db*    
-				  (let ((db (cdr *task-db*)))
-				    (if (sqlite3:database? db)
-					(begin
-					  (sqlite3:interrupt! db)
-					  (sqlite3:finalize! db #t)
-					  ;; (vector-set! *task-db* 0 #f)
-					  (set! *task-db* #f)))))
-                              #;(http-client#close-all-connections!)
-                              ;; (if (and *runremote*
-                              ;;          (remote-conndat *runremote*))
-                              ;;     (begin
-                              ;;       (http-client#close-all-connections!))) ;; for http-client
-                              (if (not (eq? *default-log-port* (current-error-port)))
-                                  (close-output-port *default-log-port*))
-			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
-	  (th2 (make-thread (lambda ()
-			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
-			      (if no-hurry
-                                  (begin
-                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
-                                  (begin
-      				  (thread-sleep! 2)))
-      			      (debug:print 4 *default-log-port* " ... done")
-      			      )
-			    "clean exit")))
-      (thread-start! th1)
-      (thread-start! th2)
-      (thread-join! th1)
-      )
-    )
-
-  0)
-
-(define (std-signal-handler signum)
-  ;; (signal-mask! signum)
-  (set! *time-to-exit* #t) 
-  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
-  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
-  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
-  (exit))
-
-(define (special-signal-handler signum)
-  ;; (signal-mask! signum)
-  (set! *time-to-exit* #t)
-  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
-  (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!")
-  ;;TODO send email to notify admin contact listed in the config that the lisner got killed
-  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
-  (exit))
-
-
-(set-signal-handler! signal/int  std-signal-handler)  ;; ^C
-(set-signal-handler! signal/term std-signal-handler)
-
-;; (set-signal-handler! signal/stop std-signal-handler)  ;; ^Z NO, do NOT handle ^Z!
-
-;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
-;; Do NOT check if not on homehost!
-;;
-(define (common:exit-on-version-changed)
-  (if (common:on-homehost?)
-      (if (common:api-changed?)
-	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
-                (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
-                (read-only (not (file-write-access? dbfile)))
-                (dbstruct (db:setup #t)))
-	    (debug:print 0 *default-log-port*
-			 "WARNING: Version mismatch!\n"
-			 "   expected: " (common:version-signature) "\n"
-			 "   got:      " (common:get-last-run-version))
-            (cond
-             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
-             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
-                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
-              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
-              (handle-exceptions
-               exn
-               (begin
-                 (debug:print 0 *default-log-port* "Failed to switch versions.")
-                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-                 (print-call-chain (current-error-port))
-                 (exit 1))
-               (common:cleanup-db dbstruct)))
-             ((not (common:file-exists? mtconf))
-              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             ((not (common:file-exists? dbfile))
-              (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             ((not (eq? (current-user-id)(file-owner mtconf)))
-              (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             (read-only
-              (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
-              (exit 1))
-             (else
-              (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
-              (exit 1)))))))
-;;      (begin
-;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
-;;	(exit 1))))
-
-;; Move me elsewhere ...
-;; RADT => Why do we meed the version check here, this is called only if version misma
-;;
-(define (common:cleanup-db dbstruct #!key (full #f))
-  (apply db:multi-db-sync 
-   dbstruct
-   'schema
-   ;; 'new2old
-   'killservers
-   'adj-target
-   ;; 'old2new
-   'new2old
-   ;; (if full
-       '(dejunk)
-       ;; '())
-       )
-  (if (common:api-changed?)
-      (common:set-last-run-version)))
-
-;; called in megatest.scm, host-port is string hostname:port
-;;
-;; NOTE: This is NOT called directly from clients as not all transports support a client running
-;;       in the same process as the server.
-;;
-#;(define (server:ping host-port-in #!key (do-exit #f))
-  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
-		       #f ;; (server:check-if-running *toppath*)
-		;; (if (number? host-port-in) ;; we were handed a server-id
-		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
-		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
-		;; 	     (if srec
-		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
-		;; 		 (conc "no such server-id " host-port-in)))
-		       host-port-in))) ;; )
-    (let* ((host-port (if host:port
-			  (let ((slst (string-split   host:port ":")))
-			    (if (eq? (length slst) 2)
-				(list (car slst)(string->number (cadr slst)))
-				#f))
-			  #f)))
-;;	   (toppath       (launch:setup)))
-      ;; (print "host-port=" host-port)
-      (if (not host-port)
-	  (begin
-	    (if host-port-in
-		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
-	    (if do-exit (exit 1))
-	    #f)
-	  (let* ((iface      (car host-port))
-		 (port       (cadr host-port))
-		 #;(server-dat (http-transport:client-connect iface port))
-		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
-	    (if (and (list? login-res)
-		     (car login-res))
-		(begin
-		  ;; (print "LOGIN_OK")
-		  (if do-exit (exit 0))
-		  #t)
-		(begin
-		  ;; (print "LOGIN_FAILED")
-		  (if do-exit (exit 1))
-		  #f)))))))
-
-;; run ping in separate process, safest way in some cases
-;;
-(define (server:ping-server ifaceport)
-  (with-input-from-pipe 
-   (conc (common:get-megatest-exe) " -ping " ifaceport)
-   (lambda ()
-     (let loop ((inl (read-line))
-		(res "NOREPLY"))
-       (if (eof-object? inl)
-	   (case (string->symbol res)
-	     ((NOREPLY)  #f)
-	     ((LOGIN_OK) #t)
-	     (else       #f))
-	   (loop (read-line) inl))))))
-
-;; ping the given server
-;;
-#;(define (server:check-server server-record)
-  (let* ((server-url (server:record->url server-record))
-         (res        (case *transport-type*
-                       ((http)(server:ping server-url))
-                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
-                       )))
-    (if res
-        server-url
-	#f)))
-
-;; no longer care if multiple servers are started by accident. older servers will drop off in time.
-;;
-#;(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
-  (let* ((ns            (server:get-num-servers))
-	 (servers       (server:get-best (server:get-list areapath))))
-    ;; (print "servers: " servers " ns: " ns)
-    (if (or (and servers
-		 (null? servers))
-	    (not servers)
-	    (and (list? servers)
-		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
-        #f
-        (let loop ((hed (car servers))
-                   (tal (cdr servers)))
-          (let ((res (server:check-server hed)))
-            (if res
-                res
-                (if (null? tal)
-                    #f
-                    (loop (car tal)(cdr tal)))))))))
-
-
-;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
-;;
-(define (server:login toppath)
-  (lambda (toppath)
-    (set! *db-last-access* (current-seconds)) ;; might not be needed.
-    (if (equal? *toppath* toppath)
-	#t
-	#f)))
-;; Given a run id start a server process    ### NOTE ### > file 2>&1 
-;; if the run-id is zero and the target-host is set 
-;; try running on that host
-;;   incidental: rotate logs in logs/ dir.
-;;
-#;(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
-  (let* ((curr-host   (get-host-name))
-         ;; (attempt-in-progress (server:start-attempted? areapath))
-         ;; (dot-server-url (server:check-if-running areapath))
-	 (curr-ip     (server:get-best-guess-address curr-host))
-	 (curr-pid    (current-process-id))
-	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
-	 (target-host (car homehost))
-	 (testsuite   (common:get-testsuite-name))
-	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
-	 (cmdln (conc (common:get-megatest-exe)
-		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
-							   " -daemonize "
-							   "")
-		      ;; " -log " logfile
-		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
-	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
-         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
-    ;; we want the remote server to start in *toppath* so push there
-    (push-directory areapath)
-    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
-    (thread-start! log-rotate)
-    
-    ;; host.domain.tld match host?
-    (if (and target-host 
-	     ;; look at target host, is it host.domain.tld or ip address and does it 
-	     ;; match current ip or hostname
-	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
-	     (not (equal? curr-ip target-host)))
-	(begin
-	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
-	  (setenv "TARGETHOST" target-host)))
-      
-    (setenv "TARGETHOST_LOGF" logfile)
-    (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
-    (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
-    (system (conc "nbfake " cmdln))
-    (unsetenv "TARGETHOST_LOGF")
-    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
-    (thread-join! log-rotate)
-    (pop-directory)))
-
-;; kind start up of servers, wait 40 seconds before allowing another server for a given
-;; run-id to be launched
-#;(define (server:kind-run areapath)
-  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
-      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
-	     (call-num     (car last-run-dat))
-	     (when-run     (cadr last-run-dat))
-	     (run-delay    (+ (case call-num
-				((0)    0)
-				((1)   20)
-				((2)  300)
-				(else 600))
-			      (random 5)))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
-	     (lock-file    (conc areapath "/logs/server-start.lock")))
-	(if	(> (- (current-seconds) when-run) run-delay)
-		(begin
-		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
-		  (server:run areapath)
-		  (thread-sleep! 2) ;; don't release the lock for at least a few seconds
-		  (common:simple-file-release-lock lock-file)))
-	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
-
-#;(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
-
-#;(define (server:start-and-wait areapath #!key (timeout 60))
-  (let ((give-up-time (+ (current-seconds) timeout)))
-    (let loop ((server-url (server:check-if-running areapath))
-	       (try-num    0))
-      (if (or server-url
-	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
-	  server-url
-	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
-	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
-		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
-		(server:kind-run areapath))
-	    (thread-sleep! 5)
-	    (loop (server:check-if-running areapath)
-		  (+ try-num 1)))))))
-
-;;======================================================================
-;; make html output
-;;======================================================================
-
-(define (tests:test-set-toplog! run-id test-name logf) 
-  (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name))
-
-(define (tests:summarize-items run-id test-id test-name force)
-  ;; if not force then only update the record if one of these is true:
-  ;;   1. logf is "log/final.log
-  ;;   2. logf is same as outputfilename
-  (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
-	 (orig-dir       (current-directory))
-	 (logf-info      (rmt:test-get-logfile-info run-id test-name))
-	 (logf           (if logf-info (cadr logf-info) #f))
-	 (path           (if logf-info (car  logf-info) #f)))
-    ;; This query finds the path and changes the directory to it for the test
-    (if (and (string? path)
-	     (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ...
-	(begin
-	  (debug:print 4 *default-log-port* "Found path: " path)
-	  (change-directory path))
-	;; (set! outputfilename (conc path "/" outputfilename)))
-	(debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path))
-    (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
-    (if (or (equal? logf "logs/final.log")
-	    (equal? logf outputfilename)
-	    force)
-	(let ((my-start-time (current-seconds))
-	      (lockf         (conc outputfilename ".lock")))
-	  (let loop ((have-lock  (common:simple-file-lock lockf)))
-	    (if have-lock
-		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
-		  (print "Obtained lock for " outputfilename)
-		  (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f)
-		  (if script
-		      (system (conc script " > " outputfilename " & "))
-		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
-		  (common:simple-file-release-lock lockf)
-		  (change-directory orig-dir)
-		  ;; NB// tests:test-set-toplog! is remote internal...
-		  (tests:test-set-toplog! run-id test-name outputfilename))
-		;; didn't get the lock, check to see if current update started later than this 
-		;; update, if so we can exit without doing any work
-		(if (> my-start-time (handle-exceptions
-					 exn
-					 0
-				       (file-modification-time lockf)))
-		    ;; we started since current re-gen in flight, delay a little and try again
-		    (begin
-		      (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it")
-		      (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds
-		      (loop (common:simple-file-lock lockf))))))))))
-
-(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)
-  (let ((counts              (make-hash-table))
-	(statecounts         (make-hash-table))
-	(outtxt              "")
-	(tot                 0)
-	(testdat             (rmt:test-get-records-for-index-file run-id test-name)))
-    (with-output-to-file outputfilename
-      (lambda ()
-	(set! outtxt (conc outtxt "<html><title>Summary: " test-name 
-			   "</title><body><h2>Summary for " test-name "</h2>"))
-	(for-each
-	 (lambda (testrecord)
-	   (let ((id             (vector-ref testrecord 0))
-		 (itempath       (vector-ref testrecord 1))
-		 (state          (vector-ref testrecord 2))
-		 (status         (vector-ref testrecord 3))
-		 (run_duration   (vector-ref testrecord 4))
-		 (logf           (vector-ref testrecord 5))
-		 (comment        (vector-ref testrecord 6)))
-	     (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0)))
-	     (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0)))
-	     (set! outtxt (conc outtxt "<tr>"
-				;; "<td><a href=\"" itempath "/" logf "\"> " itempath "</a></td>" 
-				"<td><a href=\"" itempath "/test-summary.html\"> " itempath "</a></td>" 
-				"<td>" state    "</td>" 
-				"<td><font color=" (common:get-color-from-status status)
-				">"   status   "</font></td>"
-				"<td>" (if (equal? comment "")
-					   "&nbsp;"
-					   comment) "</td>"
-					   "</tr>"))))
-	 (if (list? testdat)
-	     testdat
-	     (begin
-	       (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
-	       '())))
-	
-	(print "<table><tr><td valign=\"top\">")
-	;; Print out stats for status
-	(set! tot 0)
-	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>State stats</h2></td></tr>")
-	(for-each (lambda (state)
-		    (set! tot (+ tot (hash-table-ref statecounts state)))
-		    (print "<tr><td>" state "</td><td>" (hash-table-ref statecounts state) "</td></tr>"))
-		  (hash-table-keys statecounts))
-	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
-	(print "</td><td valign=\"top\">")
-	;; Print out stats for state
-	(set! tot 0)
-	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>Status stats</h2></td></tr>")
-	(for-each (lambda (status)
-		    (set! tot (+ tot (hash-table-ref counts status)))
-		    (print "<tr><td><font color=\"" (common:get-color-from-status status) "\">" status
-			   "</font></td><td>" (hash-table-ref counts status) "</td></tr>"))
-		  (hash-table-keys counts))
-	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
-	(print "</td></td></tr></table>")
-	
-	(print "<table cellspacing=\"0\" border=\"1\">" 
-	       "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
-	       outtxt "</table></body></html>")
-	;; (release-dot-lock outputfilename)
-	;;(rmt:update-run-stats 
-	;; run-id
-	;; (hash-table-map
-	;;  state-status-counts
-	;;  (lambda (key val)
-	;;	(append key (list val)))))
-	))))
-
-(define tests:css-jscript-block
-#<<EOF
-<style type="text/css">
-ul.LinkedList { display: block; }
-/* ul.LinkedList ul { display: none; } */
-.HandCursorStyle { cursor: pointer; cursor: hand; }  /* For IE */
-th {background-color: #8c8c8c;}
-td.test {background-color: #d9dbdd;}
-td.PASS {background-color: #347533;}
-td.FAIL {background-color: #cc2812;}
-td.SKIP{background-color: #FFD733;}
-td.WARN {background-color: #EA8724;}
-td.WAIVED {background-color: #838A12;}
-td.ABORT{background-color: #EA24B7;}
-.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;}
-
-
-</style>
-
-
-  <script type="text/JavaScript">
-
-    function filtersome() {
-  $("tr").show();
-  $(".test").filter(
-    function() {
-      var names = $('#testname').val().split(',');
-      var good=1;
-      for (var i=0, len=names.length; i<len; i++) {
-        var uname=names[i];
-        console.log("Trying to check for " + uname); 
-        if($(this).text().indexOf(uname) != -1) {
-          good= 0;
-          console.log("Found "+uname);
-        }
-      }
-      return good; 
-    }
-  ).parent().hide();
-//  $(".sum").show();
-}
-  
-    // Add this to the onload event of the BODY element
-    function addEvents() {
-      activateTree(document.getElementById("LinkedList1"));
-    }
-
-    // This function traverses the list and add links 
-    // to nested list items
-    function activateTree(oList) {
-      // Collapse the tree
-      for (var i=0; i < oList.getElementsByTagName("ul").length; i++) {
-        oList.getElementsByTagName("ul")[i].style.display="none";            
-      }                                                                  
-      // Add the click-event handler to the list items
-      if (oList.addEventListener) {
-        oList.addEventListener("click", toggleBranch, false);
-      } else if (oList.attachEvent) { // For IE
-        oList.attachEvent("onclick", toggleBranch);
-      }
-      // Make the nested items look like links
-      addLinksToBranches(oList);
-    }
-
-    // This is the click-event handler
-    function toggleBranch(event) {
-      var oBranch, cSubBranches;
-      if (event.target) {
-        oBranch = event.target;
-      } else if (event.srcElement) { // For IE
-        oBranch = event.srcElement;
-      }
-      cSubBranches = oBranch.getElementsByTagName("ul");
-      if (cSubBranches.length > 0) {
-        if (cSubBranches[0].style.display == "block") {
-          cSubBranches[0].style.display = "none";
-        } else {
-          cSubBranches[0].style.display = "block";
-        }
-      }
-    }
-
-    // This function makes nested list items look like links
-    function addLinksToBranches(oList) {
-      var cBranches = oList.getElementsByTagName("li");
-      var i, n, cSubBranches;
-      if (cBranches.length > 0) {
-        for (i=0, n = cBranches.length; i < n; i++) {
-          cSubBranches = cBranches[i].getElementsByTagName("ul");
-          if (cSubBranches.length > 0) {
-            addLinksToBranches(cSubBranches[0]);
-            cBranches[i].className = "HandCursorStyle";
-            cBranches[i].style.color = "blue";
-            cSubBranches[0].style.color = "black";
-            cSubBranches[0].style.cursor = "auto";
-          }
-        }
-      }
-    }
-  </script>
-EOF
-)
-
-(define tests:css-jscript-block-dynamic 
-#<<EOF
-           <script src= ./jquery3.1.0.js></script> 
-EOF
-)
-
-(define  (test:js-block javascript-lib)
-   (conc  "<script src=" javascript-lib "></script>" ))
-
-
-(define tests:css-jscript-block-static (test:js-block *java-script-lib*))
-
-(define (tests:css-jscript-block-cond dynamic) 
-      (if (equal? dynamic  #t)
-       tests:css-jscript-block-dynamic
-       tests:css-jscript-block-static))
-
-       
-(define (tests:run-record->test-path run numkeys)
-   (append (take (vector->list run) numkeys)
-	   (list (vector-ref run (+ 1 numkeys)))))
-
-
-(define (tests:get-rest-data runs header numkeys)
-   (let ((resh (make-hash-table)))
-   (for-each
-     (lambda (run)
-        (let* ((run-id (db:get-value-by-header run header "id"))
-               (run-dir      (tests:run-record->test-path run numkeys))
-	       (test-data    (rmt:get-tests-for-run
-				   run-id
-                                   "%"       ;; testnamepatt
-				   '()        ;; states
-				   '()        ;; statuses
-				   #f         ;; offset
-				   #f         ;; num-to-get
-				   #f         ;; hide/not-hide
-				   #f         ;; sort-by
-				   #f         ;; sort-order
-				   #f         ;; 'shortlist                           ;; qrytype
-                                   0         ;; last update
-				   #f)))
-            
-            (map (lambda (test)
-                 (let* ((test-name (vector-ref test 2))
-                        (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13)))
-                        (test-item (conc test-name ":" (vector-ref test 11)))
-                        (test-status (vector-ref test 4)))
-                         
-                (if (not (hash-table-ref/default resh test-name  #f))
-                      (hash-table-set! resh test-name   (make-hash-table)))
-                (if (not (hash-table-ref/default (hash-table-ref/default resh test-name  #f)  test-item  #f))
-                       (hash-table-set! (hash-table-ref/default resh test-name  #f) test-item   (make-hash-table))) 
-               (hash-table-set!  (hash-table-ref/default (hash-table-ref/default resh test-name  #f) test-item #f) run-id (list test-status test-html-path)))) 
-        test-data)))
-      runs)
-   resh))
-
-
-;; hash-table tree to html list tree
-;;
-;;   tipfunc takes two parameters: y the tip value and path the path to that point
-;;
-(define (common:htree->html ht path tipfunc)
-  (let ((datlist 	(sort (hash-table->alist ht)
-                              (lambda (a b)
-                                (string< (car a)(car b))))))
-    (if (null? datlist)
-    	(tipfunc #f path) ;; really shouldn't get here
-	(s:ul
-	 (map (lambda (x)
-		(let* ((levelname (car x))
-		       (y         (cdr x))
-		       (newpath   (append path (list levelname)))
-		       (leaf      (or (not (hash-table? y))
-				      (null? (hash-table-keys y)))))
-		  (if leaf
-		      (s:li (tipfunc y newpath))
-		      (s:li
-		       (list 
-			levelname
-			(common:htree->html y newpath tipfunc))))))
-	      datlist)))))
-
-
-;; tests:genrate dashboard body 
-;;
-
-(define (tests:dashboard-body page pg-size keys numkeys  total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt)
-  (let* ((start (* page pg-size)) 
-	       ;(runsdat   (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys)))
-         (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt start pg-size #f 0 sort-order: "desc"))
-                    ; db:get-runs-by-patt   keys runnamepatt targpatt offset limit fields last-update   
-	       (header    (vector-ref runsdat 0))
-	       (runs      (vector-ref runsdat 1))
-         (ctr 0)
-         (test-runs-hash (tests:get-rest-data runs header numkeys))
-         (test-list (hash-table-keys test-runs-hash))) 
-  
-  (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag)
-		   (s:title "Summary for " area-name)
-		   (s:body 'onload "addEvents();"
-                          (get-prev-links page linktree)
-                          (get-next-links page linktree total-runs)
-                           
-			   (s:h1 "Summary for " area-name)
-                           (s:h3 "Filter" )
-                           (s:input 'type "text"  'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()")
-			   ;; top list
-         
-			   (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
-                            (map (lambda (key)
-				 (let* ((res (s:tr 'class "something" 
-				  (s:th key )
-                                   (map (lambda (run)
-                                   (s:th  (vector-ref run ctr)))
-                                  runs))))
-                             (set! ctr (+ ctr 1))
-                               res))
-                               keys)
-                               (s:tr
-				 (s:th "Run Name")
-                                  (map (lambda (run)
-                                   (s:th (db:get-value-by-header run header "runname")))
-                                  runs))
-                              
-                               (map (lambda (test-name)
-                                 (let* ((item-hash (hash-table-ref/default test-runs-hash test-name  #f))
-                                         (item-keys (sort (hash-table-keys item-hash) string<=?))) 
-                                          (map (lambda (item-name)  
-  		                             (let* ((res (s:tr  'class item-name
-				                         (s:td  item-name 'class "test" )
-                                                           (map (lambda (run)
-                                                               (let* ((run-test (hash-table-ref/default item-hash item-name  #f))
-                                                                      (run-id (db:get-value-by-header run header "id"))
-                                                                      (result (hash-table-ref/default run-test run-id "n/a"))
-                                                                      ;(relative-path (get-relative-path)) 
-                                                                      (status (if (string? result)
-									                                                            	result
-										                                                            (car result)))
-                                                                        (link (if (string? result)
-										                                                            result
-                                                                                (if (equal? flag #t) 
-                                                                                (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname="  item-name ))
-  																																						  (s:a (car result) 'href (string-substitute  (conc linktree "/")  "" (cadr result)  "-"))))))
-                                                                       (s:td  link 'class status)))
-                                                                runs))))
-                                                        res))
-                                                   item-keys)))
-                               test-list)))))) 
-
-;; (tests:create-html-tree "test-index.html")
-;;
-(define (tests:create-html-tree outf)
-   (let* ((lockfile  (conc outf ".lock"))
-	 			 (runs-to-process '())
-         (linktree  (common:get-linktree))
-         (area-name (common:get-testsuite-name))
-	  		 (keys      (rmt:get-keys))
-	  		 (numkeys   (length keys))
-         (run-patt (or (args:get-arg "-run-patt")
-                        (args:get-arg "-runname")
-                        "%"))
-         (target (or  (args:get-arg "-target-patt") 
-											(args:get-arg "-target")
-                      "%"))
-         (targlist (string-split target "/"))
-         (numtarg  (length targlist))  
-         (targtweaked (if (> numkeys numtarg)
-			   								(append targlist (make-list (- numkeys numtarg) "%"))
-			  								targlist))
-         (target-patt (string-join targtweaked "/"))
-         ;(total-runs  (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target
-          (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) 
-         (pg-size 10))
-    (if (common:simple-file-lock lockfile)
-        (begin
-         ;(print total-runs)    
-        (let loop ((page 0))
-	(let* ((oup       (open-output-file (or outf (conc linktree "/page" page ".html"))))
-               (get-prev-links (lambda (page linktree )   
-                            (let* ((link  (if (not (eq? page 0))
-                                   (s:a "&lt;&lt;prev" 'href (conc  "page" (- page 1) ".html"))
-                                   (s:a "" 'href (conc   "page"  page ".html")))))
-                               link)))
-               (get-next-links (lambda (page linktree total-runs)   
-                            (let* ((link  (if (> total-runs (+ 10 (* page pg-size)))
-                                   (s:a "next&gt;&gt;" 'href (conc  "page"  (+ page 1) ".html"))
-                                   (s:a "" 'href (conc   "page" page  ".html")))))
-                               link))) )
-          (print "total runs: " total-runs) 
-          (s:output-new
-	   			 oup
-	   					(tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function
-          (close-output-port oup)
-         ; (set! page (+ 1 page))
-          (if (> total-runs (* (+ 1 page) pg-size))
-           (loop (+ 1  page)))))
-	  (common:simple-file-release-lock lockfile))
-	            
-	#f)))
-
-
-(define (tests:readlines filename)
-  (call-with-input-file filename
-    (lambda (p)
-      (let loop ((line (read-line p))
-                 (result '()))
-        (if (eof-object? line)
-            (reverse result)
-            (loop (read-line p) (cons line result)))))))
-
-(define (tests:get-test-log run-id test-name item-name)
-  (let* ((test-data    (rmt:get-tests-for-run
-				   (string->number run-id)
-                                    test-name      ;; testnamepatt
-				   '()        ;; states
-				   '()        ;; statuses
-				   #f         ;; offset
-				   #f         ;; num-to-get
-				   #f         ;; hide/not-hide
-				   #f         ;; sort-by
-				   #f         ;; sort-order
-				   #f         ;; 'shortlist                           ;; qrytype
-                                   0         ;; last update
-				   #f))
-         (path "")
-         (found 0))
-    (debug:print-info 0 *default-log-port* "found: " found )
-
-   (let loop ((hed (car test-data))
-		 (tal (cdr test-data)))
-          (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13))
-
-	(if (equal? (vector-ref hed 11) item-name)
-            (begin
-              (set! found 1) 
-	      (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13)))))
-	    (if (and (not (null? tal)) (equal? found 0))
-		(loop (car tal)(cdr tal))))
-   (if (equal? path "")
-     "<H2>Data not found</H2>"
-     (string-join (tests:readlines path) "\n"))))
-
-
-(define (tests:dynamic-dboard page)
-;(define (tests:create-html-tree o)
- (let* (
-;(page "1")
-          (linktree  (common:get-linktree))
-         (area-name (common:get-testsuite-name))
-	       (keys      (rmt:get-keys))
-	       (numkeys   (length keys))
-         (targtweaked (make-list numkeys "%"))
-         (target-patt (string-join targtweaked "/"))
-         (total-runs  (rmt:get-num-runs "%"))
-         (pg-size 10)
-         (pg (if (equal? page #f)
-                 0
-                 (- (string->number page) 1)))
-          (get-prev-links  (lambda (pg linktree)
-                           (debug:print-info 0 *default-log-port* "val: " (- 1 pg))
-                          (let* ((link  (if (not (eq? pg 0))
-                               (s:a  "&lt;&lt;prev " 'href (conc  "dashboard?page="  pg  ))
-                               (s:a "" 'href (conc  "dashboard?page=" pg)))))
-                               link)))
-          (get-next-links   (lambda (pg linktree total-runs)  
-                            (debug:print-info 0 *default-log-port* "val: " pg)
-                             (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size)
- 
-                            (let* ((link  (if (> total-runs (+ 10 (* pg pg-size)))
-                              (s:a  "next&gt;&gt; "  'href (conc  "dashboard?page="  (+ pg 2)  ))
-                             (s:a "" 'href (conc  "dashboard?page=" pg  )))))
-                             link)))
-         (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function
-        html-body))
-
-(define (tests:create-html-summary outf)
- (let* ((lockfile  (conc outf ".lock"))
-        (linktree  (common:get-linktree))
-				(keys      (rmt:get-keys))
-        (area-name (common:get-testsuite-name))
-        (run-patt (or (args:get-arg "-run-patt")
-                        (args:get-arg "-runname")
-                        "%"))
-        (target (or (args:get-arg "-target-patt")
-                        (args:get-arg "-target")
-                        "%"))
-         (targlist (string-split target "/"))
-         (numkeys  (length keys))
-	       (numtarg  (length targlist))  
-         (targtweaked (if (> numkeys numtarg)
-			   								(append targlist (make-list (- numkeys numtarg) "%"))
-			  								targlist))
-        (target-patt (string-join targtweaked "/")))
-    (if (common:simple-file-lock lockfile)
-        (begin
-          (let* (;(runsdat1   (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys)))
-                 (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt #f #f #f 0))
-					       (runs      (vector-ref runsdat 1))
-                 (header      (vector-ref runsdat 0))
-        	       (oup       (open-output-file (or outf (conc linktree "/targets.html"))))
-                 (target-hash (test:create-target-hash runs header (length keys))))
-           (test:create-target-html target-hash oup area-name linktree)
-          (test:create-run-html  runs area-name linktree (length keys) header))
-	  (common:simple-file-release-lock lockfile))
-	#f)))
-
-(define (test:get-test-hash test-data)
-	(let ((resh (make-hash-table)))
-    	(map (lambda (test)
-        (let* ((test-name (vector-ref test 2))
-               (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html"))
-																 (conc (vector-ref test 10) "/test-summary.html" )
-							 									 (conc (vector-ref test 10) "/" (vector-ref test 13))))
-               (test-item  (vector-ref test 11))
-               (test-status (vector-ref test 4)))
-               (if (not (hash-table-ref/default resh test-item  #f))
-                   (hash-table-set! resh test-item   (make-hash-table)))
-               (hash-table-set! (hash-table-ref/default resh test-item  #f) test-name (list test-status test-html-path)))) 
-        test-data)
-resh))
-
-(define (test:get-data->b-keys ordered-data a-keys)
-  (delete-duplicates
-   (sort (apply
-	  append
-	  (map (lambda (sub-key)
-		 (let ((subdat (hash-table-ref ordered-data sub-key)))
-		   (hash-table-keys subdat)))
-	       a-keys))
-	 string>=?)))
-
-
-(define (test:create-run-html runs area-name linktree numkeys header)
-  (map (lambda (run)
-		 (let* ((target (string-join (take (vector->list run) numkeys) "/"))
-						(run-name (db:get-value-by-header run header "runname"))
-            (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time")))
-						(oup (if (file-exists? (conc linktree "/" target "/" run-name))
-                        (open-output-file (conc linktree "/" target "/" run-name "/run.html"))
-                         #f))
-            (run-id (db:get-value-by-header run header "id"))
-            (test-data    (rmt:get-tests-for-run
-				  								 run-id
-                           "%"       ;; testnamepatt
-				  								 '()        ;; states
-				   								 '()        ;; statuses
-				  								 	#f         ;; offset
-				  						 			#f         ;; num-to-get
-				   									#f         ;; hide/not-hide
-				  								  #f         ;; sort-by
-				   									#f         ;; sort-order
-				   									#f         ;; 'shortlist                           ;; qrytype
-                            0         ;; last update
-				  									#f))
-            (item-test-hash (test:get-test-hash test-data))
-            (items  (hash-table-keys item-test-hash))
- 						(test-names (test:get-data->b-keys item-test-hash items)))
-    (if oup
-      (begin 
-     (s:output-new
-	   oup
-	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
-		   (s:title "Runs View " run-name)
-		   (s:body
-		     (s:h1 "Runs View " )
-         (s:h3 "Target" target)
-				 (s:p 
-					(s:b "Run name" ) run-name)
-         (s:p 
-					(s:b "Run Date" ) run-time)
-         (s:table 'border 1 'cellspacing 0
-           (s:tr
-           (s:th "Items")
-           (map (lambda (test)
-            (s:th test))
-           test-names))  
-           (map (lambda (item) 
-					  (let* ((test-hash (hash-table-ref/default item-test-hash item  #f)))
-								 (if test-hash
-                  (begin
-									(s:tr
-					  			(s:td 'class "test" item)
-            			(map (lambda (test)
-						  		(let* ((test-details (hash-table-ref/default test-hash test  #f))
-												(status (if test-details
-																(car test-details)))
-                        (link (if test-details 
-														(string-substitute  (conc linktree "/" target "/" run-name "/")  "" (cadr test-details) "-"))))
-                   (if test-details
-											(s:td 'class status
-												(s:a 'class "link" 'href link status ))
-                      (s:td "")))) 			
-									test-names))))))
-				  (sort items string<=?))))))
-		(close-output-port oup))
-    (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html"))))
-runs))
-
-(define (test:create-target-hash runs header numkeys)
-  (let ((resh (make-hash-table)))
-   (for-each
-     (lambda (run)
-        (let* ((run-name (db:get-value-by-header run header "runname"))
-               (target   (string-join (take (vector->list run) numkeys) "/"))
-               (run-list (hash-table-ref/default resh target  #f)))
-               
-               (if (not run-list)
-                   (hash-table-set! resh target   (list run-name))
-                   (hash-table-set! resh target   (cons run-name run-list)))))
-      runs)
-   resh))
-
-(define (test:get-max-run-cnt target-hash targets)
-   (let* ((cnt 0 ))
-   (map (lambda (target)
-        (let* ((runs  (hash-table-ref/default target-hash target  #f))
-               (run-length (if runs
-																(length runs)
-                                 0)))
-  
-              (if (< cnt run-length)
-               (set! cnt  run-length)))) 
-		targets) 
-cnt))
- 
-(define (test:pad-runs target-hash targets max-row-length)
- (map (lambda (target)
-        (let loop ((run-list  (hash-table-ref/default target-hash target  #f)))
-               (if (< (length run-list) max-row-length)
-                 (begin  
-               		 (hash-table-set! target-hash target   (cons "" run-list))
-               		 (loop (hash-table-ref/default target-hash target  #f) ))))) 
-		targets)
-   target-hash)
-
-(define (test:create-target-html target-hash oup area-name linktree)
-  (let* ((targets (hash-table-keys target-hash))
-         (max-row-length (test:get-max-run-cnt target-hash targets))
-         (pad-runs-hash (test:pad-runs target-hash targets max-row-length)))
-   (s:output-new
-	   oup
-	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
-
-		   (s:title "Target View " area-name)
-		   (s:body
-		   (s:h1 "Target View " area-name)
-					(s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
-             (s:tr 'class "something" 
-               (s:th "Target")
-								(s:th 'colspan max-row-length "Runs"))                                              
-                (let* ((tbl (map (lambda (target)
-                      (s:tr
-                      (s:td 'class "test" target)
-										  (let* ((runs  (hash-table-ref/default target-hash target  #f))
-														 (rest-row (map (lambda (run)
-																				(if (equal? run "")
-																						(s:td run)
-                                            (if (file-exists?(conc linktree "/" target "/" run ))
-																						(begin 
-																							(s:td 
-																							(s:a 'href (conc  target "/" run "/run.html") run))))))
-																				(reverse runs))))
-                              rest-row)))
-                                   targets)))
-                           tbl)))))
-          (close-output-port oup)))
-
-
-(define (tests:create-html-tree-old outf)
-   (let* ((lockfile  (conc outf ".lock"))
-	 (runs-to-process '()))
-    (if (common:simple-file-lock lockfile)
-	(let* ((linktree  (common:get-linktree))
-	       (oup       (open-output-file (or outf (conc linktree "/runs-index.html"))))
-	       (area-name (common:get-testsuite-name))
-	       (keys      (rmt:get-keys))
-	       (numkeys   (length keys))
-	       (runsdat   (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys)))
-	       (header    (vector-ref runsdat 0))
-	       (runs      (vector-ref runsdat 1))
-	       (runtreedat (map (lambda (x)
-				  (tests:run-record->test-path x numkeys))
-				runs))
-	       (runs-htree (common:list->htree runtreedat)))
-	  (set! runs-to-process runs)
-	  (s:output-new
-	   oup
-	   (s:html tests:css-jscript-block
-		   (s:title "Summary for " area-name)
-		   (s:body 'onload "addEvents();"
-			   (s:h1 "Summary for " area-name)
-			   ;; top list
-			   (s:ul 'id "LinkedList1" 'class "LinkedList"
-				 (s:li
-				  "Runs"
-				  (common:htree->html runs-htree
-						      '()
-						      (lambda (x p)
-							(let* ((targ-path (string-intersperse p "/"))
-                                                               (full-path (conc linktree "/" targ-path))
-                                                               (run-name  (car (reverse p))))
-                                                          (if (and (common:file-exists? full-path)
-                                                                   (directory?   full-path)
-                                                                   (file-write-access? full-path))
-                                                              (s:a run-name 'href (conc targ-path "/run-summary.html"))
-                                                              (begin
-                                                                (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html")
-                                                                (conc run-name " (Not able to create summary at " targ-path ")")))))))))))
-          (close-output-port oup)
-	  (common:simple-file-release-lock lockfile)
-               
-	  (for-each
-	   (lambda (run)
-	     (let* ((test-subpath (tests:run-record->test-path run numkeys))
-		    (run-id       (db:get-value-by-header run header "id"))
-                    (run-dir      (tests:run-record->test-path run numkeys))
-		    (test-dats    (rmt:get-tests-for-run
-				   run-id
-                                   "%/"       ;; testnamepatt
-				   '()        ;; states
-				   '()        ;; statuses
-				   #f         ;; offset
-				   #f         ;; num-to-get
-				   #f         ;; hide/not-hide
-				   #f         ;; sort-by
-				   #f         ;; sort-order
-				   #f         ;; 'shortlist                           ;; qrytype
-                                   0         ;; last update
-				   #f))
-                    (tests-tree-dat (map (lambda (test-dat)
-                                         ;; (tests:run-record->test-path x numkeys))
-                                         (let* ((test-name  (db:test-get-testname test-dat))
-                                                (item-path  (db:test-get-item-path test-dat))
-                                                (full-name  (db:test-make-full-name test-name item-path))
-                                                (path-parts (string-split full-name)))
-                                           path-parts))
-                                       test-dats))
-                    (tests-htree (common:list->htree tests-tree-dat))
-                    (html-dir    (conc linktree "/" (string-intersperse run-dir "/")))
-                    (html-path   (conc html-dir "/run-summary.html"))
-                    (oup         (if (and (common:file-exists? html-dir)
-                                          (directory?   html-dir)
-                                          (file-write-access? html-dir))
-                                     (open-output-file  html-path)
-                                     #f)))
-               ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat)
-               (if oup
-                   (begin
-                     (s:output-new
-                      oup
-                      (s:html tests:css-jscript-block
-                              (s:title "Summary for " area-name)
-                              (s:body 'onload "addEvents();"
-                                      (s:h1 "Summary for " (string-intersperse run-dir "/"))
-                                      ;; top list
-                                      (s:ul 'id "LinkedList1" 'class "LinkedList"
-                                            (s:li
-                                             "Tests"
-                                             (common:htree->html tests-htree
-                                                                 '()
-                                                                 (lambda (x p)
-                                                                   (let* ((targ-path (string-intersperse p "/"))
-                                                                          (test-name (car p))
-                                                                          (item-path ;; (if (> (length p) 2) ;; test-name + run-name
-                                                                           (string-intersperse p "/"))
-                                                                          (full-targ (conc html-dir "/" targ-path))
-                                                                          (std-file  (conc full-targ "/test-summary.html"))
-                                                                          (alt-file  (conc full-targ "/megatest-rollup-" test-name ".html"))
-                                                                          (html-file (if (common:file-exists? alt-file)
-                                                                                         alt-file
-                                                                                         std-file))
-                                                                          (run-name  (car (reverse p))))
-                                                                     (if (and (not (common:file-exists? full-targ))
-                                                                              (directory? full-targ)
-                                                                              (file-write-access? full-targ))
-                                                                         (tests:summarize-test 
-                                                                          run-id 
-                                                                          (rmt:get-test-id run-id test-name item-path)))
-                                                                     (if (common:file-exists? full-targ)
-                                                                         (s:a run-name 'href html-file)
-                                                                         (begin
-                                                                           (debug:print 0 *default-log-port* "ERROR: can't access " full-targ)
-                                                                           (conc "No summary for " run-name)))))
-                                                                 ))))))
-                     (close-output-port oup)))))
-           runs)
-          #t)
-	#f)))
-
-
-
-
-(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
-  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
-  (if (and cpuload diskfree)
-      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
-  (if minutes 
-      (rmt:general-call 'update-run-duration run-id minutes test-id))
-  (if (and uname hostname)
-      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
-  
-;; This one is for running with no db access (i.e. via rmt: internally)
-(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
-;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
-;;  (let ((remtries 10))
-  (let* ((cpuload  (get-cpu-load))
-	 (diskfree (get-df (current-directory)))
-	 (uname    (get-uname "-srvpio"))
-	 (hostname (get-host-name)))
-    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
-    
-;; 
-;;
-(define (tests:get-compressed-steps run-id test-id)
-  (let* ((steps-data  (rmt:get-steps-for-test run-id test-id)) ;;      0       1    2    3       4       5       6      7       
-	 (comprsteps  (tests:process-steps-table steps-data))) ;; #<stepname start end status Duration Logfile Comment id>
-    (map (lambda (x)
-	   ;; take advantage of the \n on time->string
-	   (vector    ;; we are constructing basically the original vector but collapsing start end records
-	    (vector-ref x 0)                              ;; id        0
-	    (let ((s (vector-ref x 1)))
-	      (if (number? s)(seconds->time-string s) s)) ;; starttime 1
-	    (let ((s (vector-ref x 2)))
-	      (if (number? s)(seconds->time-string s) s)) ;; endtime   2
-	    (vector-ref x 3)                              ;; status    3    
-	    (vector-ref x 4)                              ;; duration  4
-	    (vector-ref x 5)                              ;; logfile   5
-	    (vector-ref x 6)                              ;; comment   6
-	    (vector-ref x 7)))                            ;; id        7
-	 (sort (hash-table-values comprsteps)
-	       (lambda (a b)
-		 (let ((time-a (vector-ref a 1))
-		       (time-b (vector-ref b 1))
-		       (id-a   (vector-ref a 7))
-		       (id-b   (vector-ref b 7)))
-		   (if (and (number? time-a)(number? time-b))
-		       (if (< time-a time-b)
-			   #t
-			   (if (eq? time-a time-b)
-			       (< id-a id-b)
-			       ;; (string<? (conc (vector-ref a 2))
-			       ;;	    (conc (vector-ref b 2)))
-			       #f))
-		       (string<? (conc time-a)(conc time-b)))))))))
-
-
-;; summarize test in to a file test-summary.html in the test directory
-;;
-(define (tests:summarize-test run-id test-id)
-  (let* ((test-dat  (rmt:get-test-info-by-id run-id test-id))
-	 (out-dir   (db:test-get-rundir test-dat))
-	 (out-file  (conc out-dir "/test-summary.html")))
-    ;; first verify we are able to write the output file
-    (if (not (file-write-access? out-dir))
-	(debug:print 0 *default-log-port* "ERROR: cannot write test-summary.html to " out-dir)
-	(let* (;; (steps-dat (rmt:get-steps-for-test run-id test-id))
-	       (test-name (db:test-get-testname test-dat))
-	       (item-path (db:test-get-item-path test-dat))
-	       (full-name (db:test-make-full-name test-name item-path))
-	       (oup       (open-output-file out-file))
-	       (status    (db:test-get-status   test-dat))
-	       (color     (common:get-color-from-status status))
-	       (logf      (db:test-get-final_logf test-dat))
-	       (steps-dat (tests:get-compressed-steps run-id test-id)))
-	  ;; (dcommon:get-compressed-steps #f 1 30045)
-	  ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log"))
-	  
-	  (s:output-new
-	   oup
-	   (s:html
-	    (s:title "Summary for " full-name)
-	    (s:body 
-	     (s:h2 "Summary for " full-name)
-	     (s:table 'cellspacing "0" 'border "1"
-		      (s:tr (s:td "run id")   (s:td (db:test-get-run_id   test-dat))
-			    (s:td "test id")  (s:td (db:test-get-id       test-dat)))
-		      (s:tr (s:td "testname") (s:td test-name)
-			    (s:td "itempath") (s:td item-path))
-		      (s:tr (s:td "state")    (s:td (db:test-get-state    test-dat))
-			    (s:td "status")   (s:td (s:a 'href logf (s:font 'color color status))))
-		      (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time 
-						     (db:test-get-event_time test-dat)))
-			    (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat)))))
-	     (s:h3 "Log files")
-	     (s:table 
-	      'cellspacing "0" 'border "1"
-	      (s:tr (s:td "Final log")(s:td (s:a 'href logf logf))))
-	     (s:table
-	      'cellspacing "0" 'border "1"
-	      (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File"))
-	      (map (lambda (step-dat)
-		     (s:tr (s:td (tdb:steps-table-get-stepname step-dat))
-			   (s:td (tdb:steps-table-get-start    step-dat))
-			   (s:td (tdb:steps-table-get-end      step-dat))
-			   (s:td (tdb:steps-table-get-status   step-dat))
-			   (s:td (tdb:steps-table-get-runtime  step-dat))
-			   (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat)))
-				   (s:a 'href step-log step-log)))))
-		   steps-dat))
-	     )))
-	  (close-output-port oup)))))
-	  
-	  
-;; MUST BE CALLED local!
-;;
-(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
-  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
-  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
-	 (statepatt  (or (args:get-arg "-state")   (args:get-arg ":state")    "%"))
-	 (statuspatt (or (args:get-arg "-status")  (args:get-arg ":status")   "%"))
-	 (runname    (or (args:get-arg "-runname") (args:get-arg ":runname")  "%"))
-	 (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res
-					testpatt
-					statepatt
-					statuspatt
-					runname)))
-    (if fnamepatt
-	(apply append 
-	       (map (lambda (p)
-		      (if (directory-exists? p)
-			  (let ((glob-query (conc p "/" fnamepatt)))
-			    (handle-exceptions
-				exn
-				(with-input-from-pipe
-				    (conc "echo " glob-query)
-				  read-lines)  ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar
-			      (glob glob-query)))
-			  '()))
-		    paths-from-db))
-	paths-from-db)))
-
-			      
-;; for each test:
-;;   
-(define (tests:filter-non-runnable run-id testkeynames testrecordshash)
-  (let ((runnables '()))
-    (for-each
-     (lambda (testkeyname)
-       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
-	      (test-name   (tests:testqueue-get-testname  test-record))
-	      (itemdat     (tests:testqueue-get-itemdat   test-record))
-	      (item-path   (tests:testqueue-get-item_path test-record))
-	      (waitons     (tests:testqueue-get-waitons   test-record))
-	      (keep-test   #t)
-	      (test-id     (rmt:get-test-id run-id test-name item-path))
-	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
-	 (if tdat
-	     (begin
-	       ;; Look at the test state and status
-	       (if (or (and (member (db:test-get-status tdat) 
-				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
-			    (equal? (db:test-get-state tdat) "COMPLETED"))
-		       (member (db:test-get-state tdat)
-				    '("INCOMPLETE" "KILLED")))
-		   (set! keep-test #f))
-
-	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
-	       ;; from the runnable list
-	       (if keep-test
-		   (for-each (lambda (waiton)
-			       ;; for now we are waiting only on the parent test
-			       (let* ((parent-test-id (rmt:get-test-id run-id waiton ""))
-				      (wtdat          (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
-				 (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED")
-					      (member (db:test-get-status wtdat) '("FAIL" "ABORT")))
-					 (member (db:test-get-status wtdat)  '("KILLED"))
-					 (member (db:test-get-state wtdat)   '("INCOMPETE")))
-				 ;; (if (or (member (db:test-get-status wtdat)
-				 ;;        	 '("FAIL" "KILLED"))
-				 ;;         (member (db:test-get-state wtdat)
-				 ;;        	 '("INCOMPETE")))
-				     (set! keep-test #f)))) ;; no point in running this one again
-			     waitons))))
-	 (if keep-test (set! runnables (cons testkeyname runnables)))))
-     testkeynames)
-    runnables))
-
-;;======================================================================
-;; test steps
-;;======================================================================
-
-;; teststep-set-status! used to be here
-
-(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
-  (let* ((testdat   (rmt:get-test-info-by-id run-id test-id)))
-    (and testdat
-	 (equal? (test:get-state testdat) "KILLREQ"))))
-
-(define (test:tdb-get-rundat-count tdb)
-  (if tdb
-      (let ((res 0))
-	(sqlite3:for-each-row
-	 (lambda (count)
-	   (set! res count))
-	 tdb
-	 "SELECT count(id) FROM test_rundat;")
-	res))
-  0)
-
-;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
-#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
-  (let* ((cpuload  (get-cpu-load))
-	 (diskfree (get-df (current-directory)))
-	 (remtries 10))
-    (handle-exceptions
-     exn
-     (if (> remtries 0)
-	 (begin
-	   (print-call-chain (current-error-port))
-	   (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times")
-	   (set! remtries (- remtries 1))
-	   (thread-sleep! 10)
-	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
-	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
-	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
-	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
-	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
-	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
-	   (print-call-chain (current-error-port))))
-     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
-  )))
-	 
-
-;;======================================================================
-;; launch a task - this runs on the originating host, tests themselves
-;;
-;;======================================================================
-
-
-;;======================================================================
-;; ezsteps
-;;======================================================================
-
-;; ezsteps were going to be coded as
-;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
-;;   BUT
-;; now are
-;; stepname {VAR=first,second,third ...} command ...
-;; where the {VAR=first,second,third ...} is optional.
-
-;; given an exit code and whether or not logpro was used calculate OK/BAD
-;; return #t if we are ok, #f otherwise
-(define (steprun-good? logpro exitcode)
-  (or (eq? exitcode 0)
-      (and logpro (eq? exitcode 2))))
-
-;; if handed a string, process it, else look for MT_CMDINFO
-(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
-  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
-    (if enccmd
-	(common:read-encoded-string enccmd)
-	'())))
-
-;; return (conc status ": " comment) from the final section so that
-;;   the comment can be set in the step record in launch.scm
-;;
-(define (launch:load-logpro-dat run-id test-id stepname)
-  (let ((cname (conc stepname ".dat")))
-    (if (common:file-exists? cname)
-	(let* ((dat  (configf:read-config cname #f #f))
-	       (csvr (db:logpro-dat->csv dat stepname))
-	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
-		       (fmt-csv (map list->csv-record csvr))))
-	       (status (configf:lookup dat "final" "exit-status"))
-	       (msg     (configf:lookup dat "final" "message")))
-          (if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
-              (rmt:csv->test-data run-id test-id csvt)
-	      (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer"))
-	  ;;  (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
-	  ;;  )
-	  (cond
-	   ((equal? status "PASS") "PASS") ;; skip the message part if status is pass
-	   (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message")))
-	   (else #f)))
-	#f)))
-
-(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm
-  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
-	 (stepinfo       (cadr ezstep))
-	;; (let ((info (cadr ezstep)))
-	;; 		   (if (proc? info) "" info)))
-	;; (stepproc       (let ((info (cadr ezstep)))
-	;; 		   (if (proc? info) info #f)))
-	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
-	 (stepparams     (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each
-	 (paramparts     (if (string? stepparams)
-			     (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams))
-			     '()))
-	 (subrun         (alist-ref "subrun" paramparts equal?))
-	 (stepcmd        (list-ref stepparts 3))
-	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
-	 (logpro-file    (conc stepname ".logpro"))
-	 (html-file      (conc stepname ".html"))
-	 (dat-file       (conc stepname ".dat"))
-	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
-	 (logpro-used    (common:file-exists? logpro-file)))
-
-    (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
-                 ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
-    
-    (if (and tconfig-logpro
-	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
-	(begin
-	  (with-output-to-file logpro-file
-	    (lambda ()
-	      (print ";; logpro file extracted from testconfig\n"
-		     ";;")
-	      (print tconfig-logpro)))
-	  (set! logpro-used #t)))
-    
-    ;; NB// can safely assume we are in test-area directory
-    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
-		 " stepparams: " stepparams " stepcmd: " stepcmd)
-    
-    ;; ;; first source the previous environment
-    ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
-    ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
-    ;;   (if (and prevstep (common:file-exists? prev-env))
-    ;;       (set! script (conc script "source " prev-env))))
-    
-    ;; call the command using mt_ezstep
-    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
-    
-    (debug:print 4 *default-log-port* "script: " script)
-    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
-    ;; now launch the actual process
-    (call-with-environment-variables 
-     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
-     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
-       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
-	      (pid #f))
-	 (let ((proc (lambda ()
-		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
-	   (if subrun
-               (begin
-                 (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.")
-                 (common:without-vars proc "^MT_.*"))
-	       (proc)))
-	 
-         (with-output-to-file "Makefile.ezsteps"
-           (lambda ()
-             (print stepname ".log :")
-             (print "\t" cmd)
-             (if (common:file-exists? (conc stepname ".logpro"))
-                 (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
-             (print)
-             (print stepname " : " stepname ".log")
-             (print))
-           #:append)
-
-	 (rmt:test-set-top-process-pid run-id test-id pid)
-	 (let processloop ((i 0))
-	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
-		       (mutex-lock! m)
-		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
-		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
-		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
-		       (mutex-unlock! m)
-		       (if (eq? pid-val 0)
-			   (begin
-			     (thread-sleep! 2)
-			     (processloop (+ i 1))))
-		       )))))
-    (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
-    ;; now run logpro if needed
-    (if logpro-used
-	(let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro"))
-               (pid        (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'"))))
-	  (let processloop ((i 0))
-	    (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
-			(mutex-lock! m)
-			;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code)
-			(launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
-			(launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
-			(launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
-			(mutex-unlock! m)
-			(if (eq? pid-val 0)
-			    (begin
-			      (thread-sleep! 2)
-			      (processloop (+ i 1)))))
-	    (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2)))))
-    
-    (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
-	  (logfna (if logpro-used (conc stepname ".html") ""))
-	  (comment #f))
-      (if logpro-used
-	  (let ((datfile (conc stepname ".dat")))
-	    ;; load the .dat file into the test_data table if it exists
-	    (if (common:file-exists? datfile)
-		(set! comment (launch:load-logpro-dat run-id test-id stepname)))
-	    (rmt:test-set-log! run-id test-id (conc stepname ".html"))))
-      (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna))
-    ;; set the test final status
-    (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
-	   (this-step-status (cond
-			      ((and (eq? process-exit-status 2) logpro-used) 'warn)   ;; logpro 2 = warnings
-			      ((and (eq? process-exit-status 3) logpro-used) 'check)  ;; logpro 3 = check
-			      ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived
-			      ((and (eq? process-exit-status 5) logpro-used) 'abort)  ;; logpro 5 = abort
-			      ((and (eq? process-exit-status 6) logpro-used) 'skip)   ;; logpro 6 = skip
-			      ((eq? process-exit-status 0)                   'pass)   ;; logpro 0 = pass
-			      (else 'fail)))
-	   (overall-status   (cond
-			      ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3)
-			      ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3)
-			      (else 'fail)))
-	   (next-status      (cond 
-			      ((eq? overall-status 'pass) this-step-status)
-			      ((eq? overall-status 'warn)
-			       (if (eq? this-step-status 'fail) 'fail 'warn))
-			      ((eq? overall-status 'abort) 'abort)
-			      (else 'fail)))
-	   (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
-	    (cond
-	     ((null? tal) ;; more to run?
-	      "COMPLETED")
-	     (else "RUNNING"))))
-      (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used 
-		   " this-step-status: " this-step-status " overall-status: " overall-status 
-		   " next-status: " next-status " rollup-status: "  (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3))
-      (case next-status
-	((warn)
-	 (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "WARN" 
-				 (if (eq? this-step-status 'warn) "Logpro warning found" #f)
-				 #f))
-	((check)
-	 (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "CHECK" 
-				 (if (eq? this-step-status 'check) "Logpro check found" #f)
-				 #f))
-	((waived)
-	 (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "WAIVED" 
-				 (if (eq? this-step-status 'check) "Logpro waived found" #f)
-				 #f))
-	((abort)
-	 (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "ABORT" 
-				 (if (eq? this-step-status 'abort) "Logpro abort found" #f)
-				 #f))
-	((skip)
-	 (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status
-	 ;; NB// test-set-status! does rdb calls under the hood
-	 (tests:test-set-status! run-id test-id next-state "SKIP" 
-				 (if (eq? this-step-status 'skip) "Logpro skip found" #f)
-				 #f))
-	((pass)
-	 (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
-	(else ;; 'fail
-	 (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
-	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
-	 )))
-    logpro-used))
-
-(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)
-  ;; (let-values
-  ;;  (((pid exit-status exit-code)
-  ;;    (run-n-wait fullrunscript)))
-  ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
-  ;; Since we should have a clean slate at this time there is no need to do 
-  ;; any of the other stuff that tests:test-set-status! does. Let's just 
-  ;; force RUNNING/n/a
-
-  ;; (thread-sleep! 0.3)
-  ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
-  (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) 
-  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
-
-  ;; if there is a runscript do it first
-  (if fullrunscript
-      (let ((pid (process-run fullrunscript)))
-	(rmt:test-set-top-process-pid run-id test-id pid)
-	(let loop ((i 0))
-	  (let-values
-	   (((pid-val exit-status exit-code) (process-wait pid #t)))
-	   (mutex-lock! m)
-	   (launch:einf-pid-set!           exit-info  pid)         ;; (vector-set! exit-info 0 pid)
-	   (launch:einf-exit-status-set!   exit-info  exit-status) ;; (vector-set! exit-info 1 exit-status)
-	   (launch:einf-exit-code-set!     exit-info  exit-code)   ;; (vector-set! exit-info 2 exit-code)
-	   (launch:einf-rollup-status-set! exit-info  exit-code)   ;; (vector-set! exit-info 3 exit-code)  ;; rollup status
-	   (mutex-unlock! m)
-	   (if (eq? pid-val 0)
-	       (begin
-		 (thread-sleep! 2)
-		 (loop (+ i 1)))
-	       )))))
-  ;; then, if runscript ran ok (or did not get called)
-  ;; do all the ezsteps (if any)
-  (if (or ezsteps subrun)
-      (let* ((test-run-dir (tests:get-test-path-from-environment))
-             (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
-	      ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic
-	      ;;       ezstep names need a full re-eval here.
-	      (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
-	     (ezstepslst (if (hash-table? testconfig)
-			     (hash-table-ref/default testconfig "ezsteps" '())
-			     #f)))
-	(if testconfig
-	    (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
-	    (begin
-	      (launch:setup)
-	      (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n  "
-			   (string-intersperse (tests:get-tests-search-path *configdat*) "\n  "))))
-	;; after all that, still no testconfig? Time to abort
-	(if (not testconfig)
-	    (begin
-	      (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now")
-	      (exit 1)))
-
-	;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry
-	;; 1. get section [runarun]
-	;; 2. unset MT_* vars
-	;; 3. fix target
-	;; 4. fix runname
-	;; 5. fix testpatt or calculate it from contour
-	;; 6. launch the run
-	;; 7. roll up the run result and or roll up the logpro processed result
-	(when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested
-            (subrun:initialize-toprun-test testconfig test-run-dir)
-	    (let* ((mt-cmd (subrun:launch-cmd test-run-dir)))
-              (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"")
-              (set! ezsteps #t) ;; set the needed flag
-	      (set! ezstepslst
-                    (append (or ezstepslst '())
-                            (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))
-
-	;; process the ezsteps
-	(if ezsteps
-	    (begin
-	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
-	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
-	      (if (not (> (length ezstepslst) 0))
-		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
-		  (let loop ((ezstep (car ezstepslst))
-			     (tal    (cdr ezstepslst))
-			     (prevstep #f))
-                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
-		    ;; check exit-info (vector-ref exit-info 1)
-		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
-			(let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig))
-			      (stepname    (car ezstep)))
-			  ;; if logpro-used read in the stepname.dat file
-			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
-			      (launch:load-logpro-dat run-id test-id stepname))
-			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info))
-			      (if (not (null? tal))
-				  (loop (car tal) (cdr tal) stepname))
-			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
-			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))
-
-(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
-  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
-         (start-seconds (current-seconds))
-	 (calc-minutes  (lambda ()
-			  (inexact->exact 
-			   (round 
-			    (- 
-			     (current-seconds) 
-			     start-seconds)))))
-	 (kill-tries 0))
-    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
-    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
-    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
-
-    (let loop ((minutes   (calc-minutes))
-	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
-	       (disk-free (get-df (current-directory)))
-               (last-sync (current-seconds)))
-      #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
-      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
-             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
-                                   (delta (abs (- load cpu-load))))
-                              (if (> delta 0.1) ;; don't bother updating with small changes
-                                  load
-                                  #f)))
-             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
-                                              (get-df (current-directory))
-                                              disk-free))
-                                   (delta (abs (- df disk-free))))
-                              (if (and (> df 0)
-                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
-                                  df
-                                  #f)))
-             (do-sync       (or new-cpu-load new-disk-free over-time))
-
-             (test-info   (rmt:get-test-info-by-id run-id test-id))
-             (state       (db:test-get-state test-info))
-             (status      (db:test-get-status test-info))
-             (kill-reason  "no kill reason specified")
-             (kill-job?    #f))
-        #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
-        (cond
-         ((test-get-kill-request run-id test-id)
-          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
-          (set! kill-job? #t))
-         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
-          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
-          (set! kill-job? #t))
-         ((equal? status "DEAD")
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
-          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
-          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
-          (set! kill-job? #f)))
-
-        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
-        (launch:handle-zombie-tests run-id)
-        (when do-sync
-          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
-          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
-          #;(common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
-          #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
-        
-	(if kill-job? 
-	    (begin
-              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
-	      (mutex-lock! m)
-	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
-	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
-	      ;;       between tries?
-	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
-		     (pid2 (rmt:test-get-top-process-pid run-id test-id))
-		     (pids (delete-duplicates (filter number? (list pid1 pid2)))))
-		(if (not (null? pids))
-		    (begin
-		      (for-each
-		       (lambda (pid)
-			 (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.")
-			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
-			  (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
-			  (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask))
-			  ;; (if (process:alive? pid)
-			  ;;     (begin
-			  (map (lambda (pid-num)
-				 (process-signal pid-num signal/term))
-			       (process:get-sub-pids pid))
-			  (thread-sleep! 5)
-			  ;; (if (process:process-alive? pid)
-			  (map (lambda (pid-num)
-				 (handle-exceptions
-				  exn
-				  #f
-				  (process-signal pid-num signal/kill)))
-			       (process:get-sub-pids pid))))
-		       ;;    (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive"))))
-		       pids)
-                      ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel?  If not, should it?
-		      (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt
-		    (begin
-		      (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
-		      (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
-		      )))
-	      (mutex-unlock! m)
-	      ;; no point in sticking around. Exit now. But run end of run before exiting?
-        (launch:end-of-run-check run-id)
-	      (exit)))
-	(if (hash-table-ref/default misc-flags 'keep-going #f)
-	    (begin
-	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
-	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
-		  (loop (calc-minutes)
-                        (or new-cpu-load cpu-load)
-                        (or new-disk-free disk-free)
-                        (if do-sync (current-seconds) last-sync)))))))
-    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
-
-;; set up needed environment variables given a run-id and optionally a target, itempath etc.
-;;
-(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
-  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
-  (let* ((target    (or intarget 
-			(common:args-get-target)
-			(get-environment-variable "MT_TARGET")))
-	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
-	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
-	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
-	 (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
-    (if testname (setenv "MT_TEST_NAME" testname))
-    (if itempath (setenv "MT_ITEMPATH"  itempath))
-
-    ;; get the info from the db and put it in the cache
-    (if link-tree
-	(setenv "MT_LINKTREE" link-tree)
-	(debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section."))
-    (if (not vals)
-	(let ((ht (make-hash-table)))
-	  (hash-table-set! *env-vars-by-run-id* run-id ht)
-	  (set! vals ht)
-	  (for-each
-	   (lambda (key)
-	     (hash-table-set! vals (car key) (cadr key)))
-	   keyvals)))
-    ;; from the cached data set the vars
-    
-    (hash-table-for-each
-     vals
-     (lambda (key val)
-       (debug:print 2 *default-log-port* "setenv " key " " val)
-       (safe-setenv key val)))
-    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1")
-    ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals))
-
-    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
-    ;; we had a case where there was an exception generated by the hash-table-ref
-    ;; due to *configdat* being #f Adding a handle and exit
-    (let fatal-loop ((count 0)) 
-      (handle-exceptions
-	  exn
-	  (let ((call-chain (get-call-chain))
-		(msg        ((condition-property-accessor 'exn 'message) exn)))
-	    (if (< count 5)
-		(begin ;; this call is colliding, do some crude stuff to fix it.
-		  (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count)
-		  (launch:setup force-reread: #t)
-		  (fatal-loop (+ count 1))) 
-		(begin
-		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg)
-		  (debug:print 0 *default-log-port* "Call chain:")
-		  (with-output-to-port *default-log-port*
-
-                    (lambda ()
-                      (print "*configdat* is >>"*configdat*"<<")
-                      (pp *configdat*)
-                      (pp call-chain)))
-                  
-		  (exit 1))))
-          ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
-          (when (or (not *configdat*) (not (hash-table? *configdat*)))
-              (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen.  Brute force reread.")
-              ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen.  Brute force reread.")
-              (thread-sleep! 2) ;; assuming nfs lag.
-              (launch:setup force-reread: #t))
-          (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block.
-    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2")
-    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
-    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
-      (if runname
-	  (setenv "MT_RUNNAME" runname)
-	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
-    (setenv "MT_RUN_AREA_HOME" *toppath*)
-    ;; if a testname and itempath are available set the remaining appropriate variables
-    (if testname (setenv "MT_TEST_NAME" testname))
-    (if itempath (setenv "MT_ITEMPATH"  itempath))
-    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3")
-    (if (and testname link-tree)
-	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
-					(getenv "MT_TARGET")    "/"
-					(getenv "MT_RUNNAME")   "/"
-					(getenv "MT_TEST_NAME")
-					(if (and itempath
-						 (not (equal? itempath "")))
-					    (conc "/" itempath)
-					    ""))))))
-
-(define (launch:execute encoded-cmd)
-  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
-	 (tconfigreg #f))
-    (setenv "MT_CMDINFO" encoded-cmd)
-    ;;(bb-check-path msg: "launch:execute incoming")
-    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
-	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
-	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
-	       (top-path  (assoc/default 'toppath   cmdinfo))
-	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
-	       (test-name (assoc/default 'test-name cmdinfo))
-	       (runscript (assoc/default 'runscript cmdinfo))
-	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
-	       (subrun    (assoc/default 'subrun    cmdinfo))
-	       ;; (runremote (assoc/default 'runremote cmdinfo))
-	       ;; (transport (assoc/default 'transport cmdinfo))  ;; not used
-	       ;; (serverinf (assoc/default 'serverinf cmdinfo))
-	       ;; (port      (assoc/default 'port      cmdinfo))
-	       (serverurl (assoc/default 'serverurl cmdinfo))
-	       (homehost  (assoc/default 'homehost  cmdinfo))
-	       (run-id    (assoc/default 'run-id    cmdinfo))
-	       (test-id   (assoc/default 'test-id   cmdinfo))
-	       (target    (assoc/default 'target    cmdinfo))
-	       (areaname  (assoc/default 'areaname  cmdinfo))
-	       (itemdat   (assoc/default 'itemdat   cmdinfo))
-	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
-	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
-	       (runname   (assoc/default 'runname   cmdinfo))
-	       (megatest  (assoc/default 'megatest  cmdinfo))
-	       (runtlim   (assoc/default 'runtlim   cmdinfo))
-	       (contour   (assoc/default 'contour   cmdinfo))
-	       (item-path (item-list->path itemdat))
-	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
-	       (keys      #f)
-	       (keyvals   #f)
-	       (fullrunscript (if (not runscript)
-                                  #f
-                                  (if (substring-index "/" runscript)
-                                      runscript ;; use unadultered if contains slashes
-                                      (let ((fulln (conc work-area "/" runscript)))
-	                                  (if (and (common:file-exists? fulln)
-                                                   (file-execute-access? fulln))
-                                              fulln
-                                              runscript))))) ;; assume it is on the path
-               (check-work-area           (lambda ()
-                                            ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
-                                            (let loop ((count 0))
-                                              (if (or (common:directory-exists? work-area)
-                                                      (> count 10))
-                                                  (change-directory work-area)
-                                                  (begin
-                                                    (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
-                                                    (thread-sleep! 10)
-                                                    (loop (+ count 1)))))
-
-                                            (if (not (string=?  (common:real-path work-area)(common:real-path (current-directory))))
-                                                (begin
-                                                  (debug:print 0 *default-log-port*
-                                                               "INFO: we are expecting to be in directory " work-area "\n"
-                                                               "     but we are actually in the directory " (current-directory) "\n"
-                                                               "     doing another change dir.")
-                                                  (change-directory work-area)))
-                                            
-                                            ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
-                                            (let ((files      (glob (conc testpath "/*")))
-                                                  (bad-files '()))
-                                              (for-each
-                                               (lambda (fullname)
-                                                 (let* ((fname (pathname-strip-directory fullname))
-                                                        (targn (conc work-area "/" fname)))
-                                                   (if (not (file-exists? targn))
-                                                       (set! bad-files (cons fname bad-files)))))
-                                               files)
-                                              (if (not (null? bad-files))
-                                                  (begin
-                                                    (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
-                                                    (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", "))
-                                                    (launch:test-copy testpath work-area))))
-                                            ;; one more time, change to the work-area directory
-                                            (change-directory work-area)))
-	       ) ;; let*
-
-	  (if contour (setenv "MT_CONTOUR" contour))
-	  
-	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
-	  ;;
-	  (setenv "MT_TESTSUITENAME" areaname)
-	  (setenv "MT_RUN_AREA_HOME" top-path)
-	  (set! *toppath* top-path)
-          (change-directory *toppath*) ;; temporarily switch to the run area home
-	  (setenv "MT_TEST_RUN_DIR"  work-area)
-
-	  (launch:setup) ;; should be properly in the run area home now
-
-	  (if contour (setenv "MT_CONTOUR" contour))
-	  
-	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
-	  ;;
-	  (setenv "MT_TESTSUITENAME" areaname)
-	  (setenv "MT_RUN_AREA_HOME" top-path)
-	  (set! *toppath* top-path)
-          (change-directory *toppath*) ;; temporarily switch to the run area home
-	  (setenv "MT_TEST_RUN_DIR"  work-area)
-
-	  (launch:setup) ;; should be properly in the run area home now
-          
-	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
-	  (let ((sighand (lambda (signum)
-			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
-			   (if (eq? signum signal/stop)
-			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
-			   (set! *time-to-exit* #t)
-			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
-			   (let ((th1 (make-thread (lambda ()
-                                                     (print "set test to COMPLETED/ABORT begin.")
-						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
-                                                     (print "set test to COMPLETED/ABORT complete.")
-						     (print "Killed by signal " signum ". Exiting")
-						     (exit 1))))
-				 (th2 (make-thread (lambda ()
-						     (thread-sleep! 20)
-						     (debug:print 0 *default-log-port* "Done")
-						     (exit 4)))))
-			     (thread-start! th2)
-			     (thread-start! th1)
-			     (thread-join! th2)))))
-	    (set-signal-handler! signal/int sighand)
-	    (set-signal-handler! signal/term sighand)
-	    ) ;; (set-signal-handler! signal/stop sighand)
-	  
-	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
-	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
-	  ;;
-	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
-		 (test-host (if test-info
-				(db:test-get-host        test-info)
-				(begin
-				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
-				  (exit))))
-		 (test-pid  (db:test-get-process_id  test-info)))
-	    (cond
-             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
-	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
-	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
-	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
-
-              (rmt:general-call 'set-test-start-time #f test-id)
-              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
-	      ) ;; prime it for running
-	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
-	      (if (process:alive-on-host? test-host test-pid)
-		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
-		  (exit)))
-	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
-	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
-              (rmt:general-call 'set-test-start-time #f test-id)
-	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
-	      )
-	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
-	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
-	      (exit))))
-
-          ;; cleanup prior execution's steps
-          (rmt:delete-steps-for-test! run-id test-id)
-          
-	  (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
-	  (set! keys       (rmt:get-keys))
-	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
-	  ;; one of these is defunct/redundant ...
-	  (if (not (launch:setup force-reread: #t))
-	      (begin
-		(debug:print 0 *default-log-port* "Failed to setup, exiting") 
-		;; (sqlite3:finalize! db)
-		;; (sqlite3:finalize! tdb)
-		(exit 1)))
-          ;; validate that the test run area is available
-          (check-work-area)
-          
-          ;; still need to go back to run area home for next couple steps
-	  (change-directory *toppath*) 
-
-	  ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This 
-	  ;;       seems non-ideal but could well break stuff
-	  ;;    BUG? BUG? BUG?
-	  
-	  (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc  *toppath* "/runconfigs.config") #f #t sections: (list "default" target))))
-		(wconfig (configf:read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists
-	    ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target)
-	    ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
-	    ;; Now have runconfigs data loaded, set environment vars
-	    (for-each
-	     (lambda (section)
-	       (for-each
-		(lambda (varval)
-		  (let ((var (car varval))
-			(val (cadr varval)))
-		    (if (and (string? var)(string? val))
-			(begin
-			  (safe-setenv var (configf:eval-string-in-environment val))) ;; val)
-			(debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val))))
-		(configf:get-section rconfig section)))
-	     (list "default" target)))
-          ;;(bb-check-path msg: "launch:execute post block 1")
-
-	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
-	  (let loop ((count 0))
-	    (if (or (common:file-exists? work-area)
-		    (> count 10))
-		(change-directory work-area)
-		(begin
-		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
-		  (thread-sleep! 10)
-		  (loop (+ count 1)))))
-
-          ;; now we can switch to the work-area?
-          (change-directory work-area)
-          ;;(bb-check-path msg: "launch:execute post block 1.5")
-	  ;; (change-directory work-area) 
-	  (set! keyvals    (keys:target->keyval keys target))
-	  ;; apply pre-overrides before other variables. The pre-override vars must not
-	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
-	  (if (string? set-vars)
-	      (let ((varpairs (string-split set-vars ",")))
-		(debug:print 4 *default-log-port* "varpairs: " varpairs)
-		(map (lambda (varpair)
-		       (let ((varval (string-split varpair "=")))
-			 (if (eq? (length varval) 2)
-			     (let ((var (car varval))
-				   (val (cadr varval)))
-			       (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment")
-			       (setenv var val)))))
-		     varpairs)))
-          ;;(bb-check-path msg: "launch:execute post block 2")
-	  (for-each
-	   (lambda (varval)
-	     (let ((var (car varval))
-		   (val (cadr varval)))
-	       (if val
-		   (setenv var val)
-		   (begin
-		     (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting")
-		     (exit)))))
-	     (list 
-	      (list  "MT_TEST_RUN_DIR" work-area)
-	      (list  "MT_TEST_NAME" test-name)
-	      (list  "MT_ITEM_INFO" (conc itemdat))
-	      (list  "MT_ITEMPATH"  item-path)
-	      (list  "MT_RUNNAME"   runname)
-	      (list  "MT_MEGATEST"  megatest)
-	      (list  "MT_TARGET"    target)
-	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
-	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
-          ;;(bb-check-path msg: "launch:execute post block 3")
-
-	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
-          ;;(bb-check-path msg: "launch:execute post block 4")
-	  ;; (change-directory top-path)
-	  ;; Can setup as client for server mode now
-	  ;; (client:setup)
-
-	  
-	  ;; environment overrides are done *before* the remaining critical envars.
-	  (alist->env-vars env-ovrd)
-          ;;(bb-check-path msg: "launch:execute post block 41")
-	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
-          ;;(bb-check-path msg: "launch:execute post block 42")
-	  (set-item-env-vars itemdat)
-          ;;(bb-check-path msg: "launch:execute post block 43")
-          (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars")))
-            (if blacklist
-		(let ((vars (string-split blacklist)))
-		  (save-environment-as-files "megatest" ignorevars: vars)
-		  (for-each (lambda (var)
-			      (unsetenv var))
-			    vars))
-                (save-environment-as-files "megatest")))
-          ;;(bb-check-path msg: "launch:execute post block 44")
-	  ;; open-run-close not needed for test-set-meta-info
-	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
-	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
-	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)
-
-	  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
-
-	  (if (args:get-arg "-xterm")
-	      (set! fullrunscript "xterm")
-	      (if (and fullrunscript 
-		       (common:file-exists? fullrunscript)
-		       (not (file-execute-access? fullrunscript)))
-		  (system (conc "chmod ug+x " fullrunscript))))
-
-	  ;; We are about to actually kick off the test
-	  ;; so this is a good place to remove the records for 
-	  ;; any previous runs
-	  ;; (db:test-remove-steps db run-id testname itemdat)
-	  ;; now is also a good time to write the .testconfig file
-	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
-		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
-		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs)))
-	    (configf:write-alist tconfig tconfig-tmpfile)
-	    (file-move tconfig-tmpfile tconfig-fname #t))
-	  ;; 
-	  (let* ((m            (make-mutex))
-		 (kill-job?    #f)
-		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
-		 (job-thread   #f)
-		 ;; (keep-going   #t)
-		 (misc-flags   (let ((ht (make-hash-table)))
-				 (hash-table-set! ht 'keep-going #t)
-				 ht))
-		 (runit        (lambda ()
-				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
-		 (monitorjob   (lambda ()
-				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
-		 (th1          (make-thread monitorjob "monitor job"))
-		 (th2          (make-thread runit "run job")))
-	    (set! job-thread th2)
-	    (thread-start! th1)
-	    (thread-start! th2)
-	    (thread-join! th2)
-	    (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...")
-	    (hash-table-set! misc-flags 'keep-going #f)
-	    (thread-join! th1)
-	    (thread-sleep! 1)       ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
-	    (mutex-lock! m)
-	    (let* ((item-path (item-list->path itemdat))
-		   ;; only state and status needed - use lazy routine
-		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
-	      ;; Am I completed?
-	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
-		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
-				                                        ;; "COMPLETED"							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
-				    )
-			(new-status (cond
-				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
-				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
-				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
-				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
-				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
-				     ((eq? (launch:einf-rollup-status exit-info) 2)	     ;;	(vector-ref exit-info 3)
-				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
-				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
-				     ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK")
-				     ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED")
-				     ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT")
-				     ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP")
-				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
-		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
-		    (tests:test-set-status! run-id 
-					    test-id 
-					    new-state
-					    new-status
-					    (args:get-arg "-m") #f)
-		    ;; need to update the top test record if PASS or FAIL and this is a subtest
-		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
-		    ))
-	      ;; for automated creation of the rollup html file this is a good place...
-	      (if (not (equal? item-path ""))
-		  (tests:summarize-items run-id test-id test-name #f))
-	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
-	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
-	    (mutex-unlock! m)
-            (launch:end-of-run-check run-id )
-	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
-			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
-	    (if (not (launch:einf-exit-status exit-info))
-		(exit 4))))
-        )))
-
-(define (launch:is-test-alive host pid)
-(if (and host pid (not (equal? host "n/a")))
-(let* ((cmd (conc "ssh " host " pstree -A " pid))
-      (output (with-input-from-pipe cmd read-lines)))
-  (print "cmd: " cmd "\n op: " output )
-  (if(eq? (length output) 0)
-     #f
-     #t))
-#t))
- 
-(define (launch:kill-tests-if-dead run-id)
-  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
-       (let loop ((running-test (car running-tests))
-			     (tal    (cdr running-tests))
-			     (kill-cnt 0))
-		       (let* ((test-name (vector-ref running-test 2))
-                 (item-path (vector-ref running-test 11))
-								 (test-id (vector-ref running-test 0))
-                 (host (vector-ref running-test 6))
-                 (pid  (rmt:test-get-top-process-pid run-id test-id))   
-                 (event-time (vector-ref running-test 5))
-                 (duration (vector-ref running-test 12))
-                 (flag 0)   
-                 (curr-time (current-seconds)))
-       (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed
-           (begin    
-			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
-              (set! flag 1) 
-              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
-               (if (not (null? tal))
-				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
-                 (+ kill-cnt flag))))))
-
-;; DO NOT USE - caching of configs is handled in launch:setup now.
-;;
-(define (launch:cache-config)
-  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
-  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
-  (if (and *configdat* 
-	   (or (args:get-arg "-run")
-	       (args:get-arg "-runtests")
-	       (args:get-arg "-execute")))
-      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
-	     (target   (common:args-get-target exit-if-bad: #t))
-	     (runname  (or (args:get-arg "-runname")
-			   (args:get-arg ":runname")
-			   (getenv "MT_RUNNAME")))
-	     (fulldir  (conc linktree "/"
-			     target "/"
-			     runname)))
-	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
-	    (begin
-	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
-	      (if (not (common:file-exists? fulldir))
-		  (create-directory fulldir #t)) ;; need to protect with exception handler 
-	      (if (and target
-		       runname
-		       (common:file-exists? fulldir))
-		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
-			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
-			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
-		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
-			(begin
-			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
-                          (if (not (common:in-running-test?))
-                              (configf:write-alist *configdat* tmpfile))
-			  (system (conc "ln -sf " tmpfile " " targfile))))
-		    )))
-	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
-
-
-;; gather available information, if legit read configs in this order:
-;;
-;;   if have cache;
-;;      read it a return it
-;;   else
-;;     megatest.config     (do not cache)
-;;     runconfigs.config   (cache if all vars avail)
-;;     megatest.config     (cache if all vars avail)
-;;   returns:
-;;     *toppath*
-;;   side effects:
-;;     sets; *configdat*    (megatest.config info)
-;;           *runconfigdat* (runconfigs.config info)
-;;           *configstatus* (status of the read data)
-;;
-(define (launch:setup #!key (force-reread #f) (areapath #f))
-  (mutex-lock! *launch-setup-mutex*)
-  (if (and *toppath*
-	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
-      (begin
-	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
-	(mutex-unlock! *launch-setup-mutex*)
-	*toppath*)
-      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
-	(mutex-unlock! *launch-setup-mutex*)
-	res)))
-
-;; return paths depending on what info is available.
-;;
-(define (launch:get-cache-file-paths areapath toppath target mtconfig)
-  (let* ((use-cache (common:use-cache?))
-         (runname  (common:args-get-runname))
-         (linktree (common:get-linktree))
-         (testname (common:get-full-test-name))
-         (rundir   (if (and runname target linktree)
-                       (common:directory-writable? (conc linktree "/" target "/" runname))
-                       #f))
-         (testdir  (if (and rundir testname)
-                       (common:directory-writable? (conc rundir "/" testname))
-                       #f))
-         (cachedir (or testdir rundir))
-         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
-         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
-    (debug:print-info 6 *default-log-port* 
-                      "runname=" runname 
-                      "\n  linktree=" linktree
-                      "\n  testname=" testname
-                      "\n  rundir=" rundir 
-                      "\n  testdir=" testdir 
-                      "\n  cachedir=" cachedir
-                      "\n  mtcachef=" mtcachef
-                      "\n  rccachef=" rccachef)
-    (cons mtcachef rccachef)))
-
-(define (launch:setup-body #!key (force-reread #f) (areapath #f))
-  (if (and (eq? *configstatus* 'fulldata)
-	   *toppath*
-	   (not force-reread)) ;; no need to reprocess
-      *toppath*   ;; return toppath
-      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
-	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
-	     (target   (common:args-get-target))
-	     (sections (if target (list "default" target) #f)) ;; for runconfigs
-	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
-             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
-	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
-	     (mtcachef   (if (null? cachefiles)
-			     #f
-			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
-	     (rccachef   (if (null? cachefiles)
-			     #f
-			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
-	      ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
-	(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
-        ;;(BB> "launch:setup-body -- cachefiles="cachefiles)
-	(cond
-	 ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
-	 ((and (not force-reread)
-	       mtcachef  rccachef
-	       use-cache
-	       (get-environment-variable "MT_RUN_AREA_HOME")
-	       (common:file-exists? mtcachef)
-	       (common:file-exists? rccachef))
-          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
-          (set! *configdat*    (configf:read-alist mtcachef))
-          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
-	  (set! *runconfigdat* (configf:read-alist rccachef))
-	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
-	  (set! *configstatus* 'fulldata)
-	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
-	  *toppath*)
-	 ;; there are no existing cached configs, do full reads of the configs and cache them
-	 ;; we have all the info needed to fully process runconfigs and megatest.config
-	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
-	       mtcachef
-	       rccachef) ;; BB- why are we doing this without asking if caching is desired?
-          ;;(BB> "launch:setup-body -- cond branch 2")
-	  (let* ((first-pass    (configf:find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
-				 mtconfig
-				 environ-patt: "env-override"
-				 given-toppath: toppath
-				 pathenvvar: "MT_RUN_AREA_HOME"))
-		 (first-rundat  (let ((toppath (if toppath 
-						   toppath
-						   (car first-pass))))
-				  (configf:read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
-				   (conc (if (string? toppath)
-					     toppath
-					     (get-environment-variable "MT_RUN_AREA_HOME"))
-					 "/runconfigs.config")
-				   *runconfigdat* #t 
-				   sections: sections))))
-	    (set! *runconfigdat* first-rundat)
-	    (if first-pass  ;; 
-		(begin
-                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
-		  (set! *configdat*  (car first-pass))
-                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
-		  (set! *configinfo* first-pass)
-		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
-		  (set! toppath      *toppath*)
-		  (if (not *toppath*)
-		      (begin
-			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
-			(exit 1)))
-		  (setenv "MT_RUN_AREA_HOME" *toppath*)
-		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
-		  (let* ((keys         (rmt:get-keys))
-			 (key-vals     (keys:target->keyval keys target))
-			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
-					;     (if *configdat*
-					; 	   (configf:lookup *configdat* "setup" "linktree")
-					; 	   (conc *toppath* "/lt"))))
-			 (second-pass  (configf:find-and-read-config
-					mtconfig
-					environ-patt: "env-override"
-					given-toppath: toppath
-					pathenvvar: "MT_RUN_AREA_HOME"))
-			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
-					 (for-each (lambda (kt)
-						     (setenv (car kt) (cadr kt)))
-						   key-vals)
-					 (configf:read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
-						      sections: sections)))
-                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
-                         (mtcachef     (car cachefiles))
-                         (rccachef     (cdr cachefiles)))
-                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
-                    ;; TODO - consider 1) using simple-lock to bracket cache write
-                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
-
-		    (if rccachef
-                        (common:fail-safe
-                         (lambda ()
-                           (configf:write-alist runconfigdat rccachef))
-                         (conc "Could not write cache file - "rccachef)))
-                    (if mtcachef
-                        (common:fail-safe
-                         (lambda ()
-                           (configf:write-alist *configdat* mtcachef))
-                         (conc "Could not write cache file - "mtcachef)))
-		    (set! *runconfigdat* runconfigdat)
-		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
-		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
-		(set! *configdat* (make-hash-table))
-		)))
-
-	 ;; else read what you can and set the flag accordingly
-	 ;; here we don't have either mtconfig or rccachef
-	 (else
-          ;;(BB> "launch:setup-body -- cond branch 3 - else")
-	  (let* ((cfgdat   (configf:find-and-read-config 
-			    (or (args:get-arg "-config") "megatest.config")
-			    environ-patt: "env-override"
-			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
-			    pathenvvar: "MT_RUN_AREA_HOME")))
-
-            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
-		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
-		       (rdat     (configf:read-config (conc toppath  ;; convert this to use runconfig:read!
-						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
-		  (set! *configinfo*   cfgdat)
-		  (set! *configdat*    (car cfgdat))
-		  (set! *runconfigdat* rdat)
-		  (set! *toppath*      toppath)
-		  (set! *configstatus* 'partial))
-		(begin
-		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
-		  (exit 2))))))
-	;; COND ends here.
-	
-	;; additional house keeping
-	(let* ((linktree (or (common:get-linktree)
-			     (conc *toppath* "/lt"))))
-	  (if linktree
-	      (begin
-		(if (not (common:file-exists? linktree))
-		    (begin
-		      (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
-			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
-			    (exit 1))
-			(create-directory linktree #t))))
-		(handle-exceptions
-		    exn
-		    (begin
-		      (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
-		      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
-		  (let ((tlink (conc *toppath* "/lt")))
-		    (if (not (common:file-exists? tlink))
-			(create-symbolic-link linktree tlink)))))
-	      (begin
-		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
-		)))
-	(if (and *toppath*
-		 (directory-exists? *toppath*))
-	    (begin
-	      (setenv "MT_RUN_AREA_HOME" *toppath*)
-	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
-	    (begin
-	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
-	      (set! *toppath* #f) ;; force it to be false so we return #f
-	      #f))
-	
-        ;; one more attempt to cache the configs for future reading
-        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
-               (mtcachef     (car cachefiles))
-               (rccachef     (cdr cachefiles)))
-
-          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
-          ;; TODO - consider 1) using simple-lock to bracket cache write
-          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
-          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
-              (common:fail-safe
-               (lambda ()
-                 (configf:write-alist *runconfigdat* rccachef))
-               (conc "Could not write cache file - "rccachef))
-              )
-          (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef)))
-              (common:fail-safe
-               (lambda ()
-                 (configf:write-alist *configdat* mtcachef))
-               (conc "Could not write cache file - "mtcachef))
-              )
-          (if (and rccachef mtcachef *runconfigdat* *configdat*)
-              (set! *configstatus* 'fulldata)))
-
-	;; if have -append-config then read and append here
-	(let ((cfname (args:get-arg "-append-config")))
-	  (if (and cfname
-		   (file-read-access? cfname))
-	      (configf:read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
-	*toppath*)))
-
-(define (get-best-disk confdat testconfig)
-  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
-		      (hash-table-ref/default confdat "disks" #f)))
-	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
-		     (string->number (or m "10000")))))
-    (if disks 
-	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
-	  (if res
-	      (cdr res)
-	      (begin
-;;		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
-;;		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
-		;;(exit 1)
-                 (if (null? disks)
-                     (cons 1 (conc *toppath* "/runs"))
-                     (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y)))))))
-                       (let loop ((head (car paths)) (tail (cdr paths)))
-                         (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t))))
-                           (if result
-                               result
-                               (if (null? tail)
-                                   (cons 1 (conc *toppath* "/runs"))
-                                   (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path.
-
-
-(define (launch:test-copy test-src-path test-path)
-  (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd")))
-		   (if cmd
-		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
-		       (string-substitute "TEST_TARG_PATH" test-path
-					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
-		       #f)))
-	 (cmd    (if ovrcmd 
-		     ovrcmd
-		     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
-			   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
-	 (status (system cmd)))
-    (if (not (eq? status 0))
-	(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))))
-
-
-;; Desired directory structure:
-;;
-;;  <linkdir> - <target> - <testname> -.
-;;                                     |
-;;                                     v
-;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
-;;
-;;  dir stored in test is:
-;; 
-;;  <linkdir> - <target> - <testname> [ - <itempath> ]
-;; 
-;; All log file links should be stored relative to the top of link path
-;;  
-;; <target> - <testname> [ - <itempath> ] 
-;;
-(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
-  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
-	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
-			run-info
-			(db:get-value-by-header (db:get-rows run-info)
-						(db:get-header run-info)
-						"runname")))
-	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
-	 ;; convert back to db: from rdb: - this is always run at server end
-	 (target   (string-intersperse (map cadr keyvals) "/"))
-
-	 (not-iterated  (equal? "" item-path))
-
-	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
-	 (testtop-base (conc target "/" runname "/" testname))
-	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))
-
-	 ;; nb// if itempath is not "" then it is prefixed with "/"
-	 (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base))
-	 (test-path    (conc disk-path (if contour (conc "/" contour) "") "/" test-base))
-
-	 ;; ensure this exists first as links to subtests must be created there
-	 (linktree  (common:get-linktree))
-	 ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree")))
-	 ;;         (if rd rd (conc *toppath* "/runs"))))
-	 ;; which seems wrong ...
-
-	 (lnkbase   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname))
-	 (lnkpath   (conc lnkbase "/" testname))
-	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
-	 (lnktarget (conc lnkpath "/" item-path)))
-
-    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
-    ;;                                                 rundir   shortdir
-    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)
-
-    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
-    (if (not (common:file-exists? linktree))
-	(begin
-	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
-	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
-    ;; create the directory for the tests dir links, this is needed no matter what... try up to three times
-    (let loop ((done 3)) 
-      (let ((success (if (and (not (common:directory-exists? lnkbase))
-			      (not (common:file-exists? lnkbase)))
-			 (handle-exceptions
-			  exn
-			  (begin
-			    (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase)
-			    (print-error-message exn (current-error-port))
-			    #t)
-			  (create-directory lnkbase #t)
-			  #f))))
-	(if (and (not success)(> done 0))
-	    (loop (- done 1)))))
-    
-    ;; update the toptest record with its location rundir, cache the path
-    ;; This wass highly inefficient, one db write for every subtest, potentially
-    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
-    ;; again. 
-
-    ;; Now create the link from the test path to the link tree, however
-    ;; if the test is iterated it is necessary to create the parent path
-    ;; to the iteration. use pathname-directory to trim the path by one
-    ;; level
-    (if (not not-iterated) ;; i.e. iterated
-	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
-	  (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent)
-	  (handle-exceptions
-	   exn
-	   (begin
-	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	     (exit 1))
-	   (create-directory iterated-parent #t))))
-
-    (if (symbolic-link? lnkpath) 
-	(handle-exceptions
-	 exn
-	 (begin
-	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	   (exit 1))
-	 (delete-file lnkpath)))
-
-    (if (not (or (common:file-exists? lnkpath)
-		 (symbolic-link? lnkpath)))
-	(handle-exceptions
-	 exn
-	 (begin
-	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	   (exit 1))
-	 (create-symbolic-link toptest-path lnkpath)))
-    
-    ;; NB - This was not working right - some top tests are not getting the path set!!!
-    ;;
-    ;; Do the setting of this record after the paths are created so that the shortdir can 
-    ;; be set to the real directory location. This is safer for future clean up if the link
-    ;; tree is damaged or lost.
-    ;; 
-    (if (not (hash-table-ref/default *toptest-paths* testname #f))
-	(let* ((testinfo       (rmt:get-test-info-by-id run-id test-id)) ;;  run-id testname item-path))
-	       (curr-test-path (if testinfo ;; (filedb:get-path *fdb*
-							     ;; (db:get-path dbstruct
-				   ;; (rmt:sdb-qry 'getstr 
-				   (db:test-get-rundir testinfo) ;; ) ;; )
-				   #f)))
-	  (hash-table-set! *toptest-paths* testname curr-test-path)
-	  ;; NB// Was this for the test or for the parent in an iterated test?
-	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
-			    (if (common:file-exists? lnkpath)
-				;; (resolve-pathname lnkpath)
-				(common:nice-path lnkpath)
-				lnkpath)
-			    testname "" run-id)
-	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
-	  (if (or (not curr-test-path)
-		  (not (directory-exists? toptest-path)))
-	      (begin
-		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
-		(handle-exceptions
-		 exn
-		 #f ;; don't care to catch and deal with errors here for now.
-		 (create-directory toptest-path #t))
-		(hash-table-set! *toptest-paths* testname toptest-path)))))
-
-    ;; The toptest path has been created, the link to the test in the linktree has
-    ;; been created. Now, if this is an iterated test the real test dir must be created
-    (if (not not-iterated) ;; this is an iterated test
-	(begin ;; (let ((lnktarget (conc lnkpath "/" item-path)))
-	  (debug:print 2 *default-log-port* "Setting up sub test run area")
-	  (debug:print 2 *default-log-port* " - creating run area in " test-path)
-	  (handle-exceptions
-	   exn
-	   (begin
-	     (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	     (exit 1))
-	   (create-directory test-path #t))
-	  (debug:print 2 *default-log-port* 
-		       " - creating link from: " test-path "\n"
-		       "                   to: " lnktarget)
-
-	  ;; If there is already a symlink delete it and recreate it.
-	  (handle-exceptions
-	   exn
-	   (begin
-	     (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
-	     (exit))
-	   (if (symbolic-link? lnktarget)     (delete-file lnktarget))
-	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
-
-    (if (not (directory? test-path))
-	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes
-
-    (if (and test-src-path (directory? test-path))
-	(begin
-	  (launch:test-copy test-src-path test-path)
-	  (list lnkpathf lnkpath ))
-	(if (and test-src-path (> remtries 0))
-	    (begin
-	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
-	      ;; 
-	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
-	    (list #f #f)))))
-
-
-(define (launch:handle-zombie-tests run-id)
-  (let* ((key (conc "zombiescan-runid-"run-id))
-         (now (current-seconds))
-         (threshold (- (current-seconds)  (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120))))
-         (val (rmt:get-var key))
-         (do-scan?
-          (cond
-           ((not val)
-            #t)
-           ((< val threshold)
-            #t)
-           (else #f))))
-    (when do-scan?
-      (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
-      (rmt:set-var key (current-seconds))
-      (rmt:find-and-mark-incomplete run-id #f))))
-
-
-
-
-
-;; 1. look though disks list for disk with most space
-;; 2. create run dir on disk, path name is meaningful
-;; 3. create link from run dir to megatest runs area 
-;; 4. remotely run the test on allocated host
-;;    - could be ssh to host from hosts table (update regularly with load)
-;;    - could be netbatch
-;;      (launch-test db (cadr status) test-conf))
-(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
-  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
-  (let* ( ;; (lock-key        (conc "test-" test-id))
-	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
-	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
-	;; 		    (if (car lock)
-	;; 			#t
-	;; 			(if (> (current-seconds) expire-time)
-	;; 			    (begin
-	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
-	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
-	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
-	;; 			    (begin
-	;; 			      (thread-sleep! 1)
-	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
-	 (item-path       (item-list->path itemdat))
-	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
-    (let loop ((delta        (- (current-seconds) *last-launch*))
-	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
-      (if (> launch-delay delta)
-	  (begin
-	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
-		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
-	    (thread-sleep! (- launch-delay delta))
-	    (loop (- (current-seconds) *last-launch*) launch-delay))))
-    (change-directory *toppath*)
-    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
-     (append
-      (list
-       (list "MT_RUN_AREA_HOME" *toppath*)
-       (list "MT_TEST_NAME" test-name)
-       (list "MT_RUNNAME"   runname)
-       (list "MT_ITEMPATH"  item-path)
-       (list "MT_CONTOUR"   contour)
-       )
-      itemdat))
-    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
-           ;; for tconfig, why do we allow fallback to test-conf?
-	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
-				(begin
-                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
-                                  test-conf))) ;; force re-read now that all vars are set
-	   (useshell        (let ((ush (configf:lookup *configdat* "jobtools"     "useshell")))
-			      (if ush 
-				  (if (equal? ush "no") ;; must use "no" to NOT use shell
-				      #f
-				      ush)
-				  #t)))     ;; default is yes
-	   (runscript       (configf:lookup tconfig   "setup"        "runscript"))
-	   (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag
-	   (subrun          (> (length (hash-table-ref/default tconfig "subrun"  '())) 0)) ;; send a flag to process a subrun
-	   ;; (diskspace       (configf:lookup tconfig   "requirements" "diskspace"))
-	   ;; (memory          (configf:lookup tconfig   "requirements" "memory"))
-	   ;; (hosts           (configf:lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
-	   (remote-megatest (configf:lookup *configdat* "setup" "executable"))
-	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
-				(configf:lookup  *configdat* "setup" "runtimelim")))
-	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
-	   ;;                allow running from dashboard. Extract the path
-	   ;;                from the called megatest and convert dashboard
-	   ;;             	  or dboard to megatest
-	   (local-megatest  (let* ((lm  (car (argv)))
-				   (dir (pathname-directory lm))
-				   (exe (pathname-strip-directory lm)))
-			      (conc (if dir (conc dir "/") "")
-				    (case (string->symbol exe)
-				      ((dboard)    "../megatest")
-				      ((mtest)     "../megatest")
-				      ((dashboard) "megatest")
-				      (else exe)))))
-	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools"     "launcher"))
-	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
-	   (work-area       #f)
-	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
-	   (diskpath   #f)
-	   (cmdparms   #f)
-	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
-	   (mt-bindir-path #f)
-	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
-	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
-	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
-				(if (args:get-arg "-logging")(list "-logging") '()))))
-      ;; (if hosts (set! hosts (string-split hosts)))
-      ;; set the megatest to be called on the remote host
-      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
-      (set! mt-bindir-path (pathname-directory remote-megatest))
-      (if launcher (set! launcher (string-split launcher)))
-      ;; set up the run work area for this test
-      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
-	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
-	  (begin
-	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
-	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
-      
-      ;; prevent overlapping actions - set to LAUNCHED as early as possible
-      ;;
-      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
-      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
-      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
-      ;; (pp (hash-table->alist tconfig))
-      (set! diskpath (get-best-disk *configdat* tconfig))
-      (if diskpath
-	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
-	    (set! work-area (car dat))
-	    (set! toptest-work-area (cadr dat))
-	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
-	  (begin
-	    (set! work-area (conc test-path "/tmp_run"))
-	    (create-directory work-area #t)
-	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
-      (set! cmdparms (base64:base64-encode 
-		      (z3:encode-buffer 
-		       (with-output-to-string
-			 (lambda () ;; (list 'hosts     hosts)
-			   (write (list (list 'testpath  test-path)
-					;; (list 'transport (conc *transport-type*))
-					;; (list 'serverinf *server-info*)
-					(list 'homehost  (let* ((hhdat (common:get-homehost)))
-							   (if hhdat
-							       (car hhdat)
-							       #f)))
-					(list 'serverurl (if *runremote*
-							     (remote-server-url *runremote*)
-							     #f)) ;;
-					(list 'areaname  (common:get-testsuite-name))
-					(list 'toppath   *toppath*)
-					(list 'work-area work-area)
-					(list 'test-name test-name) 
-					(list 'runscript runscript) 
-					(list 'run-id    run-id   )
-					(list 'test-id   test-id  )
-					;; (list 'item-path item-path )
-					(list 'itemdat   itemdat  )
-					(list 'megatest  remote-megatest)
-					(list 'ezsteps   ezsteps)
-					(list 'subrun    subrun)
-					(list 'target    mt_target)
-					(list 'contour   contour)
-					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
-					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
-					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
-					(list 'runname   runname)
-					(list 'mt-bindir-path mt-bindir-path))))))))
-      
-      ;; clean out step records from previous run if they exist
-      ;; (rmt:delete-test-step-records run-id test-id)
-      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
-      (if (common:file-exists? work-area)
-	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
-      (cond
-       ;; ((and launcher hosts) ;; must be using ssh hostname
-       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
-       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
-       (launcher
-	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
-       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
-       (else
-	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
-	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
-      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
-      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
-      (debug:print 1 *default-log-port* "Launching " work-area)
-      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
-      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
-      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
-      (let* ((commonprevvals (alist->env-vars
-			      (hash-table-ref/default *configdat* "env-override" '())))
-	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
-			      (append (list (list "MT_TEST_RUN_DIR" work-area)
-					    (list "MT_TEST_NAME" test-name)
-					    (list "MT_ITEM_INFO" (conc itemdat)) 
-					    (list "MT_RUNNAME"   runname)
-					    (list "MT_TARGET"    mt_target)
-					    (list "MT_ITEMPATH"  item-path)
-					    )
-				      itemdat)))
-	     (testprevvals   (alist->env-vars
-			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
-	     ;; Launchwait defaults to true, must override it to turn off wait
-	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
-	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
-					process:cmd-run-with-stderr-and-exitcode->list
-					process-run)
-				    (if useshell
-					(let ((cmdstr (string-intersperse fullcmd " ")))
-					  (if launchwait
-					      cmdstr
-					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
-					(car fullcmd))
-				    (if useshell
-					'()
-					(cdr fullcmd))))
-             (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
-             (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
-        (if (not success)
-            (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
-        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
-	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
-	(if (not launchwait) ;; give the OS a little time to allow the process to start
-	    (thread-sleep! 0.01))
-	(with-output-to-file "mt_launch.log"
-	  (lambda ()
-	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
-	    (if (list? launch-results)
-		(apply print launch-results)
-		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
-	    #:append))
-	(debug:print 2 *default-log-port* "Launching completed, updating db")
-	(debug:print 2 *default-log-port* "Launch results: " launch-results)
-	(if (not launch-results)
-	    (begin
-	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
-	      ;; (sqlite3:finalize! db)
-	      ;; good ole "exit" seems not to work
-	      ;; (_exit 9)
-	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
-	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
-	      (process-signal (current-process-id) signal/kill)
-	      ))
-	(alist->env-vars miscprevvals)
-	(alist->env-vars testprevvals)
-	(alist->env-vars commonprevvals)
-	launch-results))
-    (change-directory *toppath*)))
-
-;; recover a test where the top controlling mtest may have died
-;;
-(define (launch:recover-test run-id test-id)
-  ;; this function is called on the test run host via ssh
-  ;;
-  ;; 1. look at the process from pid
-  ;;    - is it owned by calling user
-  ;;    - it it's run directory correct for the test
-  ;;    - is there a controlling mtest (maybe stuck)
-  ;; 2. if recovery is needed watch pid
-  ;;    - when it exits take the exit code and do the needful
-  ;;
-  (let* ((pid (rmt:test-get-top-process-pid run-id test-id))
-	 (psres (with-input-from-pipe
-		 (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'")
-		 (lambda ()
-		   (read-line))))
-	 (rundir (if (string? psres) ;; real process owned by user
-		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
-		     #f)))
-    ;; now wait on that process if all is correct
-    ;; periodically update the db with runtime
-    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
-    ;; state/status appropriately
-    (process-wait pid)))
-
-
-;; Do not rpc this one, do the underlying calls!!!
-(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f))
-  (let* ((real-status status)
-	 (otherdat    (if dat dat (make-hash-table)))
-	 (testdat     (rmt:get-test-info-by-id run-id test-id))
-	 (test-name   (db:test-get-testname  testdat))
-	 (item-path   (db:test-get-item-path testdat))
-	 ;; before proceeding we must find out if the previous test (where all keys matched except runname)
-	 ;; was WAIVED if this test is FAIL
-
-	 ;; NOTES:
-	 ;;  1. Is the call to test:get-previous-run-record remotified?
-	 ;;  2. Add test for testconfig waiver propagation control here
-	 ;;
-	 (prev-test   (if (equal? status "FAIL")
-			  (rmt:get-previous-test-run-record run-id test-name item-path)
-			  #f))
-	 (waived   (if prev-test
-		       (if prev-test ;; true if we found a previous test in this run series
-			   (let ((prev-status  (db:test-get-status  prev-test))
-				 (prev-state   (db:test-get-state   prev-test))
-				 (prev-comment (db:test-get-comment prev-test)))
-			     (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
-			     (if (and (equal? prev-state  "COMPLETED")
-				      (equal? prev-status "WAIVED"))
-				 (if comment
-				     comment
-				     prev-comment) ;; waived is either the comment or #f
-				 #f))
-			   #f)
-		       #f)))
-    (if (and waived 
-	     (tests:check-waiver-eligibility testdat prev-test))
-	(set! real-status "WAIVED"))
-
-    (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)
-
-    ;; update the primary record IF state AND status are defined
-    (if (and state status)
-	(begin
-	  (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment))
-	  ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status
-	  ))
-    
-    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
-    ;; run area, it does remote calls under the hood.
-    ;; (if (and test-id state status (equal? status "AUTO")) 
-    ;; 	(rmt:test-data-rollup run-id test-id status))
-
-    ;; add metadata (need to do this way to avoid SQL injection issues)
-
-    ;; :first_err
-    ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
-    ;;   (if val
-    ;;       (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
-    ;; 
-    ;; ;; :first_warn
-    ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
-    ;;   (if val
-    ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
-
-    (let ((category (hash-table-ref/default otherdat ":category" ""))
-	  (variable (hash-table-ref/default otherdat ":variable" ""))
-	  (value    (hash-table-ref/default otherdat ":value"    #f))
-	  (expected (hash-table-ref/default otherdat ":expected" "n/a"))
-	  (tol      (hash-table-ref/default otherdat ":tol"      "n/a"))
-	  (units    (hash-table-ref/default otherdat ":units"    ""))
-	  (type     (hash-table-ref/default otherdat ":type"     ""))
-	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
-      (debug:print 4 *default-log-port* 
-		   "category: " category ", variable: " variable ", value: " value
-		   ", expected: " expected ", tol: " tol ", units: " units)
-      (if (and value) ;; require only value; BB was- all three required
-	  (let ((dat (conc category ","
-			   variable ","
-			   value    ","
-			   expected ","
-			   tol      ","
-			   units    ","
-			   dcomment ",," ;; extra comma for status
-			   type     )))
-	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
-	    (rmt:csv->test-data run-id test-id
-				dat)
-	    ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start"
-	    ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue.
-	    (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :)
-            ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
-            )))
-      
-    ;; need to update the top test record if PASS or FAIL and this is a subtest
-    ;;;;;; (if (not (equal? item-path ""))
-    ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)
-
-    (if (or (and (string? comment)
-		 (string-match (regexp "\\S+") comment))
-	    waived)
-	(let ((cmt  (if waived waived comment)))
-	  (rmt:general-call 'set-test-comment run-id cmt test-id)))))
-
 
 
 )
 	

Index: runsmod.scm
==================================================================
--- runsmod.scm
+++ runsmod.scm
@@ -2648,7 +2648,4240 @@
                  (item-path (vector-ref running-test 11)))
 			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
               (if (not (null? tal))
 				  (loop (car tal) (cdr tal)))))))))))        
  
+;; This is the Megatest API. All generally "useful" routines will be wrapped or extended
+;; here.
+
+;;                       0           1              2              3
+(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))
+
+;;======================================================================
+;;  R U N S
+;;======================================================================
+
+;; runs:get-runs-by-patt
+;; get runs by list of criteria
+;; register a test run with the db
+;;
+;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
+;;  to extract info from the structure returned
+;;
+(define (mt:get-runs-by-patt keys runnamepatt targpatt)
+  (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0))
+	     (res      '())
+	     (offset   0)
+	     (limit    500))
+    ;; (print "runsdat: " runsdat)
+    (let* ((header    (vector-ref runsdat 0))
+	   (runslst   (vector-ref runsdat 1))
+	   (full-list (append res runslst))
+	   (have-more (eq? (length runslst) limit)))
+      ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more)
+      (if have-more 
+	  (let ((new-offset (+ offset limit))
+		(next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0)))
+	    (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.")
+	    (debug:print-info 0 *default-log-port* "next-batch: " next-batch)
+	    (loop next-batch
+		  full-list
+		  new-offset
+		  limit))
+	 (vector header full-list)))))
+
+;;======================================================================
+;;  T E S T S
+;;======================================================================
+
+(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f))
+  (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal))
+	     (res      '())
+	     (offset   0)
+	     (limit    500))
+    (let* ((full-list (append res testsdat))
+	   (have-more (eq? (length testsdat) limit)))
+      (if have-more 
+	  (let ((new-offset (+ offset limit)))
+	    (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.")
+	    (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal)
+		  full-list
+		  new-offset
+		  limit))
+	  full-list))))
+
+(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) )
+  (let* ((key    (list run-id waitons ref-item-path mode))
+	 (res    (hash-table-ref/default *pre-reqs-met-cache* key #f))
+	 (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f)))
+		   (if last-time
+		       (< (current-seconds)(+ last-time 5))
+		       #f))))
+    (if useres
+	(let ((result (vector-ref res 1)))
+	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
+	  result)
+	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
+	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
+	  newres))))
+
+(define (mt:get-run-stats dbstruct run-id)
+;;  Get run stats from local access, move this ... but where?
+  (db:get-run-stats dbstruct run-id))
+
+(define (mt:discard-blocked-tests run-id failed-test tests test-records)
+  (if (null? tests)
+      tests
+      (begin
+	(debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test)
+	(let loop ((testn (car tests))
+		   (remt  (cdr tests))
+		   (res   '()))
+	  (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '())))
+		 (waitons  (vector-ref test-dat 2)))
+	    ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons)
+	    (if (null? remt)
+		(let ((new-res (reverse res)))
+		  ;; (print "       new-res: " new-res)
+		  new-res)
+		(loop (car remt)
+		      (cdr remt)
+		      (if (member failed-test waitons)
+			  (begin
+			    (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test)
+			    res)
+			  (cons testn res)))))))))
+
+;;======================================================================
+;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
+;;======================================================================
+
+;; speed up for common cases with a little logic
+(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
+  (if (not (and run-id test-id))
+      (begin
+	(debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
+	(print-call-chain (current-error-port))
+	#f)
+      (begin
+	;; cond
+	;; ((and newstate newstatus newcomment)
+	;;  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
+	;; ((and newstate newstatus)
+	;;  (rmt:general-call 'state-status run-id newstate newstatus test-id))
+	;; (else
+	;;  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
+	;;  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
+	;;  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
+	(rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)
+	;; (mt:process-triggers run-id test-id newstate newstatus)
+	#t)))
+
+
+(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment)
+  (let* ((test-vec   (rmt:get-testinfo-state-status run-id test-id))
+         (state     (vector-ref test-vec 3)))
+    (if (equal? state "COMPLETED")
+        #t
+        (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment))))
+
+  
+(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
+  ;(let ((test-id (rmt:get-test-id run-id test-name item-path)))
+  (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment)
+  ;; (mt:process-triggers run-id test-id new-state new-status)
+  #t);)
+	;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment)))
+
+(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment)
+  (let ((test-id (rmt:get-test-id run-id test-name item-path)))
+    (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment)))
+    
+;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
+;; 
+;; do a remote call to get the task queue info but do the killing as self here.
+;;
+(define (tasks:kill-runner target run-name testpatt)
+  (let ((records    (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests"))
+	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
+    (if (null? records)
+	(debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *"))
+	(debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill."))
+    (for-each 
+     (lambda (record)
+       (let* ((param-key (list-ref record 8))
+	      (match-dat (string-search hostpid-rx param-key)))
+	 (if match-dat
+	     (let ((hostname  (cadr match-dat))
+		   (pid       (string->number (caddr match-dat))))
+	       (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname)
+	       (if (equal? (get-host-name) hostname)
+		   (if (process:alive? pid)
+		       (begin
+			 (handle-exceptions
+			  exn
+			  (begin
+			    (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.")
+			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+			    #t)
+			  (process-signal pid signal/int)
+			  (thread-sleep! 5)
+			  (if (process:alive? pid)
+			      (process-signal pid signal/kill)))))
+		   ;;  (call-with-environment-variables
+		   (let ((old-targethost (getenv "TARGETHOST")))
+		     (setenv "TARGETHOST" hostname)
+		     (setenv "TARGETHOST_LOGF" "server-kills.log")
+		     (system (conc "nbfake kill " pid))
+		     (if old-targethost (setenv "TARGETHOST" old-targethost))
+		     (unsetenv "TARGETHOST")
+		     (unsetenv "TARGETHOST_LOGF"))))
+	     (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
+     records)))
+
+(define (task:get-run-times)
+   (let* ( 
+           (run-patt (if (args:get-arg "-run-patt")
+                        (args:get-arg "-run-patt")
+                        "%"))
+           (target-patt (if (args:get-arg "-target-patt")
+                        (args:get-arg "-target-patt")
+                        "%"))
+ 
+           (run-times  (rmt:get-run-times  run-patt target-patt )))
+   (if (eq? (length run-times) 0)
+     (begin
+       (print "Data not found!!")
+       (exit)))
+   (if (equal? (args:get-arg "-dumpmode") "json")
+       (task:print-runtime-as-json run-times)
+         (if (equal? (args:get-arg "-dumpmode") "csv")
+	     (task:print-runtime run-times ",")
+	     (task:print-runtime run-times "  ")))))
+
+ (define (task:get-test-times)
+   (let* ((runname (if (args:get-arg "-runname")
+                        (args:get-arg "-runname")
+                        #f))
+           (target (if (args:get-arg "-target")
+                        (args:get-arg "-target")
+                        #f))
+ 
+           (test-times  (rmt:get-test-times  runname target )))
+   (if (not runname)
+      (begin
+      (print "Error: Missing argument -runname")
+      (exit))) 
+    (if (string-contains runname "%")
+      (begin
+      (print "Error: Invalid runname, '%' not allowed  (" runname ") ")
+      (exit)))
+    (if (not target)
+      (begin
+      (print "Error: Missing argument -target")
+      (exit)))
+     (if  (string-contains target "%")
+      (begin
+      (print "Error: Invalid target, '%' not allowed  (" target ") ")
+      (exit)))
+ 
+   (if (eq? (length test-times) 0)
+     (begin
+       (print "Data not found!!")
+       (exit)))
+   (if (equal? (args:get-arg "-dumpmode") "json")
+       (task:print-testtime-as-json test-times)
+         (if (equal? (args:get-arg "-dumpmode") "csv")
+	     (task:print-testtime test-times ",")
+	     (task:print-testtime test-times "  ")))))
+
+
+
+;; gets mtpg-run-id and syncs the record if different
+;;
+(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
+  (let* ((runs-ht (hash-table-ref cached-info 'runs))
+	 (runinf  (hash-table-ref/default runs-ht run-id #f))
+         (area-id (vector-ref area-info 0)))
+       (if runinf
+	runinf ;; already cached
+	(let* ((run-dat    (rmt:get-run-info run-id))               ;; NOTE: get-run-info returns a vector < row header >
+	       (run-name   (rmt:get-run-name-from-id run-id))
+	       (row        (db:get-rows run-dat))                   ;; yes, this returns a single row
+	       (header     (db:get-header run-dat))
+	       (state      (db:get-value-by-header row header "state"))
+	       (status     (db:get-value-by-header row header "status"))
+	       (owner      (db:get-value-by-header row header "owner"))
+	       (event-time (db:get-value-by-header row header "event_time"))
+	       (comment    (db:get-value-by-header row header "comment"))
+	       (fail-count (db:get-value-by-header row header "fail_count"))
+	       (pass-count (db:get-value-by-header row header "pass_count"))
+         (db-contour (db:get-value-by-header row header "contour"))
+	       (contour    (if (args:get-arg "-prepend-contour") 
+                                 (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
+                                           (begin 
+                                            (debug:print-info 1 *default-log-port*  "db-contour") 
+ 						db-contour)
+					    (args:get-arg "-contour"))))
+         (run-tag (if (args:get-arg "-run-tag")
+                            (args:get-arg "-run-tag")
+									""))
+         (last-update (db:get-value-by-header row header "last_update"))
+	       (keytarg    (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
+	       			(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
+	       (target     (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) 
+	       			(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id)))                 ;; e.g. v1.63/a3e1/ubuntu
+	       (spec-id    (pgdb:get-ttype dbh keytarg))
+	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
+                            event-time
+                           (current-seconds))) 
+	       (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id)))
+         (if new-run-id
+	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
+		        (hash-table-set! runs-ht run-id new-run-id)
+		;; ensure key fields are up to date
+     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
+    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
+           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
+     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
+        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
+		(pgdb:refresh-run-info
+		 dbh
+		 new-run-id
+		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
+     (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
+     (if (not (equal? run-tag ""))
+      (task:add-run-tag dbh new-run-id run-tag))
+		new-run-id) 
+      
+	      (if (equal? state "deleted")
+          (begin 
+          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
+          (if (handle-exceptions
+		        exn
+		        (begin (print-call-chain)
+              (print ((condition-property-accessor 'exn 'message) exn))     
+			      #f)
+            
+            (pgdb:insert-run
+		     dbh
+		     spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time))
+		       (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
+             (if (or (not smallest-time) (< last-update smallest-time))
+        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
+             (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
+		  #f)))))))
+
+(define (task:add-run-tag dbh run-id tag) 
+  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
+   (if (not tag-info)
+     (begin   
+     (if (handle-exceptions
+	   exn
+	   (begin 
+               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
+	   #f)
+	   (pgdb:insert-tag  dbh   tag))
+                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
+		  #f)))
+     ;;add to area_tags
+     (handle-exceptions
+	   exn
+	   (begin 
+               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
+	   #f)
+           (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0)  run-id))  
+	   (pgdb:insert-run-tag  dbh   (vector-ref tag-info 0)  run-id)))))
+
+
+(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
+ ; (print "Sync Steps " test-step-ids )
+  (let ((test-ht (hash-table-ref cached-info 'tests))
+        (step-ht (hash-table-ref cached-info 'steps)))
+    (for-each
+     (lambda (test-step-id)
+        (let* ((test-step-info  (rmt:get-steps-info-by-id test-step-id))
+               (step-id (tdb:step-get-id test-step-info))
+               (test-id  (tdb:step-get-test_id    test-step-info))   
+	       (stepname (tdb:step-get-stepname  test-step-info))
+	       (state (tdb:step-get-state test-step-info))	
+	       (status (tdb:step-get-status test-step-info))	
+	       (event_time (tdb:step-get-event_time  test-step-info))	
+	       (comment  (tdb:step-get-comment test-step-info))	
+	       (logfile (tdb:step-get-logfile test-step-info))	
+         (last-update (tdb:step-get-last_update test-step-info))
+	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
+				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
+         (pgdb-step-id (if pgdb-test-id 
+                         (pgdb:get-test-step-id dbh pgdb-test-id stepname state)
+                          #f)))
+    (if step-id
+      (begin  
+        (if pgdb-test-id
+           (begin 
+                (if  pgdb-step-id
+                   (begin
+                    (debug:print-info 1 *default-log-port*  "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
+										(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
+         (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
+        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
+                    (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
+                    (begin
+ 		      (debug:print-info 1 *default-log-port*  "Inserting test-step with test-id: " test-id " and step-id " step-id  " pgdb test id: " pgdb-test-id)
+                     (if (or (not smallest-time) (< last-update smallest-time))
+        				      (hash-table-set! smallest-last-update-time "smallest-time" last-update))
+                      (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
+                      (set! pgdb-step-id  (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
+                (hash-table-set! step-ht step-id pgdb-step-id ))
+           (debug:print-info 1 *default-log-port*  "Error: Test not cashed")))
+      (debug:print-info 1 *default-log-port*  "Error: Could not get test step info for step id " test-step-id ))))	;; this is a wierd senario need to debug      	
+   test-step-ids)))
+
+(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
+  (let ((test-ht (hash-table-ref cached-info 'tests))
+        (data-ht (hash-table-ref cached-info 'data)))
+    (for-each
+     (lambda (test-data-id)
+        (let* ((test-data-info  (rmt:get-data-info-by-id test-data-id))
+               (data-id (db:test-data-get-id  test-data-info))
+               (test-id  (db:test-data-get-test_id   test-data-info))   
+	       (category  (db:test-data-get-category  test-data-info))
+	       (variable  (db:test-data-get-variable test-data-info))	
+	       (value (db:test-data-get-value  test-data-info))	
+               (expected (db:test-data-get-expected  test-data-info))
+               (tol (db:test-data-get-tol  test-data-info))
+               (units (db:test-data-get-units  test-data-info))     
+	       (comment  (db:test-data-get-comment test-data-info))	
+               (status (db:test-data-get-status test-data-info))	
+	       (type (db:test-data-get-type test-data-info))
+				 (last-update (db:test-data-get-last_update test-data-info))
+				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
+   	
+	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
+               (pgdb-data-id (if pgdb-test-id 
+                                 (pgdb:get-test-data-id dbh pgdb-test-id category variable)
+                                  #f)))
+    (if data-id
+      (begin
+        (if pgdb-test-id
+           (begin 
+                (if  pgdb-data-id
+                   (begin
+                    (debug:print-info 1 *default-log-port*  "Updating existing test-data with test-id: " test-id " and  data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
+                    (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
+         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
+        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
+                    (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id  category variable value expected tol units comment status type last-update))
+                    (begin
+ 		      (debug:print-info 1 *default-log-port*  "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
+                       (if (handle-exceptions
+		      exn
+		      (begin (print-call-chain)
+                              (print ((condition-property-accessor 'exn 'message) exn))     
+			#f)
+                     
+                    (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
+		       ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info)
+                      (begin
+                      ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type )
+											(if (or (not smallest-time) (< last-update smallest-time))
+        								(hash-table-set! smallest-last-update-time "smallest-time" last-update))
+                      (set! pgdb-data-id  (pgdb:get-test-data-id dbh pgdb-test-id  category variable)))
+		   #f)))
+                (hash-table-set! data-ht data-id pgdb-data-id ))
+             (begin
+                 (debug:print-info 1 *default-log-port*  "Error: Test not in pgdb"))))
+
+      (debug:print-info 1 *default-log-port*  "Error: Could not get test data info for data id " test-data-id ))))	;; this is a wierd senario need to debug      	
+   test-data-ids)))
+
+
+
+(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
+  (let ((test-ht (hash-table-ref cached-info 'tests)))
+    (for-each
+     (lambda (test-id)
+      ; (print test-id)
+       (let* ((test-info    (rmt:get-test-info-by-id #f test-id))
+	      (run-id       (db:test-get-run_id    test-info)) ;; look these up in db_records.scm
+	      (test-id      (db:test-get-id        test-info))
+	      (test-name    (db:test-get-testname  test-info))
+	      (item-path    (db:test-get-item-path test-info))
+	      (state        (db:test-get-state     test-info))
+	      (status       (db:test-get-status    test-info))
+	      (host         (db:test-get-host      test-info))
+        (pid          (db:test-get-process_id test-info)) 
+	      (cpuload      (db:test-get-cpuload   test-info))
+	      (diskfree     (db:test-get-diskfree  test-info))
+	      (uname        (db:test-get-uname     test-info))
+	      (run-dir      (db:test-get-rundir    test-info))
+	      (log-file     (db:test-get-final_logf test-info))
+	      (run-duration (db:test-get-run_duration test-info))
+	      (comment      (db:test-get-comment   test-info))
+	      (event-time   (db:test-get-event_time test-info))
+	      (archived     (db:test-get-archived  test-info))
+        (last-update  (db:test-get-last_update  test-info))
+	      (pgdb-run-id  (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
+        (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))       
+	      (pgdb-test-id (if pgdb-run-id 
+				(begin
+                                  ;(print pgdb-run-id)    
+                                 (pgdb:get-test-id dbh pgdb-run-id test-name item-path))
+                                 #f)))
+	 ;; "id"           "run_id"        "testname"  "state"      "status"      "event_time"
+	 ;; "host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
+	 ;; "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived"
+         (if pgdb-run-id
+           (begin
+	   (if pgdb-test-id ;; have a record
+	     (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
+	       (debug:print-info 0 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
+         (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
+         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
+        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
+	       (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
+	     (begin 
+           (debug:print-info 0 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
+           (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
+            (if (or (not smallest-time) (< last-update smallest-time))
+        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
+           (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
+           (hash-table-set! test-ht test-id pgdb-test-id))
+           (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
+     test-ids)))
+
+(define (task:add-area-tag dbh area-info tag) 
+  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
+   (if (not tag-info)
+     (begin   
+     (if (handle-exceptions
+	   exn
+	   (begin 
+               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
+	   #f)
+	   (pgdb:insert-tag  dbh   tag))
+                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
+		  #f)))
+     ;;add to area_tags
+     (handle-exceptions
+	   exn
+	   (begin 
+               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
+	   #f)
+           (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0)  (vector-ref area-info 0)))  
+	   (pgdb:insert-area-tag  dbh   (vector-ref tag-info 0)  (vector-ref area-info 0))))))
+
+(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
+  (for-each
+     (lambda (run-id)
+      (debug:print-info 1 *default-log-port*   "Check if run with " run-id " needs to be synced" )
+       (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
+run-ids))
+
+
+;; get runs changed since last sync
+;; (define (tasks:sync-test-data dbh cached-info area-info)
+;;   (let* ((
+
+(define (tasks:sync-to-postgres configdat dest)
+  (print "In sync")
+  (let* ((dbh         (pgdb:open configdat dbname: dest))
+	 (area-info   (pgdb:get-area-by-path dbh *toppath*))
+	 (cached-info (make-hash-table))
+	 (start       (current-seconds))
+   (test-patt   (if (args:get-arg "-testpatt")
+											(args:get-arg "-testpatt")
+                      "%"))
+   (target         (if (args:get-arg "-target")
+														 (args:get-arg "-target")
+													#f))
+    (run-name         (if (args:get-arg "-runname")
+														 (args:get-arg "-runname")
+													#f)))
+     (if (and target  (not run-name))
+       (begin
+					(print "Error: Provide runname")
+          (exit 1)))
+     (if (and (not target)  run-name)
+       (begin
+					(print "Error: Provide target")
+          (exit 1)))
+    ;(print "123")
+    ;(exit 1) 
+    (for-each (lambda (dtype)
+		(hash-table-set! cached-info dtype (make-hash-table)))
+	      '(runs targets tests steps data))
+    (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this
+    (if area-info
+	(let* ((last-sync-time (vector-ref area-info 3))
+	       (smallest-last-update-time  (make-hash-table))
+         (changed      (if (and target run-name)
+                            (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt)
+                            (rmt:get-changed-record-ids last-sync-time)))
+	       (run-ids        (alist-ref 'runs       changed))
+	       (test-ids       (alist-ref 'tests      changed))
+	       (test-step-ids  (alist-ref 'test_steps changed))
+	       (test-data-ids  (alist-ref 'test_data  changed))
+	       (run-stat-ids   (alist-ref 'run_stats  changed))
+         (area-tag    (if (args:get-arg "-area-tag") 
+                                 (args:get-arg "-area-tag")
+                                 (if (args:get-arg "-area") 
+                                   (args:get-arg "-area") 
+                                   ""))))
+           (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0))))
+            (set! area-tag *default-area-tag*)) 
+           (if (not (equal? area-tag "")) 
+             (task:add-area-tag dbh area-info area-tag)) 
+	  (if (or (not (null? test-ids)) (not (null? run-ids)))
+	      (begin
+                (debug:print-info 0 *default-log-port*  "syncing runs")   
+	              (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
+                (debug:print-info 0 *default-log-port*  "syncing tests")
+		            (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
+                (debug:print-info 0 *default-log-port*  "syncing test steps")
+                (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
+								(debug:print-info 0 *default-log-port*  "syncing test data")
+                (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
+                (print "----------done---------------")))
+     (let*  ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
+     (debug:print-info 0 "smallest-time :" smallest-time  " last-sync-time " last-sync-time)
+    (if (not (and target run-name)) 
+	  (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0)))
+				(pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed
+	(if (tasks:set-area dbh configdat)
+	    (tasks:sync-to-postgres configdat dest)
+	    (begin
+	      (debug:print 0 *default-log-port* "ERROR: unable to create an area record")
+	      #f)))))
+
+;;======================================================================
+;; L O C K I N G   M E C H A N I S M S 
+;;======================================================================
+
+;; faux-lock is deprecated. Please use simple-lock below
+;;
+(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
+  (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
+      (if (> wait-time 0)
+	  (begin
+	    (thread-sleep! 1)
+	    (if (eq? wait-time 1) ;; only one second left, steal the lock
+		(begin
+		  (debug:print-info 0 *default-log-port* "stealing lock for " keyname)
+		  (common:faux-unlock keyname force: #t)))
+	    (common:faux-lock keyname wait-time: (- wait-time 1)))
+	  #f)
+      (begin
+        (rmt:no-sync-set keyname (conc (current-process-id)))
+        (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))))
+
+(define (common:faux-unlock keyname #!key (force #f))
+  (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
+      (begin
+        (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
+        #t)
+      #f))
+
+;; simple lock. improve and converge on this one.
+;;
+(define (common:simple-lock keyname)
+  (rmt:no-sync-get-lock keyname))
+
+(define (common:simple-unlock keyname #!key (force #f))
+  (rmt:no-sync-del! keyname))
+
+;;======================================================================
+;; db based host calls
+;;======================================================================
+
+;;======================================================================
+;;  T E S T   L A U N C H I N G   P E R   I T E M   W I T H   H O S T   T Y P E S
+;;======================================================================
+;; 
+;; [hosts]
+;; arm cubie01 cubie02
+;; x86_64 zeus xena myth01
+;; allhosts #{g hosts arm} #{g hosts x86_64}
+;; 
+;; [host-types]
+;; general #MTLOWESTLOAD #{g hosts allhosts}
+;; arm     #MTLOWESTLOAD #{g hosts arm}
+;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
+;;
+;; [host-rules]
+;; # maxnload   => max normalized load
+;; # maxnjobs   => max jobs per cpu
+;; # maxjobrate => max jobs per second
+;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 
+;; 
+;; [launchers]
+;; envsetup general
+;; xor/%/n 4C16G
+;; % nbgeneral
+;; 
+;; [jobtools]
+;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
+;; flexi-launcher yes  
+;; launcher nbfake
+;;
+(define (common:get-launcher configdat testname itempath)
+  (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
+    (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
+	     (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
+	(let* ((launchers         (hash-table-ref/default configdat "launchers" '())))
+	  (if (null? launchers)
+	      fallback-launcher
+	      (let loop ((hed (car launchers))
+			 (tal (cdr launchers)))
+		(let ((patt      (car hed))
+		      (host-type (cadr hed)))
+		  (if (tests:match patt testname itempath)
+		      (begin
+			(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
+			(let ((launcher (configf:lookup configdat "host-types" host-type)))
+			  (if launcher
+			      (let* ((launcher-parts (string-split launcher))
+				     (launcher-exe   (car launcher-parts)))
+				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
+				    (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
+						    (count     100))
+				      (if targ-host
+					  (conc "remrun " targ-host)
+					  (if (> count 0)
+					      (begin
+						(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
+						(thread-sleep! (- 101 count))
+						(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
+							   (- count 1)))
+					      (begin
+						(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
+						(exit)))))
+				    launcher))
+			      (begin
+				(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
+				(if (null? tal)
+				    fallback-launcher
+				    (loop (car tal)(cdr tal)))))))
+		      ;; no match, try again
+		      (if (null? tal)
+			  fallback-launcher
+			  (loop (car tal)(cdr tal))))))))
+	fallback-launcher)))
+
+;; ideally put all this info into the db, no need to preserve it across moving homehost
+;;
+;; return list of
+;;  ( reachable? cpuload update-time )
+(define (common:get-host-info hostname)
+  (let* ((loadinfo                         (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data
+         (load                             (car loadinfo))
+         (load-sample-time                 (cdr loadinfo))
+         (load-sample-age                  (- (current-seconds) load-sample-time))
+         (loadinfo-timeout-seconds         6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds
+         (host-last-update-timeout-seconds 4)
+         (host-rec (hash-table-ref/default *host-loads* hostname #f))
+         )
+    (cond
+     ((< load-sample-age loadinfo-timeout-seconds)
+      (list #t
+            load-sample-time
+            load))
+     ((and host-rec
+           (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds)))
+      (list #t
+            (host-last-update host-rec)
+            (host-last-cpuload host-rec )))
+     ((common:unix-ping hostname)
+      (list #t
+            (current-seconds)
+            (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds
+     (else
+      (list #f 0 -1) ;; bad host, don't use!
+      ))))
+
+;; see defstruct host at top of file.
+;;    host: reachable last-update last-used last-cpuload
+;;
+(define (common:update-host-loads-table hosts-raw)
+  (let* ((hosts (filter (lambda (x)
+                          (string-match (regexp "^\\S+$") x))
+                        hosts-raw)))
+    (for-each
+     (lambda (hostname)
+       (let* ((rec       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
+                          (if h
+                              h
+                              (let ((h (make-host)))
+                                (hash-table-set! *host-loads* hostname h)
+                                h))))
+              (host-info         (common:get-host-info hostname))
+              (is-reachable      (car host-info))
+              (last-reached-time (cadr host-info))
+              (load              (caddr host-info)))
+         (host-reachable-set!    rec is-reachable)
+         (host-last-update-set!  rec last-reached-time)
+         (host-last-cpuload-set! rec load)))
+     hosts)))
+
+;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the
+;; [host-rules] section.
+;;
+(define (common:get-least-loaded-host hosts-raw host-type configdat)
+  (let* ((rdat       (configf:lookup configdat "host-rules" host-type))
+	 (rules      (common:val->alist (or rdat "") convert: #t))   ;; maxnload, maxnjobs, maxjobrate
+	 (maxnload   (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load
+	 (maxnjobs   (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs
+	 (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second
+	 (hosts      (filter (lambda (x)
+			       (string-match (regexp "^\\S+$") x))
+			     hosts-raw))
+         ;; (best-host #f)
+	 (get-rec    (lambda (hostname)
+		       ;; (print "get-rec hostname=" hostname)
+		       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
+			 (if h
+			     h
+			     (let ((h (make-host)))
+			       (hash-table-set! *host-loads* hostname h)
+			       h)))))
+         (best-load 99999)
+         (curr-time (current-seconds))
+	 (get-hosts-sorted (lambda (hosts)
+			     (sort hosts (lambda (a b)
+					   (let ((a-rec (get-rec a))
+						 (b-rec (get-rec b)))
+					     ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec))
+					     ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec))
+					     (< (host-last-used a-rec)
+						(host-last-used b-rec))))))))
+    (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts))
+    (if (null? hosts)
+	#f ;; no hosts to select from. All done and giving up now.
+	(let ((hosts-sorted (get-hosts-sorted hosts)))
+	  (common:update-host-loads-table hosts)
+	  (let loop ((hostname  (car hosts-sorted))
+		     (tal       (cdr hosts-sorted))
+		     (best-host #f))
+	    (let* ((rec       (get-rec hostname))
+		   (reachable (host-reachable     rec))
+		   (load      (host-last-cpuload  rec))
+		   (last-used (host-last-used     rec))
+		   (delta     (- curr-time last-used))
+		   (job-rate  (if (> delta 0)
+				  (/ 1 delta)
+				  999)) ;; jobs per second
+		   (new-best  
+		    (cond
+		     ((not reachable)
+		      (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.")
+		      best-host)
+		     ((and (< load maxnload)        ;; load is acceptable
+			   (< job-rate maxjobrate)) ;; job rate is acceptable
+		      (set! best-load load)
+		      hostname)
+		     (else best-host))))
+	      (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." )
+	      (if new-best
+		  (begin ;; found a host, return it
+		    (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
+		    (host-last-used-set! rec curr-time)
+		    new-best)
+		  (if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))
+
+(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f))
+  (let* ((loadavg (common:get-cpu-load remote-host))
+	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
+		      (common:get-num-cpus remote-host)
+		      numcpus-in))
+	 (maxload (if force-maxload
+		      maxload-in
+		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
+	 (first   (car loadavg))
+	 (next    (cadr loadavg))
+	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
+	 (loadjmp (- first next))
+         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) ))  ))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
+    (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
+		      ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)
+    (cond
+     ((and (> first adjload)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
+      (thread-sleep! adjwait)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
+     ((and (> loadjmp numcpus)
+	   (> count 0))
+      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
+      (thread-sleep! adjwait)
+      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)))))
+
+(define (common:wait-for-homehost-load maxload msg)
+  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
+                     #f
+                     (common:get-homehost)))
+         (hh     (if hh-dat (car hh-dat) #f))
+         (numcpus (common:get-num-cpus hh)))
+    (common:wait-for-normalized-load maxload msg hh)))
+
+(define (common:get-num-cpus remote-host)
+  (let* ((actual-host (or remote-host (get-host-name))))
+    (or (common:get-cached-info actual-host "num-cpus" age: 86400) ;; hosts had better not be changing the number of cpus too often!
+	(let* ((proc   (lambda ()
+			 (let loop ((numcpu 0)
+				    (inl    (read-line)))
+			   (if (eof-object? inl)
+			       (begin
+				 (common:write-cached-info remote-host "num-cpus" numcpu)
+				 numcpu)
+			       (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
+					 (+ numcpu 1)
+					 numcpu)
+				     (read-line))))))
+	       (result (if remote-host
+			   (with-input-from-pipe 
+			       (conc "ssh " remote-host " cat /proc/cpuinfo")
+			     proc)
+			   (with-input-from-file "/proc/cpuinfo" proc))))
+	  (common:write-cached-info actual-host "num-cpus" result)
+	  result))))
+
+;; wait for normalized cpu load to drop below maxload
+;;
+(define (common:wait-for-normalized-load maxload msg remote-host)
+  (let ((num-cpus (common:get-num-cpus remote-host)))
+    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))
+
+;;======================================================================
+;; D E B U G G I N G   S T U F F 
+;;======================================================================
+
+;; (define *verbosity*         1)
+;; (define *logging*           #f)
+
+(define (common:set-last-run-version)
+  (rmt:set-var "MEGATEST_VERSION" (common:version-signature)))
+
+;; postive number if megatest version > db version
+;; negative number if megatest version < db version
+(define (common:version-db-delta)
+         (- megatest-version (common:get-last-run-version-number)))
+
+(define (common:version-changed?)
+  (not (equal? (common:get-last-run-version)
+               (common:version-signature))))
+
+;; from metadat lookup MEGATEST_VERSION
+;;
+(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB
+  (rmt:get-var "MEGATEST_VERSION"))
+
+(define (common:get-last-run-version-number)
+  (string->number 
+   (substring (common:get-last-run-version) 0 6)))
+
+(define (common:api-changed?)
+  (not (equal? (substring (->string megatest-version) 0 4)
+               (substring (conc (common:get-last-run-version)) 0 4))))
+
+;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))'
+(define (common:get-disks #!key (configf #f))
+  (hash-table-ref/default 
+   (or configf (configf:read-config "megatest.config" #f #t))
+   "disks" '("none" "")))
+
+;;======================================================================
+;; watchdog and exit procedures
+;;======================================================================
+
+;;======================================================================
+;; E X I T   H A N D L I N G
+;;======================================================================
+
+;;   (let ((ohh (common:on-homehost?))
+;; 	(srv (args:get-arg "-server")))
+;;     (and ohh srv)))
+    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
+
+(define *watchdog* (make-thread
+		    (lambda ()
+		      (handle-exceptions
+			  exn
+			  (begin
+			    (print-call-chain)
+			    (print " message: " ((condition-property-accessor 'exn 'message) exn)))
+			(common:watchdog)))
+		    "Watchdog thread"))
+
+;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
+;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
+;;
+(define (common:readonly-watchdog dbstruct)
+  (thread-sleep! 0.05) ;; delay for startup
+  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
+  ;; sync megatest.db to /tmp/.../megatst.db
+  (let* ((sync-cool-off-duration   3)
+        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
+        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
+        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
+        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
+    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
+    (let loop ((last-sync-time 0))
+      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
+      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
+        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
+        (if (and (not *time-to-exit*)
+                 (< duration-since-last-sync sync-cool-off-duration))
+            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
+        (if (not *time-to-exit*)
+            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
+                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
+	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
+		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
+		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
+			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
+              (loop (current-seconds)))
+            #t)))
+    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))
+
+
+;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
+(define (common:watchdog)
+  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
+  (if (launch:setup)
+      (if (common:on-homehost?)
+	  (let ((dbstruct (db:setup #t)))
+	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
+	    (cond
+	     ((dbr:dbstruct-read-only dbstruct)
+	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
+	      (common:readonly-watchdog dbstruct))
+	     (else
+	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
+              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
+                (cond
+                 ((equal? syncer "brute-force-sync")
+                  (server:writable-watchdog-bruteforce dbstruct))
+                 ((equal? syncer "delta-sync")
+                  (server:writable-watchdog-deltasync dbstruct))
+                 (else
+                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
+                  (exit 1)))
+                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
+                )))
+	    (debug:print-info 13 *default-log-port* "watchdog done."))
+	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
+
+
+(define (std-exit-procedure)
+  ;;(common:telemetry-log-close)
+  (on-exit (lambda () 0))
+  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
+  (let ((no-hurry  (if *time-to-exit* ;; hurry up
+		       #f
+		       (begin
+			 (set! *time-to-exit* #t)
+			 #t))))
+    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
+    (if (and no-hurry (debug:debug-mode 18))
+	(rmt:print-db-stats))
+    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
+                              (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
+			      (if *task-db*    
+				  (let ((db (cdr *task-db*)))
+				    (if (sqlite3:database? db)
+					(begin
+					  (sqlite3:interrupt! db)
+					  (sqlite3:finalize! db #t)
+					  ;; (vector-set! *task-db* 0 #f)
+					  (set! *task-db* #f)))))
+                              #;(http-client#close-all-connections!)
+                              ;; (if (and *runremote*
+                              ;;          (remote-conndat *runremote*))
+                              ;;     (begin
+                              ;;       (http-client#close-all-connections!))) ;; for http-client
+                              (if (not (eq? *default-log-port* (current-error-port)))
+                                  (close-output-port *default-log-port*))
+			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
+	  (th2 (make-thread (lambda ()
+			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
+			      (if no-hurry
+                                  (begin
+                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
+                                  (begin
+      				  (thread-sleep! 2)))
+      			      (debug:print 4 *default-log-port* " ... done")
+      			      )
+			    "clean exit")))
+      (thread-start! th1)
+      (thread-start! th2)
+      (thread-join! th1)
+      )
+    )
+
+  0)
+
+(define (std-signal-handler signum)
+  ;; (signal-mask! signum)
+  (set! *time-to-exit* #t) 
+  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
+  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
+  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
+  (exit))
+
+(define (special-signal-handler signum)
+  ;; (signal-mask! signum)
+  (set! *time-to-exit* #t)
+  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
+  (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!")
+  ;;TODO send email to notify admin contact listed in the config that the lisner got killed
+  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
+  (exit))
+
+
+(set-signal-handler! signal/int  std-signal-handler)  ;; ^C
+(set-signal-handler! signal/term std-signal-handler)
+
+;; (set-signal-handler! signal/stop std-signal-handler)  ;; ^Z NO, do NOT handle ^Z!
+
+;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
+;; Do NOT check if not on homehost!
+;;
+(define (common:exit-on-version-changed)
+  (if (common:on-homehost?)
+      (if (common:api-changed?)
+	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
+                (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
+                (read-only (not (file-write-access? dbfile)))
+                (dbstruct (db:setup #t)))
+	    (debug:print 0 *default-log-port*
+			 "WARNING: Version mismatch!\n"
+			 "   expected: " (common:version-signature) "\n"
+			 "   got:      " (common:get-last-run-version))
+            (cond
+             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
+             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
+                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
+              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
+              (handle-exceptions
+               exn
+               (begin
+                 (debug:print 0 *default-log-port* "Failed to switch versions.")
+                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+                 (print-call-chain (current-error-port))
+                 (exit 1))
+               (common:cleanup-db dbstruct)))
+             ((not (common:file-exists? mtconf))
+              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
+              (exit 1))
+             ((not (common:file-exists? dbfile))
+              (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
+              (exit 1))
+             ((not (eq? (current-user-id)(file-owner mtconf)))
+              (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
+              (exit 1))
+             (read-only
+              (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
+              (exit 1))
+             (else
+              (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
+              (exit 1)))))))
+;;      (begin
+;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
+;;	(exit 1))))
+
+;; Move me elsewhere ...
+;; RADT => Why do we meed the version check here, this is called only if version misma
+;;
+(define (common:cleanup-db dbstruct #!key (full #f))
+  (apply db:multi-db-sync 
+   dbstruct
+   'schema
+   ;; 'new2old
+   'killservers
+   'adj-target
+   ;; 'old2new
+   'new2old
+   ;; (if full
+       '(dejunk)
+       ;; '())
+       )
+  (if (common:api-changed?)
+      (common:set-last-run-version)))
+
+;; called in megatest.scm, host-port is string hostname:port
+;;
+;; NOTE: This is NOT called directly from clients as not all transports support a client running
+;;       in the same process as the server.
+;;
+#;(define (server:ping host-port-in #!key (do-exit #f))
+  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
+		       #f ;; (server:check-if-running *toppath*)
+		;; (if (number? host-port-in) ;; we were handed a server-id
+		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
+		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
+		;; 	     (if srec
+		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
+		;; 		 (conc "no such server-id " host-port-in)))
+		       host-port-in))) ;; )
+    (let* ((host-port (if host:port
+			  (let ((slst (string-split   host:port ":")))
+			    (if (eq? (length slst) 2)
+				(list (car slst)(string->number (cadr slst)))
+				#f))
+			  #f)))
+;;	   (toppath       (launch:setup)))
+      ;; (print "host-port=" host-port)
+      (if (not host-port)
+	  (begin
+	    (if host-port-in
+		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
+	    (if do-exit (exit 1))
+	    #f)
+	  (let* ((iface      (car host-port))
+		 (port       (cadr host-port))
+		 #;(server-dat (http-transport:client-connect iface port))
+		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
+	    (if (and (list? login-res)
+		     (car login-res))
+		(begin
+		  ;; (print "LOGIN_OK")
+		  (if do-exit (exit 0))
+		  #t)
+		(begin
+		  ;; (print "LOGIN_FAILED")
+		  (if do-exit (exit 1))
+		  #f)))))))
+
+;; run ping in separate process, safest way in some cases
+;;
+(define (server:ping-server ifaceport)
+  (with-input-from-pipe 
+   (conc (common:get-megatest-exe) " -ping " ifaceport)
+   (lambda ()
+     (let loop ((inl (read-line))
+		(res "NOREPLY"))
+       (if (eof-object? inl)
+	   (case (string->symbol res)
+	     ((NOREPLY)  #f)
+	     ((LOGIN_OK) #t)
+	     (else       #f))
+	   (loop (read-line) inl))))))
+
+;; ping the given server
+;;
+#;(define (server:check-server server-record)
+  (let* ((server-url (server:record->url server-record))
+         (res        (case *transport-type*
+                       ((http)(server:ping server-url))
+                       ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
+                       )))
+    (if res
+        server-url
+	#f)))
+
+;; no longer care if multiple servers are started by accident. older servers will drop off in time.
+;;
+#;(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
+  (let* ((ns            (server:get-num-servers))
+	 (servers       (server:get-best (server:get-list areapath))))
+    ;; (print "servers: " servers " ns: " ns)
+    (if (or (and servers
+		 (null? servers))
+	    (not servers)
+	    (and (list? servers)
+		 (< (length servers) (random ns)))) ;; somewhere between 0 and numservers
+        #f
+        (let loop ((hed (car servers))
+                   (tal (cdr servers)))
+          (let ((res (server:check-server hed)))
+            (if res
+                res
+                (if (null? tal)
+                    #f
+                    (loop (car tal)(cdr tal)))))))))
+
+
+;; NOT USED (well, ok, reference in rpc-transport but otherwise not used).
+;;
+(define (server:login toppath)
+  (lambda (toppath)
+    (set! *db-last-access* (current-seconds)) ;; might not be needed.
+    (if (equal? *toppath* toppath)
+	#t
+	#f)))
+;; Given a run id start a server process    ### NOTE ### > file 2>&1 
+;; if the run-id is zero and the target-host is set 
+;; try running on that host
+;;   incidental: rotate logs in logs/ dir.
+;;
+#;(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
+  (let* ((curr-host   (get-host-name))
+         ;; (attempt-in-progress (server:start-attempted? areapath))
+         ;; (dot-server-url (server:check-if-running areapath))
+	 (curr-ip     (server:get-best-guess-address curr-host))
+	 (curr-pid    (current-process-id))
+	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
+	 (target-host (car homehost))
+	 (testsuite   (common:get-testsuite-name))
+	 (logfile     (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
+	 (cmdln (conc (common:get-megatest-exe)
+		      " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
+							   " -daemonize "
+							   "")
+		      ;; " -log " logfile
+		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
+	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread"))
+         (load-limit  (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0)))
+    ;; we want the remote server to start in *toppath* so push there
+    (push-directory areapath)
+    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
+    (thread-start! log-rotate)
+    
+    ;; host.domain.tld match host?
+    (if (and target-host 
+	     ;; look at target host, is it host.domain.tld or ip address and does it 
+	     ;; match current ip or hostname
+	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
+	     (not (equal? curr-ip target-host)))
+	(begin
+	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
+	  (setenv "TARGETHOST" target-host)))
+      
+    (setenv "TARGETHOST_LOGF" logfile)
+    (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time
+    (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever
+    (system (conc "nbfake " cmdln))
+    (unsetenv "TARGETHOST_LOGF")
+    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
+    (thread-join! log-rotate)
+    (pop-directory)))
+
+;; kind start up of servers, wait 40 seconds before allowing another server for a given
+;; run-id to be launched
+#;(define (server:kind-run areapath)
+  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
+      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
+	     (call-num     (car last-run-dat))
+	     (when-run     (cadr last-run-dat))
+	     (run-delay    (+ (case call-num
+				((0)    0)
+				((1)   20)
+				((2)  300)
+				(else 600))
+			      (random 5)))   ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
+	     (lock-file    (conc areapath "/logs/server-start.lock")))
+	(if	(> (- (current-seconds) when-run) run-delay)
+		(begin
+		  (common:simple-file-lock-and-wait lock-file expire-time: 15)
+		  (server:run areapath)
+		  (thread-sleep! 2) ;; don't release the lock for at least a few seconds
+		  (common:simple-file-release-lock lock-file)))
+	(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
+
+#;(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
+
+#;(define (server:start-and-wait areapath #!key (timeout 60))
+  (let ((give-up-time (+ (current-seconds) timeout)))
+    (let loop ((server-url (server:check-if-running areapath))
+	       (try-num    0))
+      (if (or server-url
+	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
+	  server-url
+	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
+	    (if (and (> try-num 0)  ;; first time through simply wait a little while then try again
+		     (< num-ok 1))  ;; if there are no decent candidates for servers then try starting a new one
+		(server:kind-run areapath))
+	    (thread-sleep! 5)
+	    (loop (server:check-if-running areapath)
+		  (+ try-num 1)))))))
+
+;;======================================================================
+;; make html output
+;;======================================================================
+
+(define (tests:test-set-toplog! run-id test-name logf) 
+  (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name))
+
+(define (tests:summarize-items run-id test-id test-name force)
+  ;; if not force then only update the record if one of these is true:
+  ;;   1. logf is "log/final.log
+  ;;   2. logf is same as outputfilename
+  (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
+	 (orig-dir       (current-directory))
+	 (logf-info      (rmt:test-get-logfile-info run-id test-name))
+	 (logf           (if logf-info (cadr logf-info) #f))
+	 (path           (if logf-info (car  logf-info) #f)))
+    ;; This query finds the path and changes the directory to it for the test
+    (if (and (string? path)
+	     (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ...
+	(begin
+	  (debug:print 4 *default-log-port* "Found path: " path)
+	  (change-directory path))
+	;; (set! outputfilename (conc path "/" outputfilename)))
+	(debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path))
+    (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
+    (if (or (equal? logf "logs/final.log")
+	    (equal? logf outputfilename)
+	    force)
+	(let ((my-start-time (current-seconds))
+	      (lockf         (conc outputfilename ".lock")))
+	  (let loop ((have-lock  (common:simple-file-lock lockf)))
+	    (if have-lock
+		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
+		  (print "Obtained lock for " outputfilename)
+		  (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f)
+		  (if script
+		      (system (conc script " > " outputfilename " & "))
+		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
+		  (common:simple-file-release-lock lockf)
+		  (change-directory orig-dir)
+		  ;; NB// tests:test-set-toplog! is remote internal...
+		  (tests:test-set-toplog! run-id test-name outputfilename))
+		;; didn't get the lock, check to see if current update started later than this 
+		;; update, if so we can exit without doing any work
+		(if (> my-start-time (handle-exceptions
+					 exn
+					 0
+				       (file-modification-time lockf)))
+		    ;; we started since current re-gen in flight, delay a little and try again
+		    (begin
+		      (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it")
+		      (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds
+		      (loop (common:simple-file-lock lockf))))))))))
+
+(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)
+  (let ((counts              (make-hash-table))
+	(statecounts         (make-hash-table))
+	(outtxt              "")
+	(tot                 0)
+	(testdat             (rmt:test-get-records-for-index-file run-id test-name)))
+    (with-output-to-file outputfilename
+      (lambda ()
+	(set! outtxt (conc outtxt "<html><title>Summary: " test-name 
+			   "</title><body><h2>Summary for " test-name "</h2>"))
+	(for-each
+	 (lambda (testrecord)
+	   (let ((id             (vector-ref testrecord 0))
+		 (itempath       (vector-ref testrecord 1))
+		 (state          (vector-ref testrecord 2))
+		 (status         (vector-ref testrecord 3))
+		 (run_duration   (vector-ref testrecord 4))
+		 (logf           (vector-ref testrecord 5))
+		 (comment        (vector-ref testrecord 6)))
+	     (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0)))
+	     (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0)))
+	     (set! outtxt (conc outtxt "<tr>"
+				;; "<td><a href=\"" itempath "/" logf "\"> " itempath "</a></td>" 
+				"<td><a href=\"" itempath "/test-summary.html\"> " itempath "</a></td>" 
+				"<td>" state    "</td>" 
+				"<td><font color=" (common:get-color-from-status status)
+				">"   status   "</font></td>"
+				"<td>" (if (equal? comment "")
+					   "&nbsp;"
+					   comment) "</td>"
+					   "</tr>"))))
+	 (if (list? testdat)
+	     testdat
+	     (begin
+	       (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
+	       '())))
+	
+	(print "<table><tr><td valign=\"top\">")
+	;; Print out stats for status
+	(set! tot 0)
+	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>State stats</h2></td></tr>")
+	(for-each (lambda (state)
+		    (set! tot (+ tot (hash-table-ref statecounts state)))
+		    (print "<tr><td>" state "</td><td>" (hash-table-ref statecounts state) "</td></tr>"))
+		  (hash-table-keys statecounts))
+	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
+	(print "</td><td valign=\"top\">")
+	;; Print out stats for state
+	(set! tot 0)
+	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>Status stats</h2></td></tr>")
+	(for-each (lambda (status)
+		    (set! tot (+ tot (hash-table-ref counts status)))
+		    (print "<tr><td><font color=\"" (common:get-color-from-status status) "\">" status
+			   "</font></td><td>" (hash-table-ref counts status) "</td></tr>"))
+		  (hash-table-keys counts))
+	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
+	(print "</td></td></tr></table>")
+	
+	(print "<table cellspacing=\"0\" border=\"1\">" 
+	       "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
+	       outtxt "</table></body></html>")
+	;; (release-dot-lock outputfilename)
+	;;(rmt:update-run-stats 
+	;; run-id
+	;; (hash-table-map
+	;;  state-status-counts
+	;;  (lambda (key val)
+	;;	(append key (list val)))))
+	))))
+
+(define tests:css-jscript-block
+#<<EOF
+<style type="text/css">
+ul.LinkedList { display: block; }
+/* ul.LinkedList ul { display: none; } */
+.HandCursorStyle { cursor: pointer; cursor: hand; }  /* For IE */
+th {background-color: #8c8c8c;}
+td.test {background-color: #d9dbdd;}
+td.PASS {background-color: #347533;}
+td.FAIL {background-color: #cc2812;}
+td.SKIP{background-color: #FFD733;}
+td.WARN {background-color: #EA8724;}
+td.WAIVED {background-color: #838A12;}
+td.ABORT{background-color: #EA24B7;}
+.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;}
+
+
+</style>
+
+
+  <script type="text/JavaScript">
+
+    function filtersome() {
+  $("tr").show();
+  $(".test").filter(
+    function() {
+      var names = $('#testname').val().split(',');
+      var good=1;
+      for (var i=0, len=names.length; i<len; i++) {
+        var uname=names[i];
+        console.log("Trying to check for " + uname); 
+        if($(this).text().indexOf(uname) != -1) {
+          good= 0;
+          console.log("Found "+uname);
+        }
+      }
+      return good; 
+    }
+  ).parent().hide();
+//  $(".sum").show();
+}
+  
+    // Add this to the onload event of the BODY element
+    function addEvents() {
+      activateTree(document.getElementById("LinkedList1"));
+    }
+
+    // This function traverses the list and add links 
+    // to nested list items
+    function activateTree(oList) {
+      // Collapse the tree
+      for (var i=0; i < oList.getElementsByTagName("ul").length; i++) {
+        oList.getElementsByTagName("ul")[i].style.display="none";            
+      }                                                                  
+      // Add the click-event handler to the list items
+      if (oList.addEventListener) {
+        oList.addEventListener("click", toggleBranch, false);
+      } else if (oList.attachEvent) { // For IE
+        oList.attachEvent("onclick", toggleBranch);
+      }
+      // Make the nested items look like links
+      addLinksToBranches(oList);
+    }
+
+    // This is the click-event handler
+    function toggleBranch(event) {
+      var oBranch, cSubBranches;
+      if (event.target) {
+        oBranch = event.target;
+      } else if (event.srcElement) { // For IE
+        oBranch = event.srcElement;
+      }
+      cSubBranches = oBranch.getElementsByTagName("ul");
+      if (cSubBranches.length > 0) {
+        if (cSubBranches[0].style.display == "block") {
+          cSubBranches[0].style.display = "none";
+        } else {
+          cSubBranches[0].style.display = "block";
+        }
+      }
+    }
+
+    // This function makes nested list items look like links
+    function addLinksToBranches(oList) {
+      var cBranches = oList.getElementsByTagName("li");
+      var i, n, cSubBranches;
+      if (cBranches.length > 0) {
+        for (i=0, n = cBranches.length; i < n; i++) {
+          cSubBranches = cBranches[i].getElementsByTagName("ul");
+          if (cSubBranches.length > 0) {
+            addLinksToBranches(cSubBranches[0]);
+            cBranches[i].className = "HandCursorStyle";
+            cBranches[i].style.color = "blue";
+            cSubBranches[0].style.color = "black";
+            cSubBranches[0].style.cursor = "auto";
+          }
+        }
+      }
+    }
+  </script>
+EOF
+)
+
+(define tests:css-jscript-block-dynamic 
+#<<EOF
+           <script src= ./jquery3.1.0.js></script> 
+EOF
+)
+
+(define  (test:js-block javascript-lib)
+   (conc  "<script src=" javascript-lib "></script>" ))
+
+
+(define tests:css-jscript-block-static (test:js-block *java-script-lib*))
+
+(define (tests:css-jscript-block-cond dynamic) 
+      (if (equal? dynamic  #t)
+       tests:css-jscript-block-dynamic
+       tests:css-jscript-block-static))
+
+       
+(define (tests:run-record->test-path run numkeys)
+   (append (take (vector->list run) numkeys)
+	   (list (vector-ref run (+ 1 numkeys)))))
+
+
+(define (tests:get-rest-data runs header numkeys)
+   (let ((resh (make-hash-table)))
+   (for-each
+     (lambda (run)
+        (let* ((run-id (db:get-value-by-header run header "id"))
+               (run-dir      (tests:run-record->test-path run numkeys))
+	       (test-data    (rmt:get-tests-for-run
+				   run-id
+                                   "%"       ;; testnamepatt
+				   '()        ;; states
+				   '()        ;; statuses
+				   #f         ;; offset
+				   #f         ;; num-to-get
+				   #f         ;; hide/not-hide
+				   #f         ;; sort-by
+				   #f         ;; sort-order
+				   #f         ;; 'shortlist                           ;; qrytype
+                                   0         ;; last update
+				   #f)))
+            
+            (map (lambda (test)
+                 (let* ((test-name (vector-ref test 2))
+                        (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13)))
+                        (test-item (conc test-name ":" (vector-ref test 11)))
+                        (test-status (vector-ref test 4)))
+                         
+                (if (not (hash-table-ref/default resh test-name  #f))
+                      (hash-table-set! resh test-name   (make-hash-table)))
+                (if (not (hash-table-ref/default (hash-table-ref/default resh test-name  #f)  test-item  #f))
+                       (hash-table-set! (hash-table-ref/default resh test-name  #f) test-item   (make-hash-table))) 
+               (hash-table-set!  (hash-table-ref/default (hash-table-ref/default resh test-name  #f) test-item #f) run-id (list test-status test-html-path)))) 
+        test-data)))
+      runs)
+   resh))
+
+
+;; hash-table tree to html list tree
+;;
+;;   tipfunc takes two parameters: y the tip value and path the path to that point
+;;
+(define (common:htree->html ht path tipfunc)
+  (let ((datlist 	(sort (hash-table->alist ht)
+                              (lambda (a b)
+                                (string< (car a)(car b))))))
+    (if (null? datlist)
+    	(tipfunc #f path) ;; really shouldn't get here
+	(s:ul
+	 (map (lambda (x)
+		(let* ((levelname (car x))
+		       (y         (cdr x))
+		       (newpath   (append path (list levelname)))
+		       (leaf      (or (not (hash-table? y))
+				      (null? (hash-table-keys y)))))
+		  (if leaf
+		      (s:li (tipfunc y newpath))
+		      (s:li
+		       (list 
+			levelname
+			(common:htree->html y newpath tipfunc))))))
+	      datlist)))))
+
+
+;; tests:genrate dashboard body 
+;;
+
+(define (tests:dashboard-body page pg-size keys numkeys  total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt)
+  (let* ((start (* page pg-size)) 
+	       ;(runsdat   (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys)))
+         (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt start pg-size #f 0 sort-order: "desc"))
+                    ; db:get-runs-by-patt   keys runnamepatt targpatt offset limit fields last-update   
+	       (header    (vector-ref runsdat 0))
+	       (runs      (vector-ref runsdat 1))
+         (ctr 0)
+         (test-runs-hash (tests:get-rest-data runs header numkeys))
+         (test-list (hash-table-keys test-runs-hash))) 
+  
+  (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag)
+		   (s:title "Summary for " area-name)
+		   (s:body 'onload "addEvents();"
+                          (get-prev-links page linktree)
+                          (get-next-links page linktree total-runs)
+                           
+			   (s:h1 "Summary for " area-name)
+                           (s:h3 "Filter" )
+                           (s:input 'type "text"  'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()")
+			   ;; top list
+         
+			   (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
+                            (map (lambda (key)
+				 (let* ((res (s:tr 'class "something" 
+				  (s:th key )
+                                   (map (lambda (run)
+                                   (s:th  (vector-ref run ctr)))
+                                  runs))))
+                             (set! ctr (+ ctr 1))
+                               res))
+                               keys)
+                               (s:tr
+				 (s:th "Run Name")
+                                  (map (lambda (run)
+                                   (s:th (db:get-value-by-header run header "runname")))
+                                  runs))
+                              
+                               (map (lambda (test-name)
+                                 (let* ((item-hash (hash-table-ref/default test-runs-hash test-name  #f))
+                                         (item-keys (sort (hash-table-keys item-hash) string<=?))) 
+                                          (map (lambda (item-name)  
+  		                             (let* ((res (s:tr  'class item-name
+				                         (s:td  item-name 'class "test" )
+                                                           (map (lambda (run)
+                                                               (let* ((run-test (hash-table-ref/default item-hash item-name  #f))
+                                                                      (run-id (db:get-value-by-header run header "id"))
+                                                                      (result (hash-table-ref/default run-test run-id "n/a"))
+                                                                      ;(relative-path (get-relative-path)) 
+                                                                      (status (if (string? result)
+									                                                            	result
+										                                                            (car result)))
+                                                                        (link (if (string? result)
+										                                                            result
+                                                                                (if (equal? flag #t) 
+                                                                                (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname="  item-name ))
+  																																						  (s:a (car result) 'href (string-substitute  (conc linktree "/")  "" (cadr result)  "-"))))))
+                                                                       (s:td  link 'class status)))
+                                                                runs))))
+                                                        res))
+                                                   item-keys)))
+                               test-list)))))) 
+
+;; (tests:create-html-tree "test-index.html")
+;;
+(define (tests:create-html-tree outf)
+   (let* ((lockfile  (conc outf ".lock"))
+	 			 (runs-to-process '())
+         (linktree  (common:get-linktree))
+         (area-name (common:get-testsuite-name))
+	  		 (keys      (rmt:get-keys))
+	  		 (numkeys   (length keys))
+         (run-patt (or (args:get-arg "-run-patt")
+                        (args:get-arg "-runname")
+                        "%"))
+         (target (or  (args:get-arg "-target-patt") 
+											(args:get-arg "-target")
+                      "%"))
+         (targlist (string-split target "/"))
+         (numtarg  (length targlist))  
+         (targtweaked (if (> numkeys numtarg)
+			   								(append targlist (make-list (- numkeys numtarg) "%"))
+			  								targlist))
+         (target-patt (string-join targtweaked "/"))
+         ;(total-runs  (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target
+          (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) 
+         (pg-size 10))
+    (if (common:simple-file-lock lockfile)
+        (begin
+         ;(print total-runs)    
+        (let loop ((page 0))
+	(let* ((oup       (open-output-file (or outf (conc linktree "/page" page ".html"))))
+               (get-prev-links (lambda (page linktree )   
+                            (let* ((link  (if (not (eq? page 0))
+                                   (s:a "&lt;&lt;prev" 'href (conc  "page" (- page 1) ".html"))
+                                   (s:a "" 'href (conc   "page"  page ".html")))))
+                               link)))
+               (get-next-links (lambda (page linktree total-runs)   
+                            (let* ((link  (if (> total-runs (+ 10 (* page pg-size)))
+                                   (s:a "next&gt;&gt;" 'href (conc  "page"  (+ page 1) ".html"))
+                                   (s:a "" 'href (conc   "page" page  ".html")))))
+                               link))) )
+          (print "total runs: " total-runs) 
+          (s:output-new
+	   			 oup
+	   					(tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function
+          (close-output-port oup)
+         ; (set! page (+ 1 page))
+          (if (> total-runs (* (+ 1 page) pg-size))
+           (loop (+ 1  page)))))
+	  (common:simple-file-release-lock lockfile))
+	            
+	#f)))
+
+
+(define (tests:readlines filename)
+  (call-with-input-file filename
+    (lambda (p)
+      (let loop ((line (read-line p))
+                 (result '()))
+        (if (eof-object? line)
+            (reverse result)
+            (loop (read-line p) (cons line result)))))))
+
+(define (tests:get-test-log run-id test-name item-name)
+  (let* ((test-data    (rmt:get-tests-for-run
+				   (string->number run-id)
+                                    test-name      ;; testnamepatt
+				   '()        ;; states
+				   '()        ;; statuses
+				   #f         ;; offset
+				   #f         ;; num-to-get
+				   #f         ;; hide/not-hide
+				   #f         ;; sort-by
+				   #f         ;; sort-order
+				   #f         ;; 'shortlist                           ;; qrytype
+                                   0         ;; last update
+				   #f))
+         (path "")
+         (found 0))
+    (debug:print-info 0 *default-log-port* "found: " found )
+
+   (let loop ((hed (car test-data))
+		 (tal (cdr test-data)))
+          (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13))
+
+	(if (equal? (vector-ref hed 11) item-name)
+            (begin
+              (set! found 1) 
+	      (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13)))))
+	    (if (and (not (null? tal)) (equal? found 0))
+		(loop (car tal)(cdr tal))))
+   (if (equal? path "")
+     "<H2>Data not found</H2>"
+     (string-join (tests:readlines path) "\n"))))
+
+
+(define (tests:dynamic-dboard page)
+;(define (tests:create-html-tree o)
+ (let* (
+;(page "1")
+          (linktree  (common:get-linktree))
+         (area-name (common:get-testsuite-name))
+	       (keys      (rmt:get-keys))
+	       (numkeys   (length keys))
+         (targtweaked (make-list numkeys "%"))
+         (target-patt (string-join targtweaked "/"))
+         (total-runs  (rmt:get-num-runs "%"))
+         (pg-size 10)
+         (pg (if (equal? page #f)
+                 0
+                 (- (string->number page) 1)))
+          (get-prev-links  (lambda (pg linktree)
+                           (debug:print-info 0 *default-log-port* "val: " (- 1 pg))
+                          (let* ((link  (if (not (eq? pg 0))
+                               (s:a  "&lt;&lt;prev " 'href (conc  "dashboard?page="  pg  ))
+                               (s:a "" 'href (conc  "dashboard?page=" pg)))))
+                               link)))
+          (get-next-links   (lambda (pg linktree total-runs)  
+                            (debug:print-info 0 *default-log-port* "val: " pg)
+                             (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size)
+ 
+                            (let* ((link  (if (> total-runs (+ 10 (* pg pg-size)))
+                              (s:a  "next&gt;&gt; "  'href (conc  "dashboard?page="  (+ pg 2)  ))
+                             (s:a "" 'href (conc  "dashboard?page=" pg  )))))
+                             link)))
+         (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function
+        html-body))
+
+(define (tests:create-html-summary outf)
+ (let* ((lockfile  (conc outf ".lock"))
+        (linktree  (common:get-linktree))
+				(keys      (rmt:get-keys))
+        (area-name (common:get-testsuite-name))
+        (run-patt (or (args:get-arg "-run-patt")
+                        (args:get-arg "-runname")
+                        "%"))
+        (target (or (args:get-arg "-target-patt")
+                        (args:get-arg "-target")
+                        "%"))
+         (targlist (string-split target "/"))
+         (numkeys  (length keys))
+	       (numtarg  (length targlist))  
+         (targtweaked (if (> numkeys numtarg)
+			   								(append targlist (make-list (- numkeys numtarg) "%"))
+			  								targlist))
+        (target-patt (string-join targtweaked "/")))
+    (if (common:simple-file-lock lockfile)
+        (begin
+          (let* (;(runsdat1   (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys)))
+                 (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt #f #f #f 0))
+					       (runs      (vector-ref runsdat 1))
+                 (header      (vector-ref runsdat 0))
+        	       (oup       (open-output-file (or outf (conc linktree "/targets.html"))))
+                 (target-hash (test:create-target-hash runs header (length keys))))
+           (test:create-target-html target-hash oup area-name linktree)
+          (test:create-run-html  runs area-name linktree (length keys) header))
+	  (common:simple-file-release-lock lockfile))
+	#f)))
+
+(define (test:get-test-hash test-data)
+	(let ((resh (make-hash-table)))
+    	(map (lambda (test)
+        (let* ((test-name (vector-ref test 2))
+               (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html"))
+																 (conc (vector-ref test 10) "/test-summary.html" )
+							 									 (conc (vector-ref test 10) "/" (vector-ref test 13))))
+               (test-item  (vector-ref test 11))
+               (test-status (vector-ref test 4)))
+               (if (not (hash-table-ref/default resh test-item  #f))
+                   (hash-table-set! resh test-item   (make-hash-table)))
+               (hash-table-set! (hash-table-ref/default resh test-item  #f) test-name (list test-status test-html-path)))) 
+        test-data)
+resh))
+
+(define (test:get-data->b-keys ordered-data a-keys)
+  (delete-duplicates
+   (sort (apply
+	  append
+	  (map (lambda (sub-key)
+		 (let ((subdat (hash-table-ref ordered-data sub-key)))
+		   (hash-table-keys subdat)))
+	       a-keys))
+	 string>=?)))
+
+
+(define (test:create-run-html runs area-name linktree numkeys header)
+  (map (lambda (run)
+		 (let* ((target (string-join (take (vector->list run) numkeys) "/"))
+						(run-name (db:get-value-by-header run header "runname"))
+            (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time")))
+						(oup (if (file-exists? (conc linktree "/" target "/" run-name))
+                        (open-output-file (conc linktree "/" target "/" run-name "/run.html"))
+                         #f))
+            (run-id (db:get-value-by-header run header "id"))
+            (test-data    (rmt:get-tests-for-run
+				  								 run-id
+                           "%"       ;; testnamepatt
+				  								 '()        ;; states
+				   								 '()        ;; statuses
+				  								 	#f         ;; offset
+				  						 			#f         ;; num-to-get
+				   									#f         ;; hide/not-hide
+				  								  #f         ;; sort-by
+				   									#f         ;; sort-order
+				   									#f         ;; 'shortlist                           ;; qrytype
+                            0         ;; last update
+				  									#f))
+            (item-test-hash (test:get-test-hash test-data))
+            (items  (hash-table-keys item-test-hash))
+ 						(test-names (test:get-data->b-keys item-test-hash items)))
+    (if oup
+      (begin 
+     (s:output-new
+	   oup
+	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
+		   (s:title "Runs View " run-name)
+		   (s:body
+		     (s:h1 "Runs View " )
+         (s:h3 "Target" target)
+				 (s:p 
+					(s:b "Run name" ) run-name)
+         (s:p 
+					(s:b "Run Date" ) run-time)
+         (s:table 'border 1 'cellspacing 0
+           (s:tr
+           (s:th "Items")
+           (map (lambda (test)
+            (s:th test))
+           test-names))  
+           (map (lambda (item) 
+					  (let* ((test-hash (hash-table-ref/default item-test-hash item  #f)))
+								 (if test-hash
+                  (begin
+									(s:tr
+					  			(s:td 'class "test" item)
+            			(map (lambda (test)
+						  		(let* ((test-details (hash-table-ref/default test-hash test  #f))
+												(status (if test-details
+																(car test-details)))
+                        (link (if test-details 
+														(string-substitute  (conc linktree "/" target "/" run-name "/")  "" (cadr test-details) "-"))))
+                   (if test-details
+											(s:td 'class status
+												(s:a 'class "link" 'href link status ))
+                      (s:td "")))) 			
+									test-names))))))
+				  (sort items string<=?))))))
+		(close-output-port oup))
+    (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html"))))
+runs))
+
+(define (test:create-target-hash runs header numkeys)
+  (let ((resh (make-hash-table)))
+   (for-each
+     (lambda (run)
+        (let* ((run-name (db:get-value-by-header run header "runname"))
+               (target   (string-join (take (vector->list run) numkeys) "/"))
+               (run-list (hash-table-ref/default resh target  #f)))
+               
+               (if (not run-list)
+                   (hash-table-set! resh target   (list run-name))
+                   (hash-table-set! resh target   (cons run-name run-list)))))
+      runs)
+   resh))
+
+(define (test:get-max-run-cnt target-hash targets)
+   (let* ((cnt 0 ))
+   (map (lambda (target)
+        (let* ((runs  (hash-table-ref/default target-hash target  #f))
+               (run-length (if runs
+																(length runs)
+                                 0)))
+  
+              (if (< cnt run-length)
+               (set! cnt  run-length)))) 
+		targets) 
+cnt))
+ 
+(define (test:pad-runs target-hash targets max-row-length)
+ (map (lambda (target)
+        (let loop ((run-list  (hash-table-ref/default target-hash target  #f)))
+               (if (< (length run-list) max-row-length)
+                 (begin  
+               		 (hash-table-set! target-hash target   (cons "" run-list))
+               		 (loop (hash-table-ref/default target-hash target  #f) ))))) 
+		targets)
+   target-hash)
+
+(define (test:create-target-html target-hash oup area-name linktree)
+  (let* ((targets (hash-table-keys target-hash))
+         (max-row-length (test:get-max-run-cnt target-hash targets))
+         (pad-runs-hash (test:pad-runs target-hash targets max-row-length)))
+   (s:output-new
+	   oup
+	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
+
+		   (s:title "Target View " area-name)
+		   (s:body
+		   (s:h1 "Target View " area-name)
+					(s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
+             (s:tr 'class "something" 
+               (s:th "Target")
+								(s:th 'colspan max-row-length "Runs"))                                              
+                (let* ((tbl (map (lambda (target)
+                      (s:tr
+                      (s:td 'class "test" target)
+										  (let* ((runs  (hash-table-ref/default target-hash target  #f))
+														 (rest-row (map (lambda (run)
+																				(if (equal? run "")
+																						(s:td run)
+                                            (if (file-exists?(conc linktree "/" target "/" run ))
+																						(begin 
+																							(s:td 
+																							(s:a 'href (conc  target "/" run "/run.html") run))))))
+																				(reverse runs))))
+                              rest-row)))
+                                   targets)))
+                           tbl)))))
+          (close-output-port oup)))
+
+
+(define (tests:create-html-tree-old outf)
+   (let* ((lockfile  (conc outf ".lock"))
+	 (runs-to-process '()))
+    (if (common:simple-file-lock lockfile)
+	(let* ((linktree  (common:get-linktree))
+	       (oup       (open-output-file (or outf (conc linktree "/runs-index.html"))))
+	       (area-name (common:get-testsuite-name))
+	       (keys      (rmt:get-keys))
+	       (numkeys   (length keys))
+	       (runsdat   (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys)))
+	       (header    (vector-ref runsdat 0))
+	       (runs      (vector-ref runsdat 1))
+	       (runtreedat (map (lambda (x)
+				  (tests:run-record->test-path x numkeys))
+				runs))
+	       (runs-htree (common:list->htree runtreedat)))
+	  (set! runs-to-process runs)
+	  (s:output-new
+	   oup
+	   (s:html tests:css-jscript-block
+		   (s:title "Summary for " area-name)
+		   (s:body 'onload "addEvents();"
+			   (s:h1 "Summary for " area-name)
+			   ;; top list
+			   (s:ul 'id "LinkedList1" 'class "LinkedList"
+				 (s:li
+				  "Runs"
+				  (common:htree->html runs-htree
+						      '()
+						      (lambda (x p)
+							(let* ((targ-path (string-intersperse p "/"))
+                                                               (full-path (conc linktree "/" targ-path))
+                                                               (run-name  (car (reverse p))))
+                                                          (if (and (common:file-exists? full-path)
+                                                                   (directory?   full-path)
+                                                                   (file-write-access? full-path))
+                                                              (s:a run-name 'href (conc targ-path "/run-summary.html"))
+                                                              (begin
+                                                                (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html")
+                                                                (conc run-name " (Not able to create summary at " targ-path ")")))))))))))
+          (close-output-port oup)
+	  (common:simple-file-release-lock lockfile)
+               
+	  (for-each
+	   (lambda (run)
+	     (let* ((test-subpath (tests:run-record->test-path run numkeys))
+		    (run-id       (db:get-value-by-header run header "id"))
+                    (run-dir      (tests:run-record->test-path run numkeys))
+		    (test-dats    (rmt:get-tests-for-run
+				   run-id
+                                   "%/"       ;; testnamepatt
+				   '()        ;; states
+				   '()        ;; statuses
+				   #f         ;; offset
+				   #f         ;; num-to-get
+				   #f         ;; hide/not-hide
+				   #f         ;; sort-by
+				   #f         ;; sort-order
+				   #f         ;; 'shortlist                           ;; qrytype
+                                   0         ;; last update
+				   #f))
+                    (tests-tree-dat (map (lambda (test-dat)
+                                         ;; (tests:run-record->test-path x numkeys))
+                                         (let* ((test-name  (db:test-get-testname test-dat))
+                                                (item-path  (db:test-get-item-path test-dat))
+                                                (full-name  (db:test-make-full-name test-name item-path))
+                                                (path-parts (string-split full-name)))
+                                           path-parts))
+                                       test-dats))
+                    (tests-htree (common:list->htree tests-tree-dat))
+                    (html-dir    (conc linktree "/" (string-intersperse run-dir "/")))
+                    (html-path   (conc html-dir "/run-summary.html"))
+                    (oup         (if (and (common:file-exists? html-dir)
+                                          (directory?   html-dir)
+                                          (file-write-access? html-dir))
+                                     (open-output-file  html-path)
+                                     #f)))
+               ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat)
+               (if oup
+                   (begin
+                     (s:output-new
+                      oup
+                      (s:html tests:css-jscript-block
+                              (s:title "Summary for " area-name)
+                              (s:body 'onload "addEvents();"
+                                      (s:h1 "Summary for " (string-intersperse run-dir "/"))
+                                      ;; top list
+                                      (s:ul 'id "LinkedList1" 'class "LinkedList"
+                                            (s:li
+                                             "Tests"
+                                             (common:htree->html tests-htree
+                                                                 '()
+                                                                 (lambda (x p)
+                                                                   (let* ((targ-path (string-intersperse p "/"))
+                                                                          (test-name (car p))
+                                                                          (item-path ;; (if (> (length p) 2) ;; test-name + run-name
+                                                                           (string-intersperse p "/"))
+                                                                          (full-targ (conc html-dir "/" targ-path))
+                                                                          (std-file  (conc full-targ "/test-summary.html"))
+                                                                          (alt-file  (conc full-targ "/megatest-rollup-" test-name ".html"))
+                                                                          (html-file (if (common:file-exists? alt-file)
+                                                                                         alt-file
+                                                                                         std-file))
+                                                                          (run-name  (car (reverse p))))
+                                                                     (if (and (not (common:file-exists? full-targ))
+                                                                              (directory? full-targ)
+                                                                              (file-write-access? full-targ))
+                                                                         (tests:summarize-test 
+                                                                          run-id 
+                                                                          (rmt:get-test-id run-id test-name item-path)))
+                                                                     (if (common:file-exists? full-targ)
+                                                                         (s:a run-name 'href html-file)
+                                                                         (begin
+                                                                           (debug:print 0 *default-log-port* "ERROR: can't access " full-targ)
+                                                                           (conc "No summary for " run-name)))))
+                                                                 ))))))
+                     (close-output-port oup)))))
+           runs)
+          #t)
+	#f)))
+
+
+
+
+(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
+  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
+  (if (and cpuload diskfree)
+      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
+  (if minutes 
+      (rmt:general-call 'update-run-duration run-id minutes test-id))
+  (if (and uname hostname)
+      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
+  
+;; This one is for running with no db access (i.e. via rmt: internally)
+(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
+;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
+;;  (let ((remtries 10))
+  (let* ((cpuload  (get-cpu-load))
+	 (diskfree (get-df (current-directory)))
+	 (uname    (get-uname "-srvpio"))
+	 (hostname (get-host-name)))
+    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
+    
+;; 
+;;
+(define (tests:get-compressed-steps run-id test-id)
+  (let* ((steps-data  (rmt:get-steps-for-test run-id test-id)) ;;      0       1    2    3       4       5       6      7       
+	 (comprsteps  (tests:process-steps-table steps-data))) ;; #<stepname start end status Duration Logfile Comment id>
+    (map (lambda (x)
+	   ;; take advantage of the \n on time->string
+	   (vector    ;; we are constructing basically the original vector but collapsing start end records
+	    (vector-ref x 0)                              ;; id        0
+	    (let ((s (vector-ref x 1)))
+	      (if (number? s)(seconds->time-string s) s)) ;; starttime 1
+	    (let ((s (vector-ref x 2)))
+	      (if (number? s)(seconds->time-string s) s)) ;; endtime   2
+	    (vector-ref x 3)                              ;; status    3    
+	    (vector-ref x 4)                              ;; duration  4
+	    (vector-ref x 5)                              ;; logfile   5
+	    (vector-ref x 6)                              ;; comment   6
+	    (vector-ref x 7)))                            ;; id        7
+	 (sort (hash-table-values comprsteps)
+	       (lambda (a b)
+		 (let ((time-a (vector-ref a 1))
+		       (time-b (vector-ref b 1))
+		       (id-a   (vector-ref a 7))
+		       (id-b   (vector-ref b 7)))
+		   (if (and (number? time-a)(number? time-b))
+		       (if (< time-a time-b)
+			   #t
+			   (if (eq? time-a time-b)
+			       (< id-a id-b)
+			       ;; (string<? (conc (vector-ref a 2))
+			       ;;	    (conc (vector-ref b 2)))
+			       #f))
+		       (string<? (conc time-a)(conc time-b)))))))))
+
+
+;; summarize test in to a file test-summary.html in the test directory
+;;
+(define (tests:summarize-test run-id test-id)
+  (let* ((test-dat  (rmt:get-test-info-by-id run-id test-id))
+	 (out-dir   (db:test-get-rundir test-dat))
+	 (out-file  (conc out-dir "/test-summary.html")))
+    ;; first verify we are able to write the output file
+    (if (not (file-write-access? out-dir))
+	(debug:print 0 *default-log-port* "ERROR: cannot write test-summary.html to " out-dir)
+	(let* (;; (steps-dat (rmt:get-steps-for-test run-id test-id))
+	       (test-name (db:test-get-testname test-dat))
+	       (item-path (db:test-get-item-path test-dat))
+	       (full-name (db:test-make-full-name test-name item-path))
+	       (oup       (open-output-file out-file))
+	       (status    (db:test-get-status   test-dat))
+	       (color     (common:get-color-from-status status))
+	       (logf      (db:test-get-final_logf test-dat))
+	       (steps-dat (tests:get-compressed-steps run-id test-id)))
+	  ;; (dcommon:get-compressed-steps #f 1 30045)
+	  ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log"))
+	  
+	  (s:output-new
+	   oup
+	   (s:html
+	    (s:title "Summary for " full-name)
+	    (s:body 
+	     (s:h2 "Summary for " full-name)
+	     (s:table 'cellspacing "0" 'border "1"
+		      (s:tr (s:td "run id")   (s:td (db:test-get-run_id   test-dat))
+			    (s:td "test id")  (s:td (db:test-get-id       test-dat)))
+		      (s:tr (s:td "testname") (s:td test-name)
+			    (s:td "itempath") (s:td item-path))
+		      (s:tr (s:td "state")    (s:td (db:test-get-state    test-dat))
+			    (s:td "status")   (s:td (s:a 'href logf (s:font 'color color status))))
+		      (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time 
+						     (db:test-get-event_time test-dat)))
+			    (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat)))))
+	     (s:h3 "Log files")
+	     (s:table 
+	      'cellspacing "0" 'border "1"
+	      (s:tr (s:td "Final log")(s:td (s:a 'href logf logf))))
+	     (s:table
+	      'cellspacing "0" 'border "1"
+	      (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File"))
+	      (map (lambda (step-dat)
+		     (s:tr (s:td (tdb:steps-table-get-stepname step-dat))
+			   (s:td (tdb:steps-table-get-start    step-dat))
+			   (s:td (tdb:steps-table-get-end      step-dat))
+			   (s:td (tdb:steps-table-get-status   step-dat))
+			   (s:td (tdb:steps-table-get-runtime  step-dat))
+			   (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat)))
+				   (s:a 'href step-log step-log)))))
+		   steps-dat))
+	     )))
+	  (close-output-port oup)))))
+	  
+	  
+;; MUST BE CALLED local!
+;;
+(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
+  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
+  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
+	 (statepatt  (or (args:get-arg "-state")   (args:get-arg ":state")    "%"))
+	 (statuspatt (or (args:get-arg "-status")  (args:get-arg ":status")   "%"))
+	 (runname    (or (args:get-arg "-runname") (args:get-arg ":runname")  "%"))
+	 (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res
+					testpatt
+					statepatt
+					statuspatt
+					runname)))
+    (if fnamepatt
+	(apply append 
+	       (map (lambda (p)
+		      (if (directory-exists? p)
+			  (let ((glob-query (conc p "/" fnamepatt)))
+			    (handle-exceptions
+				exn
+				(with-input-from-pipe
+				    (conc "echo " glob-query)
+				  read-lines)  ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar
+			      (glob glob-query)))
+			  '()))
+		    paths-from-db))
+	paths-from-db)))
+
+			      
+;; for each test:
+;;   
+(define (tests:filter-non-runnable run-id testkeynames testrecordshash)
+  (let ((runnables '()))
+    (for-each
+     (lambda (testkeyname)
+       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
+	      (test-name   (tests:testqueue-get-testname  test-record))
+	      (itemdat     (tests:testqueue-get-itemdat   test-record))
+	      (item-path   (tests:testqueue-get-item_path test-record))
+	      (waitons     (tests:testqueue-get-waitons   test-record))
+	      (keep-test   #t)
+	      (test-id     (rmt:get-test-id run-id test-name item-path))
+	      (tdat        (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
+	 (if tdat
+	     (begin
+	       ;; Look at the test state and status
+	       (if (or (and (member (db:test-get-status tdat) 
+				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
+			    (equal? (db:test-get-state tdat) "COMPLETED"))
+		       (member (db:test-get-state tdat)
+				    '("INCOMPLETE" "KILLED")))
+		   (set! keep-test #f))
+
+	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
+	       ;; from the runnable list
+	       (if keep-test
+		   (for-each (lambda (waiton)
+			       ;; for now we are waiting only on the parent test
+			       (let* ((parent-test-id (rmt:get-test-id run-id waiton ""))
+				      (wtdat          (rmt:get-testinfo-state-status run-id test-id))) ;; (cdb:get-test-info-by-id *runremote* test-id)))
+				 (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED")
+					      (member (db:test-get-status wtdat) '("FAIL" "ABORT")))
+					 (member (db:test-get-status wtdat)  '("KILLED"))
+					 (member (db:test-get-state wtdat)   '("INCOMPETE")))
+				 ;; (if (or (member (db:test-get-status wtdat)
+				 ;;        	 '("FAIL" "KILLED"))
+				 ;;         (member (db:test-get-state wtdat)
+				 ;;        	 '("INCOMPETE")))
+				     (set! keep-test #f)))) ;; no point in running this one again
+			     waitons))))
+	 (if keep-test (set! runnables (cons testkeyname runnables)))))
+     testkeynames)
+    runnables))
+
+;;======================================================================
+;; test steps
+;;======================================================================
+
+;; teststep-set-status! used to be here
+
+(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
+  (let* ((testdat   (rmt:get-test-info-by-id run-id test-id)))
+    (and testdat
+	 (equal? (test:get-state testdat) "KILLREQ"))))
+
+(define (test:tdb-get-rundat-count tdb)
+  (if tdb
+      (let ((res 0))
+	(sqlite3:for-each-row
+	 (lambda (count)
+	   (set! res count))
+	 tdb
+	 "SELECT count(id) FROM test_rundat;")
+	res))
+  0)
+
+;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
+#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
+  (let* ((cpuload  (get-cpu-load))
+	 (diskfree (get-df (current-directory)))
+	 (remtries 10))
+    (handle-exceptions
+     exn
+     (if (> remtries 0)
+	 (begin
+	   (print-call-chain (current-error-port))
+	   (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times")
+	   (set! remtries (- remtries 1))
+	   (thread-sleep! 10)
+	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
+	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
+	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
+	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
+	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
+	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
+	   (print-call-chain (current-error-port))))
+     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
+  )))
+	 
+
+;;======================================================================
+;; launch a task - this runs on the originating host, tests themselves
+;;
+;;======================================================================
+
+
+;;======================================================================
+;; ezsteps
+;;======================================================================
+
+;; ezsteps were going to be coded as
+;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
+;;   BUT
+;; now are
+;; stepname {VAR=first,second,third ...} command ...
+;; where the {VAR=first,second,third ...} is optional.
+
+;; given an exit code and whether or not logpro was used calculate OK/BAD
+;; return #t if we are ok, #f otherwise
+(define (steprun-good? logpro exitcode)
+  (or (eq? exitcode 0)
+      (and logpro (eq? exitcode 2))))
+
+;; if handed a string, process it, else look for MT_CMDINFO
+(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
+  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
+    (if enccmd
+	(common:read-encoded-string enccmd)
+	'())))
+
+;; return (conc status ": " comment) from the final section so that
+;;   the comment can be set in the step record in launch.scm
+;;
+(define (launch:load-logpro-dat run-id test-id stepname)
+  (let ((cname (conc stepname ".dat")))
+    (if (common:file-exists? cname)
+	(let* ((dat  (configf:read-config cname #f #f))
+	       (csvr (db:logpro-dat->csv dat stepname))
+	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
+		       (fmt-csv (map list->csv-record csvr))))
+	       (status (configf:lookup dat "final" "exit-status"))
+	       (msg     (configf:lookup dat "final" "message")))
+          (if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
+              (rmt:csv->test-data run-id test-id csvt)
+	      (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer"))
+	  ;;  (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
+	  ;;  )
+	  (cond
+	   ((equal? status "PASS") "PASS") ;; skip the message part if status is pass
+	   (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message")))
+	   (else #f)))
+	#f)))
+
+(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm
+  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
+	 (stepinfo       (cadr ezstep))
+	;; (let ((info (cadr ezstep)))
+	;; 		   (if (proc? info) "" info)))
+	;; (stepproc       (let ((info (cadr ezstep)))
+	;; 		   (if (proc? info) info #f)))
+	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
+	 (stepparams     (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each
+	 (paramparts     (if (string? stepparams)
+			     (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams))
+			     '()))
+	 (subrun         (alist-ref "subrun" paramparts equal?))
+	 (stepcmd        (list-ref stepparts 3))
+	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
+	 (logpro-file    (conc stepname ".logpro"))
+	 (html-file      (conc stepname ".html"))
+	 (dat-file       (conc stepname ".dat"))
+	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
+	 (logpro-used    (common:file-exists? logpro-file)))
+
+    (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
+                 ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
+    
+    (if (and tconfig-logpro
+	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
+	(begin
+	  (with-output-to-file logpro-file
+	    (lambda ()
+	      (print ";; logpro file extracted from testconfig\n"
+		     ";;")
+	      (print tconfig-logpro)))
+	  (set! logpro-used #t)))
+    
+    ;; NB// can safely assume we are in test-area directory
+    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
+		 " stepparams: " stepparams " stepcmd: " stepcmd)
+    
+    ;; ;; first source the previous environment
+    ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
+    ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
+    ;;   (if (and prevstep (common:file-exists? prev-env))
+    ;;       (set! script (conc script "source " prev-env))))
+    
+    ;; call the command using mt_ezstep
+    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
+    
+    (debug:print 4 *default-log-port* "script: " script)
+    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
+    ;; now launch the actual process
+    (call-with-environment-variables 
+     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
+     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
+       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
+	      (pid #f))
+	 (let ((proc (lambda ()
+		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
+	   (if subrun
+               (begin
+                 (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.")
+                 (common:without-vars proc "^MT_.*"))
+	       (proc)))
+	 
+         (with-output-to-file "Makefile.ezsteps"
+           (lambda ()
+             (print stepname ".log :")
+             (print "\t" cmd)
+             (if (common:file-exists? (conc stepname ".logpro"))
+                 (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
+             (print)
+             (print stepname " : " stepname ".log")
+             (print))
+           #:append)
+
+	 (rmt:test-set-top-process-pid run-id test-id pid)
+	 (let processloop ((i 0))
+	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
+		       (mutex-lock! m)
+		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
+		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
+		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
+		       (mutex-unlock! m)
+		       (if (eq? pid-val 0)
+			   (begin
+			     (thread-sleep! 2)
+			     (processloop (+ i 1))))
+		       )))))
+    (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
+    ;; now run logpro if needed
+    (if logpro-used
+	(let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro"))
+               (pid        (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'"))))
+	  (let processloop ((i 0))
+	    (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
+			(mutex-lock! m)
+			;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code)
+			(launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
+			(launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
+			(launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
+			(mutex-unlock! m)
+			(if (eq? pid-val 0)
+			    (begin
+			      (thread-sleep! 2)
+			      (processloop (+ i 1)))))
+	    (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2)))))
+    
+    (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
+	  (logfna (if logpro-used (conc stepname ".html") ""))
+	  (comment #f))
+      (if logpro-used
+	  (let ((datfile (conc stepname ".dat")))
+	    ;; load the .dat file into the test_data table if it exists
+	    (if (common:file-exists? datfile)
+		(set! comment (launch:load-logpro-dat run-id test-id stepname)))
+	    (rmt:test-set-log! run-id test-id (conc stepname ".html"))))
+      (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna))
+    ;; set the test final status
+    (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
+	   (this-step-status (cond
+			      ((and (eq? process-exit-status 2) logpro-used) 'warn)   ;; logpro 2 = warnings
+			      ((and (eq? process-exit-status 3) logpro-used) 'check)  ;; logpro 3 = check
+			      ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived
+			      ((and (eq? process-exit-status 5) logpro-used) 'abort)  ;; logpro 5 = abort
+			      ((and (eq? process-exit-status 6) logpro-used) 'skip)   ;; logpro 6 = skip
+			      ((eq? process-exit-status 0)                   'pass)   ;; logpro 0 = pass
+			      (else 'fail)))
+	   (overall-status   (cond
+			      ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3)
+			      ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3)
+			      (else 'fail)))
+	   (next-status      (cond 
+			      ((eq? overall-status 'pass) this-step-status)
+			      ((eq? overall-status 'warn)
+			       (if (eq? this-step-status 'fail) 'fail 'warn))
+			      ((eq? overall-status 'abort) 'abort)
+			      (else 'fail)))
+	   (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
+	    (cond
+	     ((null? tal) ;; more to run?
+	      "COMPLETED")
+	     (else "RUNNING"))))
+      (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used 
+		   " this-step-status: " this-step-status " overall-status: " overall-status 
+		   " next-status: " next-status " rollup-status: "  (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3))
+      (case next-status
+	((warn)
+	 (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "WARN" 
+				 (if (eq? this-step-status 'warn) "Logpro warning found" #f)
+				 #f))
+	((check)
+	 (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "CHECK" 
+				 (if (eq? this-step-status 'check) "Logpro check found" #f)
+				 #f))
+	((waived)
+	 (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "WAIVED" 
+				 (if (eq? this-step-status 'check) "Logpro waived found" #f)
+				 #f))
+	((abort)
+	 (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "ABORT" 
+				 (if (eq? this-step-status 'abort) "Logpro abort found" #f)
+				 #f))
+	((skip)
+	 (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status
+	 ;; NB// test-set-status! does rdb calls under the hood
+	 (tests:test-set-status! run-id test-id next-state "SKIP" 
+				 (if (eq? this-step-status 'skip) "Logpro skip found" #f)
+				 #f))
+	((pass)
+	 (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
+	(else ;; 'fail
+	 (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
+	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
+	 )))
+    logpro-used))
+
+(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)
+  ;; (let-values
+  ;;  (((pid exit-status exit-code)
+  ;;    (run-n-wait fullrunscript)))
+  ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
+  ;; Since we should have a clean slate at this time there is no need to do 
+  ;; any of the other stuff that tests:test-set-status! does. Let's just 
+  ;; force RUNNING/n/a
+
+  ;; (thread-sleep! 0.3)
+  ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
+  (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) 
+  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
+
+  ;; if there is a runscript do it first
+  (if fullrunscript
+      (let ((pid (process-run fullrunscript)))
+	(rmt:test-set-top-process-pid run-id test-id pid)
+	(let loop ((i 0))
+	  (let-values
+	   (((pid-val exit-status exit-code) (process-wait pid #t)))
+	   (mutex-lock! m)
+	   (launch:einf-pid-set!           exit-info  pid)         ;; (vector-set! exit-info 0 pid)
+	   (launch:einf-exit-status-set!   exit-info  exit-status) ;; (vector-set! exit-info 1 exit-status)
+	   (launch:einf-exit-code-set!     exit-info  exit-code)   ;; (vector-set! exit-info 2 exit-code)
+	   (launch:einf-rollup-status-set! exit-info  exit-code)   ;; (vector-set! exit-info 3 exit-code)  ;; rollup status
+	   (mutex-unlock! m)
+	   (if (eq? pid-val 0)
+	       (begin
+		 (thread-sleep! 2)
+		 (loop (+ i 1)))
+	       )))))
+  ;; then, if runscript ran ok (or did not get called)
+  ;; do all the ezsteps (if any)
+  (if (or ezsteps subrun)
+      (let* ((test-run-dir (tests:get-test-path-from-environment))
+             (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
+	      ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic
+	      ;;       ezstep names need a full re-eval here.
+	      (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
+	     (ezstepslst (if (hash-table? testconfig)
+			     (hash-table-ref/default testconfig "ezsteps" '())
+			     #f)))
+	(if testconfig
+	    (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
+	    (begin
+	      (launch:setup)
+	      (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n  "
+			   (string-intersperse (tests:get-tests-search-path *configdat*) "\n  "))))
+	;; after all that, still no testconfig? Time to abort
+	(if (not testconfig)
+	    (begin
+	      (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now")
+	      (exit 1)))
+
+	;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry
+	;; 1. get section [runarun]
+	;; 2. unset MT_* vars
+	;; 3. fix target
+	;; 4. fix runname
+	;; 5. fix testpatt or calculate it from contour
+	;; 6. launch the run
+	;; 7. roll up the run result and or roll up the logpro processed result
+	(when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested
+            (subrun:initialize-toprun-test testconfig test-run-dir)
+	    (let* ((mt-cmd (subrun:launch-cmd test-run-dir)))
+              (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"")
+              (set! ezsteps #t) ;; set the needed flag
+	      (set! ezstepslst
+                    (append (or ezstepslst '())
+                            (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))
+
+	;; process the ezsteps
+	(if ezsteps
+	    (begin
+	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
+	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
+	      (if (not (> (length ezstepslst) 0))
+		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
+		  (let loop ((ezstep (car ezstepslst))
+			     (tal    (cdr ezstepslst))
+			     (prevstep #f))
+                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
+		    ;; check exit-info (vector-ref exit-info 1)
+		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
+			(let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig))
+			      (stepname    (car ezstep)))
+			  ;; if logpro-used read in the stepname.dat file
+			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
+			      (launch:load-logpro-dat run-id test-id stepname))
+			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info))
+			      (if (not (null? tal))
+				  (loop (car tal) (cdr tal) stepname))
+			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
+			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))
+
+(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
+  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
+         (start-seconds (current-seconds))
+	 (calc-minutes  (lambda ()
+			  (inexact->exact 
+			   (round 
+			    (- 
+			     (current-seconds) 
+			     start-seconds)))))
+	 (kill-tries 0))
+    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
+    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
+    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
+
+    (let loop ((minutes   (calc-minutes))
+	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
+	       (disk-free (get-df (current-directory)))
+               (last-sync (current-seconds)))
+      #;(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
+      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
+             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
+                                   (delta (abs (- load cpu-load))))
+                              (if (> delta 0.1) ;; don't bother updating with small changes
+                                  load
+                                  #f)))
+             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
+                                              (get-df (current-directory))
+                                              disk-free))
+                                   (delta (abs (- df disk-free))))
+                              (if (and (> df 0)
+                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
+                                  df
+                                  #f)))
+             (do-sync       (or new-cpu-load new-disk-free over-time))
+
+             (test-info   (rmt:get-test-info-by-id run-id test-id))
+             (state       (db:test-get-state test-info))
+             (status      (db:test-get-status test-info))
+             (kill-reason  "no kill reason specified")
+             (kill-job?    #f))
+        #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
+        (cond
+         ((test-get-kill-request run-id test-id)
+          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
+          (set! kill-job? #t))
+         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
+          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
+          (set! kill-job? #t))
+         ((equal? status "DEAD")
+          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
+          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
+          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
+          (set! kill-job? #f)))
+
+        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
+        (launch:handle-zombie-tests run-id)
+        (when do-sync
+          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
+          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
+          #;(common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
+          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
+          #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
+        
+	(if kill-job? 
+	    (begin
+              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
+	      (mutex-lock! m)
+	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
+	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
+	      ;;       between tries?
+	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
+		     (pid2 (rmt:test-get-top-process-pid run-id test-id))
+		     (pids (delete-duplicates (filter number? (list pid1 pid2)))))
+		(if (not (null? pids))
+		    (begin
+		      (for-each
+		       (lambda (pid)
+			 (handle-exceptions
+			  exn
+			  (begin
+			    (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.")
+			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
+			  (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
+			  (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask))
+			  ;; (if (process:alive? pid)
+			  ;;     (begin
+			  (map (lambda (pid-num)
+				 (process-signal pid-num signal/term))
+			       (process:get-sub-pids pid))
+			  (thread-sleep! 5)
+			  ;; (if (process:process-alive? pid)
+			  (map (lambda (pid-num)
+				 (handle-exceptions
+				  exn
+				  #f
+				  (process-signal pid-num signal/kill)))
+			       (process:get-sub-pids pid))))
+		       ;;    (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive"))))
+		       pids)
+                      ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel?  If not, should it?
+		      (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt
+		    (begin
+		      (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
+		      (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
+		      )))
+	      (mutex-unlock! m)
+	      ;; no point in sticking around. Exit now. But run end of run before exiting?
+        (launch:end-of-run-check run-id)
+	      (exit)))
+	(if (hash-table-ref/default misc-flags 'keep-going #f)
+	    (begin
+	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
+	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
+		  (loop (calc-minutes)
+                        (or new-cpu-load cpu-load)
+                        (or new-disk-free disk-free)
+                        (if do-sync (current-seconds) last-sync)))))))
+    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
+
+;; set up needed environment variables given a run-id and optionally a target, itempath etc.
+;;
+(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
+  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
+  (let* ((target    (or intarget 
+			(common:args-get-target)
+			(get-environment-variable "MT_TARGET")))
+	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
+	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
+	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
+	 (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
+    (if testname (setenv "MT_TEST_NAME" testname))
+    (if itempath (setenv "MT_ITEMPATH"  itempath))
+
+    ;; get the info from the db and put it in the cache
+    (if link-tree
+	(setenv "MT_LINKTREE" link-tree)
+	(debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section."))
+    (if (not vals)
+	(let ((ht (make-hash-table)))
+	  (hash-table-set! *env-vars-by-run-id* run-id ht)
+	  (set! vals ht)
+	  (for-each
+	   (lambda (key)
+	     (hash-table-set! vals (car key) (cadr key)))
+	   keyvals)))
+    ;; from the cached data set the vars
+    
+    (hash-table-for-each
+     vals
+     (lambda (key val)
+       (debug:print 2 *default-log-port* "setenv " key " " val)
+       (safe-setenv key val)))
+    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1")
+    ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals))
+
+    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
+    ;; we had a case where there was an exception generated by the hash-table-ref
+    ;; due to *configdat* being #f Adding a handle and exit
+    (let fatal-loop ((count 0)) 
+      (handle-exceptions
+	  exn
+	  (let ((call-chain (get-call-chain))
+		(msg        ((condition-property-accessor 'exn 'message) exn)))
+	    (if (< count 5)
+		(begin ;; this call is colliding, do some crude stuff to fix it.
+		  (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count)
+		  (launch:setup force-reread: #t)
+		  (fatal-loop (+ count 1))) 
+		(begin
+		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg)
+		  (debug:print 0 *default-log-port* "Call chain:")
+		  (with-output-to-port *default-log-port*
+
+                    (lambda ()
+                      (print "*configdat* is >>"*configdat*"<<")
+                      (pp *configdat*)
+                      (pp call-chain)))
+                  
+		  (exit 1))))
+          ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
+          (when (or (not *configdat*) (not (hash-table? *configdat*)))
+              (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen.  Brute force reread.")
+              ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen.  Brute force reread.")
+              (thread-sleep! 2) ;; assuming nfs lag.
+              (launch:setup force-reread: #t))
+          (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block.
+    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2")
+    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
+    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
+      (if runname
+	  (setenv "MT_RUNNAME" runname)
+	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
+    (setenv "MT_RUN_AREA_HOME" *toppath*)
+    ;; if a testname and itempath are available set the remaining appropriate variables
+    (if testname (setenv "MT_TEST_NAME" testname))
+    (if itempath (setenv "MT_ITEMPATH"  itempath))
+    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3")
+    (if (and testname link-tree)
+	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
+					(getenv "MT_TARGET")    "/"
+					(getenv "MT_RUNNAME")   "/"
+					(getenv "MT_TEST_NAME")
+					(if (and itempath
+						 (not (equal? itempath "")))
+					    (conc "/" itempath)
+					    ""))))))
+
+(define (launch:execute encoded-cmd)
+  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
+	 (tconfigreg #f))
+    (setenv "MT_CMDINFO" encoded-cmd)
+    ;;(bb-check-path msg: "launch:execute incoming")
+    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
+	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
+	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
+	       (top-path  (assoc/default 'toppath   cmdinfo))
+	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
+	       (test-name (assoc/default 'test-name cmdinfo))
+	       (runscript (assoc/default 'runscript cmdinfo))
+	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
+	       (subrun    (assoc/default 'subrun    cmdinfo))
+	       ;; (runremote (assoc/default 'runremote cmdinfo))
+	       ;; (transport (assoc/default 'transport cmdinfo))  ;; not used
+	       ;; (serverinf (assoc/default 'serverinf cmdinfo))
+	       ;; (port      (assoc/default 'port      cmdinfo))
+	       (serverurl (assoc/default 'serverurl cmdinfo))
+	       (homehost  (assoc/default 'homehost  cmdinfo))
+	       (run-id    (assoc/default 'run-id    cmdinfo))
+	       (test-id   (assoc/default 'test-id   cmdinfo))
+	       (target    (assoc/default 'target    cmdinfo))
+	       (areaname  (assoc/default 'areaname  cmdinfo))
+	       (itemdat   (assoc/default 'itemdat   cmdinfo))
+	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
+	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
+	       (runname   (assoc/default 'runname   cmdinfo))
+	       (megatest  (assoc/default 'megatest  cmdinfo))
+	       (runtlim   (assoc/default 'runtlim   cmdinfo))
+	       (contour   (assoc/default 'contour   cmdinfo))
+	       (item-path (item-list->path itemdat))
+	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
+	       (keys      #f)
+	       (keyvals   #f)
+	       (fullrunscript (if (not runscript)
+                                  #f
+                                  (if (substring-index "/" runscript)
+                                      runscript ;; use unadultered if contains slashes
+                                      (let ((fulln (conc work-area "/" runscript)))
+	                                  (if (and (common:file-exists? fulln)
+                                                   (file-execute-access? fulln))
+                                              fulln
+                                              runscript))))) ;; assume it is on the path
+               (check-work-area           (lambda ()
+                                            ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
+                                            (let loop ((count 0))
+                                              (if (or (common:directory-exists? work-area)
+                                                      (> count 10))
+                                                  (change-directory work-area)
+                                                  (begin
+                                                    (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
+                                                    (thread-sleep! 10)
+                                                    (loop (+ count 1)))))
+
+                                            (if (not (string=?  (common:real-path work-area)(common:real-path (current-directory))))
+                                                (begin
+                                                  (debug:print 0 *default-log-port*
+                                                               "INFO: we are expecting to be in directory " work-area "\n"
+                                                               "     but we are actually in the directory " (current-directory) "\n"
+                                                               "     doing another change dir.")
+                                                  (change-directory work-area)))
+                                            
+                                            ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
+                                            (let ((files      (glob (conc testpath "/*")))
+                                                  (bad-files '()))
+                                              (for-each
+                                               (lambda (fullname)
+                                                 (let* ((fname (pathname-strip-directory fullname))
+                                                        (targn (conc work-area "/" fname)))
+                                                   (if (not (file-exists? targn))
+                                                       (set! bad-files (cons fname bad-files)))))
+                                               files)
+                                              (if (not (null? bad-files))
+                                                  (begin
+                                                    (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
+                                                    (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", "))
+                                                    (launch:test-copy testpath work-area))))
+                                            ;; one more time, change to the work-area directory
+                                            (change-directory work-area)))
+	       ) ;; let*
+
+	  (if contour (setenv "MT_CONTOUR" contour))
+	  
+	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
+	  ;;
+	  (setenv "MT_TESTSUITENAME" areaname)
+	  (setenv "MT_RUN_AREA_HOME" top-path)
+	  (set! *toppath* top-path)
+          (change-directory *toppath*) ;; temporarily switch to the run area home
+	  (setenv "MT_TEST_RUN_DIR"  work-area)
+
+	  (launch:setup) ;; should be properly in the run area home now
+
+	  (if contour (setenv "MT_CONTOUR" contour))
+	  
+	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
+	  ;;
+	  (setenv "MT_TESTSUITENAME" areaname)
+	  (setenv "MT_RUN_AREA_HOME" top-path)
+	  (set! *toppath* top-path)
+          (change-directory *toppath*) ;; temporarily switch to the run area home
+	  (setenv "MT_TEST_RUN_DIR"  work-area)
+
+	  (launch:setup) ;; should be properly in the run area home now
+          
+	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
+	  (let ((sighand (lambda (signum)
+			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
+			   (if (eq? signum signal/stop)
+			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
+			   (set! *time-to-exit* #t)
+			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
+			   (let ((th1 (make-thread (lambda ()
+                                                     (print "set test to COMPLETED/ABORT begin.")
+						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
+                                                     (print "set test to COMPLETED/ABORT complete.")
+						     (print "Killed by signal " signum ". Exiting")
+						     (exit 1))))
+				 (th2 (make-thread (lambda ()
+						     (thread-sleep! 20)
+						     (debug:print 0 *default-log-port* "Done")
+						     (exit 4)))))
+			     (thread-start! th2)
+			     (thread-start! th1)
+			     (thread-join! th2)))))
+	    (set-signal-handler! signal/int sighand)
+	    (set-signal-handler! signal/term sighand)
+	    ) ;; (set-signal-handler! signal/stop sighand)
+	  
+	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
+	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
+	  ;;
+	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
+		 (test-host (if test-info
+				(db:test-get-host        test-info)
+				(begin
+				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
+				  (exit))))
+		 (test-pid  (db:test-get-process_id  test-info)))
+	    (cond
+             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
+	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
+	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
+	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
+
+              (rmt:general-call 'set-test-start-time #f test-id)
+              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
+	      ) ;; prime it for running
+	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
+	      (if (process:alive-on-host? test-host test-pid)
+		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
+		  (exit)))
+	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
+	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
+              (rmt:general-call 'set-test-start-time #f test-id)
+	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
+	      )
+	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
+	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
+	      (exit))))
+
+          ;; cleanup prior execution's steps
+          (rmt:delete-steps-for-test! run-id test-id)
+          
+	  (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
+	  (set! keys       (rmt:get-keys))
+	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
+	  ;; one of these is defunct/redundant ...
+	  (if (not (launch:setup force-reread: #t))
+	      (begin
+		(debug:print 0 *default-log-port* "Failed to setup, exiting") 
+		;; (sqlite3:finalize! db)
+		;; (sqlite3:finalize! tdb)
+		(exit 1)))
+          ;; validate that the test run area is available
+          (check-work-area)
+          
+          ;; still need to go back to run area home for next couple steps
+	  (change-directory *toppath*) 
+
+	  ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This 
+	  ;;       seems non-ideal but could well break stuff
+	  ;;    BUG? BUG? BUG?
+	  
+	  (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc  *toppath* "/runconfigs.config") #f #t sections: (list "default" target))))
+		(wconfig (configf:read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists
+	    ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target)
+	    ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
+	    ;; Now have runconfigs data loaded, set environment vars
+	    (for-each
+	     (lambda (section)
+	       (for-each
+		(lambda (varval)
+		  (let ((var (car varval))
+			(val (cadr varval)))
+		    (if (and (string? var)(string? val))
+			(begin
+			  (safe-setenv var (configf:eval-string-in-environment val))) ;; val)
+			(debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val))))
+		(configf:get-section rconfig section)))
+	     (list "default" target)))
+          ;;(bb-check-path msg: "launch:execute post block 1")
+
+	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
+	  (let loop ((count 0))
+	    (if (or (common:file-exists? work-area)
+		    (> count 10))
+		(change-directory work-area)
+		(begin
+		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
+		  (thread-sleep! 10)
+		  (loop (+ count 1)))))
+
+          ;; now we can switch to the work-area?
+          (change-directory work-area)
+          ;;(bb-check-path msg: "launch:execute post block 1.5")
+	  ;; (change-directory work-area) 
+	  (set! keyvals    (keys:target->keyval keys target))
+	  ;; apply pre-overrides before other variables. The pre-override vars must not
+	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
+	  (if (string? set-vars)
+	      (let ((varpairs (string-split set-vars ",")))
+		(debug:print 4 *default-log-port* "varpairs: " varpairs)
+		(map (lambda (varpair)
+		       (let ((varval (string-split varpair "=")))
+			 (if (eq? (length varval) 2)
+			     (let ((var (car varval))
+				   (val (cadr varval)))
+			       (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment")
+			       (setenv var val)))))
+		     varpairs)))
+          ;;(bb-check-path msg: "launch:execute post block 2")
+	  (for-each
+	   (lambda (varval)
+	     (let ((var (car varval))
+		   (val (cadr varval)))
+	       (if val
+		   (setenv var val)
+		   (begin
+		     (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting")
+		     (exit)))))
+	     (list 
+	      (list  "MT_TEST_RUN_DIR" work-area)
+	      (list  "MT_TEST_NAME" test-name)
+	      (list  "MT_ITEM_INFO" (conc itemdat))
+	      (list  "MT_ITEMPATH"  item-path)
+	      (list  "MT_RUNNAME"   runname)
+	      (list  "MT_MEGATEST"  megatest)
+	      (list  "MT_TARGET"    target)
+	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
+	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
+          ;;(bb-check-path msg: "launch:execute post block 3")
+
+	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
+          ;;(bb-check-path msg: "launch:execute post block 4")
+	  ;; (change-directory top-path)
+	  ;; Can setup as client for server mode now
+	  ;; (client:setup)
+
+	  
+	  ;; environment overrides are done *before* the remaining critical envars.
+	  (alist->env-vars env-ovrd)
+          ;;(bb-check-path msg: "launch:execute post block 41")
+	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
+          ;;(bb-check-path msg: "launch:execute post block 42")
+	  (set-item-env-vars itemdat)
+          ;;(bb-check-path msg: "launch:execute post block 43")
+          (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars")))
+            (if blacklist
+		(let ((vars (string-split blacklist)))
+		  (save-environment-as-files "megatest" ignorevars: vars)
+		  (for-each (lambda (var)
+			      (unsetenv var))
+			    vars))
+                (save-environment-as-files "megatest")))
+          ;;(bb-check-path msg: "launch:execute post block 44")
+	  ;; open-run-close not needed for test-set-meta-info
+	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
+	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
+	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)
+
+	  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here
+
+	  (if (args:get-arg "-xterm")
+	      (set! fullrunscript "xterm")
+	      (if (and fullrunscript 
+		       (common:file-exists? fullrunscript)
+		       (not (file-execute-access? fullrunscript)))
+		  (system (conc "chmod ug+x " fullrunscript))))
+
+	  ;; We are about to actually kick off the test
+	  ;; so this is a good place to remove the records for 
+	  ;; any previous runs
+	  ;; (db:test-remove-steps db run-id testname itemdat)
+	  ;; now is also a good time to write the .testconfig file
+	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
+		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
+		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs)))
+	    (configf:write-alist tconfig tconfig-tmpfile)
+	    (file-move tconfig-tmpfile tconfig-fname #t))
+	  ;; 
+	  (let* ((m            (make-mutex))
+		 (kill-job?    #f)
+		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
+		 (job-thread   #f)
+		 ;; (keep-going   #t)
+		 (misc-flags   (let ((ht (make-hash-table)))
+				 (hash-table-set! ht 'keep-going #t)
+				 ht))
+		 (runit        (lambda ()
+				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
+		 (monitorjob   (lambda ()
+				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
+		 (th1          (make-thread monitorjob "monitor job"))
+		 (th2          (make-thread runit "run job")))
+	    (set! job-thread th2)
+	    (thread-start! th1)
+	    (thread-start! th2)
+	    (thread-join! th2)
+	    (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...")
+	    (hash-table-set! misc-flags 'keep-going #f)
+	    (thread-join! th1)
+	    (thread-sleep! 1)       ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
+	    (mutex-lock! m)
+	    (let* ((item-path (item-list->path itemdat))
+		   ;; only state and status needed - use lazy routine
+		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
+	      ;; Am I completed?
+	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
+		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
+				                                        ;; "COMPLETED"							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
+				    )
+			(new-status (cond
+				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
+				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
+				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
+				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
+				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
+				     ((eq? (launch:einf-rollup-status exit-info) 2)	     ;;	(vector-ref exit-info 3)
+				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
+				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
+				     ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK")
+				     ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED")
+				     ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT")
+				     ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP")
+				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
+		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
+		    (tests:test-set-status! run-id 
+					    test-id 
+					    new-state
+					    new-status
+					    (args:get-arg "-m") #f)
+		    ;; need to update the top test record if PASS or FAIL and this is a subtest
+		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
+		    ))
+	      ;; for automated creation of the rollup html file this is a good place...
+	      (if (not (equal? item-path ""))
+		  (tests:summarize-items run-id test-id test-name #f))
+	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
+	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
+	    (mutex-unlock! m)
+            (launch:end-of-run-check run-id )
+	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
+			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
+	    (if (not (launch:einf-exit-status exit-info))
+		(exit 4))))
+        )))
+
+(define (launch:is-test-alive host pid)
+(if (and host pid (not (equal? host "n/a")))
+(let* ((cmd (conc "ssh " host " pstree -A " pid))
+      (output (with-input-from-pipe cmd read-lines)))
+  (print "cmd: " cmd "\n op: " output )
+  (if(eq? (length output) 0)
+     #f
+     #t))
+#t))
+ 
+(define (launch:kill-tests-if-dead run-id)
+  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
+       (let loop ((running-test (car running-tests))
+			     (tal    (cdr running-tests))
+			     (kill-cnt 0))
+		       (let* ((test-name (vector-ref running-test 2))
+                 (item-path (vector-ref running-test 11))
+								 (test-id (vector-ref running-test 0))
+                 (host (vector-ref running-test 6))
+                 (pid  (rmt:test-get-top-process-pid run-id test-id))   
+                 (event-time (vector-ref running-test 5))
+                 (duration (vector-ref running-test 12))
+                 (flag 0)   
+                 (curr-time (current-seconds)))
+       (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed
+           (begin    
+			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
+              (set! flag 1) 
+              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
+               (if (not (null? tal))
+				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
+                 (+ kill-cnt flag))))))
+
+;; DO NOT USE - caching of configs is handled in launch:setup now.
+;;
+(define (launch:cache-config)
+  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
+  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
+  (if (and *configdat* 
+	   (or (args:get-arg "-run")
+	       (args:get-arg "-runtests")
+	       (args:get-arg "-execute")))
+      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
+	     (target   (common:args-get-target exit-if-bad: #t))
+	     (runname  (or (args:get-arg "-runname")
+			   (args:get-arg ":runname")
+			   (getenv "MT_RUNNAME")))
+	     (fulldir  (conc linktree "/"
+			     target "/"
+			     runname)))
+	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
+	    (begin
+	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
+	      (if (not (common:file-exists? fulldir))
+		  (create-directory fulldir #t)) ;; need to protect with exception handler 
+	      (if (and target
+		       runname
+		       (common:file-exists? fulldir))
+		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
+			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
+			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
+		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
+			(begin
+			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
+                          (if (not (common:in-running-test?))
+                              (configf:write-alist *configdat* tmpfile))
+			  (system (conc "ln -sf " tmpfile " " targfile))))
+		    )))
+	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))
+
+
+;; gather available information, if legit read configs in this order:
+;;
+;;   if have cache;
+;;      read it a return it
+;;   else
+;;     megatest.config     (do not cache)
+;;     runconfigs.config   (cache if all vars avail)
+;;     megatest.config     (cache if all vars avail)
+;;   returns:
+;;     *toppath*
+;;   side effects:
+;;     sets; *configdat*    (megatest.config info)
+;;           *runconfigdat* (runconfigs.config info)
+;;           *configstatus* (status of the read data)
+;;
+(define (launch:setup #!key (force-reread #f) (areapath #f))
+  (mutex-lock! *launch-setup-mutex*)
+  (if (and *toppath*
+	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
+      (begin
+	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
+	(mutex-unlock! *launch-setup-mutex*)
+	*toppath*)
+      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
+	(mutex-unlock! *launch-setup-mutex*)
+	res)))
+
+;; return paths depending on what info is available.
+;;
+(define (launch:get-cache-file-paths areapath toppath target mtconfig)
+  (let* ((use-cache (common:use-cache?))
+         (runname  (common:args-get-runname))
+         (linktree (common:get-linktree))
+         (testname (common:get-full-test-name))
+         (rundir   (if (and runname target linktree)
+                       (common:directory-writable? (conc linktree "/" target "/" runname))
+                       #f))
+         (testdir  (if (and rundir testname)
+                       (common:directory-writable? (conc rundir "/" testname))
+                       #f))
+         (cachedir (or testdir rundir))
+         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
+         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
+    (debug:print-info 6 *default-log-port* 
+                      "runname=" runname 
+                      "\n  linktree=" linktree
+                      "\n  testname=" testname
+                      "\n  rundir=" rundir 
+                      "\n  testdir=" testdir 
+                      "\n  cachedir=" cachedir
+                      "\n  mtcachef=" mtcachef
+                      "\n  rccachef=" rccachef)
+    (cons mtcachef rccachef)))
+
+(define (launch:setup-body #!key (force-reread #f) (areapath #f))
+  (if (and (eq? *configstatus* 'fulldata)
+	   *toppath*
+	   (not force-reread)) ;; no need to reprocess
+      *toppath*   ;; return toppath
+      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
+	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
+	     (target   (common:args-get-target))
+	     (sections (if target (list "default" target) #f)) ;; for runconfigs
+	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
+             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
+	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
+	     (mtcachef   (if (null? cachefiles)
+			     #f
+			     (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
+	     (rccachef   (if (null? cachefiles)
+			     #f
+			     (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
+	      ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?)))))
+	(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
+        ;;(BB> "launch:setup-body -- cachefiles="cachefiles)
+	(cond
+	 ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
+	 ((and (not force-reread)
+	       mtcachef  rccachef
+	       use-cache
+	       (get-environment-variable "MT_RUN_AREA_HOME")
+	       (common:file-exists? mtcachef)
+	       (common:file-exists? rccachef))
+          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
+          (set! *configdat*    (configf:read-alist mtcachef))
+          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
+	  (set! *runconfigdat* (configf:read-alist rccachef))
+	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
+	  (set! *configstatus* 'fulldata)
+	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
+	  *toppath*)
+	 ;; there are no existing cached configs, do full reads of the configs and cache them
+	 ;; we have all the info needed to fully process runconfigs and megatest.config
+	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
+	       mtcachef
+	       rccachef) ;; BB- why are we doing this without asking if caching is desired?
+          ;;(BB> "launch:setup-body -- cond branch 2")
+	  (let* ((first-pass    (configf:find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
+				 mtconfig
+				 environ-patt: "env-override"
+				 given-toppath: toppath
+				 pathenvvar: "MT_RUN_AREA_HOME"))
+		 (first-rundat  (let ((toppath (if toppath 
+						   toppath
+						   (car first-pass))))
+				  (configf:read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
+				   (conc (if (string? toppath)
+					     toppath
+					     (get-environment-variable "MT_RUN_AREA_HOME"))
+					 "/runconfigs.config")
+				   *runconfigdat* #t 
+				   sections: sections))))
+	    (set! *runconfigdat* first-rundat)
+	    (if first-pass  ;; 
+		(begin
+                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
+		  (set! *configdat*  (car first-pass))
+                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
+		  (set! *configinfo* first-pass)
+		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
+		  (set! toppath      *toppath*)
+		  (if (not *toppath*)
+		      (begin
+			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
+			(exit 1)))
+		  (setenv "MT_RUN_AREA_HOME" *toppath*)
+		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
+		  (let* ((keys         (rmt:get-keys))
+			 (key-vals     (keys:target->keyval keys target))
+			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
+					;     (if *configdat*
+					; 	   (configf:lookup *configdat* "setup" "linktree")
+					; 	   (conc *toppath* "/lt"))))
+			 (second-pass  (configf:find-and-read-config
+					mtconfig
+					environ-patt: "env-override"
+					given-toppath: toppath
+					pathenvvar: "MT_RUN_AREA_HOME"))
+			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
+					 (for-each (lambda (kt)
+						     (setenv (car kt) (cadr kt)))
+						   key-vals)
+					 (configf:read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
+						      sections: sections)))
+                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+                         (mtcachef     (car cachefiles))
+                         (rccachef     (cdr cachefiles)))
+                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
+                    ;; TODO - consider 1) using simple-lock to bracket cache write
+                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
+
+		    (if rccachef
+                        (common:fail-safe
+                         (lambda ()
+                           (configf:write-alist runconfigdat rccachef))
+                         (conc "Could not write cache file - "rccachef)))
+                    (if mtcachef
+                        (common:fail-safe
+                         (lambda ()
+                           (configf:write-alist *configdat* mtcachef))
+                         (conc "Could not write cache file - "mtcachef)))
+		    (set! *runconfigdat* runconfigdat)
+		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
+		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
+		(set! *configdat* (make-hash-table))
+		)))
+
+	 ;; else read what you can and set the flag accordingly
+	 ;; here we don't have either mtconfig or rccachef
+	 (else
+          ;;(BB> "launch:setup-body -- cond branch 3 - else")
+	  (let* ((cfgdat   (configf:find-and-read-config 
+			    (or (args:get-arg "-config") "megatest.config")
+			    environ-patt: "env-override"
+			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
+			    pathenvvar: "MT_RUN_AREA_HOME")))
+
+            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
+		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
+		       (rdat     (configf:read-config (conc toppath  ;; convert this to use runconfig:read!
+						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
+		  (set! *configinfo*   cfgdat)
+		  (set! *configdat*    (car cfgdat))
+		  (set! *runconfigdat* rdat)
+		  (set! *toppath*      toppath)
+		  (set! *configstatus* 'partial))
+		(begin
+		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
+		  (exit 2))))))
+	;; COND ends here.
+	
+	;; additional house keeping
+	(let* ((linktree (or (common:get-linktree)
+			     (conc *toppath* "/lt"))))
+	  (if linktree
+	      (begin
+		(if (not (common:file-exists? linktree))
+		    (begin
+		      (handle-exceptions
+			  exn
+			  (begin
+			    (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
+			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
+			    (exit 1))
+			(create-directory linktree #t))))
+		(handle-exceptions
+		    exn
+		    (begin
+		      (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
+		      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
+		  (let ((tlink (conc *toppath* "/lt")))
+		    (if (not (common:file-exists? tlink))
+			(create-symbolic-link linktree tlink)))))
+	      (begin
+		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
+		)))
+	(if (and *toppath*
+		 (directory-exists? *toppath*))
+	    (begin
+	      (setenv "MT_RUN_AREA_HOME" *toppath*)
+	      (setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
+	    (begin
+	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
+	      (set! *toppath* #f) ;; force it to be false so we return #f
+	      #f))
+	
+        ;; one more attempt to cache the configs for future reading
+        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
+               (mtcachef     (car cachefiles))
+               (rccachef     (cdr cachefiles)))
+
+          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
+          ;; TODO - consider 1) using simple-lock to bracket cache write
+          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
+          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
+              (common:fail-safe
+               (lambda ()
+                 (configf:write-alist *runconfigdat* rccachef))
+               (conc "Could not write cache file - "rccachef))
+              )
+          (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef)))
+              (common:fail-safe
+               (lambda ()
+                 (configf:write-alist *configdat* mtcachef))
+               (conc "Could not write cache file - "mtcachef))
+              )
+          (if (and rccachef mtcachef *runconfigdat* *configdat*)
+              (set! *configstatus* 'fulldata)))
+
+	;; if have -append-config then read and append here
+	(let ((cfname (args:get-arg "-append-config")))
+	  (if (and cfname
+		   (file-read-access? cfname))
+	      (configf:read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
+	*toppath*)))
+
+(define (get-best-disk confdat testconfig)
+  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
+		      (hash-table-ref/default confdat "disks" #f)))
+	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
+		     (string->number (or m "10000")))))
+    (if disks 
+	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
+	  (if res
+	      (cdr res)
+	      (begin
+;;		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
+;;		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
+		;;(exit 1)
+                 (if (null? disks)
+                     (cons 1 (conc *toppath* "/runs"))
+                     (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y)))))))
+                       (let loop ((head (car paths)) (tail (cdr paths)))
+                         (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t))))
+                           (if result
+                               result
+                               (if (null? tail)
+                                   (cons 1 (conc *toppath* "/runs"))
+                                   (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path.
+
+
+(define (launch:test-copy test-src-path test-path)
+  (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd")))
+		   (if cmd
+		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
+		       (string-substitute "TEST_TARG_PATH" test-path
+					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
+		       #f)))
+	 (cmd    (if ovrcmd 
+		     ovrcmd
+		     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
+			   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
+	 (status (system cmd)))
+    (if (not (eq? status 0))
+	(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))))
+
+
+;; Desired directory structure:
+;;
+;;  <linkdir> - <target> - <testname> -.
+;;                                     |
+;;                                     v
+;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
+;;
+;;  dir stored in test is:
+;; 
+;;  <linkdir> - <target> - <testname> [ - <itempath> ]
+;; 
+;; All log file links should be stored relative to the top of link path
+;;  
+;; <target> - <testname> [ - <itempath> ] 
+;;
+(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
+  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
+	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
+			run-info
+			(db:get-value-by-header (db:get-rows run-info)
+						(db:get-header run-info)
+						"runname")))
+	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
+	 ;; convert back to db: from rdb: - this is always run at server end
+	 (target   (string-intersperse (map cadr keyvals) "/"))
+
+	 (not-iterated  (equal? "" item-path))
+
+	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
+	 (testtop-base (conc target "/" runname "/" testname))
+	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))
+
+	 ;; nb// if itempath is not "" then it is prefixed with "/"
+	 (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base))
+	 (test-path    (conc disk-path (if contour (conc "/" contour) "") "/" test-base))
+
+	 ;; ensure this exists first as links to subtests must be created there
+	 (linktree  (common:get-linktree))
+	 ;; WAS: (let ((rd (configf:lookup *configdat* "setup" "linktree")))
+	 ;;         (if rd rd (conc *toppath* "/runs"))))
+	 ;; which seems wrong ...
+
+	 (lnkbase   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname))
+	 (lnkpath   (conc lnkbase "/" testname))
+	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
+	 (lnktarget (conc lnkpath "/" item-path)))
+
+    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
+    ;;                                                 rundir   shortdir
+    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)
+
+    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
+    (if (not (common:file-exists? linktree))
+	(begin
+	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
+	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
+    ;; create the directory for the tests dir links, this is needed no matter what... try up to three times
+    (let loop ((done 3)) 
+      (let ((success (if (and (not (common:directory-exists? lnkbase))
+			      (not (common:file-exists? lnkbase)))
+			 (handle-exceptions
+			  exn
+			  (begin
+			    (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase)
+			    (print-error-message exn (current-error-port))
+			    #t)
+			  (create-directory lnkbase #t)
+			  #f))))
+	(if (and (not success)(> done 0))
+	    (loop (- done 1)))))
+    
+    ;; update the toptest record with its location rundir, cache the path
+    ;; This wass highly inefficient, one db write for every subtest, potentially
+    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
+    ;; again. 
+
+    ;; Now create the link from the test path to the link tree, however
+    ;; if the test is iterated it is necessary to create the parent path
+    ;; to the iteration. use pathname-directory to trim the path by one
+    ;; level
+    (if (not not-iterated) ;; i.e. iterated
+	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
+	  (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent)
+	  (handle-exceptions
+	   exn
+	   (begin
+	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	     (exit 1))
+	   (create-directory iterated-parent #t))))
+
+    (if (symbolic-link? lnkpath) 
+	(handle-exceptions
+	 exn
+	 (begin
+	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	   (exit 1))
+	 (delete-file lnkpath)))
+
+    (if (not (or (common:file-exists? lnkpath)
+		 (symbolic-link? lnkpath)))
+	(handle-exceptions
+	 exn
+	 (begin
+	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	   (exit 1))
+	 (create-symbolic-link toptest-path lnkpath)))
+    
+    ;; NB - This was not working right - some top tests are not getting the path set!!!
+    ;;
+    ;; Do the setting of this record after the paths are created so that the shortdir can 
+    ;; be set to the real directory location. This is safer for future clean up if the link
+    ;; tree is damaged or lost.
+    ;; 
+    (if (not (hash-table-ref/default *toptest-paths* testname #f))
+	(let* ((testinfo       (rmt:get-test-info-by-id run-id test-id)) ;;  run-id testname item-path))
+	       (curr-test-path (if testinfo ;; (filedb:get-path *fdb*
+							     ;; (db:get-path dbstruct
+				   ;; (rmt:sdb-qry 'getstr 
+				   (db:test-get-rundir testinfo) ;; ) ;; )
+				   #f)))
+	  (hash-table-set! *toptest-paths* testname curr-test-path)
+	  ;; NB// Was this for the test or for the parent in an iterated test?
+	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
+			    (if (common:file-exists? lnkpath)
+				;; (resolve-pathname lnkpath)
+				(common:nice-path lnkpath)
+				lnkpath)
+			    testname "" run-id)
+	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
+	  (if (or (not curr-test-path)
+		  (not (directory-exists? toptest-path)))
+	      (begin
+		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
+		(handle-exceptions
+		 exn
+		 #f ;; don't care to catch and deal with errors here for now.
+		 (create-directory toptest-path #t))
+		(hash-table-set! *toptest-paths* testname toptest-path)))))
+
+    ;; The toptest path has been created, the link to the test in the linktree has
+    ;; been created. Now, if this is an iterated test the real test dir must be created
+    (if (not not-iterated) ;; this is an iterated test
+	(begin ;; (let ((lnktarget (conc lnkpath "/" item-path)))
+	  (debug:print 2 *default-log-port* "Setting up sub test run area")
+	  (debug:print 2 *default-log-port* " - creating run area in " test-path)
+	  (handle-exceptions
+	   exn
+	   (begin
+	     (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	     (exit 1))
+	   (create-directory test-path #t))
+	  (debug:print 2 *default-log-port* 
+		       " - creating link from: " test-path "\n"
+		       "                   to: " lnktarget)
+
+	  ;; If there is already a symlink delete it and recreate it.
+	  (handle-exceptions
+	   exn
+	   (begin
+	     (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
+	     (exit))
+	   (if (symbolic-link? lnktarget)     (delete-file lnktarget))
+	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))
+
+    (if (not (directory? test-path))
+	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes
+
+    (if (and test-src-path (directory? test-path))
+	(begin
+	  (launch:test-copy test-src-path test-path)
+	  (list lnkpathf lnkpath ))
+	(if (and test-src-path (> remtries 0))
+	    (begin
+	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
+	      ;; 
+	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
+	    (list #f #f)))))
+
+
+(define (launch:handle-zombie-tests run-id)
+  (let* ((key (conc "zombiescan-runid-"run-id))
+         (now (current-seconds))
+         (threshold (- (current-seconds)  (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120))))
+         (val (rmt:get-var key))
+         (do-scan?
+          (cond
+           ((not val)
+            #t)
+           ((< val threshold)
+            #t)
+           (else #f))))
+    (when do-scan?
+      (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
+      (rmt:set-var key (current-seconds))
+      (rmt:find-and-mark-incomplete run-id #f))))
+
+
+
+
+
+;; 1. look though disks list for disk with most space
+;; 2. create run dir on disk, path name is meaningful
+;; 3. create link from run dir to megatest runs area 
+;; 4. remotely run the test on allocated host
+;;    - could be ssh to host from hosts table (update regularly with load)
+;;    - could be netbatch
+;;      (launch-test db (cadr status) test-conf))
+(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
+  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
+  (let* ( ;; (lock-key        (conc "test-" test-id))
+	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
+	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
+	;; 		    (if (car lock)
+	;; 			#t
+	;; 			(if (> (current-seconds) expire-time)
+	;; 			    (begin
+	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
+	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
+	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
+	;; 			    (begin
+	;; 			      (thread-sleep! 1)
+	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
+	 (item-path       (item-list->path itemdat))
+	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
+    (let loop ((delta        (- (current-seconds) *last-launch*))
+	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
+      (if (> launch-delay delta)
+	  (begin
+	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
+		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
+	    (thread-sleep! (- launch-delay delta))
+	    (loop (- (current-seconds) *last-launch*) launch-delay))))
+    (change-directory *toppath*)
+    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
+     (append
+      (list
+       (list "MT_RUN_AREA_HOME" *toppath*)
+       (list "MT_TEST_NAME" test-name)
+       (list "MT_RUNNAME"   runname)
+       (list "MT_ITEMPATH"  item-path)
+       (list "MT_CONTOUR"   contour)
+       )
+      itemdat))
+    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
+           ;; for tconfig, why do we allow fallback to test-conf?
+	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
+				(begin
+                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
+                                  test-conf))) ;; force re-read now that all vars are set
+	   (useshell        (let ((ush (configf:lookup *configdat* "jobtools"     "useshell")))
+			      (if ush 
+				  (if (equal? ush "no") ;; must use "no" to NOT use shell
+				      #f
+				      ush)
+				  #t)))     ;; default is yes
+	   (runscript       (configf:lookup tconfig   "setup"        "runscript"))
+	   (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag
+	   (subrun          (> (length (hash-table-ref/default tconfig "subrun"  '())) 0)) ;; send a flag to process a subrun
+	   ;; (diskspace       (configf:lookup tconfig   "requirements" "diskspace"))
+	   ;; (memory          (configf:lookup tconfig   "requirements" "memory"))
+	   ;; (hosts           (configf:lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
+	   (remote-megatest (configf:lookup *configdat* "setup" "executable"))
+	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
+				(configf:lookup  *configdat* "setup" "runtimelim")))
+	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
+	   ;;                allow running from dashboard. Extract the path
+	   ;;                from the called megatest and convert dashboard
+	   ;;             	  or dboard to megatest
+	   (local-megatest  (let* ((lm  (car (argv)))
+				   (dir (pathname-directory lm))
+				   (exe (pathname-strip-directory lm)))
+			      (conc (if dir (conc dir "/") "")
+				    (case (string->symbol exe)
+				      ((dboard)    "../megatest")
+				      ((mtest)     "../megatest")
+				      ((dashboard) "megatest")
+				      (else exe)))))
+	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools"     "launcher"))
+	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
+	   (work-area       #f)
+	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
+	   (diskpath   #f)
+	   (cmdparms   #f)
+	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
+	   (mt-bindir-path #f)
+	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
+	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
+	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
+				(if (args:get-arg "-logging")(list "-logging") '()))))
+      ;; (if hosts (set! hosts (string-split hosts)))
+      ;; set the megatest to be called on the remote host
+      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
+      (set! mt-bindir-path (pathname-directory remote-megatest))
+      (if launcher (set! launcher (string-split launcher)))
+      ;; set up the run work area for this test
+      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
+	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
+	  (begin
+	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
+	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
+      
+      ;; prevent overlapping actions - set to LAUNCHED as early as possible
+      ;;
+      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
+      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
+      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
+      ;; (pp (hash-table->alist tconfig))
+      (set! diskpath (get-best-disk *configdat* tconfig))
+      (if diskpath
+	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
+	    (set! work-area (car dat))
+	    (set! toptest-work-area (cadr dat))
+	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
+	  (begin
+	    (set! work-area (conc test-path "/tmp_run"))
+	    (create-directory work-area #t)
+	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
+      (set! cmdparms (base64:base64-encode 
+		      (z3:encode-buffer 
+		       (with-output-to-string
+			 (lambda () ;; (list 'hosts     hosts)
+			   (write (list (list 'testpath  test-path)
+					;; (list 'transport (conc *transport-type*))
+					;; (list 'serverinf *server-info*)
+					(list 'homehost  (let* ((hhdat (common:get-homehost)))
+							   (if hhdat
+							       (car hhdat)
+							       #f)))
+					(list 'serverurl (if *runremote*
+							     (remote-server-url *runremote*)
+							     #f)) ;;
+					(list 'areaname  (common:get-testsuite-name))
+					(list 'toppath   *toppath*)
+					(list 'work-area work-area)
+					(list 'test-name test-name) 
+					(list 'runscript runscript) 
+					(list 'run-id    run-id   )
+					(list 'test-id   test-id  )
+					;; (list 'item-path item-path )
+					(list 'itemdat   itemdat  )
+					(list 'megatest  remote-megatest)
+					(list 'ezsteps   ezsteps)
+					(list 'subrun    subrun)
+					(list 'target    mt_target)
+					(list 'contour   contour)
+					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
+					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
+					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
+					(list 'runname   runname)
+					(list 'mt-bindir-path mt-bindir-path))))))))
+      
+      ;; clean out step records from previous run if they exist
+      ;; (rmt:delete-test-step-records run-id test-id)
+      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
+      (if (common:file-exists? work-area)
+	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
+      (cond
+       ;; ((and launcher hosts) ;; must be using ssh hostname
+       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
+       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
+       (launcher
+	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
+       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
+       (else
+	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
+	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
+      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
+      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
+      (debug:print 1 *default-log-port* "Launching " work-area)
+      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
+      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
+      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
+      (let* ((commonprevvals (alist->env-vars
+			      (hash-table-ref/default *configdat* "env-override" '())))
+	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
+			      (append (list (list "MT_TEST_RUN_DIR" work-area)
+					    (list "MT_TEST_NAME" test-name)
+					    (list "MT_ITEM_INFO" (conc itemdat)) 
+					    (list "MT_RUNNAME"   runname)
+					    (list "MT_TARGET"    mt_target)
+					    (list "MT_ITEMPATH"  item-path)
+					    )
+				      itemdat)))
+	     (testprevvals   (alist->env-vars
+			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
+	     ;; Launchwait defaults to true, must override it to turn off wait
+	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
+	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
+					process:cmd-run-with-stderr-and-exitcode->list
+					process-run)
+				    (if useshell
+					(let ((cmdstr (string-intersperse fullcmd " ")))
+					  (if launchwait
+					      cmdstr
+					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
+					(car fullcmd))
+				    (if useshell
+					'()
+					(cdr fullcmd))))
+             (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
+             (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
+        (if (not success)
+            (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
+        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
+	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
+	(if (not launchwait) ;; give the OS a little time to allow the process to start
+	    (thread-sleep! 0.01))
+	(with-output-to-file "mt_launch.log"
+	  (lambda ()
+	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
+	    (if (list? launch-results)
+		(apply print launch-results)
+		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
+	    #:append))
+	(debug:print 2 *default-log-port* "Launching completed, updating db")
+	(debug:print 2 *default-log-port* "Launch results: " launch-results)
+	(if (not launch-results)
+	    (begin
+	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
+	      ;; (sqlite3:finalize! db)
+	      ;; good ole "exit" seems not to work
+	      ;; (_exit 9)
+	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
+	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
+	      (process-signal (current-process-id) signal/kill)
+	      ))
+	(alist->env-vars miscprevvals)
+	(alist->env-vars testprevvals)
+	(alist->env-vars commonprevvals)
+	launch-results))
+    (change-directory *toppath*)))
+
+;; recover a test where the top controlling mtest may have died
+;;
+(define (launch:recover-test run-id test-id)
+  ;; this function is called on the test run host via ssh
+  ;;
+  ;; 1. look at the process from pid
+  ;;    - is it owned by calling user
+  ;;    - it it's run directory correct for the test
+  ;;    - is there a controlling mtest (maybe stuck)
+  ;; 2. if recovery is needed watch pid
+  ;;    - when it exits take the exit code and do the needful
+  ;;
+  (let* ((pid (rmt:test-get-top-process-pid run-id test-id))
+	 (psres (with-input-from-pipe
+		 (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'")
+		 (lambda ()
+		   (read-line))))
+	 (rundir (if (string? psres) ;; real process owned by user
+		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
+		     #f)))
+    ;; now wait on that process if all is correct
+    ;; periodically update the db with runtime
+    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
+    ;; state/status appropriately
+    (process-wait pid)))
+
+
+;; Do not rpc this one, do the underlying calls!!!
+(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f))
+  (let* ((real-status status)
+	 (otherdat    (if dat dat (make-hash-table)))
+	 (testdat     (rmt:get-test-info-by-id run-id test-id))
+	 (test-name   (db:test-get-testname  testdat))
+	 (item-path   (db:test-get-item-path testdat))
+	 ;; before proceeding we must find out if the previous test (where all keys matched except runname)
+	 ;; was WAIVED if this test is FAIL
+
+	 ;; NOTES:
+	 ;;  1. Is the call to test:get-previous-run-record remotified?
+	 ;;  2. Add test for testconfig waiver propagation control here
+	 ;;
+	 (prev-test   (if (equal? status "FAIL")
+			  (rmt:get-previous-test-run-record run-id test-name item-path)
+			  #f))
+	 (waived   (if prev-test
+		       (if prev-test ;; true if we found a previous test in this run series
+			   (let ((prev-status  (db:test-get-status  prev-test))
+				 (prev-state   (db:test-get-state   prev-test))
+				 (prev-comment (db:test-get-comment prev-test)))
+			     (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
+			     (if (and (equal? prev-state  "COMPLETED")
+				      (equal? prev-status "WAIVED"))
+				 (if comment
+				     comment
+				     prev-comment) ;; waived is either the comment or #f
+				 #f))
+			   #f)
+		       #f)))
+    (if (and waived 
+	     (tests:check-waiver-eligibility testdat prev-test))
+	(set! real-status "WAIVED"))
+
+    (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)
+
+    ;; update the primary record IF state AND status are defined
+    (if (and state status)
+	(begin
+	  (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment))
+	  ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status
+	  ))
+    
+    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
+    ;; run area, it does remote calls under the hood.
+    ;; (if (and test-id state status (equal? status "AUTO")) 
+    ;; 	(rmt:test-data-rollup run-id test-id status))
+
+    ;; add metadata (need to do this way to avoid SQL injection issues)
+
+    ;; :first_err
+    ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
+    ;;   (if val
+    ;;       (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
+    ;; 
+    ;; ;; :first_warn
+    ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
+    ;;   (if val
+    ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
+
+    (let ((category (hash-table-ref/default otherdat ":category" ""))
+	  (variable (hash-table-ref/default otherdat ":variable" ""))
+	  (value    (hash-table-ref/default otherdat ":value"    #f))
+	  (expected (hash-table-ref/default otherdat ":expected" "n/a"))
+	  (tol      (hash-table-ref/default otherdat ":tol"      "n/a"))
+	  (units    (hash-table-ref/default otherdat ":units"    ""))
+	  (type     (hash-table-ref/default otherdat ":type"     ""))
+	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
+      (debug:print 4 *default-log-port* 
+		   "category: " category ", variable: " variable ", value: " value
+		   ", expected: " expected ", tol: " tol ", units: " units)
+      (if (and value) ;; require only value; BB was- all three required
+	  (let ((dat (conc category ","
+			   variable ","
+			   value    ","
+			   expected ","
+			   tol      ","
+			   units    ","
+			   dcomment ",," ;; extra comma for status
+			   type     )))
+	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
+	    (rmt:csv->test-data run-id test-id
+				dat)
+	    ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start"
+	    ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue.
+	    (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :)
+            ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
+            )))
+      
+    ;; need to update the top test record if PASS or FAIL and this is a subtest
+    ;;;;;; (if (not (equal? item-path ""))
+    ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)
+
+    (if (or (and (string? comment)
+		 (string-match (regexp "\\S+") comment))
+	    waived)
+	(let ((cmt  (if waived waived comment)))
+	  (rmt:general-call 'set-test-comment run-id cmt test-id)))))
+
+
 
 )