Index: TODO
==================================================================
--- TODO
+++ TODO
@@ -16,10 +16,17 @@
 #     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
 
 TODO
 ====
 
+WW38
+. Add test_rundat to no-sync ==> correction, put in <testdir>/.meta/test-run.dat
+. Add STATE/STATUS transitions to .meta/test-run.dat or similar
+. Swizzle update-test-rundat to operate on no-sync
+. Swizzle update-run-duration, -uname-host and cpuload-diskfree to no-sync
+. On state/status change update tests table with duration
+
 WW15
 . fill newview matrix with data, filter pipeline gui elements
 . improve [script], especially indent handling
 
 WW16

Index: common.scm
==================================================================
--- common.scm
+++ common.scm
@@ -501,11 +501,11 @@
 (define (common:rotate-logs)
   (let* ((all-files (make-hash-table))
 	 (stats     (make-hash-table))
 	 (inc-stat  (lambda (key)
 		      (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1))))
-	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "300")))) ;; name -> age
+	(max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age
     (if (not (directory-exists? "logs"))(create-directory "logs"))
     (directory-fold 
      (lambda (file rem)
        (handle-exceptions
 	exn

Index: dashboard.scm
==================================================================
--- dashboard.scm
+++ dashboard.scm
@@ -212,10 +212,12 @@
   (hash-table-set!
    (dboard:commondat-tabdats commondat)
    tabnum
    tabdat))
 
+(define *updater-running* #f) ;; move this into one of the stucts
+
 ;; gets and calls updater list based on curr-tab-num
 ;;
 (define (dboard:common-run-curr-updaters commondat #!key (tab-num #f))
   (if (dboard:common-get-tabdat commondat tab-num: tab-num) ;; only update if there is a tabdat
       (let* ((tnum     (or tab-num (dboard:commondat-curr-tab-num commondat)))
@@ -240,11 +242,13 @@
 		     tnum
 		     (cons updater curr-updaters))))
 
 ;; data for each specific tab goes here
 ;;
-(defstruct dboard:tabdat 
+(defstruct dboard:tabdat
+  ;; flags
+  ((already-running #f)                  : boolean)     ;; the updater is already running. skip
   ;; runs
   ((allruns         '())                 : list)        ;; list of dboard:rundat records
   ((allruns-by-id    (make-hash-table))  : hash-table)  ;; hash of run-id -> dboard:rundat records
   ((done-runs       '())                 : list)        ;; list of runs already drawn
   ((not-done-runs   '())                 : list)        ;; list of runs not yet drawn
@@ -645,11 +649,11 @@
 ;;
 (define (dboard:get-tests-for-run-duplicate tabdat run-id run testnamepatt key-vals)
   (let* ((start-time   (current-seconds))
 	 (access-mode  (dboard:tabdat-access-mode tabdat))
          (num-to-get   (string->number (or (configf:lookup *configdat* "setup" "num-tests-to-get")
-                                           "200")))
+                                           "50"))) ;; was 200, which is fine in a normal run area.
 	 (states       (hash-table-keys (dboard:tabdat-state-ignore-hash tabdat)))
 	 (statuses     (hash-table-keys (dboard:tabdat-status-ignore-hash tabdat)))
          (do-not-use-db-file-timestamps #t) ;; (configf:lookup *configdat* "setup" "do-not-use-db-file-timestamps")) ;; this still hosts runs-summary-tab
          (do-not-use-query-timestamps   #t) ;; (configf:lookup *configdat* "setup" "do-not-use-query-timestamps")) ;; this no longer troubles runs-summary-tab
 	 (sort-info    (get-curr-sort))
@@ -901,27 +905,26 @@
 		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
 		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
 	      (if (or (null? tal)
 		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
 		  (begin
-		    (when (> elapsed-time 2)   
+		    #;(when (> elapsed-time 2)   
                       (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                       (let* ((old-val (iup:attribute *tim* "TIME"))
                              (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
                         (if (< (string->number new-val) 5000)
-                            ((debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
-                            (iup:attribute-set! *tim* "TIME" new-val))))
-
-
-                      )
+                            (begin
+			      (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
+			      (iup:attribute-set! *tim* "TIME" new-val)))))
 		    (dboard:tabdat-allruns-set! tabdat new-res)
 		    maxtests)
 		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
 		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
 		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
     (dboard:tabdat-filters-changed-set! tabdat #f)
-    (dboard:update-tree tabdat runs-hash header tb)))
+    (dboard:update-tree tabdat runs-hash header tb)
+    (dboard:tabdat-already-running-set! tabdat #f)))
 
 (define *collapsed* (make-hash-table))
 
 (define (toggle-hide lnum uidat) ; fulltestname)
   (let* ((btn (vector-ref (dboard:uidat-get-lftcol uidat) lnum))
@@ -2505,14 +2508,10 @@
 
         ;; insert extra widget here
         (if extra-widget
             extra-widget
             (iup:hbox)) ;; empty widget
-
-        
-
-        
         )))
 
      (let* ((status-toggles (map (lambda (status)
 				   (iup:toggle (conc status)
 					       #:fontsize 8 ;; btn-fontsz ;; "10"
@@ -3733,20 +3732,21 @@
    (dboard:tabdat-numruns tabdat)
    (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%")
    ;; generate key patterns from the target stored in tabdat
    (let* ((dbkeys (dboard:tabdat-dbkeys tabdat)))
      (let ((fres   (if (dboard:tabdat-target tabdat)
-                       (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%"))))
-                         (map (lambda (k v)(list k v)) dbkeys ptparts))
-                       (let ((res '()))
-                         (for-each (lambda (key)
-                                     (if (not (equal? key "runname"))
-                                         (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f)))
-                                           (if val (set! res (cons (list key val) res))))))
-                                   dbkeys)
-                         res))))
-       fres))))
+		       (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%"))))
+			 (map (lambda (k v)(list k v)) dbkeys ptparts))
+		       (let ((res '()))
+			 (for-each (lambda (key)
+				     (if (not (equal? key "runname"))
+					 (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f)))
+					   (if val (set! res (cons (list key val) res))))))
+				   dbkeys)
+			 res))))
+       fres)))
+  #;(set! *updater-running* #f))
 
 (define (dashboard:runs-tab-updater commondat tab-num)
   (debug:catch-and-dump 
    (lambda ()
      (let* ((tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))
@@ -3801,22 +3801,25 @@
 	   (dashboard:runs-tab-updater commondat 1))
 	 tab-num: 2)
 	(iup:callback-set! *tim*
 			   "ACTION_CB"
 			   (lambda (time-obj)
-			     (let ((update-is-running #f))
-			     (mutex-lock! (dboard:commondat-update-mutex commondat))
-			     (set! update-is-running (dboard:commondat-updating commondat))
-			     (if (not update-is-running)
-			     (dboard:commondat-updating-set! commondat #t))
-			     (mutex-unlock! (dboard:commondat-update-mutex commondat))
-			     (if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update
-			     (begin
-			     (dboard:common-run-curr-updaters commondat) ;; (dashboard:run-update commondat)
-			     (mutex-lock! (dboard:commondat-update-mutex commondat))
-			     (dboard:commondat-updating-set! commondat #f)
-			     (mutex-unlock! (dboard:commondat-update-mutex commondat)))
+			     (if (not *updater-running*)
+				 (begin
+				   ;; (mutex-lock! (dboard:commondat-update-mutex commondat))
+				   ;; (set! update-is-running (dboard:commondat-updating commondat))
+				   ;;(if (not update-is-running)
+				   ;;	   (dboard:commondat-updating-set! commondat #t))
+				   ;;(mutex-unlock! (dboard:commondat-update-mutex commondat))
+				   ;;(if (not update-is-running) ;; we know that the update was not running and we now have a lock on doing an update
+				   ;;	   (begin
+				   (set! *updater-running* #t)
+				   (dboard:common-run-curr-updaters commondat) ;; (dashboard:run-update commondat)
+				   (set! *updater-running* #f)
+				   ;; (mutex-lock! (dboard:commondat-update-mutex commondat))
+				   ;;  (dboard:commondat-updating-set! commondat #f)
+				   ;;   (mutex-unlock! (dboard:commondat-update-mutex commondat)))
 				   ))
 			     1))))
       
       (let ((th1 (make-thread (lambda ()
 				(thread-sleep! 1)

Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -1776,15 +1776,33 @@
           #f
           )
         (with-input-from-file infile read-lines)
 	)))
 
-;;  select end_time-now from
-;;      (select testname,item_path,event_time+run_duration as
-;;                          end_time,strftime('%s','now') as now from tests where state in
-;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));
-
+;; check duration against test-run.dat file if it exists and update the value in
+;; the db if necessary
+;;
+(define (db:adjust-run-duration dbstruct test-id run-dir event-time run-duration)
+  (let* ((datf             (conc run-dir ".mt_data/test-run.dat"))
+	 (modt             (if (and (file-exists? datf)
+				    (file-read-access? datf))
+			       (file-modification-time datf)
+			       #f)) ;; (+ event-time run-duration))))
+	 (alt-run-duration (if modt
+			       (- modt event-time)
+			       #f)))
+    (if (and alt-run-duration
+	     (> alt-run-duration run-duration))
+	(begin
+	  (debug:print 0 *default-log-port* "Test " test-id " run duration mismatch. Setting to " alt-run-duration)
+	  (db:with-db
+	   dbstruct #f #f
+	   (lambda (db)
+	     (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" alt-run-duration test-id)
+	     #t)))
+	#f))) ;; #f = we did NOT adjust the time
+	      
 (define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
   (let* ((incompleted '())
 	 (oldlaunched '())
 	 (toplevels   '())
           ;; The default running-deadtime is 720 seconds = 12 minutes.
@@ -1828,37 +1846,39 @@
 	 ;;                     (db:test-get-run_duration testdat)))
 	 ;;                    600) 
 	 ;; (db:delay-if-busy dbdat)
 	 (sqlite3:for-each-row 
 	  (lambda (test-id run-dir uname testname item-path event-time run-duration)
-	    (if (and (equal? uname "n/a")
-		     (equal? item-path "")) ;; this is a toplevel test
-		;; what to do with toplevel? call rollup?
-		(begin
-		  (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
-		  (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
-		(begin
-		  (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
-		  (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id="
-				    test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)
-				    " event-time="event-time" run-duration="run-duration))))
+	    (if (not (db:adjust-run-duration dbstruct test-id run-dir event-time run-duration))
+		(if (and (equal? uname "n/a")
+			 (equal? item-path "")) ;; this is a toplevel test
+		    ;; what to do with toplevel? call rollup?
+		    (begin
+		      (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+		      (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
+		    (begin
+		      (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
+		      (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id="
+					test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)
+					" event-time="event-time" run-duration="run-duration)))))
 	  stmth1
 	  run-id running-deadtime) ;; default time 720 seconds
-       
+	    
 	 (sqlite3:for-each-row 
 	  (lambda (test-id run-dir uname testname item-path event-time run-duration)
-	    (if (and (equal? uname "n/a")
-		     (equal? item-path "")) ;; this is a toplevel test
-		;; what to do with toplevel? call rollup?
-		(begin
-		  (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
-		  (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
-		(begin
-		  (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id
-				    " exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time
-				    " run-duration="run-duration)
-		  (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))))
+	    (if (not (db:adjust-run-duration dbstruct test-id run-dir event-time run-duration))
+		(if (and (equal? uname "n/a")
+			 (equal? item-path "")) ;; this is a toplevel test
+		    ;; what to do with toplevel? call rollup?
+		    (begin
+		      (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
+		      (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
+		    (begin
+		      (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id
+					" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time
+					" run-duration="run-duration)
+		      (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))))
 	  stmth2
 	  run-id remotehoststart-deadtime) ;; default time 230 seconds
 	 
 	 ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
 	 ;;
@@ -3467,11 +3487,11 @@
      (lambda (run-id)
        (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
 	 (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs)))
      run-ids)))
 
-;; Get test data using test_id, run-id is not used
+;; Get test data using test_id, run-id is not used - but it will be!
 ;; 
 (define (db:get-test-info-by-id dbstruct run-id test-id)
   (db:with-db
    dbstruct
    #f ;; run-id

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -205,17 +205,19 @@
 			     (current-seconds) 
 			     start-seconds)))))
 	 (kill-tries 0))
     ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
     ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
-    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
+    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10 update-db: #t)
 
     (let loop ((minutes   (calc-minutes))
 	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
 	       (disk-free (get-df (current-directory)))
                (last-sync (current-seconds)))
-      (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
+      ;; (common:telemetry-log "zombie" (conc "launch:monitor-job -
+      ;; top of loop encountered at "(current-seconds)" with
+      ;; last-sync="last-sync))
       (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
              (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                    (delta (abs (- load cpu-load))))
                               (if (> delta 0.1) ;; don't bother updating with small changes
                                   load
@@ -233,33 +235,28 @@
              (test-info   (rmt:get-test-info-by-id run-id test-id))
              (state       (db:test-get-state test-info))
              (status      (db:test-get-status test-info))
              (kill-reason  "no kill reason specified")
              (kill-job?    #f))
-        (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
+        #;(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
         (cond
          ((test-get-kill-request run-id test-id)
           (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
           (set! kill-job? #t))
          ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
           (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
           (set! kill-job? #t))
          ((equal? status "DEAD")
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
+          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f update-db: #t)
           (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
           ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
           (set! kill-job? #f)))
 
         (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
         (launch:handle-zombie-tests run-id)
-        (when do-sync
-          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
-          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
-          (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
-          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
-          (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
-        
+        (if do-sync ;; save meta data about the running of this test
+	    (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f))
 	(if kill-job? 
 	    (begin
               (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
 	      (mutex-lock! m)
 	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
@@ -312,11 +309,11 @@
 	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
 		  (loop (calc-minutes)
                         (or new-cpu-load cpu-load)
                         (or new-disk-free disk-free)
                         (if do-sync (current-seconds) last-sync)))))))
-    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional
+    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f update-db: #t))) ;; NOTE: Checking twice for keep-going is intentional
 
 
 (define (launch:execute encoded-cmd)
   (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
 	 (tconfigreg #f))
@@ -465,10 +462,13 @@
 				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
 				  (exit))))
 		 (test-pid  (db:test-get-process_id  test-info)))
 	    (cond
              ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
+	     ;; ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
+	     ;;  (and (equal? (db:test-get-state test-info) "COMPLETED")                           ;; completed/abort => rerun if asked
+	     ;;	       (member (db:test-get-status test-info) '("ABORT"))))
 	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
 	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
 	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
 
               (rmt:general-call 'set-test-start-time #f test-id)
@@ -771,11 +771,13 @@
               (if (not (null? tal))
 				  (loop (car tal) (cdr tal)))))))))))        
  
 (define (launch:is-test-alive host pid)
   (if (and host pid (not (equal? host "n/a")))
-      (let* ((cmd (conc "ssh " host " pstree -A " pid))
+      (let* ((is-local (equal? host (get-host-name)))
+	     (ssh-cmd   (if is-local " " (conc "ssh " host " ")))
+	     (cmd (conc ssh-cmd "pstree -A " pid))
 	     (output (with-input-from-pipe cmd read-lines)))
 	(debug:print 2 *default-log-port* "Running " cmd " received " output)
 	(if (eq? (length output) 0)
 	   #f
 	   #t))

Index: rmt.scm
==================================================================
--- rmt.scm
+++ rmt.scm
@@ -22,10 +22,12 @@
 
 (declare (unit rmt))
 (declare (uses api))
 (declare (uses http-transport))
 (include "common_records.scm")
+(include "db_records.scm")
+
 ;; (declare (uses rmtmod))
 
 ;; (import rmtmod)
 
 ;;
@@ -525,15 +527,24 @@
   (rmt:general-call 'register-test run-id run-id test-name item-path))
 
 (define (rmt:get-test-id run-id testname item-path)
   (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))
 
-;; run-id is NOT used
+;; run-id is NOT used - but it will be! 
 ;;
 (define (rmt:get-test-info-by-id run-id test-id)
   (if (number? test-id)
-      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
+      (let* ((testdat  (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id)))
+	     (trundatf (conc (db:test-get-rundir testdat) "/.mt_data/test-run.dat")))
+	;; now we can update a couple fields from the filesystem
+	(if (and (db:test-get-rundir testdat)
+		 (file-exists? trundatf))
+	    (let* ((duration   (db:test-get-run_duration testdat))
+		   (event-time (db:test-get-event_time   testdat))
+		   (last-touch (file-modification-time trundatf)))
+	      (db:test-set-run_duration! testdat (max duration (- last-touch event-time)))))
+	testdat)
       (begin
 	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
 	(print-call-chain (current-error-port))
 	#f)))
 

Index: runs.scm
==================================================================
--- runs.scm
+++ runs.scm
@@ -1789,13 +1789,11 @@
 	    (if (not can-run-more) #;(and (list? can-run-more)
 		(car can-run-more))
 		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here
 		  (if loop-list
 		      (apply loop loop-list)
-                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)
-                      )
-                  )
+                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)))
 		;; if can't run more just loop with next possible test
 		(loop (car newtal)(cdr newtal) reg reruns))))
          
 	 ;; this case should not happen, added to help catch any bugs
 	 ((and (list? items) itemdat)

Index: server.scm
==================================================================
--- server.scm
+++ server.scm
@@ -346,10 +346,28 @@
 	      (begin
 		(debug:print-info 0 *default-log-port* "Gating server start, last start: "
 				  fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
 		(thread-sleep! reftime)
 		(server:wait-for-server-start-last-flag areapath)))))))
+
+;; wait for server=start-last to be three seconds old - this might be what was causing the obfusc-skel fails
+;;
+#;(define (server:wait-for-server-start-last-flag areapath)
+  (let* ((start-flag (conc areapath "/logs/server-start-last")))
+    (if (file-exists? start-flag)
+	(let* ((fmodtime (file-modification-time start-flag))
+	       (reftime  (+ 2 (random 3)))
+	       (delta    (- (current-seconds) fmodtime))
+	       (all-go   (> delta reftime)))
+	  (if all-go
+	      #t ;; (system (conc "touch " start-flag)) ;; lazy but safe
+	      (begin
+		(debug:print-info 0 *default-log-port* "Gating server start, last start: "
+				  fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
+		(thread-sleep! 5)
+		(server:wait-for-server-start-last-flag areapath))))
+	#;(system (conc "touch " start-flag)))))
 
 ;; kind start up of servers, wait 40 seconds before allowing another server for a given
 ;; run-id to be launched
 ;;
 (define (server:kind-run areapath)

Index: tests.scm
==================================================================
--- tests.scm
+++ tests.scm
@@ -1944,28 +1944,52 @@
 	 tdb
 	 "SELECT count(id) FROM test_rundat;")
 	res))
   0)
 
-(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
-  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
-  (if (and cpuload diskfree)
-      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
-  (if minutes 
-      (rmt:general-call 'update-run-duration run-id minutes test-id))
-  (if (and uname hostname)
-      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
+;; 
+(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname #!key (update-db #f)(tmpfree #f))
+  (if (get-environment-variable "MT_TEST_RUN_DIR")
+      (let* ((dest-dir (conc (get-environment-variable "MT_TEST_RUN_DIR") "/.mt_data"))
+	     (or-dash  (lambda (instr)
+			 (cond
+			  ((not instr) "") ;; #f -> blank, indicates value unchanged since last measurement taken
+			  ((string? instr)(if (string-search " " instr) (conc "\"" instr "\"") instr))
+			  (else instr))))
+	     (file-new (not (directory-exists? dest-dir))))
+	(if file-new (create-directory dest-dir #t))
+	(let* ((outp (open-output-file (conc dest-dir "/test-run.dat") #:append)))
+	  (with-output-to-port outp
+	    (lambda ()
+	      (if file-new
+		  (print "epoch_time,run_id,test_id,cpuload,diskfree,tmpfree,run_minutes,hostname,uname"))
+	      (print (current-seconds) "," (or-dash run-id)   "," (or-dash test-id)  ","
+		     (or-dash cpuload) "," (or-dash diskfree) "," (or-dash tmpfree)  ","
+		     (or-dash minutes) "," (or-dash hostname) ","
+		     (or-dash uname)))) ;; put uname last as it has spaces in it
+	  (close-output-port outp)))
+      (begin
+	(rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))))
+  (if update-db
+      (begin
+	(if (and cpuload diskfree)
+	    (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
+	(if minutes 
+	    (rmt:general-call 'update-run-duration run-id minutes test-id))
+	(if (and uname hostname)
+	    (rmt:general-call 'update-uname-host run-id uname hostname test-id)))))
   
 ;; This one is for running with no db access (i.e. via rmt: internally)
-(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
+(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries #!key (update-db #f))
 ;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
 ;;  (let ((remtries 10))
   (let* ((cpuload  (get-cpu-load))
 	 (diskfree (get-df (current-directory)))
+	 (tmpfree  (get-df "/tmp"))
 	 (uname    (get-uname "-srvpio"))
 	 (hostname (get-host-name)))
-    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
+    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree)))
     
 ;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
 #;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
   (let* ((cpuload  (get-cpu-load))
 	 (diskfree (get-df (current-directory)))
@@ -1984,12 +2008,11 @@
 	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
 	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
 	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
 	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
 	   (print-call-chain (current-error-port))))
-     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
-  )))
+     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes))))
 	 
 ;;======================================================================
 ;; A R C H I V I N G
 ;;======================================================================