Index: db.scm
==================================================================
--- db.scm
+++ db.scm
@@ -1676,11 +1676,11 @@
               (begin
                 (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
                 (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
               (begin
                 (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
-                (debug:print-info 0 *default-log-port* "BB> Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration))))
+                (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration))))
         db
         "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');"
         run-id running-deadtime)
 
        
@@ -1691,11 +1691,11 @@
               ;; what to do with toplevel? call rollup?
               (begin
                 (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
                 (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
               (begin
-                (debug:print-info 0 *default-log-port* "BB> Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)
+                (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)
                 (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))))
         db
         "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');"
         run-id remotehoststart-deadtime)
 
@@ -1707,11 +1707,11 @@
           (if (and (equal? uname "n/a")
                    (equal? item-path "")) ;; this is a toplevel test
               ;; what to do with toplevel? call rollup?
               (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
               (begin
-                (debug:print-info 0 *default-log-port* "BB> Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded remotehoststart-deadtime "remotehoststart-deadtime)
+                (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id" 1 day since event_time marked")
                 (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))))
         db
         "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
         run-id)
        

Index: docs/manual/megatest_manual.html
==================================================================
--- docs/manual/megatest_manual.html
+++ docs/manual/megatest_manual.html
@@ -900,69 +900,10 @@
 sqlite3 database. Megatest has been used with the Intel Netbatch and
 lsf (also known as openlava) batch systems and it should be
 straightforward to use it with other similar systems.</p></div>
 </div>
 </div>
-</div>
-<div class="sect1">
-<h2 id="_overview">Overview</h2>
-<div class="sectionbody">
-<div class="sect2">
-<h3 id="_stand_alone_megatest_area">Stand-alone Megatest Area</h3>
-<div class="paragraph"><p>A single, stand-alone, Megatest based testsuite or "area" is
-sufficient for most validation, automation and build problems.</p></div>
-<div class="imageblock">
-<div class="content">
-<img src="megatest-stand-alone-area.png" alt="Static">
-</div>
-</div>
-<div class="paragraph"><p>Megatest is designed as a distributed or decoupled system. This means
-you can run the areas stand-alone with no additional
-infrastructure. I.e. there are no databases, web servers or other
-centralized resources needed. However as your needs grow you can
-integrate multiple areas into a bigger system.</p></div>
-<div class="sect3">
-<h4 id="_component_descriptions">Component Descriptions</h4>
-<div class="olist arabic"><ol class="arabic">
-<li>
-<p>
-Multi-area dashboard and xterm. A gui (the dashboard) is usually the
-  best option for controlling and launching runs but all operations
-  can also be done from the commandline. Note: The not yet released
-  multi-area dashboard replaces the old dashboard for browsing and
-  controlling runs but for managing a single area the old dashboard
-  works very well.
-</p>
-</li>
-<li>
-<p>
-Area/testsuite. This is your testsuite or automation definition and
-  consists of the information in megatest.config, runconfigs.config
-  and your testconfigs along with any custom scripting that can&#8217;t be
-  done with the native Megatest features.
-</p>
-</li>
-<li>
-<p>
-If your testsuite or build automation is too large to run on a
-  single instance you can distribute your jobs into a compute server
-  pool. The only current requirements are password-less ssh access and
-  a network filesystem.
-</p>
-</li>
-</ol></div>
-</div>
-</div>
-<div class="sect2">
-<h3 id="_full_system_architecture">Full System Architecture</h3>
-<div class="imageblock">
-<div class="content">
-<img src="megatest-system-architecture.png" alt="Static">
-</div>
-</div>
-</div>
-</div>
 </div>
 <div class="sect1">
 <h2 id="_installation">Installation</h2>
 <div class="sectionbody">
 <div class="sect2">
@@ -3033,10 +2974,10 @@
 </div>
 <div id="footnotes"><hr></div>
 <div id="footer">
 <div id="footer-text">
 Version 1.0<br>
-Last updated 2018-11-29 09:32:52 PST
+Last updated 2019-02-19 11:13:54 PST
 </div>
 </div>
 </body>
 </html>

Index: launch.scm
==================================================================
--- launch.scm
+++ launch.scm
@@ -382,11 +382,11 @@
 
     (let loop ((minutes   (calc-minutes))
 	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
 	       (disk-free (get-df (current-directory)))
                (last-sync (current-seconds)))
-      (BB> "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync)
+      (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
       (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
              (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                    (delta (abs (- load cpu-load))))
                               (if (> delta 0.1) ;; don't bother updating with small changes
                                   load
@@ -404,11 +404,11 @@
              (test-info   (rmt:get-test-info-by-id run-id test-id))
              (state       (db:test-get-state test-info))
              (status      (db:test-get-status test-info))
              (kill-reason  "no kill reason specified")
              (kill-job?    #f))
-        (BB> "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period)
+        (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
         (cond
          ((test-get-kill-request run-id test-id)
           (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
           (set! kill-job? #t))
          ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
@@ -421,13 +421,13 @@
         (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
         (launch:handle-zombie-tests run-id)
         (when do-sync
           ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
           ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
-          (BB> "launch:monitor-job - dosync started at "(current-seconds))
+          (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
           (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
-          (BB> "launch:monitor-job - dosync finished at "(current-seconds)))
+          (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
         
 	(if kill-job? 
 	    (begin
               (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
 	      (mutex-lock! m)