Megatest: Artifact [2c9348436b]

Artifact 2c9348436b4c82dc2bfd17f83bea37c48fb395a1:

File f3.scm — part of check-in [932f8e97c0] at 2019-11-02 23:25:32 on branch v1.65-wip-alt — wip (user: matt, size: 743507) [annotate] [blame] [check-ins using]

;; (define (rmt:update-db-stats run-id rawcmd params duration)
;;   (mutex-lock! *db-stats-mutex*)
;;   (handle-exceptions
;;    exn
;;    (begin
;;      (debug:print 0 *default-log-port* "WARNING: stats collection failed in update-db-stats")
;;      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
;;      (print "exn=" (condition->list exn))
;;      #f) ;; if this fails we don't care, it is just stats
;;    (let* ((cmd      (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd)))
;; 	  (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
;;      (if (not (vector? stat-vec))
;; 	 (let ((newvec (vector 0 0)))
;; 	   (hash-table-set! *db-stats* cmd newvec)
;; 	   (set! stat-vec newvec)))
;;      (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1))
;;      (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration))))
;;   (mutex-unlock! *db-stats-mutex*))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)
		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
	      (sort (hash-table-keys *db-stats*)
		    (lambda (a b)
		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
			 (vector-ref (hash-table-ref *db-stats* b) 0)))))))

(define (rmt:get-max-query-average run-id)
  (mutex-lock! *db-stats-mutex*)
  (let* ((runkey (conc "run-id=" run-id " "))
	 (cmds   (filter (lambda (x)
			   (substring-index runkey x))
			 (hash-table-keys *db-stats*)))
	 (res    (if (null? cmds)
		     (cons 'none 0)
		     (let loop ((cmd (car cmds))
				(tal (cdr cmds))
				(max-cmd (car cmds))
				(res 0))
		       (let* ((cmd-dat (hash-table-ref *db-stats* cmd))
			      (tot     (vector-ref cmd-dat 0))
			      (curravg (/ (vector-ref cmd-dat 1) (vector-ref cmd-dat 0))) ;; count is never zero by construction
			      (currmax (max res curravg))
			      (newmax-cmd (if (> curravg res) cmd max-cmd)))
			 (if (null? tal)
			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   (handle-exceptions
		    exn
		    #f
		    (http-transport:client-api-send-receive run-id connection-info cmd params))))
    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))

;; ;; Wrap json library for strings (why the ports crap in the first place?)
;; (define (rmt:dat->json-str dat)
;;   (with-output-to-string 
;;     (lambda ()
;;       (json-write dat))))
;; 
;; (define (rmt:json-str->dat json-str)
;;   (with-input-from-string json-str
;;     (lambda ()
;;       (json-read))))

;;======================================================================
;;
;; A C T U A L   A P I   C A L L S  
;;
;;======================================================================

;;======================================================================
;;  S E R V E R
;;======================================================================

(define (rmt:kill-server run-id)
  (rmt:send-receive 'kill-server run-id (list run-id)))

(define (rmt:start-server run-id)
  (rmt:send-receive 'start-server 0 (list run-id)))

;;======================================================================
;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info)
  (case *transport-type* ;; run-id of 0 is just a placeholder
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login 0 (list *toppath* megatest-version *my-client-signature*)))
    ;;((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))
    ))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))


;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
(define (rmt:get-latest-host-load hostname)
  (rmt:send-receive 'get-latest-host-load 0 (list hostname)))

;; (define (rmt:sync-inmem->db run-id)
;;   (rmt:send-receive 'sync-inmem->db run-id '()))

(define (rmt:sdb-qry qry val run-id)
  ;; add caching if qry is 'getid or 'getstr
  (rmt:send-receive 'sdb-qry run-id (list qry val)))

;; NOT COMPLETED
(define (rmt:runtests user run-id testpatt params)
  (rmt:send-receive 'runtests run-id testpatt))

(define (rmt:get-run-record-ids  target run keynames test-patt)
  (rmt:send-receive 'get-run-record-ids #f (list target run keynames test-patt)))

(define (rmt:get-changed-record-ids since-time)
  (rmt:send-receive 'get-changed-record-ids #f (list since-time)) )

;;======================================================================
;;  T E S T   M E T A 
;;======================================================================

(define (rmt:get-tests-tags)
  (rmt:send-receive 'get-tests-tags #f '()))

;;======================================================================
;;  K E Y S 
;;======================================================================

;; These require run-id because the values come from the run!
;;
(define (rmt:get-key-val-pairs run-id)
  (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))

(define (rmt:get-keys)
  (if *db-keys* *db-keys* 
     (let ((res (rmt:send-receive 'get-keys #f '())))
       (set! *db-keys* res)
       res)))

(define (rmt:get-keys-write) ;; dummy query to force server start
  (let ((res (rmt:send-receive 'get-keys-write #f '())))
    (set! *db-keys* res)
    res))

;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
;; to cache the resuls in a hash
;;
(define (rmt:get-key-vals run-id)
  (or (hash-table-ref/default *keyvals* run-id #f)
      (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
        (hash-table-set! *keyvals* run-id res)
        res)))

(define (rmt:get-targets)
  (rmt:send-receive 'get-targets #f '()))

(define (rmt:get-target run-id)
  (rmt:send-receive 'get-target run-id (list run-id)))

(define (rmt:get-run-times runpatt targetpatt)
  (rmt:send-receive 'get-run-times #f (list runpatt targetpatt ))) 


;;======================================================================
;;  T E S T S
;;======================================================================

;; Just some syntatic sugar
(define (rmt:register-test run-id test-name item-path)
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

;; run-id is NOT used
;;
(define (rmt:get-test-info-by-id run-id test-id)
  (if (number? test-id)
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
  (let* ((test-path (if (string? work-area)
			work-area
			(rmt:test-get-rundir-from-test-id run-id test-id))))
    (debug:print 3 *default-log-port* "TEST PATH: " test-path)
    (open-test-db test-path)))

;; WARNING: This currently bypasses the transaction wrapped writes system
(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))

(define (rmt:set-tests-state-status run-id                      testnames currstate currstatus newstate newstatus)
  (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))

(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  ;; (if (number? run-id)
  (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)))
  ;;    (begin
  ;;	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)
  ;;	(print-call-chain (current-error-port))
  ;;	'())))

;; get stuff via synchash 
(define (rmt:synchash-get run-id proc synckey keynum params)
  (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

(define (rmt:get-tests-for-run-mindata run-id testpatt states status not-in)
  (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-id testpatt states status not-in)))
  
;; IDEA: Threadify these - they spend a lot of time waiting ...
;;
(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
  (let ((multi-run-mutex (make-mutex))
	(run-id-list (if run-ids
			 run-ids
			 (rmt:get-all-run-ids)))
	(result      '()))
    (if (null? run-id-list)
	'()
	(let loop ((hed     (car run-id-list))
		   (tal     (cdr run-id-list))
		   (threads '()))
	  (if (> (length threads) 5)
	      (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
	      (let* ((newthread (make-thread
				 (lambda ()
				   (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
				     (if (list? res)
					 (begin
					   (mutex-lock! multi-run-mutex)
					   (set! result (append result res))
					   (mutex-unlock! multi-run-mutex))
					 (debug:print-error 0 *default-log-port* "get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
				 (conc "multi-run-thread for run-id " hed)))
		     (newthreads (cons newthread threads)))
		(thread-start! newthread)
		(thread-sleep! 0.05) ;; give that thread some time to start
		(if (null? tal)
		    newthreads
		    (loop (car tal)(cdr tal) newthreads))))))
    result))

;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
;; ;;
;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
;;   (let ((run-id-list (if run-ids
;; 			 run-ids
;; 			 (rmt:get-all-run-ids))))
;;     (apply append (map (lambda (run-id)
;; 			 (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
;; 		       run-id-list))))

(define (rmt:delete-test-records run-id test-id)
  (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))

;; This is not needed as test steps are deleted on test delete call
;;
;; (define (rmt:delete-test-step-records run-id test-id)
;;   (rmt:send-receive 'delete-test-step-records run-id (list run-id test-id)))

(define (rmt:test-set-state-status run-id test-id state status msg)
  (rmt:send-receive 'test-set-state-status run-id (list run-id test-id state status msg)))

(define (rmt:test-toplevel-num-items run-id test-name)
  (rmt:send-receive 'test-toplevel-num-items run-id (list run-id test-name)))

;; (define (rmt:get-previous-test-run-record run-id test-name item-path)
;;   (rmt:send-receive 'get-previous-test-run-record run-id (list run-id test-name item-path)))

(define (rmt:get-matching-previous-test-run-records run-id test-name item-path)
  (rmt:send-receive 'get-matching-previous-test-run-records run-id (list run-id test-name item-path)))

(define (rmt:test-get-logfile-info run-id test-name)
  (rmt:send-receive 'test-get-logfile-info run-id (list run-id test-name)))

(define (rmt:test-get-records-for-index-file run-id test-name)
  (rmt:send-receive 'test-get-records-for-index-file run-id (list run-id test-name)))

(define (rmt:get-testinfo-state-status run-id test-id)
  (rmt:send-receive 'get-testinfo-state-status run-id (list run-id test-id)))

(define (rmt:test-set-log! run-id test-id logf)
  (if (string? logf)(rmt:general-call 'test-set-log run-id logf test-id)))

(define (rmt:test-set-top-process-pid run-id test-id pid)
  (rmt:send-receive 'test-set-top-process-pid run-id (list run-id test-id pid)))

(define (rmt:test-get-top-process-pid run-id test-id)
  (rmt:send-receive 'test-get-top-process-pid run-id (list run-id test-id)))

(define (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)
  (rmt:send-receive 'get-run-ids-matching-target #f (list keynames target res runname testpatt statepatt statuspatt)))

;; NOTE: This will open and access ALL run databases. 
;;
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
  (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
    (apply append 
	   (map (lambda (run-id)
		  (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
	   run-ids))))

;; (define (rmt:get-run-ids-matching keynames target res)
;;   (rmt:send-receive #f 'get-run-ids-matching (list keynames target res)))

(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))

(define (rmt:get-count-tests-running-for-run-id run-id)
  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id)))

(define (rmt:get-not-completed-cnt run-id)
  (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))


;; Statistical queries

(define (rmt:get-count-tests-running run-id)
  (rmt:send-receive 'get-count-tests-running run-id (list run-id)))

(define (rmt:get-count-tests-running-for-testname run-id testname)
  (rmt:send-receive 'get-count-tests-running-for-testname run-id (list run-id testname)))

(define (rmt:get-count-tests-running-in-jobgroup run-id jobgroup)
  (rmt:send-receive 'get-count-tests-running-in-jobgroup run-id (list run-id jobgroup)))

;; state and status are extra hints not usually used in the calculation
;;
(define (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status comment)
  (rmt:send-receive 'set-state-status-and-roll-up-items run-id (list run-id test-name item-path state status comment)))

(define (rmt:set-state-status-and-roll-up-run run-id state status)
  (rmt:send-receive 'set-state-status-and-roll-up-run run-id (list run-id state status)))


(define (rmt:update-pass-fail-counts run-id test-name)
  (rmt:general-call 'update-pass-fail-counts run-id test-name test-name test-name))

(define (rmt:top-test-set-per-pf-counts run-id test-name)
  (rmt:send-receive 'top-test-set-per-pf-counts run-id (list run-id test-name)))

(define (rmt:get-raw-run-stats run-id)
  (rmt:send-receive 'get-raw-run-stats run-id (list run-id)))

(define (rmt:get-test-times runname target)
  (rmt:send-receive 'get-test-times #f (list runname target ))) 

;;======================================================================
;;  R U N S
;;======================================================================

(define (rmt:get-run-info run-id)
  (rmt:send-receive 'get-run-info run-id (list run-id)))

(define (rmt:get-num-runs runpatt)
  (rmt:send-receive 'get-num-runs #f (list runpatt)))

(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
  (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt  targetpatt keys)))

;; Use the special run-id == #f scenario here since there is no run yet
(define (rmt:register-run keyvals runname state status user contour)
  (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
    
(define (rmt:get-run-name-from-id run-id)
  (rmt:send-receive 'get-run-name-from-id run-id (list run-id)))

(define (rmt:delete-run run-id)
  (rmt:send-receive 'delete-run run-id (list run-id)))

(define (rmt:update-run-stats run-id stats)
  (rmt:send-receive 'update-run-stats #f (list run-id stats)))

(define (rmt:delete-old-deleted-test-records)
  (rmt:send-receive 'delete-old-deleted-test-records #f '()))

(define (rmt:get-runs runpatt count offset keypatts)
  (rmt:send-receive 'get-runs #f (list runpatt count offset keypatts)))

(define (rmt:simple-get-runs runpatt count offset target)
  (rmt:send-receive 'simple-get-runs #f (list runpatt count offset target)))

(define (rmt:get-all-run-ids)
  (rmt:send-receive 'get-all-run-ids #f '()))

(define (rmt:get-prev-run-ids run-id)
  (rmt:send-receive 'get-prev-run-ids #f (list run-id)))

(define (rmt:lock/unlock-run run-id lock unlock user)
  (rmt:send-receive 'lock/unlock-run #f (list run-id lock unlock user)))

;; set/get status
(define (rmt:get-run-status run-id)
  (rmt:send-receive 'get-run-status #f (list run-id)))

(define (rmt:get-run-state run-id)
  (rmt:send-receive 'get-run-state #f (list run-id)))


(define (rmt:set-run-status run-id run-status #!key (msg #f))
  (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))

(define (rmt:set-run-state-status run-id state status )
  (rmt:send-receive 'set-run-state-status #f (list run-id state status)))


(define (rmt:update-run-event_time run-id)
  (rmt:send-receive 'update-run-event_time #f (list run-id)))

(define (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update  #!key  (sort-order "asc")) ;; fields of #f uses default
  (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update sort-order)))

(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
  ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
  (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )

(define (rmt:get-main-run-stats run-id)
  (rmt:send-receive 'get-main-run-stats #f (list run-id)))

(define (rmt:get-var varname)
  (rmt:send-receive 'get-var #f (list varname)))

(define (rmt:del-var varname)
  (rmt:send-receive 'del-var #f (list varname)))

(define (rmt:set-var varname value)
  (rmt:send-receive 'set-var #f (list varname value)))

;;======================================================================
;; M U L T I R U N   Q U E R I E S
;;======================================================================

;; Need to move this to multi-run section and make associated changes
(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
  (let ((run-ids (rmt:get-all-run-ids)))
    (for-each (lambda (run-id)
	       (rmt:find-and-mark-incomplete run-id ovr-deadtime))
	     run-ids)))

;; get the previous record for when this test was run where all keys match but runname
;; returns #f if no such test found, returns a single test record if found
;; 
;; Run this at the client end since we have to connect to multiple run-id dbs
;;
(define (rmt:get-previous-test-run-record run-id test-name item-path)
  (let* ((keyvals (rmt:get-key-val-pairs run-id))
	 (keys    (rmt:get-keys))
	 (selstr  (string-intersperse  keys ","))
	 (qrystr  (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
    (if (not keyvals)
	#f
	(let ((prev-run-ids (rmt:get-prev-run-ids run-id)))
	  ;; for each run starting with the most recent look to see if there is a matching test
	  ;; if found then return that matching test record
	  (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals ", previous run ids found: " prev-run-ids)
	  (if (null? prev-run-ids) #f
	      (let loop ((hed (car prev-run-ids))
			 (tal (cdr prev-run-ids)))
		(let ((results (rmt:get-tests-for-run hed (conc test-name "/" item-path) '() '() ;; run-id testpatt states statuses
						      #f #f #f               ;; offset limit not-in hide/not-hide
						      #f #f #f #f 'normal))) ;; sort-by sort-order qryvals last-update mode
		  (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name ", item-path " item-path ": " results)
		  (if (and (null? results)
			   (not (null? tal)))
		      (loop (car tal)(cdr tal))
		      (if (null? results) #f
			  (car results))))))))))

(define (rmt:get-run-stats)
  (rmt:send-receive 'get-run-stats #f '()))

;;======================================================================
;;  S T E P S
;;======================================================================

;; Getting steps is more complicated.
;;
;; If given work area 
;;  1. Find the testdat.db file
;;  2. Open the testdat.db file and do the query
;; If not given the work area
;;  1. Do a remote call to get the test path
;;  2. Continue as above
;; 
;;(define (rmt:get-steps-for-test run-id test-id)
;;  (rmt:send-receive 'get-steps-data run-id (list test-id)))

(define (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
  (let* ((state     (items:check-valid-items "state" state-in))
	 (status    (items:check-valid-items "status" status-in)))
    (if (or (not state)(not status))
	(debug:print 3 *default-log-port* "WARNING: Invalid " (if status "status" "state")
		     " value \"" (if status state-in status-in) "\", update your validvalues section in megatest.config"))
    (rmt:send-receive 'teststep-set-status! run-id (list run-id test-id teststep-name state-in status-in comment logfile))))


(define (rmt:delete-steps-for-test! run-id test-id)
  (rmt:send-receive 'delete-steps-for-test! run-id (list run-id test-id)))

(define (rmt:get-steps-for-test run-id test-id)
  (rmt:send-receive 'get-steps-for-test run-id (list run-id test-id)))

(define (rmt:get-steps-info-by-id test-step-id)
  (rmt:send-receive 'get-steps-info-by-id #f (list test-step-id)))

;;======================================================================
;;  T E S T   D A T A 
;;======================================================================

(define (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
  (rmt:send-receive 'read-test-data run-id (list run-id test-id categorypatt)))
(define (rmt:read-test-data* run-id test-id categorypatt varpatt #!key (work-area #f)) 
  (rmt:send-receive 'read-test-data* run-id (list run-id test-id categorypatt varpatt)))

(define (rmt:get-data-info-by-id test-data-id)
   (rmt:send-receive 'get-data-info-by-id #f (list test-data-id)))

(define (rmt:testmeta-add-record testname)
  (rmt:send-receive 'testmeta-add-record #f (list testname)))

(define (rmt:testmeta-get-record testname)
  (rmt:send-receive 'testmeta-get-record #f (list testname)))

(define (rmt:testmeta-update-field test-name fld val)
  (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))

(define (rmt:test-data-rollup run-id test-id status)
  (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))

(define (rmt:csv->test-data run-id test-id csvdata)
  (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))

;;======================================================================
;;  T A S K S
;;======================================================================

(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
  (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))

(define (rmt:tasks-add action owner target runname testpatt params)
  (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))

(define (rmt:tasks-set-state-given-param-key param-key new-state)
  (rmt:send-receive 'tasks-set-state-given-param-key #f (list  param-key new-state)))

(define (rmt:tasks-get-last target runname)
  (rmt:send-receive 'tasks-get-last #f (list target runname)))

;;======================================================================
;; N O   S Y N C   D B 
;;======================================================================

(define (rmt:no-sync-set var val)
  (rmt:send-receive 'no-sync-set #f `(,var ,val)))

(define (rmt:no-sync-get/default var default)
  (rmt:send-receive 'no-sync-get/default #f `(,var ,default)))

(define (rmt:no-sync-del! var)
  (rmt:send-receive 'no-sync-del! #f `(,var)))

(define (rmt:no-sync-get-lock keyname)
  (rmt:send-receive 'no-sync-get-lock #f `(,keyname)))

;;======================================================================
;; A R C H I V E S
;;======================================================================

(define (rmt:archive-get-allocations  testname itempath dneeded)
  (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))

(define (rmt:archive-register-block-name bdisk-id archive-path)
  (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))

#;(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
  (rmt:send-receive 'archive-allocate-test-to-block #f (list  block-id testsuite-name areakey)))

(define (rmt:archive-register-disk bdisk-name bdisk-path df)
  (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))

(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

(define (rmt:test-get-archive-block-info archive-block-id)
  (rmt:send-receive 'test-get-archive-block-info #f (list archive-block-id)))

#;(set-functions http-transport:client-api-send-receive  ;; a
	       http-transport:close-connections	       ;; b
	       api:execute-requests                    ;; c
	       #f
	       client:setup                            ;; e
	       server:kind-run                         ;; f
	       server:start-and-wait                   ;; g
	       server:check-if-running                 ;; h
	       server:ping                             ;; i
	       common:force-server?                    ;; j
	       )

(define *api-process-request-count* 0)
(define *max-api-process-requests* 0)
(define *db-api-call-time*    (make-hash-table)) 

;; NOTE: Can remove the regex and base64 encoding for zmq
(define (api:obj->string obj #!key (transport 'http))
  (case transport
    ;; ((fs) obj)
    ((http fs)
     (string-substitute
      (regexp "=") "_"
      (base64:base64-encode 
       (z3:encode-buffer
	(with-output-to-string
	  (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating.  serialize is sensitive to binary image of mtest.
      #t))
    ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj))))
    (else obj))) ;; rpc

(define (api:string->obj msg #!key (transport 'http))
  (case transport
    ;; ((fs) msg)
    ((http fs)
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.")
           (print-call-chain (current-error-port))
	   msg))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg))) ;; rpc



;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
  (handle-exceptions
   exn
   (let ((call-chain (get-call-chain)))
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (cond
    ((not (vector? dat))                    ;; it is an error to not receive a vector
     (vector #f (vector #f "remote must be called with a vector")))
    ((> *api-process-request-count* 20) ;; 20)
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.")
     (set! *server-overloaded* #t)
     (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
    (else  
     (let* ((cmd-in            (vector-ref dat 0))
            (cmd               (if (symbol? cmd-in)
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
            #;(foo               (begin
                                 (common:telemetry-log (conc "api-in:"(->string cmd))
                                                       payload: `((params . ,params)))
                                 
                                 #t))
            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")
                 (case cmd
                   ;;===============================================
                   ;; READ/WRITE QUERIES
                   ;;===============================================

                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
                   
                   ;; SERVERS
                   ((start-server)                    (apply server:kind-run params))
                   ((kill-server)                     (set! *server-run* #f))

                   ;; TESTS

                   ;;((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
                   ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items.
                   ((test-set-state-status-by-id)

                    ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
                    (db:set-state-status-and-roll-up-items
                     dbstruct
                     (list-ref params 0) ; run-id
                     (list-ref params 1) ; test-name
                     #f                  ; item-path
                     (list-ref params 2) ; state
                     (list-ref params 3) ; status
                     (list-ref params 4) ; comment
                     ))
                   
                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                   ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))

                   ;; RUNS
                   ((register-run)                 (apply db:register-run dbstruct params))
                   ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
                   ((delete-run)                   (apply db:delete-run dbstruct params))
                   ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
                   ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
                   ((update-run-stats)             (apply db:update-run-stats dbstruct params))
                   ((set-var)                      (apply db:set-var dbstruct params))
                   ((del-var)                      (apply db:del-var dbstruct params))

                   ;; STEPS
                   ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
                   ((delete-steps-for-test!)        (apply db:delete-steps-for-test! dbstruct params))
                   
                   ;; TEST DATA
                   ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
                   ((csv->test-data)               (apply db:csv->test-data dbstruct params))

                   ;; MISC
                   ((sync-inmem->db)               (let ((run-id (car params)))
                                                     (db:sync-touched dbstruct run-id force-sync: #t)))
                   ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))

                   ;; TESTMETA
                   ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
                   ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
                   ((get-tests-tags)            (db:get-tests-tags dbstruct))

                   ;; TASKS
                   ((tasks-add)                 (apply tasks:add dbstruct params))   
                   ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
                   ((tasks-get-last)            (apply tasks:get-last dbstruct params))

		   ;; NO SYNC DB
		   ((no-sync-set)               (apply db:no-sync-set         *no-sync-db* params))
		   ((no-sync-get/default)       (apply db:no-sync-get/default *no-sync-db* params))
		   ((no-sync-del!)              (apply db:no-sync-del!        *no-sync-db* params))
		   ((no-sync-get-lock)          (apply db:no-sync-get-lock    *no-sync-db* params))
		 
                   ;; ARCHIVES
                   ;; ((archive-get-allocations)   
                   ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
                   ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
                   ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))

                   ;;======================================================================
                   ;; READ ONLY QUERIES
                   ;;======================================================================

                   ;; KEYS
                   ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
                   ((get-keys)                        (db:get-keys dbstruct))
                   ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
                   ((get-target)                      (apply db:get-target dbstruct params))
                   ((get-targets)                     (db:get-targets dbstruct))

                   ;; ARCHIVES
                   ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
                   
                   ;; TESTS
                   ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
                   ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
                   ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
                   ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
                   ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
                   ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
                   ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
                   ;; ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
                   ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
                   ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
                   ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
                   ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
                   ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
                   ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
                   ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
                   ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
                   ((get-not-completed-cnt)           (apply db:get-not-completed-cnt  dbstruct params)) 
                   ;; ((synchash-get)                    (apply synchash:server-get dbstruct params))
                   ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
		   ((get-test-times)                  (apply db:get-test-times dbstruct params))

                   ;; RUNS
                   ((get-run-info)                 (apply db:get-run-info dbstruct params))
                   ((get-run-status)               (apply db:get-run-status dbstruct params))
                   ((get-run-state)                (apply db:get-run-state dbstruct params))
                   ((set-run-status)               (apply db:set-run-status dbstruct params))
                   ((set-run-state-status)  			 (apply db:set-run-state-status dbstruct params))
                   ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
                   ((get-test-id)                  (apply db:get-test-id dbstruct params))
                   ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
                   ;; ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
                   ((get-runs)                     (apply db:get-runs dbstruct params))
                   ((simple-get-runs)              (apply db:simple-get-runs dbstruct params))
                   ((get-num-runs)                 (apply db:get-num-runs dbstruct params))
                   ((get-runs-cnt-by-patt)         (apply db:get-runs-cnt-by-patt dbstruct params))
                   ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
                   ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
                   ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
                   ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
                   ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
                   ((get-main-run-stats)           (apply db:get-main-run-stats dbstruct params))
                   ((get-var)                      (apply db:get-var dbstruct params))
                   ((get-run-stats)                (apply db:get-run-stats dbstruct params))
                   ((get-run-times)                (apply db:get-run-times dbstruct params)) 

                   ;; STEPS
                   ((get-steps-data)               (apply db:get-steps-data dbstruct params))
                   ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))
		   ((get-steps-info-by-id)         (apply db:get-steps-info-by-id dbstruct params))

                   ;; TEST DATA
                   ((read-test-data)               (apply db:read-test-data dbstruct params))
                   ((read-test-data*)              (apply db:read-test-data* dbstruct params))
                   ((get-data-info-by-id)          (apply db:get-data-info-by-id dbstruct params)) 

                   ;; MISC
                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
                   ((login)                        (apply db:login dbstruct params))
                   ((general-call)                 (let ((stmtname   (car params))
                                                         (run-id     (cadr params))
                                                         (realparams (cddr params)))
                                                     (db:general-call dbstruct stmtname realparams)))
                   ((sdb-qry)                      (apply sdb:qry params))
                   ((ping)                         (current-process-id))
		   ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
		   ((get-run-record-ids) 	   (apply db:get-run-record-ids dbstruct params))	
                   ;; TESTMETA
                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))

                   ;; TASKS 
                   ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))
		   (else
		    (debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
		    (conc "ERROR: BAD api call " cmd))))))

       
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode
           (begin
             #;(common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #t)))
	     (vector #f res))
           (begin
             #;(common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #f)))
             (vector #t res))))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (set! *api-process-request-count* (+ *api-process-request-count* 1))
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
	 (params  (api:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?)
	 (resdat  (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
	 (success (vector-ref resdat 0))
	 (res     (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
    (if (not success)
	(debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
    (if (> *api-process-request-count* *max-api-process-requests*)
	(set! *max-api-process-requests* *api-process-request-count*))
    (set! *api-process-request-count* (- *api-process-request-count* 1))
    ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
    ;; (rmt:dat->json-str
    ;;  (if (or (string? res)
    ;;          (list?   res)
    ;;          (number? res)
    ;;          (boolean? res))
    ;;      res 
    ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
    (api:obj->string res transport: 'http)))


;;======================================================================
;;
;; T E S T   D A T A B A S E S
;;
;;======================================================================

;;======================================================================
;; T E S T   S P E C I F I C   D B 
;;======================================================================

;; Create the sqlite db for the individual test(s)
;;
;; Moved these tables into <runid>.db
;; THIS CODE TO BE REMOVED
;;
(define (open-test-db work-area) 
  (debug:print-info 11 *default-log-port* "open-test-db " work-area)
  (if (and work-area 
	   (directory? work-area)
	   (file-read-access? work-area))
      (let* ((dbpath              (conc work-area "/testdat.db"))
	     (dbexists            (common:file-exists? dbpath))
	     (work-area-writeable (file-write-access? work-area))
	     (db                  (handle-exceptions  ;; open the db if area writeable or db pre-existing. open in-mem otherwise. if exception, open in-mem
				   exn
				   (begin
				     (print-call-chain (current-error-port))
				     (debug:print 2 *default-log-port* "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test"
						  ((condition-property-accessor 'exn 'message) exn))
				     (set! dbexists #f) ;; must force re-creation of tables, more tom-foolery
				     (sqlite3:open-database ":memory:")) ;; open an in-memory db to allow readonly access 
				   (if (or work-area-writeable
					   dbexists)
				       (sqlite3:open-database dbpath)
				       (sqlite3:open-database ":memory:"))))
	     (tdb-writeable       (and (file-write-access? work-area)
				       (file-write-access? dbpath)))
	     (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
					       (string->number (args:get-arg "-override-timeout"))
					       136000))))
	
	(if (and tdb-writeable
		 *db-write-access*)
	    (sqlite3:set-busy-handler! db handler))
	(if (not dbexists)
	    (begin
	      (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;")
	      (debug:print-info 11 *default-log-port* "Initialized test database " dbpath)
	      (tdb:testdb-initialize db)))
	;; (sqlite3:execute db "PRAGMA synchronous = 0;")
	(debug:print-info 11 *default-log-port* "open-test-db END (sucessful)" work-area)
	;; now let's test that everything is correct
	(handle-exceptions
	 exn
	 (begin
	   (print-call-chain (current-error-port))
	   (debug:print-error 0 *default-log-port* "problem accessing test db " work-area ", you probably should clean and re-run this test or remove the file " 
			dbpath ".\n  "
			((condition-property-accessor 'exn 'message) exn))
	   #f)
	 ;; Is there a cheaper single line operation that will check for existance of a table
	 ;; and raise an exception ?
	 (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;"))
	db)
      ;; no work-area or not readable - create a placeholder to fake rest of world out
      (let ((baddb (sqlite3:open-database ":memory:")))
 	(debug:print-info 11 *default-log-port* "open-test-db END (unsucessful)" work-area)
 	;; provide an in-mem db (this is dangerous!)
 	(tdb:testdb-initialize baddb)
 	baddb)))

;; find and open the testdat.db file for an existing test
(define (tdb:open-test-db-by-test-id test-id #!key (work-area #f))
  (let* ((test-path (if work-area
			work-area
			(rmt:test-get-rundir-from-test-id test-id))))
    (debug:print 3 *default-log-port* "TEST PATH: " test-path)
    (open-test-db test-path)))

;; find and open the testdat.db file for an existing test
(define (tdb:open-test-db-by-test-id-local dbstruct run-id test-id #!key (work-area #f))
  (let* ((test-path (if work-area
			work-area
			(db:test-get-rundir-from-test-id dbstruct run-id test-id))))
    (debug:print 3 *default-log-port* "TEST PATH: " test-path)
    (open-test-db test-path)))

;; find and open the testdat.db file for an existing test
(define (tdb:open-run-close-db-by-test-id-local dbstruct run-id test-id work-area proc . params)
  (let* ((test-path (if work-area
			work-area
			(db:test-get-rundir-from-test-id dbstruct run-id test-id)))
	 (tdb        (open-test-db test-path)))
    (apply proc tdb params)))

(define (tdb:testdb-initialize db)
  (debug:print 11 *default-log-port* "db:testdb-initialize START")
  (sqlite3:with-transaction
   db
   (lambda ()
     (for-each
      (lambda (sqlcmd)
	(sqlite3:execute db sqlcmd))
      (list "CREATE TABLE IF NOT EXISTS test_rundat (
              id INTEGER PRIMARY KEY,
              update_time TIMESTAMP,
              cpuload INTEGER DEFAULT -1,
              diskfree INTEGER DEFAULT -1,
              diskusage INTGER DEFAULT -1,
              run_duration INTEGER DEFAULT 0);"
	    "CREATE TABLE IF NOT EXISTS test_data (
              id INTEGER PRIMARY KEY,
              test_id INTEGER,
              category TEXT DEFAULT '',
              variable TEXT,
	      value REAL,
	      expected REAL,
	      tol REAL,
              units TEXT,
              comment TEXT DEFAULT '',
              status TEXT DEFAULT 'n/a',
              type TEXT DEFAULT '',
              CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));"
	    "CREATE TABLE IF NOT EXISTS test_steps (
              id INTEGER PRIMARY KEY,
              test_id INTEGER, 
              stepname TEXT, 
              state TEXT DEFAULT 'NOT_STARTED', 
              status TEXT DEFAULT 'n/a',
              event_time TIMESTAMP,
              comment TEXT DEFAULT '',
              logfile TEXT DEFAULT '',
              CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));"
	    ;; test_meta can be used for handing commands to the test
	    ;; e.g. KILLREQ
	    ;;      the ackstate is set to 1 once the command has been completed
	    "CREATE TABLE IF NOT EXISTS test_meta (
              id INTEGER PRIMARY KEY,
              var TEXT,
              val TEXT,
              ackstate INTEGER DEFAULT 0,
              CONSTRAINT metadat_constraint UNIQUE (var));"))))
  (debug:print 11 *default-log-port* "db:testdb-initialize END"))

;; This routine moved to db:read-test-data
;;
(define (tdb:read-test-data tdb test-id categorypatt)
  (let ((res '()))
    (sqlite3:for-each-row 
     (lambda (id test_id category variable value expected tol units comment status type)
       (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
     tdb
     "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
    (sqlite3:finalize! tdb)
    (reverse res)))

;;======================================================================
;; T E S T   D A T A 
;;======================================================================

;; ;; get a list of test_data records matching categorypatt
;; (define (tdb:read-test-data test-id categorypatt #!key (work-area #f))
;;   (let ((tdb  (tdb:open-test-db-by-test-id test-id work-area: work-area)))
;;     (if (sqlite3:database? tdb)
;; 	(let ((res '()))
;; 	  (sqlite3:for-each-row 
;; 	   (lambda (id test_id category variable value expected tol units comment status type)
;; 	     (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
;; 	   tdb
;; 	   "SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
;; 	  (sqlite3:finalize! tdb)
;; 	  (reverse res))
;; 	'())))

;; NOTE: Run this local with #f for db !!!
(define (tdb:load-test-data run-id test-id)
  (let loop ((lin (read-line)))
    (if (not (eof-object? lin))
	(begin
	  (debug:print 4 *default-log-port* lin)
          ;;(when lin ;; this when blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
          (rmt:csv->test-data run-id test-id lin)
          ;;)
	  (loop (read-line)))))
  ;; roll up the current results.
  ;; FIXME: Add the status too 
  (rmt:test-data-rollup run-id test-id #f))

;; NOTE: Run this local with #f for db !!!
(define (tdb:load-logpro-data run-id test-id)
  (let loop ((lin (read-line)))
    (if (not (eof-object? lin))
	(begin
	  (debug:print 4 *default-log-port* lin)
          ;;(when lin  ;; this when blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
          (rmt:csv->test-data run-id test-id lin)
          ;;)
	  (loop (read-line)))))
  ;; roll up the current results.
  ;; FIXME: Add the status too 
  (rmt:test-data-rollup run-id test-id #f))

(define (tdb:get-prev-tol-for-test tdb test-id category variable)
  ;; Finish me?
  (values #f #f #f))

;;======================================================================
;; S T E P S 
;;======================================================================

(define (tdb:step-get-time-as-string vec)
  (seconds->time-string (tdb:step-get-event_time vec)))

;; get a pretty table to summarize steps
;; 
;; NOT USED, WILL BE REMOVED
;;
(define (tdb:get-steps-table steps);; organise the steps for better readability
  (let ((res (make-hash-table)))
    (for-each 
     (lambda (step)
       (debug:print 6 *default-log-port* "step=" step)
       (let ((record (hash-table-ref/default 
		      res 
		      (tdb:step-get-stepname step) 
		      ;;        stepname                start end status Duration  Logfile 
		      (vector (tdb:step-get-stepname step) ""   "" ""     ""        ""))))
	 (debug:print 6 *default-log-port* "record(before) = " record 
		      "\nid:       " (tdb:step-get-id step)
		      "\nstepname: " (tdb:step-get-stepname step)
		      "\nstate:    " (tdb:step-get-state step)
		      "\nstatus:   " (tdb:step-get-status step)
		      "\ntime:     " (tdb:step-get-event_time step))
	 (case (string->symbol (tdb:step-get-state step))
	   ((start)(vector-set! record 1 (tdb:step-get-event_time step))
	    (vector-set! record 3 (if (equal? (vector-ref record 3) "")
				      (tdb:step-get-status step)))
	    (if (> (string-length (tdb:step-get-logfile step))
		   0)
		(vector-set! record 5 (tdb:step-get-logfile step))))
	   ((end)  
	    (vector-set! record 2 (any->number (tdb:step-get-event_time step)))
	    (vector-set! record 3 (tdb:step-get-status step))
	    (vector-set! record 4 (let ((startt (any->number (vector-ref record 1)))
					(endt   (any->number (vector-ref record 2))))
				    (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) 
						 ", startt=" startt ", endt=" endt
						 ", get-status: " (tdb:step-get-status step))
				    (if (and (number? startt)(number? endt))
					(seconds->hr-min-sec (- endt startt)) "-1")))
	    (if (> (string-length (tdb:step-get-logfile step))
		   0)
		(vector-set! record 5 (tdb:step-get-logfile step))))
	   (else
	    (vector-set! record 2 (tdb:step-get-state step))
	    (vector-set! record 3 (tdb:step-get-status step))
	    (vector-set! record 4 (tdb:step-get-event_time step))))
	 (hash-table-set! res (tdb:step-get-stepname step) record)
	 (debug:print 6 *default-log-port* "record(after)  = " record 
		      "\nid:       " (tdb:step-get-id step)
		      "\nstepname: " (tdb:step-get-stepname step)
		      "\nstate:    " (tdb:step-get-state step)
		      "\nstatus:   " (tdb:step-get-status step)
		      "\ntime:     " (tdb:step-get-event_time step))))
     ;; (else   (vector-set! record 1 (tdb:step-get-event_time step)))
     (sort steps (lambda (a b)
		   (cond
		    ((<   (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t)
		    ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) 
		     (<   (tdb:step-get-id a)        (tdb:step-get-id b)))
		    (else #f)))))
    res))

;; Move this to steps.scm
;;
;; get a pretty table to summarize steps
;;
(define (tdb:get-steps-table-list steps)
  ;; organise the steps for better readability
  (let ((res (make-hash-table)))
    (for-each 
     (lambda (step)
       (debug:print 6 *default-log-port* "step=" step)
       (let ((record (hash-table-ref/default 
		      res 
		      (tdb:step-get-stepname step) 
		      ;;        stepname                start end status    
		      (vector (tdb:step-get-stepname step) ""   "" ""     "" ""))))
	 (debug:print 6 *default-log-port* "record(before) = " record 
		      "\nid:       " (tdb:step-get-id step)
		      "\nstepname: " (tdb:step-get-stepname step)
		      "\nstate:    " (tdb:step-get-state step)
		      "\nstatus:   " (tdb:step-get-status step)
		      "\ntime:     " (tdb:step-get-event_time step))
	 (case (string->symbol (tdb:step-get-state step))
	   ((start)(vector-set! record 1 (tdb:step-get-event_time step))
	    (vector-set! record 3 (if (equal? (vector-ref record 3) "")
				      (tdb:step-get-status step)))
	    (if (> (string-length (tdb:step-get-logfile step))
		   0)
		(vector-set! record 5 (tdb:step-get-logfile step))))
	   ((end)  
	    (vector-set! record 2 (any->number (tdb:step-get-event_time step)))
	    (vector-set! record 3 (tdb:step-get-status step))
	    (vector-set! record 4 (let ((startt (any->number (vector-ref record 1)))
					(endt   (any->number (vector-ref record 2))))
				    (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) 
						 ", startt=" startt ", endt=" endt
						 ", get-status: " (tdb:step-get-status step))
				    (if (and (number? startt)(number? endt))
					(seconds->hr-min-sec (- endt startt)) "-1")))
	    (if (> (string-length (tdb:step-get-logfile step))
		   0)
		(vector-set! record 5 (tdb:step-get-logfile step))))
	   (else
	    (vector-set! record 2 (tdb:step-get-state step))
	    (vector-set! record 3 (tdb:step-get-status step))
	    (vector-set! record 4 (tdb:step-get-event_time step))))
	 (hash-table-set! res (tdb:step-get-stepname step) record)
	 (debug:print 6 *default-log-port* "record(after)  = " record 
		      "\nid:       " (tdb:step-get-id step)
		      "\nstepname: " (tdb:step-get-stepname step)
		      "\nstate:    " (tdb:step-get-state step)
		      "\nstatus:   " (tdb:step-get-status step)
		      "\ntime:     " (tdb:step-get-event_time step))))
     ;; (else   (vector-set! record 1 (tdb:step-get-event_time step)))
     (sort steps (lambda (a b)
		   (cond
		    ((<   (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t)
		    ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) 
		     (<   (tdb:step-get-id a)        (tdb:step-get-id b)))
		    (else #f)))))
    res))

;;
;; Move to steps.scm
;;
(define (tdb:get-compressed-steps comprsteps) ;; from tdb:get-steps-table
  (map (lambda (x)
	 ;; take advantage of the \n on time->string
	 (vector
	  (vector-ref x 0)
	  (let ((s (vector-ref x 1)))
	    (if (number? s)(seconds->time-string s) s))
	  (let ((s (vector-ref x 2)))
	    (if (number? s)(seconds->time-string s) s))
	  (vector-ref x 3)    ;; status
	  (vector-ref x 4)
	  (vector-ref x 5)))  ;; time delta
       (sort (hash-table-values comprsteps)
	     (lambda (a b)
	       (let ((time-a (vector-ref a 1))
		     (time-b (vector-ref b 1)))
		 (if (and (number? time-a)(number? time-b))
		     (if (< time-a time-b)
			 #t
			 (if (eq? time-a time-b)
			     (string<? (conc (vector-ref a 2))
				       (conc (vector-ref b 2)))
			     #f))
		     (string<? (conc time-a)(conc time-b))))))))

;; 
(define (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes)
  (let ((tdb         (rmt:open-test-db-by-test-id run-id test-id work-area: work-area)))
    (if (sqlite3:database? tdb)
	(begin
	  (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);"
			   cpuload diskfree minutes)
	  (sqlite3:finalize! tdb))
	(debug:print 2 *default-log-port* "Can't update testdat.db for test " test-id " read-only or non-existant"))))
    
;;======================================================================
;; Tasks db
;;======================================================================

;; wait up to aprox n seconds for a journal to go away
;;
(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f))
  (if (not (string? path))
      (debug:print-error 0 *default-log-port* "Called tasks:wait-on-journal with path=" path " (not a string)")
      (let ((fullpath (conc path "-journal")))
	(handle-exceptions
	 exn
	 (begin
	   (print-call-chain (current-error-port))
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain")
	   #t) ;; if stuff goes wrong just allow it to move on
	 (let loop ((journal-exists (file-exists? fullpath))
		    (count          n)) ;; wait ten times ...
	   (if journal-exists
	       (begin
		 (if (and waiting-msg
			  (eq? (modulo n 30) 0))
		     (debug:print 0 *default-log-port* waiting-msg))
		 (if (> count 0)
		     (begin
		       (thread-sleep! 1)
		       (loop (common:file-exists? fullpath)
			     (- count 1)))
		     (begin
		       (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.")
		       (if remove (system (conc "rm -rf " fullpath)))
		       #f)))
	       #t))))))

#;(define (tasks:get-task-db-path)
  (let ((dbdir  (or (configf:lookup *configdat* "setup" "monitordir")
		    (configf:lookup *configdat* "setup" "dbdir")
		    (conc (common:get-linktree) "/.db"))))
    (handle-exceptions
     exn
     (begin
       (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir)
       (exit 1))
     (if (not (directory? dbdir))(create-directory dbdir #t)))
    dbdir))

;; If file exists AND
;;    file readable
;;         ==> open it
;; If file exists AND
;;    file NOT readable
;;         ==> open in-mem version
;; If file NOT exists
;;    ==> open in-mem version
;;
#;(define (tasks:open-db alldat #!key (numretries 4))
  
  (if *task-db*
      *task-db*
      (handle-exceptions
       exn
       (if (> numretries 0)
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath        (common:get-db-tmp-area *alldat*)) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (common:file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))
			     ((file-read-access? dbpath)    (sqlite3:open-database dbfile))
			     (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath))
	      (handler      (make-busy-timeout 36000)))
	 (if (and exists
		  (not write-access))
	     (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
	 (sqlite3:set-busy-handler! mdb handler)
	 (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
	 ;;  (if (or (and (not exists)
	 ;; 	      (file-write-access? *toppath*))
	 ;; 	 (not (file-read-access? dbpath)))
	 ;;      (begin
	 ;; 
	 ;; TASKS QUEUE MOVED TO main.db
	 ;;
	 ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
         ;;                        action TEXT DEFAULT '',
         ;;                        owner TEXT,
         ;;                        state TEXT DEFAULT 'new',
         ;;                        target TEXT DEFAULT '',
         ;;                        name TEXT DEFAULT '',
         ;;                        testpatt TEXT DEFAULT '',
         ;;                        keylock TEXT,
         ;;                        params TEXT,
         ;;                        creation_time TIMESTAMP,
         ;;                        execution_time TIMESTAMP);")
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY,
                                pid INTEGER,
                                start_time TIMESTAMP,
                                last_update TIMESTAMP,
                                hostname TEXT,
                                username TEXT,
                               CONSTRAINT monitors_constraint UNIQUE (pid,hostname));")
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY,
                                  pid INTEGER,
                                  interface TEXT,
                                  hostname TEXT,
                                  port INTEGER,
                                  pubport INTEGER,
                                  start_time TIMESTAMP,
                                  priority INTEGER,
                                  state TEXT,
                                  mt_version TEXT,
                                  heartbeat TIMESTAMP,
                                  transport TEXT,
                                  run_id INTEGER);")
	 ;;                               CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));")
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY,
                                  server_id INTEGER,
                                  pid INTEGER,
                                  hostname TEXT,
                                  cmdline TEXT,
                                  login_time TIMESTAMP,
                                  logout_time TIMESTAMP DEFAULT -1,
                                CONSTRAINT clients_constraint UNIQUE (pid,hostname));")
	       
	       ;))
	 (set! *task-db* (cons mdb dbpath))
	 *task-db*))))

;;======================================================================
;; Server and client management
;;======================================================================

;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname
(define (tasks:hostinfo-get-id          vec)    (vector-ref  vec 0))
(define (tasks:hostinfo-get-interface   vec)    (vector-ref  vec 1))
(define (tasks:hostinfo-get-port        vec)    (vector-ref  vec 2))
(define (tasks:hostinfo-get-pubport     vec)    (vector-ref  vec 3))
(define (tasks:hostinfo-get-transport   vec)    (vector-ref  vec 4))
(define (tasks:hostinfo-get-pid         vec)    (vector-ref  vec 5))
(define (tasks:hostinfo-get-hostname    vec)    (vector-ref  vec 6))

#;(define (tasks:need-server run-id)
  (equal? (configf:lookup *configdat* "server" "required") "yes"))

;; no elegance here ...
;;
#;(define (tasks:kill-server hostname pid #!key (kill-switch ""))
  (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (let* ((logdir (if (directory-exists? "logs")
                    "logs/"
                    ""))
         (logfile (if logdir (conc "logs/server-"pid"-"hostname".log") #f))
         (gzfile  (if logfile (conc logfile ".gz"))))
    (setenv "TARGETHOST_LOGF" (conc logdir "server-kills.log"))

    (system (conc "nbfake kill "kill-switch" "pid))

    (when logfile
      (thread-sleep! 0.5)
      (if (common:file-exists? gzfile) (delete-file gzfile))
      (system (conc "gzip " logfile))
      
      (unsetenv "TARGETHOST_LOGF")
      (unsetenv "TARGETHOST"))))
    
 
;;======================================================================
;; M O N I T O R S
;;======================================================================

#;(define (tasks:remove-monitor-record mdb)
  (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   (current-process-id)
		   (get-host-name)))

#;(define (tasks:get-monitors mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (a . rem)
       (set! res (cons (apply vector a rem) res)))
     mdb
     "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;")
    (reverse res)
    ))

#;(define (tasks:monitors->text-table monitors)
  (let ((fmtstr "~4a~8a~20a~20a~10a~10a"))
    (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n"
	  (string-intersperse 
	   (map (lambda (monitor)
		  (format #f fmtstr
			  (tasks:monitor-get-id          monitor)
			  (tasks:monitor-get-pid         monitor)
			  (tasks:monitor-get-start_time  monitor)
			  (tasks:monitor-get-last_update monitor)
			  (tasks:monitor-get-hostname    monitor)
			  (tasks:monitor-get-username    monitor)))
		monitors)
	   "\n"))))
   
;; update the last_update field with the current time and
;; if any monitors appear dead, remove them
#;(define (tasks:monitors-update mdb)
  (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;"
			  (current-process-id)
			  (get-host-name))
  (let ((deadlist '()))
    (sqlite3:for-each-row
     (lambda (id pid host last-update delta)
       (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago")
       (set! deadlist (cons id deadlist)))
     mdb 
     "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;")
    (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');")))
  )
#;(define (tasks:register-monitor db port)
  (let* ((pid (current-process-id))
	 (hostname (get-host-name))
	 (userinfo (user-information (current-user-id)))
	 (username (car userinfo)))
    (print "Register monitor, pid: " pid ", hostname: " hostname ", port: " port ", username: " username)
    (sqlite3:execute db "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);"
		     pid hostname username)))

#;(define (tasks:get-num-alive-monitors mdb)
  (let ((res 0))
    (sqlite3:for-each-row 
     (lambda (count)
       (set! res count))
     mdb
     "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
     (car (user-information (current-user-id))))
    res))

;; 
#;(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
      (debug:print-info 1 *default-log-port* "Not starting monitor, already have more than two running")
      (let* ((megatestdb     (conc *toppath* "/megatest.db"))
	     (monitordbf     (conc (common:get-db-tmp-area *alldat*) "/monitor.db"))
	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
	(task:register-monitor mdb)
	(let loop ((count      0)
		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
	  ;; if the db has been modified we'd best look at the task queue
	  (let ((modtime (file-modification-time megatestdbpath )))
	    (if (> modtime last-db-update)
		(tasks:process-queue db)) ;; BROKEN. mdb last-db-update megatestdb next-touch))
	    ;; WARNING: Possible race conditon here!!
	    ;; should this update be immediately after the task-get-action call above?
	    (if (> (current-seconds) next-touch)
		(begin
		  (tasks:monitors-update mdb)
		  (loop (+ count 1)(+ (current-seconds) 240)))
		(loop (+ count 1) next-touch)))))))
      
;;======================================================================
;; T A S K S   Q U E U E
;;
;;   NOTE:: These operate on task_queue which is in main.db
;;
;;======================================================================

;; NOTE: It might be good to add one more layer of checking to ensure
;;       that no task gets run in parallel.

;; id INTEGER PRIMARY KEY,
;; action TEXT DEFAULT '',
;; owner TEXT,
;; state TEXT DEFAULT 'new',
;; target TEXT DEFAULT '',
;; name TEXT DEFAULT '',
;; testpatt TEXT DEFAULT '',
;; keylock TEXT,
;; params TEXT,
;; creation_time TIMESTAMP DEFAULT (strftime('%s','now')),
;; execution_time TIMESTAMP);


;; register a task
(define (tasks:add dbstruct action owner target runname testpatt params)
  (db:with-db 
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time)
                             VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" 
		      action
		      owner
		      target
		      runname
		      testpatt
		      (if params params "")))))

(define (keys:key-vals-hash->target keys key-params)
  (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) "")))
    (if (> (length keys) 1)
	(for-each (lambda (key)
		    (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) ""))))
		  (cdr keys)))
    tmp))
								
;; for use from the gui, not ported
;;
;; (define (tasks:add-from-params mdb action keys key-params var-params)
;;   (let ((target    (keys:key-vals-hash->target keys key-params))
;; 	(owner     (car (user-information (current-user-id))))
;; 	(runname   (hash-table-ref/default var-params "runname" #f))
;; 	(testpatts (hash-table-ref/default var-params "testpatts" "%"))
;; 	(params    (hash-table-ref/default var-params "params"    "")))
;;     (tasks:add mdb action owner target runname testpatts params)))

;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old
;;
#;(define (tasks:snag-a-task dbstruct)
  (let ((res    #f)
	(keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id))))))
    (db:with-db
     dbstruct #f #t
     (lambda (db)
       ;; first randomly set a new to pid-hostname-hostname
       (sqlite3:execute
	db 
	"UPDATE tasks_queue SET keylock=? WHERE id IN
           (SELECT id FROM tasks_queue 
              WHERE state='new' OR 
                    (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR
                    state='reset'
              ORDER BY RANDOM() LIMIT 1);" keytxt)

       (sqlite3:for-each-row
	(lambda (id . rem)
	  (set! res (apply vector id rem)))
	db
	"SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt)
       (if res ;; yep, have work to be done
	   (begin
	     (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;"
			      (tasks:task-get-id res))
	     res)
	   #f)))))

#;(define (tasks:reset-stuck-tasks dbstruct)
  (let ((res '()))
    (db:with-db
     dbstruct #f #t
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (id delta)
	  (set! res (cons id res)))
	db
	"SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;")
       (sqlite3:execute 
	db 
	(conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")
	)))))

;; return all tasks in the tasks_queue table
;;
#;(define (tasks:get-tasks dbstruct types states)
  (let ((res '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (id . rem)
	  (set! res (cons (apply vector id rem) res)))
	db
	(conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time 
                  FROM tasks_queue "
	      ;; WHERE  
	      ;;   state IN " statesstr " AND 
	      ;;   action IN " actionsstr 
	      " ORDER BY creation_time DESC;"))
       res))))

(define (tasks:get-last dbstruct target runname)
  (let ((res #f))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (id . rem)
	  (set! res (apply vector id rem)))
	db
	(conc "SELECT id,action,owner,state,target,name,testpatt,keylock,params,creation_time,execution_time 
                  FROM tasks_queue 
 	       WHERE  
	        target = ? AND name =?
	       ORDER BY creation_time DESC LIMIT 1;")
	target runname)
       res))))

;; remove tasks given by a string of numbers comma separated
(define (tasks:remove-queue-entries dbstruct task-ids)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))))

#;(define (tasks:process-queue dbstruct)
  (let* ((task   (tasks:snag-a-task dbstruct))
	 (action (if task (tasks:task-get-action task) #f)))
    (if action (print "tasks:process-queue task: " task))
    (if action
	(case (string->symbol action)
	  ((run)       (tasks:start-run     dbstruct task))
	  ((remove)    (tasks:remove-runs   dbstruct task))
	  ((lock)      (tasks:lock-runs     dbstruct task))
	  ;; ((monitor)   (tasks:start-monitor db task))
	  ((rollup)    (tasks:rollup-runs   dbstruct task))
	  ;; ((updatemeta)(tasks:update-meta   dbstruct task))
	  ((kill)      (tasks:kill-monitors dbstruct task))))))

(define (tasks:tasks->text tasks)
  (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
    (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
	  (string-intersperse 
	   (map (lambda (task)
		  (format #f fmtstr
			  (tasks:task-get-id     task)
			  (tasks:task-get-action task)
			  (tasks:task-get-owner  task)
			  (tasks:task-get-state  task)
			  (tasks:task-get-target task)
			  (tasks:task-get-name   task)
			  ;; (tasks:task-get-test   task)
			  ;; (tasks:task-get-item   task)
			  (tasks:task-get-params task)))
		tasks) "\n"))))
   
(define (tasks:set-state dbstruct task-id state)
  (db:with-db 
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" 
		      state 
		      task-id))))

;;======================================================================
;; Access using task key (stored in params; (hash-table->alist flags) hostname pid
;;======================================================================

(define (tasks:param-key->id dbstruct task-params)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (handle-exceptions
      exn
      #f
      (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;"
			    task-params)))))

(define (tasks:set-state-given-param-key dbstruct param-key new-state)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key))))

(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (handle-exceptions
      exn
      '()
      (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE
                               params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
			 param-key state-patt action-patt test-patt)))))

(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt)
  ;; (handle-exceptions
  ;;  exn
  ;;  '()
  ;;  (sqlite3:first-row
  (let ((db (db:delay-if-busy (db:get-db dbstruct)))
	(res '()))
    (sqlite3:for-each-row 
     (lambda (a . b)
       (set! res (cons (cons a b) res)))
     db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue 
           WHERE
              target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
     target run-name state-patt action-patt test-patt)
    res)) ;; )

;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
;; 
;; do a remote call to get the task queue info but do the killing as self here.
;;
(define (tasks:kill-runner target run-name testpatt)
  (let ((records    (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests"))
	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *"))
	(debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key)))
	 (if match-dat
	     (let ((hostname  (cadr match-dat))
		   (pid       (string->number (caddr match-dat))))
	       (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname)
	       (if (equal? (get-host-name) hostname)
		   (if (process:alive? pid)
		       (begin
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.")
			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
			    #t)
			  (process-signal pid signal/int)
			  (thread-sleep! 5)
			  (if (process:alive? pid)
			      (process-signal pid signal/kill)))))
		   ;;  (call-with-environment-variables
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (setenv "TARGETHOST_LOGF" "server-kills.log")
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost))
		     (unsetenv "TARGETHOST")
		     (unsetenv "TARGETHOST_LOGF"))))
	     (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
     records)))

;; (define (tasks:start-run dbstruct mdb task)
;;   (let ((flags (make-hash-table)))
;;     (hash-table-set! flags "-rerun" "NOT_STARTED")
;;     (if (not (string=? (tasks:task-get-params task) ""))
;; 	(hash-table-set! flags "-setvars" (tasks:task-get-params task)))
;;     (print "Starting run " task)
;;     ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
;;     (runs:run-tests db
;; 		    (tasks:task-get-target task)
;; 		    (tasks:task-get-name   task)
;; 		    (tasks:task-get-test   task)
;; 		    (tasks:task-get-item   task)
;; 		    (tasks:task-get-owner  task)
;; 		    flags)
;;     (tasks:set-state mdb (tasks:task-get-id task) "waiting")))
;; 
;; (define (tasks:rollup-runs db mdb task)
;;   (let* ((flags (make-hash-table)) 
;; 	 (keys  (db:get-keys db))
;; 	 (keyvals (keys:target-keyval keys (tasks:task-get-target task))))
;;     ;; (hash-table-set! flags "-rerun" "NOT_STARTED")
;;     (print "Starting rollup " task)
;;     ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
;;     (runs:rollup-run db
;; 		     keys 
;; 		     keyvals
;; 		     (tasks:task-get-name  task)
;; 		     (tasks:task-get-owner  task))
;;     (tasks:set-state mdb (tasks:task-get-id task) "waiting")))

;;======================================================================
;;  S Y N C   T O   P O S T G R E S Q L
;;======================================================================

;; In the spirit of "dump your junk in the tasks module" I'll put the
;; sync to postgres here for now.

;; attempt to automatically set up an area. call only if get area by path
;; returns naught of interest
;;
#;(define (tasks:set-area dbh configdat #!key (toppath #f)) ;; could I safely put *toppath* in for the default for toppath? when would it be evaluated?
  (let loop ((area-name (or (configf:lookup configdat "setup" "area-name")
			    (common:get-area-name *alldat*)))
	     (modifier  'none))
    (let ((success (handle-exceptions
		       exn
		       (begin
			 (debug:print 0 *default-log-port* "ERROR: cannot create area entry, " ((condition-property-accessor 'exn 'message) exn))
			 #f) ;; FIXME: I don't care for now but I should look at *why* there was an exception
		     (pgdb:add-area dbh area-name (or toppath *toppath*)))))
      (or success
	  (case modifier
	    ((none)(loop (conc (current-user-name) "_" area-name) 'user))
	    ((user)(loop (conc (substring (common:get-area-path-signature) 0 4)
			       area-name) 'areasig))
	    (else #f)))))) ;; give up

#;(define (task:print-runtime run-times saperator)
(for-each
    (lambda (run-time-info)
     (let* ((run-name  (vector-ref run-time-info 0))
            (run-time  (vector-ref run-time-info 1))
            (target  (vector-ref run-time-info 2)))
        (print target saperator run-name saperator run-time )))
   run-times))

#;(define (task:print-runtime-as-json run-times)
 (let loop ((run-time-info (car run-times))
            (rema (cdr run-times)) 
            (str ""))
     (let* ((run-name  (vector-ref run-time-info 0))
            (run-time  (vector-ref run-time-info 1))
            (target  (vector-ref run-time-info 2)))
        ;(print (not (equal? str "")))
        (if (not (equal? str "")) 
            (set! str (conc str ",")))
        (if (null? rema)
		(print "[" str "{target:" target ",run-name:" run-name ", run-time:" run-time "}]")
            (loop (car rema) (cdr rema) (conc str "{target:" target ", run-name:" run-name ", run-time:" run-time "}"))))))

#;(define (task:get-run-times)
   (let* ( 
           (run-patt (if (args:get-arg "-run-patt")
                        (args:get-arg "-run-patt")
                        "%"))
           (target-patt (if (args:get-arg "-target-patt")
                        (args:get-arg "-target-patt")
                        "%"))
 
           (run-times  (rmt:get-run-times  run-patt target-patt )))
   (if (eq? (length run-times) 0)
     (begin
       (print "Data not found!!")
       (exit)))
   (if (equal? (args:get-arg "-dumpmode") "json")
       (task:print-runtime-as-json run-times)
         (if (equal? (args:get-arg "-dumpmode") "csv")
	     (task:print-runtime run-times ",")
	     (task:print-runtime run-times "  ")))))


#;(define (task:print-testtime test-times saperator)
(for-each
    (lambda (test-time-info)
     (let* ((test-name  (vector-ref test-time-info 0))
            (test-time  (vector-ref test-time-info 2))
            (test-item  (if (eq? (string-length (vector-ref test-time-info 1)) 0)
                               "N/A"
				(vector-ref test-time-info 1))))
        (print  test-name saperator test-item saperator test-time )))
   test-times))

#;(define (task:print-testtime-as-json test-times)
 (let loop ((test-time-info (car test-times))
            (rema (cdr test-times)) 
            (str ""))
     (let* ((test-name  (vector-ref test-time-info 0))
            (test-time  (vector-ref test-time-info 2))
            (item  (vector-ref test-time-info 1)))
        ;(print (not (equal? str "")))
        (if (not (equal? str "")) 
            (set! str (conc str ",")))
        (if (null? rema)
		(print "[" str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}]")
            (loop (car rema) (cdr rema) (conc str "{test-name:" test-name ", item-path:" item ", test-time:" test-time "}"))))))


#; (define (task:get-test-times)
   (let* ((runname (if (args:get-arg "-runname")
                        (args:get-arg "-runname")
                        #f))
           (target (if (args:get-arg "-target")
                        (args:get-arg "-target")
                        #f))
 
           (test-times  (rmt:get-test-times  runname target )))
   (if (not runname)
      (begin
      (print "Error: Missing argument -runname")
      (exit))) 
    (if (string-contains runname "%")
      (begin
      (print "Error: Invalid runname, '%' not allowed  (" runname ") ")
      (exit)))
    (if (not target)
      (begin
      (print "Error: Missing argument -target")
      (exit)))
     (if  (string-contains target "%")
      (begin
      (print "Error: Invalid target, '%' not allowed  (" target ") ")
      (exit)))
 
   (if (eq? (length test-times) 0)
     (begin
       (print "Data not found!!")
       (exit)))
   (if (equal? (args:get-arg "-dumpmode") "json")
       (task:print-testtime-as-json test-times)
         (if (equal? (args:get-arg "-dumpmode") "csv")
	     (task:print-testtime test-times ",")
	     (task:print-testtime test-times "  ")))))



;; gets mtpg-run-id and syncs the record if different
;;
#;(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
  (let* ((runs-ht (hash-table-ref cached-info 'runs))
	 (runinf  (hash-table-ref/default runs-ht run-id #f))
         (area-id (vector-ref area-info 0)))
       (if runinf
	runinf ;; already cached
	(let* ((run-dat    (rmt:get-run-info run-id))               ;; NOTE: get-run-info returns a vector < row header >
	       (run-name   (rmt:get-run-name-from-id run-id))
	       (row        (db:get-rows run-dat))                   ;; yes, this returns a single row
	       (header     (db:get-header run-dat))
	       (state      (db:get-value-by-header row header "state"))
	       (status     (db:get-value-by-header row header "status"))
	       (owner      (db:get-value-by-header row header "owner"))
	       (event-time (db:get-value-by-header row header "event_time"))
	       (comment    (db:get-value-by-header row header "comment"))
	       (fail-count (db:get-value-by-header row header "fail_count"))
	       (pass-count (db:get-value-by-header row header "pass_count"))
         (db-contour (db:get-value-by-header row header "contour"))
	       (contour    (if (args:get-arg "-prepend-contour") 
                                 (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
                                           (begin 
                                            (debug:print-info 1 *default-log-port*  "db-contour") 
 						db-contour)
					    (args:get-arg "-contour"))))
         (run-tag (if (args:get-arg "-run-tag")
                            (args:get-arg "-run-tag")
									""))
         (last-update (db:get-value-by-header row header "last_update"))
	       (keytarg    (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
	       			(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
	       (target     (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) 
	       			(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name *alldat*) "/")) (rmt:get-target run-id)) (rmt:get-target run-id)))                 ;; e.g. v1.63/a3e1/ubuntu
	       (spec-id    (pgdb:get-ttype dbh keytarg))
	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
                            event-time
                           (current-seconds))) 
	       (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id)))
         (if new-run-id
	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
		        (hash-table-set! runs-ht run-id new-run-id)
		;; ensure key fields are up to date
     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
		(pgdb:refresh-run-info
		 dbh
		 new-run-id
		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
     (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
     (if (not (equal? run-tag ""))
      (task:add-run-tag dbh new-run-id run-tag))
		new-run-id) 
      
	      (if (equal? state "deleted")
          (begin 
          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
          (if (handle-exceptions
		        exn
		        (begin (print-call-chain)
              (print ((condition-property-accessor 'exn 'message) exn))     
			      #f)
            
            (pgdb:insert-run
		     dbh
		     spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time))
		       (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
             (if (or (not smallest-time) (< last-update smallest-time))
        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
             (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
		  #f)))))))

#;(define (task:add-run-tag dbh run-id tag) 
  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
   (if (not tag-info)
     (begin   
     (if (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
	   (pgdb:insert-tag  dbh   tag))
                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
		  #f)))
     ;;add to area_tags
     (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
           (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0)  run-id))  
	   (pgdb:insert-run-tag  dbh   (vector-ref tag-info 0)  run-id)))))


#;(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
 ; (print "Sync Steps " test-step-ids )
  (let ((test-ht (hash-table-ref cached-info 'tests))
        (step-ht (hash-table-ref cached-info 'steps)))
    (for-each
     (lambda (test-step-id)
        (let* ((test-step-info  (rmt:get-steps-info-by-id test-step-id))
               (step-id (tdb:step-get-id test-step-info))
               (test-id  (tdb:step-get-test_id    test-step-info))   
	       (stepname (tdb:step-get-stepname  test-step-info))
	       (state (tdb:step-get-state test-step-info))	
	       (status (tdb:step-get-status test-step-info))	
	       (event_time (tdb:step-get-event_time  test-step-info))	
	       (comment  (tdb:step-get-comment test-step-info))	
	       (logfile (tdb:step-get-logfile test-step-info))	
         (last-update (tdb:step-get-last_update test-step-info))
	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
         (pgdb-step-id (if pgdb-test-id 
                         (pgdb:get-test-step-id dbh pgdb-test-id stepname state)
                          #f)))
    (if step-id
      (begin  
        (if pgdb-test-id
           (begin 
                (if  pgdb-step-id
                   (begin
                    (debug:print-info 1 *default-log-port*  "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
										(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
         (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
                    (begin
 		      (debug:print-info 1 *default-log-port*  "Inserting test-step with test-id: " test-id " and step-id " step-id  " pgdb test id: " pgdb-test-id)
                     (if (or (not smallest-time) (< last-update smallest-time))
        				      (hash-table-set! smallest-last-update-time "smallest-time" last-update))
                      (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
                      (set! pgdb-step-id  (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
                (hash-table-set! step-ht step-id pgdb-step-id ))
           (debug:print-info 1 *default-log-port*  "Error: Test not cashed")))
      (debug:print-info 1 *default-log-port*  "Error: Could not get test step info for step id " test-step-id ))))	;; this is a wierd senario need to debug      	
   test-step-ids)))

#;(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
  (let ((test-ht (hash-table-ref cached-info 'tests))
        (data-ht (hash-table-ref cached-info 'data)))
    (for-each
     (lambda (test-data-id)
        (let* ((test-data-info  (rmt:get-data-info-by-id test-data-id))
               (data-id (db:test-data-get-id  test-data-info))
               (test-id  (db:test-data-get-test_id   test-data-info))   
	       (category  (db:test-data-get-category  test-data-info))
	       (variable  (db:test-data-get-variable test-data-info))	
	       (value (db:test-data-get-value  test-data-info))	
               (expected (db:test-data-get-expected  test-data-info))
               (tol (db:test-data-get-tol  test-data-info))
               (units (db:test-data-get-units  test-data-info))     
	       (comment  (db:test-data-get-comment test-data-info))	
               (status (db:test-data-get-status test-data-info))	
	       (type (db:test-data-get-type test-data-info))
				 (last-update (db:test-data-get-last_update test-data-info))
				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
   	
	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
               (pgdb-data-id (if pgdb-test-id 
                                 (pgdb:get-test-data-id dbh pgdb-test-id category variable)
                                  #f)))
    (if data-id
      (begin
        (if pgdb-test-id
           (begin 
                (if  pgdb-data-id
                   (begin
                    (debug:print-info 1 *default-log-port*  "Updating existing test-data with test-id: " test-id " and  data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
                    (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id  category variable value expected tol units comment status type last-update))
                    (begin
 		      (debug:print-info 1 *default-log-port*  "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
                       (if (handle-exceptions
		      exn
		      (begin (print-call-chain)
                              (print ((condition-property-accessor 'exn 'message) exn))     
			#f)
                     
                    (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
		       ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info)
                      (begin
                      ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type )
											(if (or (not smallest-time) (< last-update smallest-time))
        								(hash-table-set! smallest-last-update-time "smallest-time" last-update))
                      (set! pgdb-data-id  (pgdb:get-test-data-id dbh pgdb-test-id  category variable)))
		   #f)))
                (hash-table-set! data-ht data-id pgdb-data-id ))
             (begin
                 (debug:print-info 1 *default-log-port*  "Error: Test not in pgdb"))))

      (debug:print-info 1 *default-log-port*  "Error: Could not get test data info for data id " test-data-id ))))	;; this is a wierd senario need to debug      	
   test-data-ids)))



#;(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
  (let ((test-ht (hash-table-ref cached-info 'tests)))
    (for-each
     (lambda (test-id)
      ; (print test-id)
       (let* ((test-info    (rmt:get-test-info-by-id #f test-id))
	      (run-id       (db:test-get-run_id    test-info)) ;; look these up in db_records.scm
	      (test-id      (db:test-get-id        test-info))
	      (test-name    (db:test-get-testname  test-info))
	      (item-path    (db:test-get-item-path test-info))
	      (state        (db:test-get-state     test-info))
	      (status       (db:test-get-status    test-info))
	      (host         (db:test-get-host      test-info))
        (pid          (db:test-get-process_id test-info)) 
	      (cpuload      (db:test-get-cpuload   test-info))
	      (diskfree     (db:test-get-diskfree  test-info))
	      (uname        (db:test-get-uname     test-info))
	      (run-dir      (db:test-get-rundir    test-info))
	      (log-file     (db:test-get-final_logf test-info))
	      (run-duration (db:test-get-run_duration test-info))
	      (comment      (db:test-get-comment   test-info))
	      (event-time   (db:test-get-event_time test-info))
	      (archived     (db:test-get-archived  test-info))
        (last-update  (db:test-get-last_update  test-info))
	      (pgdb-run-id  (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
        (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))       
	      (pgdb-test-id (if pgdb-run-id 
				(begin
                                  ;(print pgdb-run-id)    
                                 (pgdb:get-test-id dbh pgdb-run-id test-name item-path))
                                 #f)))
	 ;; "id"           "run_id"        "testname"  "state"      "status"      "event_time"
	 ;; "host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
	 ;; "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived"
         (if pgdb-run-id
           (begin
	   (if pgdb-test-id ;; have a record
	     (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
	       (debug:print-info 0 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
         (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
	       (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
	     (begin 
           (debug:print-info 0 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
           (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
            (if (or (not smallest-time) (< last-update smallest-time))
        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
           (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
           (hash-table-set! test-ht test-id pgdb-test-id))
           (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
     test-ids)))

#;(define (task:add-area-tag dbh area-info tag) 
  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
   (if (not tag-info)
     (begin   
     (if (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
	   (pgdb:insert-tag  dbh   tag))
                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
		  #f)))
     ;;add to area_tags
     (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
           (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0)  (vector-ref area-info 0)))  
	   (pgdb:insert-area-tag  dbh   (vector-ref tag-info 0)  (vector-ref area-info 0))))))

#;(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
  (for-each
     (lambda (run-id)
      (debug:print-info 1 *default-log-port*   "Check if run with " run-id " needs to be synced" )
       (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
run-ids))


;; get runs changed since last sync
;; (define (tasks:sync-test-data dbh cached-info area-info)
;;   (let* ((

#;(define (tasks:sync-to-postgres configdat dest)
  (print "In sync")
  (let* ((dbh         (pgdb:open configdat dbname: dest))
	 (area-info   (pgdb:get-area-by-path dbh *toppath*))
	 (cached-info (make-hash-table))
	 (start       (current-seconds))
   (test-patt   (if (args:get-arg "-testpatt")
											(args:get-arg "-testpatt")
                      "%"))
   (target         (if (args:get-arg "-target")
														 (args:get-arg "-target")
													#f))
    (run-name         (if (args:get-arg "-runname")
														 (args:get-arg "-runname")
													#f)))
     (if (and target  (not run-name))
       (begin
					(print "Error: Provide runname")
          (exit 1)))
     (if (and (not target)  run-name)
       (begin
					(print "Error: Provide target")
          (exit 1)))
    ;(print "123")
    ;(exit 1) 
    (for-each (lambda (dtype)
		(hash-table-set! cached-info dtype (make-hash-table)))
	      '(runs targets tests steps data))
    (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this
    (if area-info
	(let* ((last-sync-time (vector-ref area-info 3))
	       (smallest-last-update-time  (make-hash-table))
         (changed      (if (and target run-name)
                            (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt)
                            (rmt:get-changed-record-ids last-sync-time)))
	       (run-ids        (alist-ref 'runs       changed))
	       (test-ids       (alist-ref 'tests      changed))
	       (test-step-ids  (alist-ref 'test_steps changed))
	       (test-data-ids  (alist-ref 'test_data  changed))
	       (run-stat-ids   (alist-ref 'run_stats  changed))
         (area-tag    (if (args:get-arg "-area-tag") 
                                 (args:get-arg "-area-tag")
                                 (if (args:get-arg "-area") 
                                   (args:get-arg "-area") 
                                   ""))))
           (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0))))
            (set! area-tag *default-area-tag*)) 
           (if (not (equal? area-tag "")) 
             (task:add-area-tag dbh area-info area-tag)) 
	  (if (or (not (null? test-ids)) (not (null? run-ids)))
	      (begin
                (debug:print-info 0 *default-log-port*  "syncing runs")   
	              (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
                (debug:print-info 0 *default-log-port*  "syncing tests")
		            (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
                (debug:print-info 0 *default-log-port*  "syncing test steps")
                (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
								(debug:print-info 0 *default-log-port*  "syncing test data")
                (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
                (print "----------done---------------")))
     (let*  ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
     (debug:print-info 0 "smallest-time :" smallest-time  " last-sync-time " last-sync-time)
    (if (not (and target run-name)) 
	  (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0)))
				(pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed
	(if (tasks:set-area dbh configdat)
	    (tasks:sync-to-postgres configdat dest)
	    (begin
	      (debug:print 0 *default-log-port* "ERROR: unable to create an area record")
	      #f)))))

;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig
;;
(define (full-runconfigs-read)
;; in the envprocessing branch the below code replaces the further below code
;;  (if (eq? *configstatus* 'fulldata)
;;      *runconfigdat*
;;      (begin
;;	(launch:setup)
;;	*runconfigdat*)))

  (let* ((rundir (if (and (getenv "MT_LINKTREE")(getenv "MT_TARGET")(getenv "MT_RUNNAME"))
		     (conc (getenv "MT_LINKTREE") "/" (getenv "MT_TARGET") "/" (getenv "MT_RUNNAME"))
		     #f))
	 (cfgf   (if rundir (conc rundir "/.runconfig." megatest-version "-" megatest-fossil-hash) #f)))
    (if (and cfgf
	     (common:file-exists? cfgf)
	     (file-write-access? cfgf)
	     (common:use-cache?))
	(configf:read-alist cfgf)
	(let* ((keys   (rmt:get-keys))
	       (target (common:args-get-target))
	       (key-vals (if target (keys:target->keyval keys target) #f))
	       (sections (if target (list "default" target) #f))
	       (data     (begin
			   (setenv "MT_RUN_AREA_HOME" *toppath*)
			   (if key-vals
			       (for-each (lambda (kt)
					   (setenv (car kt) (cadr kt)))
					 key-vals))
			   ;; (read-config (conc *toppath* "/runconfigs.config") #f #t sections: sections))))
                           (runconfig:read (conc *toppath* "/runconfigs.config") target #f))))
	  (if (and rundir ;; have all needed variabless
		   (directory-exists? rundir)
		   (file-write-access? rundir))
	      (begin
                (if (not (common:in-running-test?))
                    (configf:write-alist data cfgf))
		;; force re-read of megatest.config - this resolves circular references between megatest.config
		(launch:setup force-reread: #t)
		;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
		)) ;; we can safely cache megatest.config since we have a valid runconfig
	  data))))




;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
    server-dat))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running) 
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
  (let* ((tmp-area          (common:get-db-tmp-area *alldat*))
	 (started-file      (conc tmp-area "/.server-started"))
	 (server-start-time (current-seconds))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 *default-log-port* "Waiting for server alive signature")
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if (and sdat
				   (not changed)
				   (> (- (current-seconds) start-time) 2))
			      (begin
				(debug:print-info 0 *default-log-port* "Received server alive signature")
                                (common:save-pkt `((action . alive)
                                                   (T      . server)
                                                   (pid    . ,(current-process-id))
                                                   (ipaddr . ,(car sdat))
                                                   (port   . ,(cadr sdat)))
                                                 *configdat* #t)
				sdat)
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (begin
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
                                      (common:save-pkt `((action . died)
                                                         (T      . server)
                                                         (pid    . ,(current-process-id))
                                                         (ipaddr . ,(car sdat))
                                                         (port   . ,(cadr sdat))
                                                         (msg    . "Transport died?"))
                                                 *configdat* #t)
				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
	 (server-going  #f)
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (with-output-to-file started-file (lambda ()(print (current-process-id))))

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-db* 
	  (begin
	    (debug:print 0 *default-log-port* "SERVER: dbprep")
	    (set! *dbstruct-db*  (db:setup #t)) ;;  run-id))
	    (set! server-going #t)
	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
	    (thread-start! *watchdog*)))
      
      ;; when things go wrong we don't want to be doing the various queries too often
      ;; so we strive to run this stuff only every four seconds or so.
      (let* ((sync-time (- (current-milliseconds) start-time))
	    (rem-time  (quotient (- 4000 sync-time) 1000)))
	(if (and (<= rem-time 4)
		 (>  rem-time 0))
	    (thread-sleep! rem-time)))
      
      (if (< count 1) ;; 3x3 = 9 secs aprox
	  (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
      
      ;; Check that iface and port have not changed (can happen if server port collides)
      (mutex-lock! *heartbeat-mutex*)
      (set! sdat *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (not (equal? sdat (list iface port)))
	  (let ((new-iface (car sdat))
		(new-port  (cadr sdat)))
	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
	    (set! iface new-iface)
	    (set! port  new-port)
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      
      ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
      (mutex-lock! *heartbeat-mutex*)
      (set! last-access *db-last-access*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
	  (begin
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
	    (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond
         ((and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
		    exn
		    (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk?")
		  (if (not *server-overloaded*)
		      (change-file-times server-log-file curr-time curr-time)))))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)
  (begin
    ;;(BB> "http-transport:server-shutdown called")
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
    ;;
    ;; start_shutdown
    ;;
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 1)

    ;; (debug:print-info 0 *default-log-port* "Max cached queries was    " *max-cache-size*)
    ;; (debug:print-info 0 *default-log-port* "Number of cached writes   " *number-of-writes*)
    ;; (debug:print-info 0 *default-log-port* "Average cached write time "
    ;; 		      (if (eq? *number-of-writes* 0)
    ;; 			  "n/a (no writes)"
    ;; 			  (/ *writes-total-delay*
    ;; 			     *number-of-writes*))
    ;; 		      " ms")
    ;; (debug:print-info 0 *default-log-port* "Number non-cached queries "  *number-non-write-queries*)
    ;; (debug:print-info 0 *default-log-port* "Average non-cached time   "
    ;; 		      (if (eq? *number-non-write-queries* 0)
    ;; 			  "n/a (no queries)"
    ;; 			  (/ *total-non-write-delay* 
    ;; 			     *number-non-write-queries*))
    ;; 		      " ms")
    
    (db:print-current-query-stats)
    (common:save-pkt `((action . exit)
                       (T      . server)
                       (pid    . ,(current-process-id)))
                     *configdat* #t)
    (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch)
  ;; check that a server start is in progress, pause or exit if so
  (let* ((tmp-area            (common:get-db-tmp-area *alldat*))
	 (server-start        (conc tmp-area "/.server-start"))
	 (server-started      (conc tmp-area "/.server-started"))
	 (start-time          (common:lazy-modification-time server-start))
	 (started-time        (common:lazy-modification-time server-started))
	 (server-starting     (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting
	 (start-time-old      (> (- (current-seconds) start-time) 5))
         (cleanup-proc        (lambda (msg)
                                (let* ((serv-fname      (conc "server-" (current-process-id) "-" (get-host-name) ".log"))
                                       (full-serv-fname (conc *toppath* "/logs/" serv-fname))
                                       (new-serv-fname  (conc *toppath* "/logs/" "defunct-" serv-fname)))
                                  (debug:print 0 *default-log-port* msg)
                                  (if (common:file-exists? full-serv-fname)
                                      (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname))
                                      (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname))
                                  (exit)))))
    (if (and (not start-time-old) ;; last server start try was less than five seconds ago
	     (not server-starting))
	(begin
	  (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting")
	  (exit)))
    ;; lets not even bother to start if there are already three or more server files ready to go
    (let* ((num-alive   (server:get-num-alive (server:get-list *toppath*))))
      (if (> num-alive 3)
          (begin
            (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up"))
            (exit))))
  (common:save-pkt `((action . start)
		     (T      . server)
		     (pid    . ,(current-process-id)))
		   *configdat* #t)
    (let* ((th2 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server run thread started")
                               (http-transport:run 
                                (if (args:get-arg "-server")
                                    (args:get-arg "-server")
                                    "-")
                                )) "Server run"))
           (th3 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server monitor thread started")
                               (http-transport:keep-running)
                               "Keep running"))))
      (thread-start! th2)
      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
      (thread-start! th3)
      (set! *didsomething* #t)
      (thread-join! th2)
      (exit))))

;; (define (http-transport:server-signal-handler signum)
;;   (signal-mask! signum)
;;   (handle-exceptions
;;    exn
;;    (debug:print 0 *default-log-port* " ... exiting ...")
;;    (let ((th1 (make-thread (lambda ()
;; 			     (thread-sleep! 1))
;; 			   "eat response"))
;; 	 (th2 (make-thread (lambda ()
;; 			     (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
;; 			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
;; 			     (debug:print 0 *default-log-port* "       Done.")
;; 			     (exit 4))
;; 			   "exit on ^C timer")))
;;      (thread-start! th2)
;;      (thread-start! th1)
;;      (thread-join! th2))))

;;===============================================
;; Java script
;;===============================================
(define (http-transport:show-jquery)
  (let* ((data  (tests:readlines *java-script-lib*)))
(string-join data "\n")))



;;======================================================================
;; web pages
;;======================================================================

(define (http-transport:html-test-log $)
   (let* ((run-id ($ 'runid))
         (test-item ($ 'testname))
         (parts (string-split test-item ":"))
         (test-name (car parts))
             
         (item-name (if (equal? (length parts) 1)
             ""
             (cadr parts))))
  ;(print $) 
(tests:get-test-log run-id test-name item-name)))


(define (http-transport:html-dboard $)
  (let* ((page ($ 'page))
         (oup       (open-output-string)) 
         (bdy "--------------------------")

         (ret  (tests:dynamic-dboard page)))
    (s:output-new  oup  ret)
   (close-output-port oup)

  (set! bdy   (get-output-string oup))
     (conc "<h1>Dashboard</h1>" bdy "<br/> <br/> "  )))

(define (http-transport:main-page)
  (let ((linkpath (root-path)))
    (conc "<head><h1>" (pathname-strip-directory *toppath*) "</h1></head>"
	  "<body>"
	  "Run area: " *toppath*
	  "<h2>Server Stats</h2>"
	  (http-transport:stats-table) 
	  "<hr>"
	  (http-transport:runs linkpath)
	  "<hr>"
	  (http-transport:run-stats)
	  "</body>"
	  )))

(define (http-transport:stats-table)
  (mutex-lock! *heartbeat-mutex*)
  (let ((res 
	 (conc "<table>"
	       ;; "<tr><td>Max cached queries</td>        <td>" *max-cache-size* "</td></tr>"
	       "<tr><td>Number of cached writes</td>   <td>" *number-of-writes* "</td></tr>"
	       "<tr><td>Average cached write time</td> <td>" (if (eq? *number-of-writes* 0)
								 "n/a (no writes)"
								 (/ *writes-total-delay*
								    *number-of-writes*))
	       " ms</td></tr>"
	       "<tr><td>Number non-cached queries</td> <td>"  *number-non-write-queries* "</td></tr>"
	       ;; "<tr><td>Average non-cached time</td>   <td>" (if (eq? *number-non-write-queries* 0)
	       ;; 							 "n/a (no queries)"
	       ;; 							 (/ *total-non-write-delay* 
	       ;; 							    *number-non-write-queries*))
	       " ms</td></tr>"
	       "<tr><td>Last access</td><td>"              (seconds->time-string *db-last-access*) "</td></tr>"
	       "</table>")))
    (mutex-unlock! *heartbeat-mutex*)
    res))

(define (http-transport:runs linkpath)
  (conc "<h3>Runs</h3>"
	(string-intersperse
	 (let ((files (map pathname-strip-directory (glob (conc linkpath "/*")))))
	   (map (lambda (p)
		  (conc "<a href=\"" p "\">" p "</a><br>"))
		files))
	 " ")))

(define (http-transport:run-stats)
  (let ((stats (open-run-close db:get-running-stats #f)))
    (conc "<table>"
	  (string-intersperse
	   (map (lambda (stat)
		  (conc "<tr><td>" (car stat) "</td><td>" (cadr stat) "</td></tr>"))
		stats)
	   " ")
	  "</table>")))

;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048) 

;;======================================================================
;;
;;======================================================================


;; Puts out all combinations
(define (process-itemlist hierdepth curritemkey itemlist)
  (let ((res '()))
    (if (not hierdepth)
	(set! hierdepth (length itemlist)))
    (let loop ((hed (car itemlist))
	       (tal (cdr itemlist)))
      (if (null? tal)
	  (for-each (lambda (item)
		      (if (> (length curritemkey) (- hierdepth 2))
			  (set! res (append res (list (append curritemkey (list (list (car hed) item))))))))
		    (cadr hed))
	  (begin
	    (for-each (lambda (item)
			(set! res (append res (process-itemlist hierdepth (append curritemkey (list (list (car hed) item))) tal))))
		      (cadr hed))
	    (loop (car tal)(cdr tal)))))
    res))

;; (item-assoc->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Fall")))
;;   => ((("ANIMAL" "Elephant") ("SEASON" "Spring")) 
;;       (("ANIMAL" "Elephant") ("SEASON" "Fall")) 
;;       (("ANIMAL" "Lion")     ("SEASON" "Spring"))
;;       (("ANIMAL" "Lion")     ("SEASON" "Fall")))
(define (item-assoc->item-list itemsdat)
  (if (and itemsdat (not (null? itemsdat)))
      (let ((itemlst (filter (lambda (x)
			       (list? x))
			     (map (lambda (x)
				    (debug:print 6 *default-log-port* "item-assoc->item-list x: " x)
				    (if (< (length x) 2)
					(begin
					  (debug:print-error 0 *default-log-port* "malformed items spec " (string-intersperse x " "))
					  (list (car x)'()))
					(let* ((name (car x))
					       (items (cadr x))
					       (ilist (list name (if (string? items)
								     (string-split items)
								     '()))))
					  (if (null? ilist)
					      (debug:print-error 0 *default-log-port* "No items specified for " name))
					  ilist)))
				  itemsdat))))
	(let ((debuglevel 5))
	  (debug:print 5 *default-log-port* "item-assoc->item-list: itemsdat => itemlst ")
	  (if (debug:debug-mode 5)
	      (begin
		(pp itemsdat)
		(print " => ")
		(pp itemlst))))
	(if (> (length itemlst) 0)
	    (process-itemlist #f '() itemlst)
	    '()))
      '())) ;; return a list consisting on a single null list for non-item runs
            ;; Nope, not now, return null as of 6/6/2011

;; (item-table->item-list '(("ANIMAL" "Elephant Lion")("SEASON" "Spring Winter")))
;;   => ((("ANIMAL" "Elephant")("SEASON" "Spring"))
;;       (("ANIMAL" "Lion")    ("SEASON" "Winter")))
(define (item-table->item-list itemtable)
  (let ((newlst (map (lambda (x)
		       (if (> (length x) 1)
			   (list (car x)
				 (string-split (cadr x)))
			   (list x '())))
		     itemtable))
	(res     '())) ;; a list of items
    (let loop ((indx    0)
	       (item   '()) ;; an item will be ((KEYNAME1 VAL1)(KEYNAME2 VAL2) ...)
	       (elflag  #f))
      (for-each (lambda (row)
		  (let ((rowname (car row))
			(rowdat  (cadr row)))
		    (set! item (append item 
				       (list 
					(if (< indx (length rowdat))
					    (let ((new (list rowname (list-ref rowdat indx))))
					      ;; (debug:print 0 *default-log-port* "New: " new)
					      (set! elflag #t)
					      new
					      ) ;; i.e. had at least on legit value to use
					    (list rowname "-")))))))
		newlst)
      (if elflag
	  (begin
	    (set! res (append res (list item)))
	    (loop (+ indx 1)
		  '()
		  #f)))
      res)))
            ;; Nope, not now, return null as of 6/6/2011
		
(define (items:check-valid-items class item)
  (let ((valid-values (let ((s (config-lookup *configdat* "validvalues" class)))
			(if s (string-split s) #f))))
    (if valid-values
	(if (member item valid-values)
	    item #f)
	item)))

(define (items:get-items-from-config tconfig)
  (let* ((have-items  (hash-table-ref/default tconfig "items"      #f))
	 (have-itable (hash-table-ref/default tconfig "itemstable" #f))
	 (items       (hash-table-ref/default tconfig "items"      '()))
	 (itemstable  (hash-table-ref/default tconfig "itemstable" '())))
    (debug:print 5 *default-log-port* "items: " items " itemstable: " itemstable)
    (set! items (map (lambda (item)
		       (if (procedure? (cadr item))
			   (list (car item)((cadr item)))  ;; evaluate the proc
			   item))
		     items))
    (set! itemstable (map (lambda (item)
			    (if (procedure? (cadr item))
				(list (car item)((cadr item)))  ;; evaluate the proc
				item))
			  itemstable))
    (if (and have-items  (null? items))     (debug:print 0 *default-log-port* "WARNING:[items] section in testconfig but no entries defined"))
    (if (and have-itable (null? itemstable))(debug:print 0 *default-log-port* "WARNNG:[itemstable] section in testconfig but no entries defined"))
    (if (or (not (null? items))(not (null? itemstable)))
	(append (item-assoc->item-list items)
		(item-table->item-list itemstable))
	'(()))))

;; (pp (item-assoc->item-list itemdat))

;;======================================================================
;; keys
;;======================================================================


(define (args:usage . a) #f)

;;======================================================================
;; key <=> target routines
;;======================================================================

;; This invalidates using "/" in item names. Every key will be
;; available via args:get-arg as :keyfield. Since this only needs to
;; be called once let's use it to set the environment vars
;;
;; The setting of :keyfield in args should be turned off ASAP
;;
(define (keys:target-set-args keys target ht)
  (if target
      (let ((vals (string-split target "/")))
	(if (eq? (length vals)(length keys))
	    (for-each (lambda (key val)
			(setenv key val)
			(if ht (hash-table-set! ht (conc ":" key) val)))
		      keys
		      vals)
	    (debug:print-error 0 *default-log-port* "wrong number of values in " target ", should match " keys))
	vals)
      (debug:print 4 *default-log-port* "ERROR: keys:target-set-args called with no target.")))

;; given the keys (a list of vectors <key field> or a list of keys) and a target return a keyval list
;; keyval list ( (key1 val1) (key2 val2) ...)
(define (keys:target->keyval keys target)
  (let* ((targlist (string-split target "/"))
	 (numkeys  (length keys))
	 (numtarg  (length targlist))
	 (targtweaked (if (> numkeys numtarg)
			  (append targlist (make-list (- numkeys numtarg) ""))
			  targlist)))
    (map (lambda (key targ)
	   (list key targ))
	 keys targtweaked)))

;;======================================================================
;; launch
;;======================================================================

;;======================================================================
;; ezsteps
;;======================================================================

;; ezsteps were going to be coded as
;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute
;;   BUT
;; now are
;; stepname {VAR=first,second,third ...} command ...
;; where the {VAR=first,second,third ...} is optional.

;; given an exit code and whether or not logpro was used calculate OK/BAD
;; return #t if we are ok, #f otherwise
(define (steprun-good? logpro exitcode)
  (or (eq? exitcode 0)
      (and logpro (eq? exitcode 2))))

;; if handed a string, process it, else look for MT_CMDINFO
(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
    (if enccmd
	(common:read-encoded-string enccmd)
	'())))

;;                       0           1              2              3
(defstruct launch:einf (pid #t)(exit-status #t)(exit-code #t)(rollup-status 0))

;; return (conc status ": " comment) from the final section so that
;;   the comment can be set in the step record in launch.scm
;;
(define (launch:load-logpro-dat run-id test-id stepname)
  (let ((cname (conc stepname ".dat")))
    (if (common:file-exists? cname)
	(let* ((dat  (read-config cname #f #f))
	       (csvr (db:logpro-dat->csv dat stepname))
	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
		       (fmt-csv (map list->csv-record csvr))))
	       (status (configf:lookup dat "final" "exit-status"))
	       (msg     (configf:lookup dat "final" "message")))
          (if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
              (rmt:csv->test-data run-id test-id csvt)
	      (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer"))
	  ;;  (debug:print-info 13 *default-log-port* "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
	  ;;  )
	  (cond
	   ((equal? status "PASS") "PASS") ;; skip the message part if status is pass
	   (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message")))
	   (else #f)))
	#f)))

(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig) ;;; TODO: deprecate me in favor of ezsteps.scm
  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
	 (stepinfo       (cadr ezstep))
	;; (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) "" info)))
	;; (stepproc       (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) info #f)))
	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
	 (stepparams     (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each
	 (paramparts     (if (string? stepparams)
			     (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams))
			     '()))
	 (subrun         (alist-ref "subrun" paramparts equal?))
	 (stepcmd        (list-ref stepparts 3))
	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
	 (logpro-file    (conc stepname ".logpro"))
	 (html-file      (conc stepname ".html"))
	 (dat-file       (conc stepname ".dat"))
	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
	 (logpro-used    (common:file-exists? logpro-file)))

    (debug:print 0 *default-log-port* "stepparts: " stepparts ", stepparams: " stepparams
                 ", paramparts: " paramparts ", subrun: " subrun ", stepcmd: " stepcmd)
    
    (if (and tconfig-logpro
	     (not logpro-used)) ;; no logpro file found but have a defn in the testconfig
	(begin
	  (with-output-to-file logpro-file
	    (lambda ()
	      (print ";; logpro file extracted from testconfig\n"
		     ";;")
	      (print tconfig-logpro)))
	  (set! logpro-used #t)))
    
    ;; NB// can safely assume we are in test-area directory
    (debug:print 4 *default-log-port* "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts
		 " stepparams: " stepparams " stepcmd: " stepcmd)
    
    ;; ;; first source the previous environment
    ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") 
    ;;      							 (get-environment-variable "SHELL")) ".csh" ".sh"))))
    ;;   (if (and prevstep (common:file-exists? prev-env))
    ;;       (set! script (conc script "source " prev-env))))
    
    ;; call the command using mt_ezstep
    ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd))
    
    (debug:print 4 *default-log-port* "script: " script)
    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid #f))
	 (let ((proc (lambda ()
		       (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
	   (if subrun
               (begin
                 (debug:print-info 0 *default-log-port* "Running without MT_.* environment variables.")
                 (common:without-vars proc "^MT_.*"))
	       (proc)))
	 
         (with-output-to-file "Makefile.ezsteps"
           (lambda ()
             (print stepname ".log :")
             (print "\t" cmd)
             (if (common:file-exists? (conc stepname ".logpro"))
                 (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
             (print)
             (print stepname " : " stepname ".log")
             (print))
           #:append)

	 (rmt:test-set-top-process-pid run-id test-id pid)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
		       (mutex-unlock! m)
		       (if (eq? pid-val 0)
			   (begin
			     (thread-sleep! 2)
			     (processloop (+ i 1))))
		       )))))
    (debug:print-info 0 *default-log-port* "step " stepname " completed with exit code " (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
    ;; now run logpro if needed
    (if logpro-used
	(let* ((logpro-exe (or (getenv "LOGPRO_EXE") "logpro"))
               (pid        (process-run (conc "/bin/sh -c '"logpro-exe" "logpro-file " " (conc stepname ".html") " < " stepname ".log > /dev/null'"))))
	  (let processloop ((i 0))
	    (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
			(mutex-lock! m)
			;; (make-launch:einf pid: pid exit-status: exit-status exit-code: exit-code)
			(launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
			(launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
			(launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
			(mutex-unlock! m)
			(if (eq? pid-val 0)
			    (begin
			      (thread-sleep! 2)
			      (processloop (+ i 1)))))
	    (debug:print-info 0 *default-log-port* "logpro for step " stepname " exited with code " (launch:einf-exit-code exit-info))))) ;; (vector-ref exit-info 2)))))
    
    (let ((exinfo (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
	  (logfna (if logpro-used (conc stepname ".html") ""))
	  (comment #f))
      (if logpro-used
	  (let ((datfile (conc stepname ".dat")))
	    ;; load the .dat file into the test_data table if it exists
	    (if (common:file-exists? datfile)
		(set! comment (launch:load-logpro-dat run-id test-id stepname)))
	    (rmt:test-set-log! run-id test-id (conc stepname ".html"))))
      (rmt:teststep-set-status! run-id test-id stepname "end" exinfo comment logfna))
    ;; set the test final status
    (let* ((process-exit-status (launch:einf-exit-code exit-info)) ;; (vector-ref exit-info 2))
	   (this-step-status (cond
			      ((and (eq? process-exit-status 2) logpro-used) 'warn)   ;; logpro 2 = warnings
			      ((and (eq? process-exit-status 3) logpro-used) 'check)  ;; logpro 3 = check
			      ((and (eq? process-exit-status 4) logpro-used) 'waived) ;; logpro 4 = waived
			      ((and (eq? process-exit-status 5) logpro-used) 'abort)  ;; logpro 5 = abort
			      ((and (eq? process-exit-status 6) logpro-used) 'skip)   ;; logpro 6 = skip
			      ((eq? process-exit-status 0)                   'pass)   ;; logpro 0 = pass
			      (else 'fail)))
	   (overall-status   (cond
			      ((eq? (launch:einf-rollup-status exit-info) 2) 'warn) ;; rollup-status (vector-ref exit-info 3)
			      ((eq? (launch:einf-rollup-status exit-info) 0) 'pass) ;; (vector-ref exit-info 3)
			      (else 'fail)))
	   (next-status      (cond 
			      ((eq? overall-status 'pass) this-step-status)
			      ((eq? overall-status 'warn)
			       (if (eq? this-step-status 'fail) 'fail 'warn))
			      ((eq? overall-status 'abort) 'abort)
			      (else 'fail)))
	   (next-state       ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ??
	    (cond
	     ((null? tal) ;; more to run?
	      "COMPLETED")
	     (else "RUNNING"))))
      (debug:print 4 *default-log-port* "Exit value received: " (launch:einf-exit-code exit-info) " logpro-used: " logpro-used 
		   " this-step-status: " this-step-status " overall-status: " overall-status 
		   " next-status: " next-status " rollup-status: "  (launch:einf-rollup-status exit-info)) ;; (vector-ref exit-info 3))
      (case next-status
	((warn)
	 (launch:einf-rollup-status-set! exit-info 2) ;; (vector-set! exit-info 3 2) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "WARN" 
				 (if (eq? this-step-status 'warn) "Logpro warning found" #f)
				 #f))
	((check)
	 (launch:einf-rollup-status-set! exit-info 3) ;; (vector-set! exit-info 3 3) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "CHECK" 
				 (if (eq? this-step-status 'check) "Logpro check found" #f)
				 #f))
	((waived)
	 (launch:einf-rollup-status-set! exit-info 4) ;; (vector-set! exit-info 3 3) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "WAIVED" 
				 (if (eq? this-step-status 'check) "Logpro waived found" #f)
				 #f))
	((abort)
	 (launch:einf-rollup-status-set! exit-info 5) ;; (vector-set! exit-info 3 4) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "ABORT" 
				 (if (eq? this-step-status 'abort) "Logpro abort found" #f)
				 #f))
	((skip)
	 (launch:einf-rollup-status-set! exit-info 6) ;; (vector-set! exit-info 3 4) ;; rollup-status
	 ;; NB// test-set-status! does rdb calls under the hood
	 (tests:test-set-status! run-id test-id next-state "SKIP" 
				 (if (eq? this-step-status 'skip) "Logpro skip found" #f)
				 #f))
	((pass)
	 (tests:test-set-status! run-id test-id next-state "PASS" #f #f))
	(else ;; 'fail
	 (launch:einf-rollup-status-set! exit-info 1) ;; (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
	 )))
    logpro-used))

(define (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)
  ;; (let-values
  ;;  (((pid exit-status exit-code)
  ;;    (run-n-wait fullrunscript)))
  ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
  ;; Since we should have a clean slate at this time there is no need to do 
  ;; any of the other stuff that tests:test-set-status! does. Let's just 
  ;; force RUNNING/n/a

  ;; (thread-sleep! 0.3)
  ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
  (rmt:set-state-status-and-roll-up-items run-id test-name item-path "RUNNING" #f #f) 
  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here

  ;; if there is a runscript do it first
  (if fullrunscript
      (let ((pid (process-run fullrunscript)))
	(rmt:test-set-top-process-pid run-id test-id pid)
	(let loop ((i 0))
	  (let-values
	   (((pid-val exit-status exit-code) (process-wait pid #t)))
	   (mutex-lock! m)
	   (launch:einf-pid-set!           exit-info  pid)         ;; (vector-set! exit-info 0 pid)
	   (launch:einf-exit-status-set!   exit-info  exit-status) ;; (vector-set! exit-info 1 exit-status)
	   (launch:einf-exit-code-set!     exit-info  exit-code)   ;; (vector-set! exit-info 2 exit-code)
	   (launch:einf-rollup-status-set! exit-info  exit-code)   ;; (vector-set! exit-info 3 exit-code)  ;; rollup status
	   (mutex-unlock! m)
	   (if (eq? pid-val 0)
	       (begin
		 (thread-sleep! 2)
		 (loop (+ i 1)))
	       )))))
  ;; then, if runscript ran ok (or did not get called)
  ;; do all the ezsteps (if any)
  (if (or ezsteps subrun)
      (let* ((test-run-dir (tests:get-test-path-from-environment))
             (testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
	      ;; NOTE: it is tempting to turn off force-create of testconfig but dynamic
	      ;;       ezstep names need a full re-eval here.
	      (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
	     (ezstepslst (if (hash-table? testconfig)
			     (hash-table-ref/default testconfig "ezsteps" '())
			     #f)))
	(if testconfig
	    (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
	    (begin
	      (launch:setup)
	      (debug:print 0 *default-log-port* "WARNING: no testconfig found for " test-name " in search path:\n  "
			   (string-intersperse (tests:get-tests-search-path *configdat*) "\n  "))))
	;; after all that, still no testconfig? Time to abort
	(if (not testconfig)
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to resolve megatest.config, runconfigs.config and testconfig issues. Giving up now")
	      (exit 1)))

	;; create a proc for the subrun if requested, save that proc in the ezsteps table as the last entry
	;; 1. get section [runarun]
	;; 2. unset MT_* vars
	;; 3. fix target
	;; 4. fix runname
	;; 5. fix testpatt or calculate it from contour
	;; 6. launch the run
	;; 7. roll up the run result and or roll up the logpro processed result
	(when (configf:lookup testconfig "subrun" "runwait") ;; we use runwait as the flag that a subrun is requested
            (subrun:initialize-toprun-test testconfig test-run-dir)
	    (let* ((mt-cmd (subrun:launch-cmd test-run-dir)))
              (debug:print-info 0 *default-log-port* "Subrun command is \"" mt-cmd "\"")
              (set! ezsteps #t) ;; set the needed flag
	      (set! ezstepslst
                    (append (or ezstepslst '())
                            (list (list "subrun" (conc "{subrun=true} " mt-cmd)))))))

	;; process the ezsteps
	(if ezsteps
	    (begin
	      (if (not (common:file-exists? ".ezsteps"))(create-directory ".ezsteps"))
	      ;; if ezsteps was defined then we are sure to have at least one step but check anyway
	      (if (not (> (length ezstepslst) 0))
		  (debug:print-error 0 *default-log-port* "ezsteps defined but ezstepslst is zero length")
		  (let loop ((ezstep (car ezstepslst))
			     (tal    (cdr ezstepslst))
			     (prevstep #f))
                    (debug:print-info 0 *default-log-port* "Processing ezstep \"" (string-intersperse ezstep " ") "\"")
		    ;; check exit-info (vector-ref exit-info 1)
		    (if (launch:einf-exit-status exit-info) ;; (vector-ref exit-info 1)
			(let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal testconfig))
			      (stepname    (car ezstep)))
			  ;; if logpro-used read in the stepname.dat file
			  (if (and logpro-used (common:file-exists? (conc stepname ".dat")))
			      (launch:load-logpro-dat run-id test-id stepname))
			  (if (steprun-good? logpro-used (launch:einf-exit-code exit-info))
			      (if (not (null? tal))
				  (loop (car tal) (cdr tal) stepname))
			      (debug:print 0 *default-log-port* "WARNING: step " (car ezstep) " failed. Stopping")))
			(debug:print 0 *default-log-port* "WARNING: a prior step failed, stopping at " ezstep)))))))))

(define (launch:monitor-job run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)
  (let* ((update-period (string->number (or (configf:lookup *configdat* "setup" "test-stats-update-period") "30")))
         (start-seconds (current-seconds))
	 (calc-minutes  (lambda ()
			  (inexact->exact 
			   (round 
			    (- 
			     (current-seconds) 
			     start-seconds)))))
	 (kill-tries 0))
    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)

    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                   (delta (abs (- load cpu-load))))
                              (if (> delta 0.1) ;; don't bother updating with small changes
                                  load
                                  #f)))
             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
                                              (get-df (current-directory))
                                              disk-free))
                                   (delta (abs (- df disk-free))))
                              (if (and (> df 0)
                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
                                  df
                                  #f)))
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
          (set! kill-job? #t))
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          ;; (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds)))
	  )
        
	(if kill-job? 
	    (begin
              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
	      ;;       between tries?
	      (let* ((pid1 (launch:einf-pid exit-info)) ;; (vector-ref exit-info 0))
		     (pid2 (rmt:test-get-top-process-pid run-id test-id))
		     (pids (delete-duplicates (filter number? (list pid1 pid2)))))
		(if (not (null? pids))
		    (begin
		      (for-each
		       (lambda (pid)
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print-info 0 *default-log-port* "Unable to kill process with pid " pid ", possibly already killed.")
			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
			  (debug:print 0 *default-log-port* "WARNING: Request received to kill job " pid) ;;  " (attempt # " kill-tries ")")
			  (debug:print-info 0 *default-log-port* "Signal mask=" (signal-mask))
			  ;; (if (process:alive? pid)
			  ;;     (begin
			  (map (lambda (pid-num)
				 (process-signal pid-num signal/term))
			       (process:get-sub-pids pid))
			  (thread-sleep! 5)
			  ;; (if (process:process-alive? pid)
			  (map (lambda (pid-num)
				 (handle-exceptions
				  exn
				  #f
				  (process-signal pid-num signal/kill)))
			       (process:get-sub-pids pid))))
		       ;;    (debug:print-info 0 *default-log-port* "not killing process " pid " as it is not alive"))))
		       pids)
                      ;; BB: question to Matt -- does the tests:test-state-status! encompass rollup to toplevel?  If not, should it?
		      (tests:test-set-status! run-id test-id "KILLED"  "KILLED" (conc (args:get-arg "-m")" "kill-reason) #f)) ;; BB ADDED kill-reason -- confirm OK with Matt
		    (begin
		      (debug:print-error 0 *default-log-port* "Nothing to kill, pid1=" pid1 ", pid2=" pid2)
		      (tests:test-set-status! run-id test-id "KILLED"  "FAILED TO KILL" (conc (args:get-arg "-m")" "kill-reason) #f) ;; BB ADDED kill-reason -- confirm OK with Matt
		      )))
	      (mutex-unlock! m)
	      ;; no point in sticking around. Exit now. But run end of run before exiting?
        (launch:end-of-run-check run-id)
	      (exit)))
	(if (hash-table-ref/default misc-flags 'keep-going #f)
	    (begin
	      (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
                        (or new-disk-free disk-free)
                        (if do-sync (current-seconds) last-sync)))))))
    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional


(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)
    ;;(bb-check-path msg: "launch:execute incoming")
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       (subrun    (assoc/default 'subrun    cmdinfo))
	       (serverurl (assoc/default 'serverurl cmdinfo))
	       (homehost  (assoc/default 'homehost  cmdinfo))
	       (run-id    (assoc/default 'run-id    cmdinfo))
	       (test-id   (assoc/default 'test-id   cmdinfo))
	       (target    (assoc/default 'target    cmdinfo))
	       (areaname  (assoc/default 'areaname  cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (runtlim   (assoc/default 'runtlim   cmdinfo))
	       (contour   (assoc/default 'contour   cmdinfo))
	       (item-path (item-list->path itemdat))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (keys      #f)
	       (keyvals   #f)
	       (fullrunscript (if (not runscript)
                                  #f
                                  (if (substring-index "/" runscript)
                                      runscript ;; use unadultered if contains slashes
                                      (let ((fulln (conc work-area "/" runscript)))
	                                  (if (and (common:file-exists? fulln)
                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
               (check-work-area           (lambda ()
                                            ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
                                            (let loop ((count 0))
                                              (if (or (common:directory-exists? work-area)
                                                      (> count 10))
                                                  (change-directory work-area)
                                                  (begin
                                                    (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
                                                    (thread-sleep! 10)
                                                    (loop (+ count 1)))))

                                            (if (not (string=?  (common:real-path work-area)(common:real-path (current-directory))))
                                                (begin
                                                  (debug:print 0 *default-log-port*
                                                               "INFO: we are expecting to be in directory " work-area "\n"
                                                               "     but we are actually in the directory " (current-directory) "\n"
                                                               "     doing another change dir.")
                                                  (change-directory work-area)))
                                            
                                            ;; spot check that the files in testpath are available. Too often NFS delays cause problems here.
                                            (let ((files      (glob (conc testpath "/*")))
                                                  (bad-files '()))
                                              (for-each
                                               (lambda (fullname)
                                                 (let* ((fname (pathname-strip-directory fullname))
                                                        (targn (conc work-area "/" fname)))
                                                   (if (not (file-exists? targn))
                                                       (set! bad-files (cons fname bad-files)))))
                                               files)
                                              (if (not (null? bad-files))
                                                  (begin
                                                    (debug:print 0 *default-log-port* "INFO: test data from " testpath " not copied properly or filesystem problems causing data to not be found. Re-running the copy command.")
                                                    (debug:print 0 *default-log-port* "INFO: missing files from " work-area ": " (string-intersperse bad-files ", "))
                                                    (launch:test-copy testpath work-area))))
                                            ;; one more time, change to the work-area directory
                                            (change-directory work-area)))
	       ) ;; let*

	  (if contour (setenv "MT_CONTOUR" contour))
	  
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)
          (change-directory *toppath*) ;; temporarily switch to the run area home
	  (setenv "MT_TEST_RUN_DIR"  work-area)

	  (launch:setup) ;; should be properly in the run area home now

	  (if contour (setenv "MT_CONTOUR" contour))
	  
	  ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
	  ;;
	  (setenv "MT_TESTSUITENAME" areaname)
	  (setenv "MT_RUN_AREA_HOME" top-path)
	  (set! *toppath* top-path)
          (change-directory *toppath*) ;; temporarily switch to the run area home
	  (setenv "MT_TEST_RUN_DIR"  work-area)

	  (launch:setup) ;; should be properly in the run area home now
          
	  (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path
	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)
			   (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...")
			   (let ((th1 (make-thread (lambda ()
                                                     (print "set test to COMPLETED/ABORT begin.")
						     (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal")
                                                     (print "set test to COMPLETED/ABORT complete.")
						     (print "Killed by signal " signum ". Exiting")
						     (exit 1))))
				 (th2 (make-thread (lambda ()
						     (thread-sleep! 20)
						     (debug:print 0 *default-log-port* "Done")
						     (exit 4)))))
			     (thread-start! th2)
			     (thread-start! th1)
			     (thread-join! th2)))))
	    (set-signal-handler! signal/int sighand)
	    (set-signal-handler! signal/term sighand)
	    ) ;; (set-signal-handler! signal/stop sighand)
	  
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  (exit)))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
              (rmt:general-call 'set-test-start-time #f test-id)
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      )
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))

          ;; cleanup prior execution's steps
          (rmt:delete-steps-for-test! run-id test-id)
          
	  (debug:print 2 *default-log-port* "Executing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys))
	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
	  ;; one of these is defunct/redundant ...
	  (if (not (launch:setup force-reread: #t))
	      (begin
		(debug:print 0 *default-log-port* "Failed to setup, exiting") 
		;; (sqlite3:finalize! db)
		;; (sqlite3:finalize! tdb)
		(exit 1)))
          ;; validate that the test run area is available
          (check-work-area)
          
          ;; still need to go back to run area home for next couple steps
	  (change-directory *toppath*) 

	  ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This 
	  ;;       seems non-ideal but could well break stuff
	  ;;    BUG? BUG? BUG?
	  
	  (let ((rconfig (full-runconfigs-read)) ;; (read-config (conc  *toppath* "/runconfigs.config") #f #t sections: (list "default" target))))
		(wconfig (read-config "waivers.config" #f #t sections: `( "default" ,target )))) ;; read the waivers config if it exists
	    ;; (setup-env-defaults (conc *toppath* "/runconfigs.config") run-id (make-hash-table) keyvals target)
	    ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	    ;; Now have runconfigs data loaded, set environment vars
	    (for-each
	     (lambda (section)
	       (for-each
		(lambda (varval)
		  (let ((var (car varval))
			(val (cadr varval)))
		    (if (and (string? var)(string? val))
			(begin
			  (safe-setenv var (config:eval-string-in-environment val))) ;; val)
			(debug:print-error 0 *default-log-port* "bad variable spec, " var "=" val))))
		(configf:get-section rconfig section)))
	     (list "default" target)))
          ;;(bb-check-path msg: "launch:execute post block 1")

	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (common:file-exists? work-area)
		    (> count 10))
		(change-directory work-area)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " work-area " not found")
		  (thread-sleep! 10)
		  (loop (+ count 1)))))

          ;; now we can switch to the work-area?
          (change-directory work-area)
          ;;(bb-check-path msg: "launch:execute post block 1.5")
	  ;; (change-directory work-area) 
	  (set! keyvals    (keys:target->keyval keys target))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
		(debug:print 4 *default-log-port* "varpairs: " varpairs)
		(map (lambda (varpair)
		       (let ((varval (string-split varpair "=")))
			 (if (eq? (length varval) 2)
			     (let ((var (car varval))
				   (val (cadr varval)))
			       (debug:print 1 *default-log-port* "Adding pre-var/val " var " = " val " to the environment")
			       (setenv var val)))))
		     varpairs)))
          ;;(bb-check-path msg: "launch:execute post block 2")
	  (for-each
	   (lambda (varval)
	     (let ((var (car varval))
		   (val (cadr varval)))
	       (if val
		   (setenv var val)
		   (begin
		     (debug:print-error 0 *default-log-port* "required variable " var " does not have a valid value. Exiting")
		     (exit)))))
	     (list 
	      (list  "MT_TEST_RUN_DIR" work-area)
	      (list  "MT_TEST_NAME" test-name)
	      (list  "MT_ITEM_INFO" (conc itemdat))
	      (list  "MT_ITEMPATH"  item-path)
	      (list  "MT_RUNNAME"   runname)
	      (list  "MT_MEGATEST"  megatest)
	      (list  "MT_TARGET"    target)
	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_TESTSUITENAME" (common:get-area-name *alldat*))))
          ;;(bb-check-path msg: "launch:execute post block 3")

	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
          ;;(bb-check-path msg: "launch:execute post block 4")
	  ;; (change-directory top-path)
	  ;; Can setup as client for server mode now
	  ;; (client:setup)

	  
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
          ;;(bb-check-path msg: "launch:execute post block 41")
	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
          ;;(bb-check-path msg: "launch:execute post block 42")
	  (set-item-env-vars itemdat)
          ;;(bb-check-path msg: "launch:execute post block 43")
          (let ((blacklist (configf:lookup *configdat* "setup" "blacklistvars")))
            (if blacklist
                (save-environment-as-files "megatest" ignorevars: (string-split blacklist))
                (save-environment-as-files "megatest")))
          ;;(bb-check-path msg: "launch:execute post block 44")
	  ;; open-run-close not needed for test-set-meta-info
	  ;; (tests:set-full-meta-info #f test-id run-id 0 work-area)
	  ;; (tests:set-full-meta-info test-id run-id 0 work-area)
	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)

	  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript 
		       (common:file-exists? fullrunscript)
		       (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))

	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  ;; now is also a good time to write the .testconfig file
	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs)))
	    (configf:write-alist tconfig tconfig-tmpfile)
	    (file-move tconfig-tmpfile tconfig-fname #t))
	  ;; 
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
		 (job-thread   #f)
		 ;; (keep-going   #t)
		 (misc-flags   (let ((ht (make-hash-table)))
				 (hash-table-set! ht 'keep-going #t)
				 ht))
		 (runit        (lambda ()
				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
		 (monitorjob   (lambda ()
				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job")))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (debug:print-info 0 *default-log-port* "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...")
	    (hash-table-set! misc-flags 'keep-going #f)
	    (thread-join! th1)
	    (thread-sleep! 1)       ;; givbe thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec.
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   ;; only state and status needed - use lazy routine
		   (testinfo  (rmt:get-testinfo-state-status run-id test-id)))
	      ;; Am I completed?
	      (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (let ((new-state  (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status
				                                        ;; "COMPLETED"							                ;; (db:test-get-state testinfo)))   ;; else preseve the state as set within the test
				    )
			(new-status (cond
				     ((not (launch:einf-exit-status exit-info)) "FAIL") ;; job failed to run ... (vector-ref exit-info 1)
				     ((eq? (launch:einf-rollup-status exit-info) 0)     ;; (vector-ref exit-info 3)
				      ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				     ((eq? (launch:einf-rollup-status exit-info) 1) "FAIL")  ;; (vector-ref exit-info 3)
				     ((eq? (launch:einf-rollup-status exit-info) 2)	     ;;	(vector-ref exit-info 3)
				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				     ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK")
				     ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED")
				     ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT")
				     ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP")
				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
		    (tests:test-set-status! run-id 
					    test-id 
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items run-id test-id test-name #f))
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
	    (if (not (launch:einf-exit-status exit-info))
		(exit 4))))
        )))

;; Spec for End of test
;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
;; At transition to run COMPLETED/X do hooks
;; Definition: test_dead if event_time + duration + 1 minute? < current_time AND
;; we can prove the process is not alive (ssh host pstree -A pid)
;; if dead safe to mark the test as killed in the db
;; State/status table
;; new
;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
;; 0 RUNNING ==> this is actually the first condition, should not get here

(define (launch:end-of-run-check run-id )
    (let*	((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
           (current-state (rmt:get-run-state run-id))
           (current-status (rmt:get-run-status run-id)))
     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
     (debug:print 0 *default-log-port* "rollup run state/status")                      
     (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
 
     (cond 
       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
           	(debug:print 0 *default-log-port* "look for  post hook.")
          	(runs:run-post-hook run-id))
        ((> running-cnt 3) 
        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
        ((> running-cnt 0)
            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
           					(launch:end-of-run-check run-id)))) ;;todo
        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
         (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (if (> (length not-completed-tests) 0) 
           (let loop ((running-test (car not-completed-tests))
			     (tal    (cdr not-completed-tests)))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11)))
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
              (if (not (null? tal))
				  (loop (car tal) (cdr tal)))))))))))        
 
(define (launch:is-test-alive host pid)
(if (and host pid (not (equal? host "n/a")))
(let* ((cmd (conc "ssh " host " pstree -A " pid))
      (output (with-input-from-pipe cmd read-lines)))
  (print "cmd: " cmd "\n op: " output )
  (if(eq? (length output) 0)
     #f
     #t))
#t))
 
(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11))
								 (test-id (vector-ref running-test 0))
                 (host (vector-ref running-test 6))
                 (pid  (rmt:test-get-top-process-pid run-id test-id))   
                 (event-time (vector-ref running-test 5))
                 (duration (vector-ref running-test 12))
                 (flag 0)   
                 (curr-time (current-seconds)))
       (if (and (< (+ event-time duration 600) curr-time) (not (launch:is-test-alive host pid))) ;;test has not updated duration in last 10 min then likely its not running but confirm before marking it as killed
           (begin    
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " needs to be killed")
              (set! flag 1) 
              (rmt:set-state-status-and-roll-up-items run-id test-name item-path "KILLREQ" "n/a" #f)))
               (if (not (null? tal))
				  (loop (car tal) (cdr tal) (+ kill-cnt flag))
                 (+ kill-cnt flag))))))

;; DO NOT USE - caching of configs is handled in launch:setup now.
;;
(define (launch:cache-config)
  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
  (if (and *configdat* 
	   (or (args:get-arg "-run")
	       (args:get-arg "-runtests")
	       (args:get-arg "-execute")))
      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
	     (target   (common:args-get-target exit-if-bad: #t))
	     (runname  (or (args:get-arg "-runname")
			   (args:get-arg ":runname")
			   (getenv "MT_RUNNAME")))
	     (fulldir  (conc linktree "/"
			     target "/"
			     runname)))
	(if (and linktree (common:file-exists? linktree)) ;; can't proceed without linktree
	    (begin
	      (debug:print-info 0 *default-log-port* "Have -run with target=" target ", runname=" runname ", fulldir=" fulldir ", testpatt=" (or (args:get-arg "-testpatt") "%"))
	      (if (not (common:file-exists? fulldir))
		  (create-directory fulldir #t)) ;; need to protect with exception handler 
	      (if (and target
		       runname
		       (common:file-exists? fulldir))
		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
			(begin
			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
                          (if (not (common:in-running-test?))
                              (configf:write-alist *configdat* tmpfile))
			  (system (conc "ln -sf " tmpfile " " targfile))))
		    )))
	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))


;; gather available information, if legit read configs in this order:
;;
;;   if have cache;
;;      read it a return it
;;   else
;;     megatest.config     (do not cache)
;;     runconfigs.config   (cache if all vars avail)
;;     megatest.config     (cache if all vars avail)
;;   returns:
;;     *toppath*
;;   side effects:
;;     sets; *configdat*    (megatest.config info)
;;           *runconfigdat* (runconfigs.config info)
;;           *configstatus* (status of the read data)
;;
(define (launch:setup #!key (force-reread #f) (areapath #f))
  (mutex-lock! *launch-setup-mutex*)
  (if (and *toppath*
	   (eq? *configstatus* 'fulldata) (not force-reread)) ;; got it all
      (begin
	(debug:print 2 *default-log-port* "NOTE: skipping launch:setup-body call since we have fulldata")
	(mutex-unlock! *launch-setup-mutex*)
	*toppath*)
      (let ((res (launch:setup-body force-reread: force-reread areapath: areapath)))
	(mutex-unlock! *launch-setup-mutex*)
	res)))

;; return paths depending on what info is available.
;;
(define (launch:get-cache-file-paths areapath toppath target mtconfig)
  (let* ((use-cache (common:use-cache?))
         (runname  (common:args-get-runname))
         (linktree (common:get-linktree))
         (testname (common:get-full-test-name))
         (rundir   (if (and runname target linktree)
                       (common:directory-writable? (conc linktree "/" target "/" runname))
                       #f))
         (testdir  (if (and rundir testname)
                       (common:directory-writable? (conc rundir "/" testname))
                       #f))
         (cachedir (or testdir rundir))
         (mtcachef (and cachedir (conc cachedir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
         (rccachef (and cachedir (conc cachedir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash))))
    (debug:print-info 6 *default-log-port* 
                      "runname=" runname 
                      "\n  linktree=" linktree
                      "\n  testname=" testname
                      "\n  rundir=" rundir 
                      "\n  testdir=" testdir 
                      "\n  cachedir=" cachedir
                      "\n  mtcachef=" mtcachef
                      "\n  rccachef=" rccachef)
    (cons mtcachef rccachef)))

(define (launch:setup-body #!key (force-reread #f) (areapath #f))
  (if (and (eq? *configstatus* 'fulldata)
	   *toppath*
	   (not force-reread)) ;; no need to reprocess
      *toppath*   ;; return toppath
      (let* ((use-cache (common:use-cache?))  ;; BB- use-cache checks
					      ;; *configdat* for
					      ;; use-cache setting.
					      ;; We do not have
					      ;; *configdat*.
					      ;; Bootstrapping problem
					      ;; here.
	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
	     (target   (common:args-get-target))
	     (sections (if target (list "default" target) #f)) ;; for runconfigs
	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
	     ;; checking for null cachefiles should not be necessary,
	     ;; I was seeing error car of '(), might be a chicken bug
	     ;; or a red herring ...
	     (mtcachef   (if (null? cachefiles)
			     #f
			     (car cachefiles))) ;; (and cachedir (conc
						;; cachedir "/"
						;; ".megatest.cfg-"
						;; megatest-version
						;; "-"
						;; megatest-fossil-hash)))
	     (rccachef   (if (null? cachefiles)
			     #f
			     (cdr cachefiles)))) ;; (and cachedir
						 ;; (conc cachedir "/"
						 ;; ".runconfigs.cfg-"
						 ;; megatest-version
						 ;; "-"
						 ;; megatest-fossil-hash)))
						 ;; (cancreate (and
						 ;; cachedir
						 ;; (common:file-exists?
						 ;; cachedir)(file-write-access?
						 ;; cachedir) (not
						 ;; (common:in-running-test?)))))
	(set! *toppath* toppath) ;; This is needed when we are running
				 ;; as a test using CMDINFO as a
				 ;; datasource (BB> "launch:setup-body
				 ;; -- cachefiles="cachefiles)
	(cond
	 ;; if mtcachef exists just read it, however we need to assume
	 ;; toppath is available in $MT_RUN_AREA_HOME
	 ((and (not force-reread)
	       mtcachef  rccachef
	       use-cache
	       (get-environment-variable "MT_RUN_AREA_HOME")
	       (common:file-exists? mtcachef)
	       (common:file-exists? rccachef))
          ;;(BB> "launch:setup-body -- cond branch 1 - use-cache")
          (set! *configdat*    (configf:read-alist mtcachef))
          ;;(BB> "launch:setup-body -- 1 set! *configdat*="*configdat*)
	  (set! *runconfigdat* (configf:read-alist rccachef))
	  (set! *configinfo*   (list *configdat*  (get-environment-variable "MT_RUN_AREA_HOME")))
	  (set! *configstatus* 'fulldata)
	  (set! *toppath*      (get-environment-variable "MT_RUN_AREA_HOME"))
	  *toppath*)
	 ;; there are no existing cached configs, do full reads of the
	 ;; configs and cache them we have all the info needed to
	 ;; fully process runconfigs and megatest.config
	 ((and ;; (not force-reread) ;; force-reread is irrelevant in the AND, could however OR it?
	       mtcachef
	       rccachef) ;; BB- why are we doing this without asking if caching is desired?
          ;;(BB> "launch:setup-body -- cond branch 2")
	  (let* ((first-pass    (find-and-read-config        ;; NB// sets MT_RUN_AREA_HOME as side effect
				 mtconfig
				 environ-patt: "env-override"
				 given-toppath: toppath
				 pathenvvar: "MT_RUN_AREA_HOME"))
		 (first-rundat  (let ((toppath (if toppath 
						   toppath
						   (car first-pass))))
				  (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
				   (conc (if (string? toppath)
					     toppath
					     (get-environment-variable "MT_RUN_AREA_HOME"))
					 "/runconfigs.config")
				   *runconfigdat* #t 
				   sections: sections))))
	    (set! *runconfigdat* first-rundat)
	    (if first-pass  ;; 
		(begin
                  ;;(BB> "launch:setup-body -- \"first-pass\"=first-pass")
		  (set! *configdat*  (car first-pass))
                  ;;(BB> "launch:setup-body -- 2 set! *configdat*="*configdat*)
		  (set! *configinfo* first-pass)
		  (set! *toppath*    (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
		  (set! toppath      *toppath*)
		  (if (not *toppath*)
		      (begin
			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
			(exit 1)))
		  (setenv "MT_RUN_AREA_HOME" *toppath*)
		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
		  (let* ((keys         (rmt:get-keys))
			 (key-vals     (keys:target->keyval keys target))
			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
					;     (if *configdat*
					; 	   (configf:lookup *configdat* "setup" "linktree")
					; 	   (conc *toppath* "/lt"))))
			 (second-pass  (find-and-read-config
					mtconfig
					environ-patt: "env-override"
					given-toppath: toppath
					pathenvvar: "MT_RUN_AREA_HOME"))
			 (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
					 (for-each (lambda (kt)
						     (setenv (car kt) (cadr kt)))
						   key-vals)
					 (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
						      sections: sections)))
                         (cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
                         (mtcachef     (car cachefiles))
                         (rccachef     (cdr cachefiles)))
                    ;;  trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "/p/fdk/gwa/lefkowit/mtTesting/qa/primbeqa/links/p1222/11/PDK_r1.1.1/prim/clean/pcell_testgen/.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
                    ;; TODO - consider 1) using simple-lock to bracket cache write
                    ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.

		    (if rccachef
                        (common:fail-safe
                         (lambda ()
                           (configf:write-alist runconfigdat rccachef))
                         (conc "Could not write cache file - "rccachef)))
                    (if mtcachef
                        (common:fail-safe
                         (lambda ()
                           (configf:write-alist *configdat* mtcachef))
                         (conc "Could not write cache file - "mtcachef)))
		    (set! *runconfigdat* runconfigdat)
		    (if (and rccachef mtcachef) (set! *configstatus* 'fulldata))))
		;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
		(set! *configdat* (make-hash-table))
		)))

	 ;; else read what you can and set the flag accordingly
	 ;; here we don't have either mtconfig or rccachef
	 (else
          ;;(BB> "launch:setup-body -- cond branch 3 - else")
	  (let* ((cfgdat   (find-and-read-config 
			    (or (args:get-arg "-config") "megatest.config")
			    environ-patt: "env-override"
			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
			    pathenvvar: "MT_RUN_AREA_HOME")))

            (if (and cfgdat (list? cfgdat) (> (length cfgdat) 0) (hash-table? (car cfgdat)))
		(let* ((toppath  (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
		       (rdat     (read-config (conc toppath  ;; convert this to use runconfig:read!
						    "/runconfigs.config") *runconfigdat* #t sections: sections)))
		  (set! *configinfo*   cfgdat)
		  (set! *configdat*    (car cfgdat))
		  (set! *runconfigdat* rdat)
		  (set! *toppath*      toppath)
		  (set! *configstatus* 'partial)
		  ;; set up as many vars in *alldat* as possible here
		  (alldat-areapath-set! *alldat* toppath)
		  (alldat-log-port-set! *alldat* *default-log-port*)
		  (alldat-mtconfig-set! *alldat* *configdat*)
		  
		  )
		(begin
		  (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
		  (exit 2))))))
	;; COND ends here.
	
	;; additional house keeping
	(let* ((linktree (or (common:get-linktree)
			     (conc *toppath* "/lt"))))
	  (if linktree
	      (begin
		(if (not (common:file-exists? linktree))
		    (begin
		      (handle-exceptions
			  exn
			  (begin
			    (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
			    (exit 1))
			(create-directory linktree #t))))
		(handle-exceptions
		    exn
		    (begin
		      (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
		      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
		  (let ((tlink (conc *toppath* "/lt")))
		    (if (not (common:file-exists? tlink))
			(create-symbolic-link linktree tlink)))))
	      (begin
		(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
		)))
	(if (and *toppath*
		 (directory-exists? *toppath*))
	    (begin
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (setenv "MT_TESTSUITENAME" (common:get-area-name *alldat*)))
	    (begin
	      (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
	      (set! *toppath* #f) ;; force it to be false so we return #f
	      #f))
	
        ;; one more attempt to cache the configs for future reading
        (let* ((cachefiles   (launch:get-cache-file-paths areapath toppath target mtconfig))
               (mtcachef     (car cachefiles))
               (rccachef     (cdr cachefiles)))

          ;; trap exception due to stale NFS handle -- Error: (open-output-file) cannot open file - Stale NFS file handle: "...somepath.../.runconfigs.cfg-1.6427-7d1e789cb3f62f9cde719a4865bb51b3c17ea853" - ticket 220546342
          ;; TODO - consider 1) using simple-lock to bracket cache write
          ;;                 2) cache in hash on server, since need to do rmt: anyway to lock.
          (if (and rccachef *runconfigdat* (not (common:file-exists? rccachef)))
              (common:fail-safe
               (lambda ()
                 (configf:write-alist *runconfigdat* rccachef))
               (conc "Could not write cache file - "rccachef))
              )
          (if (and mtcachef *configdat*    (not (common:file-exists? mtcachef)))
              (common:fail-safe
               (lambda ()
                 (configf:write-alist *configdat* mtcachef))
               (conc "Could not write cache file - "mtcachef))
              )
          (if (and rccachef mtcachef *runconfigdat* *configdat*)
              (set! *configstatus* 'fulldata)))

	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	*toppath*)))

(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))
	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
		     (string->number (or m "10000")))))
    (if disks 
	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
	  (if res
	      (cdr res)
	      (begin
;;		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
;;		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
		;;(exit 1)
                 (if (null? disks)
                     (cons 1 (conc *toppath* "/runs"))
                     (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y)))))))
                       (let loop ((head (car paths)) (tail (cdr paths)))
                         (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t))))
                           (if result
                               result
                               (if (null? tail)
                                   (cons 1 (conc *toppath* "/runs"))
                                   (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path.


(define (launch:test-copy test-src-path test-path)
  (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd")))
		   (if cmd
		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
		       (string-substitute "TEST_TARG_PATH" test-path
					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
		       #f)))
	 (cmd    (if ovrcmd 
		     ovrcmd
		     (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"
			   " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log")))
	 (status (system cmd)))
    (if (not (eq? status 0))
	(debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))))


;; Desired directory structure:
;;
;;  <linkdir> - <target> - <testname> -.
;;                                     |
;;                                     v
;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
;;
;;  dir stored in test is:
;; 
;;  <linkdir> - <target> - <testname> [ - <itempath> ]
;; 
;; All log file links should be stored relative to the top of link path
;;  
;; <target> - <testname> [ - <itempath> ] 
;;
(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
			run-info
			(db:get-value-by-header (db:get-rows run-info)
						(db:get-header run-info)
						"runname")))
	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))

	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
	 (testtop-base (conc target "/" runname "/" testname))
	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))

	 ;; nb// if itempath is not "" then it is prefixed with "/"
	 (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base))
	 (test-path    (conc disk-path (if contour (conc "/" contour) "") "/" test-base))

	 ;; ensure this exists first as links to subtests must be created there
	 (linktree  (common:get-linktree))
	 ;; WAS: (let ((rd (config-lookup *configdat* "setup" "linktree")))
	 ;;         (if rd rd (conc *toppath* "/runs"))))
	 ;; which seems wrong ...

	 (lnkbase   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname))
	 (lnkpath   (conc lnkbase "/" testname))
	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
	 (lnktarget (conc lnkpath "/" item-path)))

    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
    ;;                                                 rundir   shortdir
    (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)

    (debug:print 2 *default-log-port* "INFO:\n       lnkbase=" lnkbase "\n       lnkpath=" lnkpath "\n  toptest-path=" toptest-path "\n     test-path=" test-path)
    (if (not (common:file-exists? linktree))
	(begin
	  (debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
    ;; create the directory for the tests dir links, this is needed no matter what... try up to three times
    (let loop ((done 3)) 
      (let ((success (if (and (not (common:directory-exists? lnkbase))
			      (not (common:file-exists? lnkbase)))
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase)
			    (print-error-message exn (current-error-port))
			    #t)
			  (create-directory lnkbase #t)
			  #f))))
	(if (and (not success)(> done 0))
	    (loop (- done 1)))))
    
    ;; update the toptest record with its location rundir, cache the path
    ;; This wass highly inefficient, one db write for every subtest, potentially
    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
    ;; again. 

    ;; Now create the link from the test path to the link tree, however
    ;; if the test is iterated it is necessary to create the parent path
    ;; to the iteration. use pathname-directory to trim the path by one
    ;; level
    (if (not not-iterated) ;; i.e. iterated
	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
	  (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent)
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting")
	     (exit 1))
	   (create-directory iterated-parent #t))))

    (if (symbolic-link? lnkpath) 
	(handle-exceptions
	 exn
	 (begin
	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
	   (exit 1))
	 (delete-file lnkpath)))

    (if (not (or (common:file-exists? lnkpath)
		 (symbolic-link? lnkpath)))
	(handle-exceptions
	 exn
	 (begin
	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
	   (exit 1))
	 (create-symbolic-link toptest-path lnkpath)))
    
    ;; NB - This was not working right - some top tests are not getting the path set!!!
    ;;
    ;; Do the setting of this record after the paths are created so that the shortdir can 
    ;; be set to the real directory location. This is safer for future clean up if the link
    ;; tree is damaged or lost.
    ;; 
    (if (not (hash-table-ref/default *toptest-paths* testname #f))
	(let* ((testinfo       (rmt:get-test-info-by-id run-id test-id)) ;;  run-id testname item-path))
	       (curr-test-path (if testinfo ;; (filedb:get-path *fdb*
							     ;; (db:get-path dbstruct
				   ;; (rmt:sdb-qry 'getstr 
				   (db:test-get-rundir testinfo) ;; ) ;; )
				   #f)))
	  (hash-table-set! *toptest-paths* testname curr-test-path)
	  ;; NB// Was this for the test or for the parent in an iterated test?
	  (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath 
			    (if (common:file-exists? lnkpath)
				;; (resolve-pathname lnkpath)
				(common:nice-path lnkpath)
				lnkpath)
			    testname "" run-id)
	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print-info 2 *default-log-port* "Creating " toptest-path " and link " lnkpath)
		(handle-exceptions
		 exn
		 #f ;; don't care to catch and deal with errors here for now.
		 (create-directory toptest-path #t))
		(hash-table-set! *toptest-paths* testname toptest-path)))))

    ;; The toptest path has been created, the link to the test in the linktree has
    ;; been created. Now, if this is an iterated test the real test dir must be created
    (if (not not-iterated) ;; this is an iterated test
	(begin ;; (let ((lnktarget (conc lnkpath "/" item-path)))
	  (debug:print 2 *default-log-port* "Setting up sub test run area")
	  (debug:print 2 *default-log-port* " - creating run area in " test-path)
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* " Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting")
	     (exit 1))
	   (create-directory test-path #t))
	  (debug:print 2 *default-log-port* 
		       " - creating link from: " test-path "\n"
		       "                   to: " lnktarget)

	  ;; If there is already a symlink delete it and recreate it.
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* " Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting")
	     (exit))
	   (if (symbolic-link? lnktarget)     (delete-file lnktarget))
	   (if (not (common:file-exists? lnktarget)) (create-symbolic-link test-path lnktarget)))))

    (if (not (directory? test-path))
	(create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes

    (if (and test-src-path (directory? test-path))
	(begin
	  (launch:test-copy test-src-path test-path)
	  (list lnkpathf lnkpath ))
	(if (and test-src-path (> remtries 0))
	    (begin
	      (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries)
	      ;; 
	      (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1)))
	    (list #f #f)))))


(define (launch:handle-zombie-tests run-id)
  (let* ((key (conc "zombiescan-runid-"run-id))
         (now (current-seconds))
         (threshold (- (current-seconds)  (* 2 (or (configf:lookup-number *configdat* "setup" "deadtime") 120))))
         (val (rmt:get-var key))
         (do-scan?
          (cond
           ((not val)
            #t)
           ((< val threshold)
            #t)
           (else #f))))
    (when do-scan?
      (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
      (rmt:set-var key (current-seconds))
      (rmt:find-and-mark-incomplete run-id #f))))





;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1)))
      (if (> launch-delay delta)
	  (begin
	    (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
		(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)
       (list "MT_CONTOUR"   contour)
       )
      itemdat))
    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
           ;; for tconfig, why do we allow fallback to test-conf?
	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
				(begin
                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
                                  test-conf))) ;; force re-read now that all vars are set
	   (useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))
			      (if ush 
				  (if (equal? ush "no") ;; must use "no" to NOT use shell
				      #f
				      ush)
				  #t)))     ;; default is yes
	   (runscript       (config-lookup tconfig   "setup"        "runscript"))
	   (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big, just send a flag
	   (subrun          (> (length (hash-table-ref/default tconfig "subrun"  '())) 0)) ;; send a flag to process a subrun
	   ;; (diskspace       (config-lookup tconfig   "requirements" "diskspace"))
	   ;; (memory          (config-lookup tconfig   "requirements" "memory"))
	   ;; (hosts           (config-lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
	   (remote-megatest (config-lookup *configdat* "setup" "executable"))
	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
				(configf:lookup  *configdat* "setup" "runtimelim")))
	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	   ;;                allow running from dashboard. Extract the path
	   ;;                from the called megatest and convert dashboard
	   ;;             	  or dboard to megatest
	   (local-megatest  (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig        (conc (common:get-area-name *alldat*) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))
      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	  (begin
	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
      
      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-path)
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					(list 'homehost  (let* ((hhdat (common:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *alldat* 
							     (alldat-server-url *alldat*)
							     #f)) ;;
					(list 'areaname  (common:get-area-name *alldat*))
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )
					(list 'itemdat   itemdat  )
					(list 'megatest  remote-megatest)
					(list 'ezsteps   ezsteps)
					(list 'subrun    subrun)
					(list 'target    mt_target)
					(list 'contour   contour)
					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
      (if (common:file-exists? work-area)
	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname
       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
       (launcher
	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
       (else
	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
      (let* ((commonprevvals (alist->env-vars
			      (hash-table-ref/default *configdat* "env-override" '())))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list (list "MT_TEST_RUN_DIR" work-area)
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) 
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)
					    (list "MT_ITEMPATH"  item-path)
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
	     ;; Launchwait defaults to true, must override it to turn off wait
	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
					process:cmd-run-with-stderr-and-exitcode->list
					process-run)
				    (if useshell
					(let ((cmdstr (string-intersperse fullcmd " ")))
					  (if launchwait
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1 &")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd))))
             (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
             (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))
        (if (not success)
            (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)
		(apply print launch-results)
		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	    #:append))
	(debug:print 2 *default-log-port* "Launching completed, updating db")
	(debug:print 2 *default-log-port* "Launch results: " launch-results)
	(if (not launch-results)
	    (begin
	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
	      ;; (sqlite3:finalize! db)
	      ;; good ole "exit" seems not to work
	      ;; (_exit 9)
	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)))

;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid
  ;;    - is it owned by calling user
  ;;    - it it's run directory correct for the test
  ;;    - is there a controlling mtest (maybe stuck)
  ;; 2. if recovery is needed watch pid
  ;;    - when it exits take the exit code and do the needful
  ;;
  (let* ((pid (rmt:test-get-top-process-pid run-id test-id))
	 (psres (with-input-from-pipe
		 (conc "ps -F -u " (current-user-name) " | grep -E '" pid " ' | grep -v 'grep -E " pid "'")
		 (lambda ()
		   (read-line))))
	 (rundir (if (string? psres) ;; real process owned by user
		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
		     #f)))
    ;; now wait on that process if all is correct
    ;; periodically update the db with runtime
    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
    ;; state/status appropriately
    (process-wait pid)))

;;======================================================================
;; lock-queue
;;======================================================================


;;======================================================================
;; attempt to prevent overlapping updates of rollup files by queueing
;; update requests in an sqlite db
;;======================================================================

;;======================================================================
;; db record, <vector db path-to-db>
;;======================================================================

(define (make-lock-queue:db-dat)(make-vector 3))
(define-inline (lock-queue:db-dat-get-db        vec)    (vector-ref  vec 0))
(define-inline (lock-queue:db-dat-get-path      vec)    (vector-ref  vec 1))
(define-inline (lock-queue:db-dat-set-db!       vec val)(vector-set! vec 0 val))
(define-inline (lock-queue:db-dat-set-path!     vec val)(vector-set! vec 1 val))

(define (lock-queue:delete-lock-db dbdat)
  (let ((fname (lock-queue:db-dat-get-path dbdat)))
    (system (conc "rm -f " fname "*"))))

(define (lock-queue:open-db fname #!key (count 10))
  (let* ((actualfname (conc fname ".lockdb"))
	 (dbexists (common:file-exists? actualfname))
	 (db       (sqlite3:open-database actualfname))
	 (handler  (make-busy-timeout 136000)))
    (if dbexists
	(vector db actualfname)
	(begin
	  (handle-exceptions
	   exn
	   (begin
	     (thread-sleep! 10)
	     (if (> count 0)
		 (lock-queue:open-db fname count: (- count 1))
		 (vector db actualfname)))
	   (sqlite3:with-transaction
	    db
	    (lambda ()
	      (sqlite3:execute 
	       db
	       "CREATE TABLE IF NOT EXISTS queue (
     	         id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 start_time INTEGER,
                 state      TEXT,
                 CONSTRAINT queue_constraint UNIQUE (test_id));")
	      (sqlite3:execute
	       db
	       "CREATE TABLE IF NOT EXISTS runlocks (
                 id         INTEGER PRIMARY KEY,
                 test_id    INTEGER,
                 run_lock   TEXT,
                 CONSTRAINT runlock_constraint UNIQUE (run_lock));"))))))
    (sqlite3:set-busy-handler! db handler)
    (vector db actualfname)))

(define (lock-queue:set-state dbdat test-id newstate #!key (remtries 10))
  (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200)
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:set-state. Trying again in 30 seconds.")
	 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 30)
	 (lock-queue:set-state dbdat test-id newstate remtries: (- remtries 1)))
       (begin
	 (debug:print-error 0 *default-log-port* " Failed to set lock state for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "UPDATE queue SET state=? WHERE test_id=?;"
		    newstate
		    test-id)))

(define (lock-queue:any-younger? dbdat mystart test-id #!key (remtries 10))
  ;; no need to wait on journal on read only queries
  ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200)
  (handle-exceptions
   exn
   (if (> remtries 0)
       (begin
	 (debug:print 0 *default-log-port* "WARNING: exception on lock-queue:any-younger. Removing lockdb and trying again in 5 seconds.")
	 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	 (thread-sleep! 5)
         (lock-queue:delete-lock-db dbdat)
	 (lock-queue:any-younger? dbdat mystart test-id remtries: (- remtries 1)))
       (begin
	 (debug:print-error 0 *default-log-port* " Failed to find younger locks for test with id " test-id ", error: " ((condition-property-accessor 'exn 'message) exn) ", giving up.")
	 #f))
   (let ((res #f))
     (sqlite3:for-each-row
      (lambda (tid)
	;; Actually this should not be needed as mystart cannot be simultaneously less than and test-id same as 
	(if (not (equal? tid test-id)) 
	    (set! res tid)))
      (lock-queue:db-dat-get-db dbdat)
      "SELECT test_id FROM queue WHERE start_time > ?;" mystart)
     res)))

(define (lock-queue:get-lock dbdat test-id #!key (count 10)(waiting-msg #f))
  (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 remove: #t waiting-msg: "lock-queue:get-lock, waiting on journal")
  (let* ((res       #f)
	 (db        (lock-queue:db-dat-get-db dbdat))
	 (lckqry    (sqlite3:prepare db "SELECT test_id,run_lock FROM runlocks WHERE run_lock='locked';"))
	 (mklckqry  (sqlite3:prepare db "INSERT INTO runlocks (test_id,run_lock) VALUES (?,'locked');")))
    (let ((result 
	   (handle-exceptions
	    exn
	    (begin
	      (debug:print 0 *default-log-port* "WARNING: failed to get queue lock. Removing lock db and returning fail") ;; Will try again in a few seconds")
	      (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	      (thread-sleep! 10)
	      ;; (if (> count 0)	
	      ;;  #f ;; (lock-queue:get-lock dbdat test-id count: (- count 1)) - give up on retries 
	      ;; (begin ;; never recovered, remote the lock file and return #f, no lock obtained
	      (lock-queue:delete-lock-db dbdat)
	      #f)
	    (sqlite3:with-transaction
	     db
	     (lambda ()
	       (sqlite3:for-each-row (lambda (tid lockstate)
				       (set! res (list tid lockstate)))
				     lckqry)
	       (if res
		   (if (equal? (car res) test-id)
		       #t ;; already have the lock
		       #f)
		   (begin
		     (sqlite3:execute mklckqry test-id)
		     ;; if no error handled then return #t for got the lock
		     #t)))))))
      (sqlite3:finalize! lckqry)
      (sqlite3:finalize! mklckqry)
      result)))

(define (lock-queue:release-lock fname test-id #!key (count 10))
  (let* ((dbdat (lock-queue:open-db fname)))
    (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:release-lock; waiting on journal")
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "WARNING: Failed to release queue lock. Will try again in few seconds")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (thread-sleep! (/ count 10))
       (if (> count 0)
	   (begin
	     (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))
	     (lock-queue:release-lock fname test-id count: (- count 1)))
	   (let ((journal (conc fname "-journal")))
	     ;; If we've tried ten times and failed there is a serious problem
	     ;; try to remove the lock db and allow it to be recreated
	     (handle-exceptions
	      exn
	      #f
	      (if (common:file-exists? journal)(delete-file journal))
	      (if (common:file-exists? fname)  (delete-file fname))
	      #f))))
     (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id)
     (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)))))

(define (lock-queue:steal-lock dbdat test-id #!key (count 10))
  (debug:print-info 0 *default-log-port* "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat))
  (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 "lock-queue:steal-lock; waiting on journal")
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: Failed to steal queue lock. Will try again in few seconds")
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (thread-sleep! 10)
     (if (> count 0)
	 (lock-queue:steal-lock dbdat test-id count: (- count 1))
	 #f))
   (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE run_lock='locked';"))
  (lock-queue:get-lock dbdat test-id))

;; returns #f if ok to skip the task
;; returns #t if ok to proceed with task
;; otherwise waits
;;
(define (lock-queue:wait-turn fname test-id #!key (count 10)(waiting-msg #f))
  (let* ((dbdat   (lock-queue:open-db fname))
	 (mystart (current-seconds))
	 (db      (lock-queue:db-dat-get-db dbdat)))
    ;; (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file")
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "WARNING: Failed to find out if it is ok to skip the wait queue. Will try again in few seconds")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (print-call-chain (current-error-port))
       (thread-sleep! 10)
       (if (> count 0)
	   (begin
	     (sqlite3:finalize! db)
	     (lock-queue:wait-turn fname test-id count: (- count 1)))
	   (begin
	     (debug:print 0 *default-log-port* "Giving up calls to lock-queue:wait-turn for test-id " test-id " at path " fname ", printing call chain")
	     (print-call-chain (current-error-port))
	     #f)))
     ;; wait 10 seconds and then check to see if someone is already updating the html
     (thread-sleep! 10)
     (if (not (lock-queue:any-younger? dbdat mystart test-id)) ;; no processing in flight, must try to start processing
	 (begin
	   (tasks:wait-on-journal (lock-queue:db-dat-get-path dbdat) 1200 waiting-msg: "lock-queue:wait-turn; waiting on journal file")
	   (sqlite3:execute
	    db
	    "INSERT OR REPLACE INTO queue (test_id,start_time,state) VALUES (?,?,'waiting');"
	    test-id mystart)
	   ;; (thread-sleep! 1) ;; give other tests a chance to register
	   (let ((result 
		  (let loop ((younger-waiting (lock-queue:any-younger? dbdat mystart test-id)))
		    (if younger-waiting
			(begin
			  ;; no need for us to wait. mark in the lock queue db as skipping
			  ;; no point in marking anything in the queue - simply never register this
			  ;; test as it is *covered* by a previously started update to the html file
			  ;; (lock-queue:set-state dbdat test-id "skipping")
			  #f) ;; let the calling process know that nothing needs to be done
			(if (lock-queue:get-lock dbdat test-id)
			    #t
			    (if (> (- (current-seconds) mystart) 36000) ;; waited too long, steal the lock
				(lock-queue:steal-lock dbdat test-id)
				(begin
				  (thread-sleep! 1)
				  (loop (lock-queue:any-younger? dbdat mystart test-id)))))))))
	     (sqlite3:finalize! db)
	     result))))))
	  
            
;; (use trace)
;; (trace lock-queue:get-lock lock-queue:release-lock lock-queue:wait-turn lock-queue:any-younger? lock-queue:set-state)

;;======================================================================
;; margs
;;======================================================================
(define args:arg-hash (make-hash-table))

(define (args:get-arg arg . default)
  (if (null? default)
      (hash-table-ref/default args:arg-hash arg #f)
      (hash-table-ref/default args:arg-hash arg (car default))))

(define (args:any? . args)
  (not (null? (filter (lambda (x) x)
		      (map args:get-arg args)))))

(define (args:get-arg-from ht arg . default)
  (if (null? default)
      (hash-table-ref/default ht arg #f)
      (hash-table-ref/default ht arg (car default))))

(define (args:usage . args)
  (if (> (length args) 0)
      (apply print "ERROR: " args))
  (if (string? help)
      (print help)
      (print "Usage: " (car (argv)) " ... "))
  (exit 0))

 ;; one-of args defined
(define (args:any-defined? . param)
  (let ((res #f))
    (for-each 
     (lambda (arg)
       (if (args:get-arg arg)(set! res #t)))
     param)
    res))

;; args: 
(define (args:get-args args params switches arg-hash num-needed)
  (let* ((numargs (length args))
	 (adj-num-needed (if num-needed (+ num-needed 2) #f)))
    (if (< numargs (if adj-num-needed adj-num-needed 2))
	(if (>= num-needed 1)
	    (args:usage "No arguments provided")
	    '())
	(let loop ((arg (cadr args))
		   (tail (cddr args))
		   (remargs '()))
	  (cond 
	   ((member arg params) ;; args with params
	    (if (< (length tail) 1)
		(args:usage "param given without argument " arg)
		(let ((val     (car tail))
		      (newtail (cdr tail)))
		  (hash-table-set! arg-hash arg val)
		  (if (null? newtail) remargs
		      (loop (car newtail)(cdr newtail) remargs)))))
	   ((member arg switches)         ;; args with no params (i.e. switches)
	    (hash-table-set! arg-hash arg #t)
	    (if (null? tail) remargs
		(loop (car tail)(cdr tail) remargs)))
	   (else
	    (if (null? tail)(append remargs (list arg)) ;; return the non-used args
		(loop (car tail)(cdr tail)(append remargs (list arg))))))))
    ))

(define (args:print-args remargs arg-hash)
  (print "ARGS: " remargs)
  (for-each (lambda (arg)
	      (print "   " arg "   " (hash-table-ref/default arg-hash arg #f)))
	    (hash-table-keys arg-hash)))

;;======================================================================
;; mt
;;======================================================================

;; This is the Megatest API. All generally "useful" routines will be wrapped or extended
;; here.

;;======================================================================
;;  R U N S
;;======================================================================

;; runs:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;;  to extract info from the structure returned
;;
(define (mt:get-runs-by-patt keys runnamepatt targpatt)
  (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0))
	     (res      '())
	     (offset   0)
	     (limit    500))
    ;; (print "runsdat: " runsdat)
    (let* ((header    (vector-ref runsdat 0))
	   (runslst   (vector-ref runsdat 1))
	   (full-list (append res runslst))
	   (have-more (eq? (length runslst) limit)))
      ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more)
      (if have-more 
	  (let ((new-offset (+ offset limit))
		(next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0)))
	    (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.")
	    (debug:print-info 0 *default-log-port* "next-batch: " next-batch)
	    (loop next-batch
		  full-list
		  new-offset
		  limit))
	 (vector header full-list)))))

;;======================================================================
;;  T E S T S
;;======================================================================

(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f))
  (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal))
	     (res      '())
	     (offset   0)
	     (limit    500))
    (let* ((full-list (append res testsdat))
	   (have-more (eq? (length testsdat) limit)))
      (if have-more 
	  (let ((new-offset (+ offset limit)))
	    (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.")
	    (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal)
		  full-list
		  new-offset
		  limit))
	  full-list))))

(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) )
  (let* ((key    (list run-id waitons ref-item-path mode))
	 (res    (hash-table-ref/default *pre-reqs-met-cache* key #f))
	 (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f)))
		   (if last-time
		       (< (current-seconds)(+ last-time 5))
		       #f))))
    (if useres
	(let ((result (vector-ref res 1)))
	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
	  result)
	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
	  newres))))

(define (mt:get-run-stats dbstruct run-id)
;;  Get run stats from local access, move this ... but where?
  (db:get-run-stats dbstruct run-id))

(define (mt:discard-blocked-tests run-id failed-test tests test-records)
  (if (null? tests)
      tests
      (begin
	(debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test)
	(let loop ((testn (car tests))
		   (remt  (cdr tests))
		   (res   '()))
	  (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '())))
		 (waitons  (vector-ref test-dat 2)))
	    ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons)
	    (if (null? remt)
		(let ((new-res (reverse res)))
		  ;; (print "       new-res: " new-res)
		  new-res)
		(loop (car remt)
		      (cdr remt)
		      (if (member failed-test waitons)
			  (begin
			    (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test)
			    res)
			  (cons testn res)))))))))

;;======================================================================
;;  T R I G G E R S
;;======================================================================

(define (mt:run-trigger cmd test-id test-rundir trigger logname test-name item-path event-time actual-state actual-status)
  ;; Putting the commandline into ( )'s means no control over the shell. 
  ;; stdout and stderr will be caught in the NBFAKE or mt_launch.log files
  ;; or equivalent. No need to do this. Just run it?
  (let* ((fullcmd (conc "nbfake "
			cmd           " "
			test-id       " "
			test-rundir   " "
			trigger       " "
			test-name     " "
			item-path     " " ;; has / prepended to deal with toplevel tests
			actual-state  " "
			actual-status " "
			event-time
			))
	 (prev-nbfake-log (get-environment-variable "NBFAKE_LOG")))
    (setenv "NBFAKE_LOG" (conc (cond
				((and (directory-exists? test-rundir)
				      (file-write-access? test-rundir))
				 test-rundir)
				((and (directory-exists? *toppath*)
				      (file-write-access? *toppath*))
				 *toppath*)
				(else (conc "/tmp/" (current-user-name))))
			       "/" logname))
    (debug:print-info 0 *default-log-port* "TRIGGERED on " trigger ", running command " fullcmd " output at " (get-environment-variable "NBFAKE_LOG"))
    ;; (call-with-environment-variables
    ;;  `(("NBFAKE_LOG" . ,(conc test-rundir "/" logname)))
    ;;  (lambda ()
    (process-run fullcmd)
    (if prev-nbfake-log
	(setenv "NBFAKE_LOG" prev-nbfake-log)
	(unsetenv "NBFAKE_LOG"))
    )) ;; ))

(define (mt:process-triggers dbstruct run-id test-id newstate newstatus)
  (if test-id 
      (let* ((test-dat      (db:get-test-info-by-id dbstruct run-id test-id)))
	(if test-dat
	    (let* ((test-rundir   (db:test-get-rundir       test-dat)) ;; ) ;; )
		   (test-name     (db:test-get-testname     test-dat))
		   (item-path     (db:test-get-item-path    test-dat))
		   (duration      (db:test-get-run_duration test-dat))
		   (comment       (db:test-get-comment      test-dat))
		   (event-time    (db:test-get-event_time   test-dat))
		   (tconfig       #f)
		   (state         (if newstate  newstate  (db:test-get-state  test-dat)))
		   (status        (if newstatus newstatus (db:test-get-status test-dat))))
	      ;; (mutex-lock! *triggers-mutex*)
              (handle-exceptions
               exn
               (begin
                 (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus
                                    "\n   error: " ((condition-property-accessor 'exn 'message) exn)
                                    "\n   test-rundir="test-rundir
                                    "\n   test-name="test-name
                                    "\n   item-path="item-path
                                    "\n   state="state
                                    "\n   status="status
                                    "\n")
                 (print-call-chain (current-error-port))
                 #f)
               (if (and test-name
                        test-rundir)   ;; #f means no dir set yet
                   ;; (common:file-exists? test-rundir)
                   ;; (directory? test-rundir))
                   (call-with-environment-variables
                    (list (cons "MT_TEST_NAME"    (or test-name "no such test"))
                          (cons "MT_TEST_RUN_DIR" (or test-rundir "no test directory yet"))
                          (cons "MT_ITEMPATH"     (or item-path "")))
                    (lambda ()
                      (if (directory-exists? test-rundir)
                          (push-directory test-rundir)
                          (push-directory *toppath*))
                      (set! tconfig (mt:lazy-read-test-config test-name))
                      (for-each (lambda (trigger)
                                  (let* ((munged-trigger (string-translate trigger "/ " "--"))
					(logname        (conc "last-trigger-" munged-trigger ".log")))
                                    ;; first any triggers from the testconfig
                                    (let ((cmd  (configf:lookup tconfig "triggers" trigger)))
                                      (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "tconfig-" logname) test-name item-path event-time state status)))
                                    ;; next any triggers from megatest.config
                                    (let ((cmd  (configf:lookup *configdat* "triggers" trigger)))
                                      (if cmd (mt:run-trigger cmd test-id test-rundir trigger (conc "mtconfig-" logname) test-name item-path event-time state status)))))
                                (list
                                 (conc state "/" status)
                                 (conc state "/")
                                 (conc "/" status)))
		     (pop-directory))
                    )))
	      ;; (mutex-unlock! *triggers-mutex*)
	      )))))

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (if (not (and run-id test-id))
      (begin
	(debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
	(print-call-chain (current-error-port))
	#f)
      (begin
	;; cond
	;; ((and newstate newstatus newcomment)
	;;  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
	;; ((and newstate newstatus)
	;;  (rmt:general-call 'state-status run-id newstate newstatus test-id))
	;; (else
	;;  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
	;;  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
	;;  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
	(rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)
	;; (mt:process-triggers run-id test-id newstate newstatus)
	#t)))


(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment)
  (let* ((test-vec   (rmt:get-testinfo-state-status run-id test-id))
         (state     (vector-ref test-vec 3)))
    (if (equal? state "COMPLETED")
        #t
        (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment))))

  
(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  ;(let ((test-id (rmt:get-test-id run-id test-name item-path)))
  (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment)
  ;; (mt:process-triggers run-id test-id new-state new-status)
  #t);)
	;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment)))

(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (rmt:get-test-id run-id test-name item-path)))
    (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment)))
    
(define (mt:lazy-read-test-config test-name)
  (let ((tconf (hash-table-ref/default *testconfigs* test-name #f)))
    (if tconf
	tconf
	(let ((test-dirs (tests:get-tests-search-path *configdat*)))
	  (let loop ((hed (car test-dirs))
		     (tal (cdr test-dirs)))
	    ;; Setting MT_LINKTREE here is almost certainly unnecessary. 
	    (let ((tconfig-file (conc hed "/" test-name "/testconfig")))
	      (if (and (common:file-exists? tconfig-file)
		       (file-read-access? tconfig-file))
		  (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
			(old-link-tree  (get-environment-variable "MT_LINKTREE")))
		    (if link-tree-path (setenv "MT_LINKTREE" link-tree-path))
		    (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...]
		      (hash-table-set! *testconfigs* test-name newtcfg)
		      (if old-link-tree 
			  (setenv "MT_LINKTREE" old-link-tree)
			  (unsetenv "MT_LINKTREE"))
		      newtcfg))
		  (if (null? tal)
		      (begin
			(debug:print-error 0 *default-log-port* "No readable testconfig found for " test-name)
			#f)
		      (loop (car tal)(cdr tal))))))))))

;;======================================================================
;; portlogger
;;======================================================================



(define (portlogger:open-db fname)
  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
	 (exists   (common:file-exists? fname))
	 (db       (if avail 
		       (sqlite3:open-database fname)
		       (begin
			 (system (conc "rm -f " fname))
			 (sqlite3:open-database fname))))
	 (handler  (make-busy-timeout 136000))
	 (canwrite (file-write-access? fname)))
	 ;; (db-init  (lambda ()
	 ;;             (sqlite3:execute 
	 ;;              db
	 ;;              "CREATE TABLE IF NOT EXISTS ports (
         ;;                 port INTEGER PRIMARY KEY,
         ;;                 state TEXT DEFAULT 'not-used',
         ;;                 fail_count INTEGER DEFAULT 0,
         ;;                 update_time TIMESTAMP DEFAULT (strftime('%s','now')) );"))))
    (sqlite3:set-busy-handler! db handler)
    (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;")
    ;; (if (not exists) ;; needed with IF NOT EXISTS?
    (sqlite3:execute 
     db
     "CREATE TABLE IF NOT EXISTS ports (
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
    db))

(define (portlogger:open-run-close proc . params)
  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (if (common:file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (print-call-chain (current-error-port)))
     (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
	    (db     (portlogger:open-db fname))
	    (res    (apply proc db params)))
       (sqlite3:finalize! db)
       ;; (release-dot-lock fname)
       res))))

;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) 
(define (portlogger:take-port db portnum)
  (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);"))
	 (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;"))
	 (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;"))
	 (res  (sqlite3:with-transaction
		db
		(lambda ()
		  ;; (fold-row (lambda (var curr) (or var curr)) #f db "SELECT var FROM foo WHERE id=100;")
		  (let* ((curr #f)
			 (res  #f))
		    (set! curr (sqlite3:fold-row
				(lambda (var curr)
				  (or curr var curr))
				"not-tried"
				qry3
				portnum))
		    ;; (print "curr=" curr)
		    (set! res (case (string->symbol curr)
				((released)  (sqlite3:execute qry2 "taken" portnum) 'taken)
				((not-tried) (sqlite3:execute qry1 portnum "taken") 'taken)
				((taken)                                            'already-taken)
				((failed)                                           'failed)
				(else                                               'error)))
		    ;; (print "res=" res)
		    res)))))
    (sqlite3:finalize! qry1)
    (sqlite3:finalize! qry2)
    (sqlite3:finalize! qry3)
    res))

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
      exn
      (begin
	(debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
	(print-call-chain (current-error-port))
	(debug:print 0 *default-log-port* "Continuing anyway.")
	#f)
    (sqlite3:fold-row
     (lambda (var curr)
       (or curr var curr))
     #f
     db
     "SELECT (port) FROM ports WHERE state='released' LIMIT 1;")))

(define (portlogger:find-port db)
  (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		    (if (and val 
			     (string->number val))
			(string->number val)
			32768)))
	 (portnum (or (portlogger:get-prev-used-port db)
		      (+ lowport ;; top of registered ports is 49152 but lets use ports in the registered range
			 (random (- 64000 lowport))))))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (print-call-chain (current-error-port))
       (debug:print 0 *default-log-port* "Continuing anyway."))
     (portlogger:take-port db portnum))
    portnum))

;; set port to "released", "failed" etc.
;; 
(define (portlogger:set-port db portnum value)
  (sqlite3:execute db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;" value portnum))

;; set port to failed (attempted to take but got error)
;;
(define (portlogger:set-failed db portnum)
  (sqlite3:execute db "UPDATE ports SET state='failed',fail_count=fail_count+1,update_time=strftime('%s','now') WHERE port=?;" portnum))

;;======================================================================
;; MAIN
;;======================================================================

(define (portlogger:main . args)
  (let* ((dbfname (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (db      (portlogger:open-db dbfname))
	 (numargs (length args))
	 (result  
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print 0 *default-log-port* "EXCEPTION: portlogger database at " dbfname " probably overloaded or unreadable. Try removing it.")
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	     (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	     (print-call-chain (current-error-port))
	     #f)
	   (case (string->symbol (car args)) ;; commands with two or more params
	     ((take)(portlogger:take-port db (string->number (cadr args))))
	     ((find)(portlogger:find-port db))
	     ((set) (let ((port  (cadr  args))
			  (state (caddr args)))
		      (portlogger:set-port db 
					   (if (number? port) port (string->number port))
					   state)
		      state))
	     ((failed)(portlogger:set-failed db (string->number (cadr args))) 'failed)))))
    (sqlite3:finalize! db)
    result))
     
;; (print (apply portlogger:main (cdr (argv))))
;;======================================================================
;; runconfig
;;======================================================================

(define (runconfig:read fname target environ-patt)
  (let ((ht (make-hash-table)))
    (if target (hash-table-set! ht target '()))
    (read-config fname ht #t environ-patt: environ-patt sections: (if target (list "default" target) #f))))

;; NB// to process a runconfig ensure to use environ-patt with target!
;;
(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t))
  (let* ((keys    (map car keyvals))
	 (thekey  (if keyvals 
		      (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/")
		      (or (common:args-get-target)
			  (get-environment-variable "MT_TARGET")
			  (begin
			    (debug:print-error 0 *default-log-port* "setup-env-defaults called with no run-id or -target or -reqtarg")
			    "nothing matches this I hope"))))
	 ;; Why was system disallowed in the reading of the runconfigs file?
	 ;; NOTE: Should be setting env vars based on (target|default)
	 (confdat   (runconfig:read fname thekey environ-patt))
	 (whatfound (make-hash-table))
	 (finaldat  (make-hash-table))
	 (sections (list "default" thekey)))
    (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code
    (debug:print 4 *default-log-port* "Using key=\"" thekey "\"")

    (if change-env
	(for-each ;; NB// This can be simplified with new content of keyvals having all that is needed.
	 (lambda (keyval)
	   (safe-setenv (car keyval)(cadr keyval)))
	 keyvals))
	
    (for-each 
     (lambda (section)
       (let ((section-dat (hash-table-ref/default confdat section #f)))
	 (if section-dat
	     (for-each 
	      (lambda (envvar)
		(let ((val (cadr (assoc envvar section-dat))))
		(hash-table-set! whatfound section (+ (hash-table-ref/default whatfound section 0) 1))
		(if (and (string? envvar)
			 (string? val)
			 change-env)
		    (safe-setenv envvar val))
		(hash-table-set! finaldat envvar val)))
	      (map car section-dat)))))
     sections)
    (if already-seen
	(begin
	  (debug:print 2 *default-log-port* "Key settings found in runconfigs.config:")
	  (for-each (lambda (fullkey)
		      (debug:print 2 *default-log-port* (format #f "~20a ~a\n" fullkey (hash-table-ref/default whatfound fullkey 0))))
		    sections)
	  (debug:print 2 *default-log-port* "---")
	  (set! *already-seen-runconfig-info* #t)))
    ;; finaldat ;; was returning this "finaldat" which would be good but conflicts with other uses
    confdat
    ))

(define (set-run-config-vars run-id keyvals targ-from-db)
  (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ...
  (let ((runconfigf (conc  *toppath* "/runconfigs.config"))
	(targ       (or (common:args-get-target)
			targ-from-db
			(get-environment-variable "MT_TARGET"))))
    (pop-directory)
    (if (common:file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id #t keyvals
			    environ-patt: (conc "(default"
						(if targ
						    (conc "|" targ ")")
						    ")")))
	(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf))))

;; given (a (b c) d) return ((a b d)(a c d))
;; NOTE: this feels like it has been done before - perhaps with items handling?
;;
(define (runconfig:combinations inlst)
  (let loop ((hed (car inlst))
	     (tal (cdr inlst))
	     (res '()))
    ;; (print "res: " res " hed: " hed)
    (if (list? hed)
	(let ((newres (if (null? res) ;; first time through convert incoming items to list of items
			  (map list hed)
			  (apply append
				 (map (lambda (r)  ;; iterate over items in res
					(map (lambda (h) ;; iterate over items in hed
					       (append r (list h)))
					     hed))
				      res)))))
	  ;; (print "newres1: " newres)
	  (if (null? tal)
	      newres
	      (loop (car tal)(cdr tal) newres)))
	(let ((newres (if (null? res)
			  (list (list hed))
			  (map (lambda (r)
				 (append r (list hed)))
			       res))))
	  ;; (print "newres2: " newres)
	  (if (null? tal)
	      newres
	      (loop (car tal)(cdr tal) newres))))))

;; multi-part expand
;; Given a/b,c,d/e,f return a/b/e a/b/f a/c/e a/c/f a/d/e a/d/f
;;
(define (runconfig:expand target)
  (let* ((parts (map (lambda (x)
		       (string-split x ","))
		     (string-split target "/"))))
    (map (lambda (x)
	   (string-intersperse x "/"))
	 (runconfig:combinations parts))))

;; multi-target expansion
;; a/b/c/x,y,z a/b/d/x,y => a/b/c/x a/b/c/y a/b/c/z a/b/d/x a/b/d/y
;; 
(define (runconfig:expand-target target-strs)
  (delete-duplicates
   (apply append (map runconfig:expand (string-split target-strs " ")))))
;;======================================================================
;; runs
;;======================================================================
;; (include "debugger.scm")

;; use this struct to facilitate refactoring
;;

(defstruct runs:dat
  reglen regfull
  runname max-concurrent-jobs run-id
  test-patts required-tests test-registry
  registry-mutex flags keyvals run-info all-tests-registry
  can-run-more-tests
  ((can-run-more-tests-count 0) : fixnum))

(defstruct runs:testdat
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal itemmaps prereqs-not-met)


(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)

    ("MT_TARGET"        . ,target)
    
    ("MT_RUNNAME"       . ,runname)
    
    ("MT_RUN_AREA_HOME" . ,*toppath*)
    
    ,@(let* ((link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
        (if link-tree
            (list (cons "MT_LINKTREE" link-tree)
                  
                  (cons "MT_TEST_RUN_DIR"
                        (conc link-tree  "/" target "/" runname "/" testname
                              (if (and (string? itempath) (not (equal? itempath "")))
                                  (conc "/" itempath)
                                  "")))
                  )
            '()))
    
    ,@(map
       (lambda (key)
         (cons  (car key) (cadr key)))
       (keys:target->keyval (rmt:get-keys) target))
    
    ,@(map (lambda (var)
             (let ((val (configf:lookup *configdat* "env-override" var)))
               (cons var val)))
           (configf:section-vars *configdat* "env-override"))))


    

    
    
;; set up needed environment variables given a run-id and optionally a target, itempath etc.
;;
(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  (let* ((target    (or intarget 
			(common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
	 (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))

    ;; get the info from the db and put it in the cache
    (if link-tree
	(setenv "MT_LINKTREE" link-tree)
	(debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section."))
    (if (not vals)
	(let ((ht (make-hash-table)))
	  (hash-table-set! *env-vars-by-run-id* run-id ht)
	  (set! vals ht)
	  (for-each
	   (lambda (key)
	     (hash-table-set! vals (car key) (cadr key)))
	   keyvals)))
    ;; from the cached data set the vars
    
    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 *default-log-port* "setenv " key " " val)
       (safe-setenv key val)))
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1")
    ;;(BB> "*env-vars-by-run-id*/runid("run-id" vals="(hash-table->alist vals))

    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
    ;; we had a case where there was an exception generated by the hash-table-ref
    ;; due to *configdat* being #f Adding a handle and exit
    (let fatal-loop ((count 0)) 
      (handle-exceptions
	  exn
	  (let ((call-chain (get-call-chain))
		(msg        ((condition-property-accessor 'exn 'message) exn)))
	    (if (< count 5)
		(begin ;; this call is colliding, do some crude stuff to fix it.
		  (debug:print 0 *default-log-port* "ERROR: *configdat* was inaccessible! This should never happen. Retry #" count)
		  (launch:setup force-reread: #t)
		  (fatal-loop (+ count 1))) 
		(begin
		  (debug:print 0 *default-log-port* "FATAL: *configdat* was inaccessible! This should never happen. Retried " count " times. Message: " msg)
		  (debug:print 0 *default-log-port* "Call chain:")
		  (with-output-to-port *default-log-port*

                    (lambda ()
                      (print "*configdat* is >>"*configdat*"<<")
                      (pp *configdat*)
                      (pp call-chain)))
                  
		  (exit 1))))
          ;;(bb-check-path msg: "runs:set-megatest-env-vars block 1.5")
          (when (or (not *configdat*) (not (hash-table? *configdat*)))
              (debug:print 0 *default-log-port* "WARNING: *configdat* was inaccessible! This should never happen.  Brute force reread.")
              ;;(BB> "ERROR: *configdat* was inaccessible! This should never happen.  Brute force reread.")
              (thread-sleep! 2) ;; assuming nfs lag.
              (launch:setup force-reread: #t))
          (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())))) ;;;; environment is tainted HERE in this let block.
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 2")
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (let ((runname  (if inrunname inrunname (rmt:get-run-name-from-id run-id))))
      (if runname
	  (setenv "MT_RUNNAME" runname)
	  (debug:print-error 0 *default-log-port* "no value for runname for id " run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)
    ;; if a testname and itempath are available set the remaining appropriate variables
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))
    ;;(bb-check-path msg: "runs:set-megatest-env-vars block 3")
    (if (and testname link-tree)
	(setenv "MT_TEST_RUN_DIR" (conc (getenv "MT_LINKTREE")  "/"
					(getenv "MT_TARGET")    "/"
					(getenv "MT_RUNNAME")   "/"
					(getenv "MT_TEST_NAME")
					(if (and itempath
						 (not (equal? itempath "")))
					    (conc "/" itempath)
					    ""))))))

(define (set-item-env-vars itemdat)
  (for-each (lambda (item)
	      (debug:print 2 *default-log-port* "setenv " (car item) " " (cadr item))
	      (setenv (car item) (cadr item)))
	    itemdat))

;; Every time can-run-more-tests is called increment the delay
;;
;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine
;;
(define *last-num-running-tests* 0)
;; (define *runs:can-run-more-tests-count* 0)
(define (runs:shrink-can-run-more-tests-count runsdat)
  (runs:dat-can-run-more-tests-count-set! runsdat 0))

(define (runs:inc-can-run-more-tests-count runsdat)
  (runs:dat-can-run-more-tests-count-set!
   runsdat
   (+ (runs:dat-can-run-more-tests-count runsdat) 1)))

;;  (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2)))

;; Temporary globals. Move these into the logic or into common
;;
(define *seen-cant-run-tests* (make-hash-table)) ;; use to track tests that we suspect cannot be run
(define (runs:inc-cant-run-tests testname)
  (hash-table-set! *seen-cant-run-tests* testname
		   (+ (hash-table-ref/default *seen-cant-run-tests* testname 0) 1)))

(define (runs:can-keep-running? testname n)
  (< (hash-table-ref/default *seen-cant-run-tests* testname 0) n))

(define *runs:denoise* (make-hash-table)) ;; key => last-time-ran

;; mechanism to limit printing info to the screen that is repetitive.
;;
;; Example: 
;; (if (runs:lownoise "waiting on tasks" 60)
;;     (debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
;;
(define (runs:lownoise key waitval)
  (let ((lasttime (hash-table-ref/default *runs:denoise* key 0))
	(currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)

  ;; Take advantage of a good place to exit if running the one-pass methodology
  (if (and (> (runs:dat-can-run-more-tests-count runsdat) 20)
	   (args:get-arg "-one-pass"))
      (exit 0))

  (thread-sleep! (cond ;; BB: check with Matt.  Should this sleep move to cond clauses below where we determine we have too many jobs running rather than each time the and condition above is true (which seems like always)?
        	  ((> (runs:dat-can-run-more-tests-count runsdat) 20)
		   (if (runs:lownoise "waiting on tasks" 60)(debug:print-info 2 *default-log-port* "waiting for tasks to complete, sleeping briefly ..."))
                   (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.1) ;; was 2
		   );; obviously haven't had any work to do for a while
        	  (else 0)))
  
  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
	(runs:inc-can-run-more-tests-count runsdat)) ;; (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1)))
    (if (not (eq? *last-num-running-tests* num-running))
	(begin
	  (debug:print 2 *default-log-port* "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running)
	  (set! *last-num-running-tests* num-running)))
    (if (not (eq? 0 *globalexitstatus*))
	(list #f num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)
	(let* ((can-not-run-more (cond
				 ;; if max-concurrent-jobs is set and the number running is greater 
				 ;; than it then cannot run more jobs
				 ((and max-concurrent-jobs (>= num-running max-concurrent-jobs))
				  (if (runs:lownoise "mcj msg" 60)
				      (debug:print 0 *default-log-port* "WARNING: Max running jobs exceeded, current number running: " num-running 
						   ", max_concurrent_jobs: " max-concurrent-jobs))
				  #t)
				 ;; if job-group-limit is set and number of jobs in the group is greater
				 ;; than the limit then cannot run more jobs of this kind
				 ((and job-group-limit
				       (>= num-running-in-jobgroup job-group-limit))
				  (if (runs:lownoise (conc "maxjobgroup " jobgroup) 60)
				      (debug:print 1 *default-log-port* "WARNING: number of jobs " num-running-in-jobgroup 
						   " in jobgroup \"" jobgroup "\" exceeds limit of " job-group-limit))
				  #t)
				 (else #f))))
	  (list (not can-not-run-more) num-running num-running-in-jobgroup max-concurrent-jobs job-group-limit)))))

(define (runs:run-pre-hook run-id)
    (let* ((run-pre-hook   (configf:lookup *configdat* "runs" "pre-hook"))
           (existing-tests (if run-pre-hook
                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
                                                      #f #f ;; offset limit
                                                      #f ;; not-in
                                                      #f ;; sort-by
                                                      #f ;; sort-order
                                                      #f ;; get full data (not 'shortlist)
                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
                                                      'dashboard)
                               '()))
           (log-dir         (conc *toppath* "/logs"))
           (log-file        (conc "pre-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
           (full-log-fname  (conc log-dir "/" log-file)))
      (if run-pre-hook
          (if (null? existing-tests)
              (let* ((use-log-dir (if (not (directory-exists? log-dir))
                                      (handle-exceptions
                                       exn
                                       (begin
                                         (debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
                                         #f)
                                       (create-directory log-dir #t)
                                       #t)
                                      #t))
                     (start-time   (current-seconds))
                     (actual-logf  (if use-log-dir full-log-fname log-file)))
                (handle-exceptions
                 exn
                 (begin
                   (print-call-chain *default-log-port*)
                   (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
                   (debug:print 0 *default-log-port* "ERROR: failed to run pre-hook " run-pre-hook ", check the log " log-file))
                 (debug:print-info 0 *default-log-port* "running run-pre-hook: \"" run-pre-hook "\", log is " actual-logf)
                 (system (conc run-pre-hook " >> " actual-logf " 2>&1"))
                 (debug:print-info 0 *default-log-port* "pre-hook \"" run-pre-hook "\" took " (- (current-seconds) start-time) " seconds to run.")))
              (debug:print 0 *default-log-port* "Skipping pre-hook call \"" run-pre-hook "\" as there are existing tests for this run.")))))
    
(define (runs:run-post-hook run-id)
    (let* ((run-post-hook   (configf:lookup *configdat* "runs" "post-hook"))
           (existing-tests (if run-post-hook
                               (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
                                                      #f #f ;; offset limit
                                                      #f ;; not-in
                                                      #f ;; sort-by
                                                      #f ;; sort-order
                                                      #f ;; get full data (not 'shortlist)
                                                      0 ;; (runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
                                                      'dashboard)
                               '()))
           (log-dir         (conc *toppath* "/logs"))
           (log-file        (conc "post-hook-" (string-translate (getenv "MT_TARGET") "/" "-") "-" (getenv "MT_RUNNAME") ".log"))
           (full-log-fname  (conc log-dir "/" log-file)))
      (if run-post-hook
          ;; (if (null? existing-tests)
          ;;    (debug:print 0 *default-log-port* "Skipping post-hook call \"" run-post-hook "\" as there are existing tests for this run.")))))
	  (let* ((use-log-dir (if (not (directory-exists? log-dir))
				  (handle-exceptions
				      exn
				      (begin
					(debug:print 0 *default-log-port* "WARNING: Failed to create " log-dir)
					#f)
				    (create-directory log-dir #t)
				    #t)
				  #t))
		 (start-time   (current-seconds))
		 (actual-logf  (if use-log-dir full-log-fname log-file)))
	    (handle-exceptions
		exn
		(begin
		  (print-call-chain *default-log-port*)
		  (debug:print 0 *default-log-port* "Message: " ((condition-property-accessor 'exn 'message) exn))
		  (debug:print 0 *default-log-port* "ERROR: failed to run post-hook " run-post-hook ", check the log " log-file))
	      (debug:print-info 0 *default-log-port* "running run-post-hook: \"" run-post-hook "\", log is " actual-logf)
	      (system (conc run-post-hook " >> " actual-logf " 2>&1"))
	      (debug:print-info 0 *default-log-port* "post-hook \"" run-post-hook "\" took " (- (current-seconds) start-time) " seconds to run."))))))

;; return #t when all items in waitors-upon list are represented in test-patt, #f otherwise.
(define (runs:testpatts-mention-waitors-upon? test-patt waitors-upon)
  (null? (tests:filter-test-names-not-matched waitors-upon test-patt)))

;;======================================================================
;; runs:run-tests is called from megatest.scm and itself
;;======================================================================
;;
;;  test-names: Comma separated patterns same as test-patts but used in selection 
;;              of tests to run. The item portions are not respected.
;;              FIXME: error out if /patt specified
;;            
(define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names
  (let* ((keys               (common:get-fields *configdat*))
	 (keyvals            (keys:target->keyval keys target))
	 (run-id             (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))  ;;  test-name)))
	 ;; (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
         (dbfile             (conc  *toppath* "/megatest.db"))
         (readonly-mode      (not (file-write-access? dbfile)))
	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
         (waitors-upon       (make-hash-table)) ;; given a test, return list of tests waiting upon this test.
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f)
	 (runconf            #f))

    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
      (exit 1))

    ;; per user request. If less than 100Meg space on dbdir partition, bail out with error
    ;; this will reduce issues in database corruption
    (common:check-db-dir-and-exit-if-insufficient)

    ;; override the number of reruns from the configs
    ;; this needs to be done at the place where is first runs:run-tests called 
    ;(if (and config-reruns
	  ;   (> run-count config-reruns))
 	  ;(set! run-count config-reruns))

    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (let ((sighand (lambda (signum)
		     ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
		     (set! *time-to-exit* #t)
		     (print "Received signal " signum ", cleaning up before exit. Please wait...")
		     (let ((th1 (make-thread (lambda ()
					       ;; (let ((tdbdat (tasks:open-db)))
						 (rmt:tasks-set-state-given-param-key task-key "killed") ;; )
					       (print "Killed by signal " signum ". Exiting")
					       (thread-sleep! 3)
					       (exit))))
			   (th2 (make-thread (lambda ()
					       (thread-sleep! 5)
					       (debug:print 0 *default-log-port* "Done")
					       (exit 4)))))
		       (thread-start! th2)
		       (thread-start! th1)
		       (thread-join! th2)))))
      (set-signal-handler! signal/int sighand)
      (set-signal-handler! signal/term sighand))

    ;; force the starting of a server -- removed BB 17ww28 - no longer needed.
    ;;(debug:print 0 *default-log-port* "waiting on server...")
    ;;(server:start-and-wait *toppath*)
    
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (set! runconf (if (common:file-exists? runconfigf)
		      (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
		      (begin
			(debug:print 0 *default-log-port* "WARNING: You do not have a run config file: " runconfigf)
			#f)))

    (if (not test-patts) ;; first time in - adjust testpatt
	(set! test-patts (common:args-get-testpatt runconf)))
    ;; if test-patts is #f at this point there is something wrong and we need to bail out
    (if (not test-patts)
	(begin
	  (debug:print 0 *default-log-port* "WARNING: there is no test pattern for this run. Exiting now.")
	  (exit 0)))
    
    (if (args:get-arg "-tagexpr")
	(begin
	  (set! allowed-tests (string-join (runs:get-tests-matching-tags (args:get-arg "-tagexpr")) ","))
	  	  (debug:print-info 0 *default-log-port* "filtering initial test list with tagexpr: " (args:get-arg "-tagexpr") " => " allowed-tests)
		  ));; tests will be ANDed with this list

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")

    #;(common:telemetry-log "run-tests"
                          payload:
                          `( (target . ,target)
                             (run-name . ,runname)
                             (test-patts . ,test-patts) ) )

    
    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all))   ;; hash of testname => path-to-test
    (set! all-test-names     (hash-table-keys all-tests-registry))
    ;; filter first for allowed-tests (from -tagexpr) then for test-patts.
    (set! test-names         (tests:filter-test-names
			      (if allowed-tests
				  (tests:filter-test-names all-test-names allowed-tests)
				  all-test-names)
			      test-patts))

    ;; I think seeding required-tests with all test-names makes sense but lack analysis to back that up.

    ;; NEW STRATEGY HERE:
    ;; 1. fill required tests with test-patts
    ;; 2. scan testconfigs and if waitons, itemwait, itempatt calc prior test test-patt
    ;; 3. repeat until all deps propagated
    
    ;; any tests with direct mention in test-patts can be added to required
    ;;(set! required-tests     (lset-intersection equal? (string-split test-patts ",") all-test-names))
    (set! required-tests     (tests:filter-test-names all-test-names test-patts))
    ;;
    ;; (set! required-tests     (lset-intersection equal? test-names all-test-names))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)

    ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts)))
    (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " "))
    (debug:print-info 0 *default-log-port* "all tests:         " (string-intersperse (sort all-test-names string<) " "))
    (debug:print-info 0 *default-log-port* "test names:        " (string-intersperse (sort test-names string<) " "))
    (debug:print-info 0 *default-log-port* "required tests:    " (string-intersperse (sort required-tests string<) " "))

    ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if
    ;; -keepgoing is specified
    (if (eq? *passnum* 0)
	(begin
	  ;; Is this still necessary? I think not. Unreachable tests are marked as such and 
	  ;; should not cause problems here.
	  ;;
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  ;;
	  ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
	  
	  ;; Now convert anything in allow-auto-rerun to NOT_STARTED
	  ;;
	  (for-each
	   (lambda (state-status)
	     (let* ((ss-lst (string-split-fields "/" state-status #:infix))
		    (state  (if (> (length ss-lst) 0)(car  ss-lst) #f))
		    (status (if (> (length ss-lst) 1)(cadr ss-lst) #f)))
	       (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status)))
	   ;; list of state/status pairs separated by spaces
	   (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; run the run prehook if there are no tests yet run for this run:
    ;;
    (runs:run-pre-hook run-id)
    ;; mark all test launced flag as false in the meta table 
    (rmt:set-var (conc "lunch-complete-" run-id) "no")
    (rmt:set-run-state-status run-id "new" "n/a")
    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
    ;;======================================================================
    
    (if (not (null? test-names)) ;; BEGIN test-names loop
	(let loop ((hed (car test-names))   ;; NOTE: This is the main loop that iterates over the test-names
		   (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
          (debug:print-info 4 *default-log-port* "\n\ntestpatt elaboration loop => hed="hed " tal="tal" test-patts="test-patts" test-names="test-names)
	  (change-directory *toppath*) ;; PLEASE OPTIMIZE ME!!! I think this should be a no-op but there are several places where change-directories could be happening.
	  (setenv "MT_TEST_NAME" hed) ;; 
	  (let*-values (((waitons waitors config) (tests:get-waitons hed all-tests-registry))

			;; NOTE: Have the config - can extract [waitons] section
			
                        ((hed-mode)
                         (let ((m (config-lookup config "requirements" "mode")))
                           (if m (map string->symbol (string-split m)) '(normal))))
                        ((hed-itemized-waiton) ;; are items in hed waiting on items of waiton?
                         (not (null? (lset-intersection eq? hed-mode '(itemmatch itemwait)))))
                        )
	    (debug:print-info 8 *default-log-port* "waitons: " waitons)
	    ;; check for hed in waitons => this would be circular, remove it and issue an
	    ;; error
	    (if (or (member hed waitons)
		    (member hed waitors))
		(begin
		  (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton or waitor, please correct this!")
		  (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))
		  (set! waitors (filter (lambda (x)(not (equal? x hed))) waitors))))
	    
	    ;; (items   (items:get-items-from-config config)))
	    (if (not (hash-table-ref/default test-records hed #f)) ;; waiton-tconfig below will be #f until that test is visted here at least once
		(hash-table-set! test-records ;; BB: we are doing a manual make-tests:testqueue
				 hed (vector hed     ;; 0 ;; testname
					     config  ;; 1 
					     waitons ;; 2 
					     (config-lookup config "requirements" "priority")     ;; priority 3
					     (tests:get-items config) ;; 4 ;; expand the [items] and or [itemstable] into explict items
					     #f      ;; itemsdat 5
					     #f      ;; spare - used for item-path
					     waitors ;; 
					     )))
            ;; update waitors-upon here
            (for-each
             (lambda (waiton)
               (let* ((current-waitors-upon (hash-table-ref/default waitors-upon waiton '())))
                 (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] is "current-waitors-upon )
                 (when (not (member hed current-waitors-upon))
                   (debug:print-info 8 *default-log-port* " current-waiters-upon["waiton"] << "hed  )
                   (hash-table-set! waitors-upon waiton (cons hed current-waitors-upon)))))
             (if (list? waitons) waitons '()))
            (debug:print-info 8 *default-log-port* " process waitons&waitors of "hed": "(delete-duplicates (append waitons waitors)))
            (for-each 
	     (lambda (waiton)
	       (if (and waiton (not (member waiton test-names)))
		   (let* ((waitors-in-testpatt (runs:testpatts-mention-waitors-upon? test-patts (hash-table-ref/default waitors-upon waiton '())))
                          (waiton-record   (hash-table-ref/default test-records waiton #f))
			  (waiton-tconfig  (if waiton-record (vector-ref waiton-record 1) #f))
			  (waiton-itemized (and waiton-tconfig
						(or (hash-table-ref/default waiton-tconfig "items" #f)
						    (hash-table-ref/default waiton-tconfig "itemstable" #f))))
			  (itemmaps        (tests:get-itemmaps config))  ;; (configf:lookup config "requirements" "itemmap"))
			  (new-test-patts  (tests:extend-test-patts test-patts hed waiton itemmaps hed-itemized-waiton))) 
		     (debug:print-info 0 *default-log-port* "Test " waiton " has " (if waiton-record "a" "no") " waiton-record and" (if waiton-itemized " " " no ") "items")
		     ;; need to account for test-patt here, if I am test "a", selected with a test-patt of "hed/b%"
		     ;; and we are waiting on "waiton" we need to add "waiton/,waiton/b%" to test-patt
		     ;; is this satisfied by merely appending "/" to the waiton name added to the list?
		     ;;
		     ;; This approach causes all of the items in an upstream test to be run 
		     ;; if we have this waiton already processed once we can analzye it for extending
		     ;; tests to be run, since we can't properly process waitons unless they have been
		     ;; initially added we add them again to be processed on second round AND add the hed
		     ;; back in to also be processed on second round
		     (if waiton-tconfig ;; BB: waiter should be in test-patts as well as the waiton have a tconfig.
                         (if waiton-itemized
                             (if waitors-in-testpatt
                                 (begin
                                   (debug:print-info 0 *default-log-port* "New test patts: " new-test-patts ", prev test patts: " test-patts)
                                   (set! test-names (cons waiton test-names)) ;; need to process this one, only add once the waiton tconfig read
                                   (set! required-tests (cons (conc waiton "/") required-tests))
                                   (set! test-patts new-test-patts))
                                 (begin
                                   (debug:print-info 0 *default-log-port* "Waitor(s) not yet on testpatt for " waiton ", setting up to re-process it")
                                   (set! tal (append (cons waiton tal)(list hed)))))
                             (begin
                               (debug:print-info 0 *default-log-port* "Adding non-itemized test " waiton " to required-tests")
                               (set! required-tests (cons waiton required-tests))
                               (set! test-patts new-test-patts)))
			 (begin
			   (debug:print-info 0 *default-log-port* "No testconfig info yet for " waiton ", setting up to re-process it")
			   (set! tal (append (cons waiton tal)(list hed))))) ;; (cons (conc waiton "/") required-tests))
		     ;; NOPE: didn't work. required needs to be plain test names. Try tacking on to test-patts
		     ;;  - doesn't work
		     ;; (set! test-patts (conc test-patts "," waiton "/"))
		     ;; (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
		     )))
	     (delete-duplicates (append waitons waitors)))
	    (let ((remtests (delete-duplicates (append waitons tal))))
              (debug:print-info 8 *default-log-port* " remtests are "remtests)
	      (if (not (null? remtests))
		  (begin
		    ;; (debug:print-info 0 *default-log-port* "Preprocessing continues for " (string-intersperse remtests ", "))
		    (loop (car remtests)(cdr remtests)))))))) ;; END test-names loop

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 #;(th1        (make-thread (lambda ()
					    (handle-exceptions
						exn
						(begin
						  (print-call-chain)
						  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
					      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    ;; (thread-start! th1)
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)
	    (set! keep-going #f)
	    (thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
    ;(if (eq? run-count 0)
    ; (begin  
    ;  (debug:print-info 0 *default-log-port* "Calling Post Hook")  
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   
    (rmt:tasks-set-state-given-param-key task-key "done")
    ;; (sqlite3:finalize! tasks-db)
    ))


;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable.
;;
;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns
;; If reg is full (i.e. length >= n
;;   loop with (car reg) tal (cdr reg) reruns
;; If tal is empty
;;   but have items in reg; loop with (car reg)(cdr reg) '() reruns
;;   If reg is empty => all done

(define (runs:queue-next-hed tal reg n regfull)
  (if regfull
      (if (null? reg) #f (car reg))
      (if (null? tal) ;; tal is used up, pop from reg
	  (if (null? reg) #f (car reg))
	  (car tal))))

(define (runs:queue-next-tal tal reg n regfull)
  (if regfull
      tal
      (if (null? tal) ;; must transfer from reg
	  (if (null? reg) '() (cdr reg))
	  (cdr tal))))

(define (runs:queue-next-reg tal reg n regfull)
  (if regfull
      (if (null? reg) '() (cdr reg)) ;; EXPLORE: reorder (cdr reg) such that looping is more efficient
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

;; this is the list of parameters to the named loop "loop" near the top of runs:run-tests-queue, look around line 1216
;;
(define (runs:loop-values tal reg reglen regfull reruns)
  (list (runs:queue-next-hed tal reg reglen regfull)      ;; hed
        (runs:queue-next-tal tal reg reglen regfull)      ;; tal
        (runs:queue-next-reg tal reg reglen regfull)      ;; reg
        reruns))                                          ;; reruns

;; objective - iterate thru tests
;;    => want to prioritize tests we haven't seen before
;;    => sometimes need to squeeze things in (added to reg)
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)

;;======================================================================
;; runs:expand-items is called by runs:run-tests-queue
;;======================================================================
;;
;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature:
;;    (let loop ((hed         (car sorted-test-names))
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '()))))
         (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))
                           (items       (tests:testqueue-get-items  test-rec)))
                      ;;(BB> "HEY " testname "=>"items)
                      (or (procedure? items)(eq? items 'have-procedure))))
                  waitons))


         )
    (debug:print-info 4 *default-log-port* "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n prereq-fails:    " (runs:pretty-string prereq-fails)
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (member 'toplevel testmode): " (member 'toplevel testmode)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

   (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

    ((and (not (member 'toplevel testmode))
	   (member (hash-table-ref/default test-registry (db:test-make-full-name hed item-path) 'n/a)
		   '(DONOTRUN removed CANNOTRUN))) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-1")
      (debug:print-info 1 *default-log-port* "Test " hed " set to \"" (hash-table-ref test-registry (db:test-make-full-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
          (runs:loop-values tal reg reglen regfull reruns)
	  (begin
	    (debug:print-info 0 *default-log-port* "Nothing left in the queue!")
	    ;; If get here twice then we know we've tried to expand all items
	    ;; since there must be a logic issue with the handling of loops in the 
	    ;; items expand phase we will brute force an exit here.
	    (if (> runs:nothing-left-in-queue-count 2)
		(begin
		  (debug:print 0 *default-log-port* "WARNING: this condition is triggered when there were no items to expand and nothing to run. Please check your run for completeness")
		  (exit 0))
		(set! runs:nothing-left-in-queue-count (+ runs:nothing-left-in-queue-count 1)))
	    #f)))

     ;; desired result of below cond branch:
     ;;   we want to expand items in our test of interest (hed) in the following cases:
     ;;    case 1 - mode is itemmatch or itemwait: 
     ;;       - all prereq tests have been expanded
     ;;       - at least one prereq's items have completed
     ;;    case 2 - mode is toplevel   
     ;;       - prereqs are completed.
     ;;       - or no prereqs can complete 
     ;;    case 3 - mode not specified 
     ;;       - prereqs are completed and passed (we could consider removing "and passed" -- it would change behavior from current)          
    ((or (null? prereqs-not-met)
	  (and (member 'toplevel testmode)
	       (null? non-completed)))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-2")
      (debug:print-info 4 *default-log-port* "runs:expand-items: (or (null? prereqs-not-met) (and (member 'toplevel testmode)(null? non-completed)))")
      (let ((test-name (tests:testqueue-get-testname test-record)))
	(setenv "MT_TEST_NAME" test-name) ;; 
	(setenv "MT_RUNNAME"   runname)
	(runs:set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(if (null? items-list)
		    (let ((test-id   (rmt:get-test-id run-id test-name ""))
			  (num-items (rmt:test-toplevel-num-items run-id test-name)))
		      (if (and test-id
			       (not (> num-items 0)))
			  (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "Failed to run due to failed prerequisites"))))
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print-error 0 *default-log-port* "The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

    ((and (null? fails)
	   (null? prereq-fails)
	   (not (null? non-completed)))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-3")
      (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      (append newtal reruns)))
	     ;; prereqstrs is a list of test names as strings that are prereqs for hed
             (prereqstrs (delete-duplicates (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
						 prereqs-not-met)))
	     ;; a prereq that is not found in allinqueue will be put in the notinqueue list
	     ;; 
             ;; (notinqueue (filter (lambda (x)
             ;;    		   (not (member x allinqueue)))
             ;;    		 prereqstrs))
	     (give-up    #f))

	;; We can get here when a prereq has not been run due to *it* having a prereq that failed.
	;; We need to use this to dequeue this item as CANNOTRUN
	;; 
	(if (member 'toplevel testmode) ;; '(toplevel)) ;; NOTE: this probably should be (member 'toplevel testmode)
	    (for-each (lambda (prereq)
			(if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			    (set! give-up #t)))
		      prereqstrs))

	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 *default-log-port* "WARNING: test " hed " has discarded prerequisites, removing it from the queue")

	      (let ((test-id (rmt:get-test-id run-id hed "")))
		(if test-id (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to discarded prerequisites")))
	      
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
                  (runs:loop-values trimmed-tal trimmed-reg reglen regfull reruns)
                  ))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))

    ((and (null? fails) ;; have not-started tests, but unable to run them.  everything looks completed with no prospect of unsticking something that is stuck.  we should mark hed as moribund and exit or continue if there are more tests to consider
	   (null? prereq-fails)
	   (null? non-completed))
     (debug:print-info 4 *default-log-port* "cond branch - "  "ei-4")
      (if  (runs:can-keep-running? hed 20)
	  (begin
	    (runs:inc-cant-run-tests hed)
	    (debug:print-info 0 *default-log-port* "no fails in prerequisites for " hed " but also none running, keeping " hed " for now. Try count: " (hash-table-ref/default *seen-cant-run-tests* hed 0)) ;; 
	    ;; getting here likely means the system is way overloaded, kill a full minute before continuing
	    (thread-sleep! 60) ;; TODO: gate by normalized server load > 1.0 (maxload config thing)
	    ;; num-retries code was here
	    ;; we use this opportunity to move contents of reg to tal
	    (list (car newtal)(append (cdr newtal) reg) '() reruns)) ;; an issue with prereqs not yet met?
	  (begin
	    (debug:print-info 1 *default-log-port* "no fails in prerequisites for " hed " but nothing seen running in a while, dropping test " hed " from the run queue")
	    (let ((test-id (rmt:get-test-id run-id hed "")))
	      (if test-id (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "TIMED_OUT" "Nothing seen running in a while.")))
            (runs:loop-values tal reg reglen regfull reruns)
            )))

    ((and 
       (or (not (null? fails))
	   (not (null? prereq-fails)))
       (member 'normal testmode))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-5")
      (debug:print-info 1 *default-log-port* "test "  hed " (mode=" testmode ") has failed prerequisite(s); "
			(string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ")
			", removing it from to-do list")
      (let ((test-id (rmt:get-test-id run-id hed "")))
	(if test-id
	    (if (not (null? prereq-fails))
		(mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_DISCARDED" "Failed to run due to prior failed prerequisites")
		(mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL"      "Failed to run due to failed prerequisites"))))
      ;; (debug:print 4 *default-log-port*"BB> set PREQ_FAIL on "hed)
      ;; (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "PREQ_FAIL"      "Failed to run due to failed prerequisites")))) ;; BB: this works, btu equivalent for itemwait mode does not work.
      (if (or (not (null? reg))(not (null? tal)))
	  (begin
	    (hash-table-set! test-registry hed 'CANNOTRUN)
            (runs:loop-values tal reg reglen regfull (cons hed reruns))
            )
	  #f)) ;; #f flags do not loop
      
     ((and (not (null? fails))(member 'toplevel testmode))
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-6")
      (if (or (not (null? reg))(not (null? tal)))
	   (list (car newtal)(append (cdr newtal) reg) '() reruns)
	  #f)) 
     ((null? runnables)
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-7")
      #f) ;; if we get here and non-completed is null then it is all over.
     (else
      (debug:print-info 4 *default-log-port* "cond branch - "  "ei-8")
      (debug:print 0 *default-log-port* "WARNING: FAILS or incomplete tests maybe preventing completion of this run. Watch for issues with test " hed ", continuing for now")
      (list (car newtal)(cdr newtal) reg reruns)))))

(define (runs:mixed-list-testname-and-testrec->list-of-strings inlst)
  (if (null? inlst)
      '()
      (map (lambda (t)
	     (cond
	      ((vector? t)
	       (let ((test-name (db:test-get-testname t))
		     (item-path (db:test-get-item-path t))
		     (test-state (db:test-get-state t))
		     (test-status (db:test-get-status t)))
		 (conc test-name (if (equal? item-path "") "" "/") item-path ":" test-state "/" test-status)))
	      ((string? t)
	       t)
	      (else 
	       (conc t))))
	   inlst)))


;;  hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry itemmaps)
(define (runs:process-expanded-tests runsdat testdat)
  ;; unroll the contents of runsdat and testdat (due to ongoing refactoring).
  (debug:print 2 *default-log-port* "runs:process-expanded-tests; testdat:" )
  (debug:print 2 *default-log-port* (with-output-to-string
                                            (lambda () (pp (runs:testdat->alist testdat) ))))
  (let* ((hed                    (runs:testdat-hed testdat))
	 (tal                    (runs:testdat-tal testdat))
	 (reg                    (runs:testdat-reg testdat))
	 (reruns                 (runs:testdat-reruns testdat))
	 (test-name              (runs:testdat-test-name testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (jobgroup               (runs:testdat-jobgroup testdat))
	 (waitons                (runs:testdat-waitons testdat))
	 (item-path              (runs:testdat-item-path testdat))
	 (testmode               (runs:testdat-testmode testdat))
	 (newtal                 (runs:testdat-newtal testdat))
	 (itemmaps               (runs:testdat-itemmaps testdat))
	 (test-record            (runs:testdat-test-record testdat))
	 (prereqs-not-met        (runs:testdat-prereqs-not-met testdat))

	 (reglen                 (runs:dat-reglen runsdat))
	 (regfull                (runs:dat-regfull runsdat))
	 (runname                (runs:dat-runname runsdat))
	 (max-concurrent-jobs    (runs:dat-max-concurrent-jobs runsdat))
	 (run-id                 (runs:dat-run-id runsdat))
	 (test-patts             (runs:dat-test-patts runsdat))
	 (required-tests         (runs:dat-required-tests runsdat))
	 (test-registry          (runs:dat-test-registry runsdat))
	 (registry-mutex         (runs:dat-registry-mutex runsdat))
	 (flags                  (runs:dat-flags runsdat))
	 (keyvals                (runs:dat-keyvals runsdat))
	 (run-info               (runs:dat-run-info runsdat))
	 (all-tests-registry     (runs:dat-all-tests-registry runsdat))
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (car run-limits-info))
	 (num-running            (list-ref run-limits-info 1))
	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
	 (max-concurrent-jobs    (list-ref run-limits-info 3))
	 (job-group-limit        (list-ref run-limits-info 4))
	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                  (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs
				      (runs:calc-fails prereqs-not-met)
				      (begin
					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
					  (runs:calc-not-completed prereqs-not-met)))
	 (loop-list               (list hed tal reg reruns))
	 ;; configure the load runner
	 (numcpus                 (common:get-num-cpus #f))
	 (maxload                 (string->number (or (configf:lookup *configdat* "jobtools" "maxload") "3.0")))         ;; use a non-number string to disable
         (maxhomehostload         (string->number (or (configf:lookup *configdat* "jobtools" "maxhomehostload") "2.0"))) ;; use a non-number string to disable
         (waitdelay               (string->number (or (configf:lookup *configdat* "jobtools" "waitdelay") "60"))))
    (debug:print-info 4 *default-log-port* "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
				  (conc (db:test-get-state t) "/" (db:test-get-status t))
				  (conc " WARNING: t is not a vector=" t )))
			    prereqs-not-met)
		       ", ") ") fails: " fails
		       "\nregistered? " (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
			    

    
    (if (and (not (null? prereqs-not-met))
	     (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	(debug:print-info 2 *default-log-port* "waiting on tests; " (string-intersperse (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met) ", ")))

    ;; Don't know at this time if the test have been launched at some time in the past
    ;; i.e. is this a re-launch?
    (debug:print-info 4 *default-log-port* "run-limits-info = " run-limits-info)
    
    (cond ; cond 894- 1067
     
     ;; Check item path against item-patts, 
     ;;
     ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run
      ;; else the run is stuck, temporarily or permanently
      ;; but should check if it is due to lack of resources vs. prerequisites
      (debug:print-info 1 *default-log-port* "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts)
      (if (or (not (null? tal))(not (null? reg)))
	  (runs:loop-values tal reg reglen regfull reruns)
	  #f))
     
     ;; Register tests 
     ;;
     ((not (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f))
      (debug:print-info 4 *default-log-port* "Pre-registering test " test-name "/" item-path " to create placeholder" )
      ;; always do firm registration now in v1.60 and greater ;; (eq? *transport-type* 'fs) ;; no point in parallel registration if use fs
      (let register-loop ((numtries 15))
	(rmt:register-test run-id test-name item-path)
	(if (rmt:get-test-id run-id test-name item-path)
	    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'done)
	    (if (> numtries 0)
		(begin
		  (thread-sleep! 0.5)
		  (register-loop (- numtries 1)))
		(debug:print-error 0 *default-log-port* "failed to register test " (db:test-make-full-name test-name item-path)))))
      (if (not (eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f) 'done))
	  (begin
	    (rmt:register-test run-id test-name "")
	    (if (rmt:get-test-id run-id test-name "")
		(hash-table-set! test-registry (db:test-make-full-name test-name "") 'done))))
      (runs:shrink-can-run-more-tests-count runsdat)   ;; DELAY TWEAKER (still needed?)
      (if (and (null? tal)(null? reg))
	  (list hed tal (append reg (list hed)) reruns)
	  (list (runs:queue-next-hed tal reg reglen regfull) ;; cannot replace with a call to runs:loop-values as the logic is different for reg
		(runs:queue-next-tal tal reg reglen regfull)
		;; NB// Here we are building reg as we register tests
		;; if regfull we must pop the front item off reg
		(if regfull
		    (append (cdr reg) (list hed))
		    (append reg (list hed)))
		reruns)))
     
     ;; At this point hed test registration must be completed.
     ;;
     ((eq? (hash-table-ref/default test-registry (db:test-make-full-name test-name item-path) #f)
	   'start)
      (debug:print-info 0 *default-log-port* "Waiting on test registration(s): "
			(string-intersperse 
			 (filter (lambda (x)
				   (eq? (hash-table-ref/default test-registry x #f) 'start))
				 (hash-table-keys test-registry))
			 ", "))
      (thread-sleep! 0.051)
      (list hed tal reg reruns))
     
     ;; If no resources are available just kill time and loop again
     ;;
     ((not have-resources) ;; simply try again after waiting a second
      (if (runs:lownoise "no resources" 60)
	  (debug:print-info 1 *default-log-port* "no resources to run new tests, waiting ..."))
      ;; Have gone back and forth on this but db starvation is an issue.
      ;; wait one second before looking again to run jobs.
      (thread-sleep! 1)
      ;; could have done hed tal here but doing car/cdr of newtal to rotate tests
      (list (car newtal)(cdr newtal) reg reruns))
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (member 'toplevel testmode) ;;  'toplevel)
		    (null? non-completed)
		    (not (member 'exclusive testmode)))))
      ;; (hash-table-delete! *max-tries-hash* (db:test-make-full-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      ;; well, first lets see if cpu load throttling is enabled. If so wait around until the
      ;; average cpu load is under the threshold before continuing
      (if maxload ;; only gate if maxload is specified
          (common:wait-for-cpuload maxload numcpus waitdelay))
      (if maxhomehostload
          (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))
      
      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
      (runs:incremental-print-results run-id)
      (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'running)
      (runs:shrink-can-run-more-tests-count runsdat)  ;; DELAY TWEAKER (still needed?)
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))
	  (runs:loop-values tal reg reglen regfull reruns)
	  #f))
     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 *default-log-port* "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue
      ;; a message and drop hed from the items to be processed.
      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
      (if (and (not (null? prereqs-not-met))
	       (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60))
	  (debug:print-info 1 *default-log-port* "waiting on tests; " (string-intersperse 
						    (runs:mixed-list-testname-and-testrec->list-of-strings 
						     prereqs-not-met) ", ")))
      (if (or (null? fails)
	      (member 'toplevel testmode))
	  (begin
	    ;; couldn't run, take a breather
	    (if  (runs:lownoise "Waiting for more work to do..." 60)
		 (debug:print-info 0 *default-log-port* "Waiting for more work to do..."))
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
	  ;; the waiton is FAIL so no point in trying to run hed ever again
	  (begin
            (let ((my-test-id (rmt:get-test-id run-id test-name item-path)))
              (mt:test-set-state-status-by-id-unless-completed run-id my-test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites2"))


            
            (if (or (not (null? reg))(not (null? tal)))
                (if (vector? hed)
		  (begin
		    (debug:print 1 *default-log-port* "WARNING: Dropping test " test-name "/" item-path
				 " from the launch list as it has prerequistes that are FAIL")
		    (let ((test-id (rmt:get-test-id run-id hed "")))
		      (if test-id (mt:test-set-state-status-by-id-unless-completed run-id test-id "COMPLETED" "PREQ_FAIL" "Failed to run due to failed prerequisites")))
		    (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		    ;; (thread-sleep! *global-delta*)
		    ;; This next is for the items

                    (if (not (null? fails))
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "PREQ_FAIL" #f) 
                        ;;(mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "BLOCKED" #f)
                        (rmt:set-state-status-and-roll-up-items run-id test-name item-path "NOT_STARTED" "BLOCKED" #f) )
		    (hash-table-set! test-registry (db:test-make-full-name test-name item-path) 'removed)
		    (runs:loop-values tal reg reglen regfull reruns))
		  (let ((nth-try (hash-table-ref/default test-registry hed 0))) ;; hed not a vector...
                    (debug:print 2 *default-log-port* "nth-try("hed")="nth-try)
		    (cond
		     ((member "RUNNING" (map db:test-get-state prereqs-not-met))
		      (if (runs:lownoise (conc "possible RUNNING prerequistes " hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has possible RUNNING prerequisites, don't give up on it yet."))
		      (thread-sleep! 0.1)
		      (runs:loop-values tal reg reglen regfull reruns))
		     ((or (not nth-try) ;; BB: condition on subsequent tries, condition below fires on first try 
			  (and (number? nth-try)
			       (< nth-try 2)))
		      (hash-table-set! test-registry hed (if (number? nth-try)
							     (+ nth-try 1)
							     0))
		      (if (runs:lownoise (conc "not removing test " hed) 60)
			  (debug:print 1 *default-log-port* "WARNING: not removing test " hed " from queue although it may not be runnable due to FAILED prerequisites"))
		      ;; may not have processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		      (runs:shrink-can-run-more-tests-count runsdat) ;; DELAY TWEAKER (still needed?)
		      (runs:loop-values newtal reg reglen regfull reruns))
		     ((symbol? nth-try) ;; BB: 'done matches here in one case where prereq itemwait failed.  This is first "try"
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))
			    ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
                            (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)
			    (hash-table-set! test-registry hed 'removed) ;; was 0
                            (if (not (and (null? reg) (null? tal)))
                                (runs:loop-values tal reg reglen regfull reruns)
                                #f))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
		      ;; (debug:print 0 *default-log-port* "         prereqs: " prereqs-not-met)
		      (hash-table-set! test-registry hed 'removed)
		      (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "TEN_STRIKES" #f)
		      ;; I'm unclear on if this roll up is needed - it may be the root cause of the "all set to FAIL" bug.
		      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "FAIL" #f) ;; treat as FAIL
		      (list (if (null? tal)(car newtal)(car tal))
			    tal
			    reg
			    reruns)))))
                ;; ELSE: can't drop this - maybe running? Just keep trying

                ;;(if (not (or (not (null? reg))(not (null? tal)))) ;; old experiment
                (let ((runable-tests (runs:runable-tests prereqs-not-met))) ;; SUSPICIOUS: Should look at more than just prereqs-not-met?
                  (if (null? runable-tests)
                      #f   ;; I think we are truly done here
                      (runs:loop-values newtal reg reglen regfull reruns)))
                ;;) ;;from old experiment
                ) ;; end if (or (not (null? reg))(not (null? tal)))
            ))))))

;; scan a list of tests looking to see if any are potentially runnable
;;
(define (runs:runable-tests tests)
  (filter (lambda (t)
	    (if (not (vector? t))
		t
		(let ((state  (db:test-get-state t))
		      (status (db:test-get-status t)))
		  (case (string->symbol state)
		    ((COMPLETED INCOMPLETE) #f)
		    ((NOT_STARTED)
		     (if (member status '("TEN_STRIKES" "BLOCKED" "PREQ_FAIL" "ZERO_ITEMS" "PREQ_DISCARDED" "TIMED_OUT" ))
			 #f
			 t))
		    ((DELETED) #f)
		    (else t)))))
	  tests))

;; move all the miscellanea into this struct
;;
(defstruct runs:gendat inc-results inc-results-last-update inc-results-fmt run-info runname target)

(define *runs:general-data* 
  (make-runs:gendat
   inc-results: (make-hash-table)
   inc-results-last-update: 0
   inc-results-fmt: "~12a~12a~20a~12a~40a\n" ;; state status time duration test-name item-path
   run-info: #f
   runname: #f
   target: #f
   )
  )

(define (runs:incremental-print-results run-id)
  (let ((curr-sec (current-seconds)))
    (if (> (- curr-sec (runs:gendat-inc-results-last-update *runs:general-data*)) 5) ;; at least five seconds since last update
	(let* ((run-dat  (or (runs:gendat-run-info *runs:general-data*)(rmt:get-run-info run-id)))
	       (runname  (or (runs:gendat-runname *runs:general-data*)
			     (db:get-value-by-header (db:get-rows run-dat)
						     (db:get-header run-dat) "runname")))
	       (target   (or (runs:gendat-target *runs:general-data*)(rmt:get-target run-id)))
	       (testsdat (rmt:get-tests-for-run run-id "%" '() '() ;; run-id testpatt states statuses
						#f #f ;; offset limit
						#f ;; not-in
						#f ;; sort-by
						#f ;; sort-order
						#f ;; get full data (not 'shortlist)
						(runs:gendat-inc-results-last-update *runs:general-data*) ;; last update time
						'dashboard)))
	  (if (not (runs:gendat-run-info *runs:general-data*))
	      (runs:gendat-run-info-set! *runs:general-data* run-dat))
	  (if (not (runs:gendat-runname  *runs:general-data*))
	      (runs:gendat-runname-set! *runs:general-data* runname))
	  (if (not (runs:gendat-target *runs:general-data*))
	      (runs:gendat-target-set! *runs:general-data* target))
	  (for-each
	   (lambda (testdat)
	     (let* ((test-id    (db:test-get-id           testdat))
		    (prevdat    (hash-table-ref/default   (runs:gendat-inc-results *runs:general-data*)
							  (conc run-id "," test-id) #f))
		    (test-name  (db:test-get-testname     testdat))
		    (item-path  (db:test-get-item-path    testdat))
		    (state      (db:test-get-state        testdat))
		    (status     (db:test-get-status       testdat))
		    (event-time (db:test-get-event_time   testdat))
		    (duration   (db:test-get-run_duration testdat)))
	       (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED")))
			(not (and prevdat
				  (equal? state  (db:test-get-state  prevdat))
				  (equal? status (db:test-get-status prevdat)))))
		   (let ((fmt   (runs:gendat-inc-results-fmt *runs:general-data*))
			 (dtime (seconds->year-work-week/day-time event-time))) 
		     (if (runs:lownoise "inc-print" 600)
			 (format #t fmt "State" "Status" "Start Time" "Duration" "Test path"))
		     ;; (debug:print 0 *default-log-port* "fmt: " fmt " state: " state " status: " status " test-name: " test-name " item-path: " item-path " dtime: " dtime)
		     ;; (debug:print 0 #f "event-time: " event-time " duration: " duration)
		     (format #t fmt
			     state
			     status
			     dtime
			     (seconds->hr-min-sec duration)
			     (conc "lt/" target "/" runname "/" test-name (if (string-null? item-path) "" (conc "/" item-path))))
		     (hash-table-set! (runs:gendat-inc-results *runs:general-data*) (conc run-id "," test-id) testdat)))))
	   testsdat)))
    (runs:gendat-inc-results-last-update-set! *runs:general-data* (- curr-sec 10))))

;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

;;======================================================================
;; runs:run-tests-queue is called by runs:run-tests
;;======================================================================
;;
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
  ;; At this point the list of parent tests is expanded 
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 *default-log-port* "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (rmt:find-and-mark-incomplete)

  (let* ((run-info              (rmt:get-run-info run-id))
         (tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
         (sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
         (test-registry         (make-hash-table))
         (registry-mutex        (make-mutex))
         (num-retries           0)
         (max-retries           (config-lookup *configdat* "setup" "maxretries"))
         (max-concurrent-jobs   (configf:lookup-number *configdat* "setup" "max_concurrent_jobs" default: 50))
         (reglen                (if (number? reglen-in) reglen-in 1))
         (last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
         (last-time-some-running (current-seconds))
         ;; (tdbdat                (tasks:open-db))
         (runsdat (make-runs:dat
                   ;; hed: hed
                   ;; tal: tal
                   ;; reg: reg
                   ;; reruns: reruns
                   reglen: reglen
                   regfull: #f ;; regfull
                   ;; test-record: test-record
                   runname: runname
                   ;; test-name: test-name
                   ;; item-path: item-path
                   ;; jobgroup: jobgroup
                   max-concurrent-jobs: max-concurrent-jobs
                   run-id: run-id
                   ;; waitons: waitons
                   ;; testmode: testmode
                   test-patts: test-patts
                   required-tests: required-tests
                   test-registry: test-registry
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps
                   ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (db:test-make-full-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
               (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))

      (runs:incremental-print-results run-id)

      (if (not (null? reruns))(debug:print-info 4 *default-log-port* "reruns=" reruns))

      ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
      ;; moving this to a parallel thread and just run it once.
      ;;
      (if (> (current-seconds)(+ last-time-incomplete 900))
          (begin
            (set! last-time-incomplete (current-seconds))
            ;; (rmt:find-and-mark-incomplete-all-runs)
	    ))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "test_meta" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
			    (if m (map string->symbol (string-split m)) '(normal))))
	     (itemmaps    (tests:get-itemmaps tconfig)) ;;  (configf:lookup tconfig "requirements" "itemmap"))
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (db:test-make-full-name test-name item-path))
	      ;; these are hard coded item-item waits test/item-path => test/item-path2 ...
	     (extra-waits (let* ((section (configf:get-section (tests:testqueue-get-testconfig test-record) "waitons"))
				 (myextra (alist-ref tfullname section equal?)))
			    (if myextra
				(let ((extras (string-split (car myextra))))
				  (if (runs:lownoise (conc tfullname "extra-waitons" tfullname) 60)
				      (debug:print-info 0 *default-log-port* "HAVE EXTRA WAITONS for test " tfullname ": " myextra))
				  (for-each
				   (lambda (extra)
				     ;; (debug:print 0 *default-log-port* "FYI: extra = " extra " reruns = " reruns)
				     (let ((basetestname (car (string-split extra "/"))))
				       #;(if (not (member extra tal))
					   (set! reruns (append tal (list extra))))
				       (if (not (member basetestname tal))
					   (set! reruns (append tal (list basetestname))))
				       ))
				   extras)
				  extras)
				'())))
	     (waitons     (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id))
	     (testdat     (make-runs:testdat
			   hed: hed
			   tal: tal
			   reg: reg
			   reruns: reruns
			   test-record: test-record
			   test-name:   test-name
			   item-path:   item-path
			   jobgroup:    jobgroup
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)
    
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (db:test-make-full-name test-name "") #f))
	    (begin
	      (rmt:register-test run-id test-name "")
	      (hash-table-set! test-registry (db:test-make-full-name test-name "") 'done)))
	
	;; Fast skip of tests that are already "COMPLETED" - NO! Cannot do that as the items may not have been expanded yet :(
	;;
	(if (member (hash-table-ref/default test-registry tfullname #f) 
		    '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE))
	    (begin
	      (if (runs:lownoise (conc "been marked do not run " tfullname) 60)
		  (debug:print-info 0 *default-log-port* "Skipping test " tfullname " as it has been marked do not run due to being completed or not runnable"))
	      (if (or (not (null? tal))(not (null? reg)))
		  (loop (runs:queue-next-hed tal reg reglen regfull)
			(runs:queue-next-tal tal reg reglen regfull)
			(runs:queue-next-reg tal reg reglen regfull)
			reruns))))
        ;; (loop (car tal)(cdr tal) reg reruns))))

	(runs:incremental-print-results run-id)
	(debug:print 4 *default-log-port* "TOP OF LOOP => "
		     "test-name: " test-name
		     "\n  hed:         " hed
		     "\n  tal:         " tal
		     "\n  reg:         " reg
                     "\n  test-record  " test-record
                     "\n  itemdat:     " itemdat
		     "\n  items:       " items
		     "\n  item-path:   " item-path
		     "\n  waitons:     " waitons
		     "\n  num-retries: " num-retries
                     "\n  reruns:      " reruns
		     "\n  regfull:     " regfull
		     "\n  reglen:      " reglen
		     "\n  length reg:  " (length reg)
                     )

	;; check for hed in waitons => this would be circular, remove it and issue an
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print-error 0 *default-log-port* "test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))

	(cond 
	 
	 ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF 
	 ;; they have been through the wringer 10 or more times
	 ((and (list? waitons)
	       (not (null? waitons))
	       (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10)
	       (not (null? (filter
			    number?
			    (map (lambda (waiton)
				   (if (and (not (member waiton tal))            ;; this waiton is not in the list to be tried to run
					    (not (member waiton reruns)))
				       1
				       #f))
				 waitons))))) ;; could do this more elegantly with a marker....
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-1")
	  (debug:print 0 *default-log-port* "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.")
	  (hash-table-set! test-registry tfullname 'removed))

	 ;; items is #f then the test is ok to be handed off to launch (but not before)
	 ;; 
	 ((not items)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-2")
	  (debug:print-info 4 *default-log-port* "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
	      (loop (car tal)(cdr tal) reg reruns))
	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	  (let ((loop-list (runs:process-expanded-tests runsdat testdat)))
            (if loop-list (apply loop loop-list))))

	 ;; items processed into a list but not came in as a list been processed
	 ;;
	 ((and (list? items)     ;; thus we know our items are already calculated
	       (not   itemdat))  ;; and not yet expanded into the list of things to be done
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-3")
	  (debug:print-info 4 *default-log-port* "OUTER COND: (and (list? items)(not itemdat))")
	  ;; Must determine if the items list is valid. Discard the test if it is not.
	  (if (and (list? items)
		   (> (length items) 0)
		   (and (list? (car items))
			(> (length (car items)) 0))
		   (debug:debug-mode 1))
	      (debug:print 2 *default-log-port* (map (lambda (row)
                                                       (conc (string-intersperse
                                                              (map (lambda (varval)
                                                                     (string-intersperse varval "="))
                                                                   row)
                                                              " ")
                                                             "\n"))
                                                     items)))

          (let* ((items-in-testpatt
                  (filter
                   (lambda (my-itemdat)
                     (tests:match test-patts hed (item-list->path my-itemdat) ))
                   ;; was: (tests:match test-patts hed (item-list->path my-itemdat) required: required-tests))
                   items) ))
            (if (null? items-in-testpatt)
                (let ((test-id   (rmt:get-test-id run-id test-name "")))
                  (debug:print-info 0 *default-log-port* "Test " (tests:testqueue-get-testname test-record) " is itemized but has no items matching test pattern -- marking status ZERO_ITEMS")
                  (if test-id
                      (mt:test-set-state-status-by-id run-id test-id "NOT_STARTED" "ZERO_ITEMS" "This test has no items which match test pattern.")))
                
                (for-each (lambda (my-itemdat)
                            (let* ((new-test-record (let ((newrec (make-tests:testqueue)))
                                                      (vector-copy! test-record newrec)
                                                      newrec))
                                   (my-item-path (item-list->path my-itemdat))

                                   (newtestname (db:test-make-full-name hed my-item-path)))    ;; test names are unique on testname/item-path
                              (tests:testqueue-set-items!     new-test-record #f)
                              (tests:testqueue-set-itemdat!   new-test-record my-itemdat)
                              (tests:testqueue-set-item_path! new-test-record my-item-path)
                              (hash-table-set! test-records newtestname new-test-record)
                              (set! tal (append tal (list newtestname)))))  ;; since these are itemized create new test names testname/itempath
                          items-in-testpatt)))
          
          

	  ;; At this point we have possibly added items to tal but all must be handed off to 
	  ;; INNER COND logic. I think loop without rotating the queue 
	  ;; (loop hed tal reg reruns))
	  ;; (let ((newtal (append tal (list hed))))  ;; We should discard hed as it has been expanded into it's items? Yes, but only if this *is* an itemized test
	  ;; (loop (car newtal)(cdr newtal) reg reruns)
	  (if (null? tal)
	      #f
	      (loop (car tal)(cdr tal) reg reruns)))
         
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-4")
	  (let ((can-run-more    (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here
		  (if loop-list
		      (apply loop loop-list)
                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)
                      )
                  )
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-5")
	  (debug:print-error 0 *default-log-port* "Should not have a list of items in a test and the itemspath set - please report this")
	  (exit 1))
	 ((not (null? reruns))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-6")
	  (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED,
		 (junked (lset-difference equal? tal newlst)))
	    (debug:print-info 4 *default-log-port* "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal)
	    (if (< num-retries max-retries)
		(set! newlst (append reruns newlst)))
	    (set! num-retries (+ num-retries 1))
	    ;; (thread-sleep! (+ 1 *global-delta*))
	    (if (not (null? newlst))
		;; since reruns have been tacked on to newlst create new reruns from junked
		(loop (car newlst)(cdr newlst) reg (delete-duplicates junked)))))
	 ((not (null? tal))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-7")
	  (debug:print-info 4 *default-log-port* "I'm pretty sure I shouldn't get here."))
	 ((not (null? reg)) ;; could we get here with leftovers?
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-8")
	  (debug:print-info 0 *default-log-port* "Have leftovers!")
	  (loop (car reg)(cdr reg) '() reruns))
	 (else
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-9")
	  (debug:print-info 4 *default-log-port* "Exiting loop with...\n  hed=" hed "\n  tal=" tal "\n  reruns=" reruns))
	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launced and now you can mark the run in metadata table as all launced 
    (rmt:set-var (conc "lunch-complete-" run-id) "yes")
  
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))
	  (begin
	    ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
	    ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0))
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(begin
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds))
		  (rmt:find-and-mark-incomplete run-id #f)))
	    (if (not (eq? num-running prev-num-running))
		(debug:print-info 0 *default-log-port* "run-wait specified, waiting on " num-running " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at " (time->string (seconds->local-time (current-seconds)))))
	    (thread-sleep! 5)
	    ;; (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id) num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
    ;; this is moved to runs:run-testes since this function is getting called twice to ensure everthing is completed. 
    ;; (debug:print-info 0 *default-log-port* "Calling Post Hook")    
    ;; (runs:run-post-hook run-id)
    (debug:print-info 1 *default-log-port* "All tests launched")))

(define (runs:calc-fails prereqs-not-met)
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (member (db:test-get-state test) '("INCOMPLETE" "COMPLETED")) ;; TODO: pull from *common:stuff...*
		 (not (member (db:test-get-status test)
			      '("PASS" "WARN" "CHECK" "WAIVED" "SKIP")))))
	  prereqs-not-met))

(define (runs:calc-prereq-fail prereqs-not-met) ;; REMOVEME since NOT_STARTED/PREQ_FAIL is now COMPLETED/PREQ_FAIL
  (filter (lambda (test)
	    (and (vector? test) ;; not (string? test))
		 (equal? (db:test-get-state test) "NOT_STARTED")
		 (not (member (db:test-get-status test)
			      '("n/a" "KEEP_TRYING")))))
	  prereqs-not-met))

(define (runs:calc-not-completed prereqs-not-met)
  (filter
   (lambda (t)
     (or (not (vector? t))
	 (not (member (db:test-get-state t) '("INCOMPLETE" "COMPLETED")))))
   prereqs-not-met))

;; (define (runs:calc-not-completed prereqs-not-met)
;;   (filter
;;    (lambda (t)
;;      (or (not (vector? t))
;; 	 (not (equal? "COMPLETED" (db:test-get-state t)))))
;;    prereqs-not-met))

(define (runs:calc-runnable prereqs-not-met)
  (filter 
   (lambda (t)
     (or (not (vector? t))
	 (and (equal? "NOT_STARTED" (db:test-get-state t))
	      (member (db:test-get-status t)
		      '("n/a" "KEEP_TRYING")))
	 (and (equal? "RUNNING" (db:test-get-state t))))) ;; account for a test that is running
   prereqs-not-met))

(define (runs:pretty-string lst)
  (map (lambda (t)
	 (if (not (vector? t))
	     (conc t)
	     (conc (db:test-get-testname t) ":" (db:test-get-state t) "/" (db:test-get-status t))))
       lst))

;; parent-test is there as a placeholder for when parent-tests can be run as a setup step
;;
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry)
  ;; All these vars might be referenced by the testconfig file reader
  (let* ((test-name    (tests:testqueue-get-testname   test-record))
	 (test-waitons (tests:testqueue-get-waitons    test-record))
	 (test-conf    (tests:testqueue-get-testconfig test-record))
	 (itemdat      (tests:testqueue-get-itemdat    test-record))
	 (test-path    (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...
	 (force        (hash-table-ref/default flags "-force" #f))
	 (rerun        (hash-table-ref/default flags "-rerun" #f))
	 (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))
	 (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x")))
	 (item-path     "")
	 (db           #f)
	 (full-test-name #f))

    ;; setting itemdat to a list if it is #f
    (if (not itemdat)(set! itemdat '()))
    (set! item-path (item-list->path itemdat))
    (set! full-test-name (db:test-make-full-name test-name item-path))
    (debug:print-info 4 *default-log-port*
		      "\nTESTNAME: " full-test-name 
		      "\n   test-config: " (hash-table->alist test-conf)
		      "\n   itemdat: " itemdat
		      )
    (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name)
    ;; (setenv "MT_TEST_NAME" test-name) ;; 
    ;; (setenv "MT_ITEMPATH"  item-path)
    ;; (setenv "MT_RUNNAME"   runname)
    (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process
    (change-directory *toppath*)

    ;; Here is where the test_meta table is best updated
    ;; Yes, another use of a global for caching. Need a better way?
    ;;
    ;; There is now a single call to runs:update-all-test_meta and this 
    ;; per-test call is not needed. Given the delicacy of the move to 
    ;; v1.55 this code is being left in place for the time being.
    ;;
    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
          (hash-table-set! *test-meta-updated* test-name #t)
          (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (rmt:get-test-id run-id test-name item-path))
	   (testdat       (if test-id (rmt:get-test-info-by-id run-id test-id) #f)))
      (if (not testdat)
	  (let loop ()
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    ;;
	    ;; (open-run-close tests:register-test db run-id test-name item-path)
	    ;;
	    ;; NB// for the above line. I want the test to be registered long before this routine gets called!
	    ;;
	    (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path)))
	    (if (not test-id)
		(begin
		  (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		  (rmt:register-test run-id test-name item-path)
		  (set! test-id (rmt:get-test-id run-id test-name item-path))))
	    (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	    (set! testdat (rmt:get-test-info-by-id run-id test-id))
	    (if (not testdat)
		(begin
		  (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))
      (if (not testdat) ;; should NOT happen
	  (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
      (set! test-id (db:test-get-id testdat))
      (if (common:file-exists? test-path)
	  (change-directory test-path)
	  (begin
	    (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
	    (change-directory *toppath*)))
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat
		    (string->symbol (test:get-state testdat))
		    'failed-to-insert))
	((failed-to-insert)
	 (debug:print-error 0 *default-log-port* "Failed to insert the record into the db"))
	((NOT_STARTED COMPLETED DELETED INCOMPLETE)
	 (let ((runflag #f))
	   (cond
	    ;; -force, run no matter what
	    (force (set! runflag #t))
	    ;; NOT_STARTED, run no matter what
	    ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t))
	    ;; not -rerun and PASS, WARN or CHECK, do no run
	    ((and (or (not rerun)
		      keepgoing)
		  ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK
		  (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
		      (member (test:get-state  testdat) '("COMPLETED")))) 
	     (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
	     (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED)
	     (set! runflag #f))
	    ;; -rerun and status is one of the specifed, run it
	    ((and rerun
		  (let* ((rerunlst   (string-split rerun ","))
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	     (set! runflag #t))
	    ;; -keepgoing, do not rerun FAIL
	    ((and keepgoing
		  (member (test:get-status testdat) '("FAIL")))
	     (set! runflag #f))
	    ((and (not rerun)
		  (member (test:get-status testdat) '("FAIL" "n/a")))
	     (set! runflag #t))
	    (else (set! runflag #f)))
	   (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
	   (if (not runflag)
	       (if (not parent-test)
		   (if (runs:lownoise (conc "not starting test" full-test-name) 60)
		       (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) 
				    "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat)
				    "\" or -force to override")))
	       ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are
	       ;;       already met.
	       ;; This would be a great place to do the process-fork
	       ;; 
	       (let ((skip-test   #f)
		     (skip-check  (configf:get-section test-conf "skip")))
		 (cond 
		  ;; Have to check for skip conditions. This one skips if there are same-named tests
		  ;; currently running
		  ((and skip-check
			(configf:lookup test-conf "skip" "prevrunning"))
		   ;; run-ids = #f means *all* runs
		   (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
		     (if (not (null? running-tests)) ;; have to skip 
			 (set! skip-test "Skipping due to previous tests running"))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "fileexists"))
		   (if (common:file-exists? (configf:lookup test-conf "skip" "fileexists"))
		       (set! skip-test (conc "Skipping due to existance of file " (configf:lookup test-conf "skip" "fileexists")))))
		  ((and skip-check
			(configf:lookup test-conf "skip" "rundelay"))
		   ;; run-ids = #f means *all* runs
		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
			  (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
			  (last-run-times  (map db:mintest-get-event_time completed-tests))
			  (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times)))))
		     (if (or (not (null? running-tests)) ;; have to skip if test is running
			     (> numseconds time-since-last))
			 (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
		 
		 (if skip-test
		     (begin
		       (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
		       (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test))
		     ;;
		     ;; Here the test is handed off to launch.scm for launch-test to complete the launch process
		     ;;
		     (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			 (begin
			   (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			   (set! *globalexitstatus* 1) ;; 
			   (process-signal (current-process-id) signal/kill))))))))
	((KILLED) 
	 (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))
	;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
	;; 			       (db:test-get-run_duration testdat)))
	;; 	(or incomplete-timeout
	;; 	    6000)) ;; i.e. no update for more than 6000 seconds
	;;      (begin
	;;        (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	;;        (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	;;        ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	;;      (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")))
	(else      
	 (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 
	   ((COMPLETED INCOMPLETE)
	    (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))
	   (else
	    (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))
	(count   (if (null? params) 1 (car params))))
    (conc "/" (string-intersperse 
	       (take dparts (- (length dparts) count))
	       "/"))))

(define (runs:recursive-delete-with-error-msg real-dir)
  (if (> (system (conc "rm -rf " real-dir)) 0)
      (begin
	;; FAILED, possibly due to permissions, do chmod a+rwx then try one more time
	(system (conc "chmod -R a+rwx " real-dir))
	(if (> (system (conc "rm -rf " real-dir)) 0)
	    (debug:print-error 0 *default-log-port* "There was a problem removing " real-dir " with rm -f")))))

(define (runs:safe-delete-test-dir real-dir)
  ;; first delete all sub-directories
  (directory-fold 
   (lambda (f x)
     (let ((fullname (conc real-dir "/" f)))
       (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname)))
     (+ 1 x))
   0 real-dir)
  ;; then files other than *testdat.db*
  (directory-fold 
   (lambda (f x)
     (let ((fullname (conc real-dir "/" f)))
       (if (not (string-search (regexp "testdat.db") f))
	   (runs:recursive-delete-with-error-msg fullname)))
     (+ 1 x))
   0 real-dir)
  ;; then the entire directory
  (runs:recursive-delete-with-error-msg real-dir))

;; cleanup often needs to remove all but the last N runs per target
;;
;; target-patts a1/b1/c1,a2/b2/c2 ...
;;
;; This will fail if called with empty target or a bad target (i.e. missing or extra fields)
;;
(define (runs:get-hash-by-target target-patts runpatt)
  (let* ((targets (string-split target-patts ","))
	 (keys    (rmt:get-keys))
	 (res-ht  (make-hash-table))) ;; target -> ( runrecord1 runrecord2 ... )
    (for-each
     (lambda (target-patt)
       (let ((runs     (rmt:simple-get-runs runpatt #f #f target-patt)))
	 (for-each
	  (lambda (run)
	    (let ((target (simple-run-target run)))
	      (hash-table-set! res-ht target (cons run (hash-table-ref/default res-ht target '())))))
	  runs)))
     targets)
    res-ht))

;; delete runs older than X (weeks, days, months years etc.)
;; delete redundant runs within a target - N is the input
;; delete redundant runs within a target IFF older than given date/time AND keep at least N
;; 
(define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep #!key (actions '(print)))
  (let* ((runs-ht  (runs:get-hash-by-target target-patts runpatt))
	 (age      (if (args:get-arg "-age")(common:hms-string->seconds (args:get-arg "-age")) #f))
	 (age-mark (if age (- (current-seconds) age) (+ (current-seconds) 86400)))
	 (precmd   (or (args:get-arg "-precmd") "")))
    (print "Actions: " actions)
    (for-each
     (lambda (target)
       (let* ((runs      (hash-table-ref runs-ht target))
	      (sorted    (sort runs (lambda (a b)(< (simple-run-event_time a)(simple-run-event_time b)))))
	      (to-remove (let* ((len      (length sorted))
                                (trim-amt (- len num-to-keep)))
                           (if (> trim-amt 0)
                               (take sorted trim-amt)
                               '()))))
	 (hash-table-set! runs-ht target to-remove)
         (print target ":")
         (for-each
          (lambda (run)
            (let ((remove (member run to-remove (lambda (a b)
                                                  (eq? (simple-run-id a)
                                                       (simple-run-id b))))))
	      (if (and age (> (simple-run-event_time run) age-mark))
		  (print "Skipping handling of " target "/" (simple-run-runname run) " as it is younger than " (args:get-arg "-age"))
		  (for-each
		   (lambda (action)
		     (case action
		       ((print)
			(print " " (simple-run-runname run)
			       " " (time->string (seconds->local-time (simple-run-event_time run)) "WW%V.%u %H:%M:%S")
			       " " (if remove "REMOVE" "")))
		       ((remove-runs)
			(if remove (system (conc precmd " megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %"
						 (if (member 'kill-runs actions) ;; if kill-runs is specified then set -kill-wait to 0
						     " -kill-wait 0"
						     "")))))
		       ((archive)
			(if remove (system (conc precmd " megatest -archive save-remove -target " target " -runname " (simple-run-runname run) " -testpatt %"))))
		       ((kill-runs)
			(if remove (system (conc precmd " megatest -kill-runs -target " target " -runname " (simple-run-runname run) " -testpatt %"))))
			))
		   actions))))
          sorted)))
     ;; (print "Sorted: " (map simple-run-event_time sorted))
     ;; (print "Remove: " (map simple-run-event_time to-remove))))
     (hash-table-keys runs-ht))
    runs-ht))

;; (define (runs:remove-all-but-last-n-runs-per-target target-patts runpatt num-to-keep)
;;   (let ((data (runs:get-all-but-most-recent-n-per-target target-patts runpatt num-to-keep)))
;;     (for-each
;;      (lambda (target)
;;        (let ((runs-to-remove (hash-table-ref data target )))
;;          (for-each
;;           (lambda (run)
;;             (print "megatest -remove-runs -target " target " -runname " (simple-run-runname run) " -testpatt %"))
;;           runs-to-remove)))
;;      (hash-table-keys data))))

;; Remove runs
;; fields are passing in through 
;; action:
;;    'remove-runs
;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode #f)(options '()))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 ;; (tdbdat       (tasks:open-db))
	 (keys         (rmt:get-keys))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex))
         (keep-records (args:get-arg "-keep-records"))) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".

    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
           (dbfile             (conc  *toppath* "/megatest.db"))
           (readonly-mode      (not (file-write-access? dbfile))))
      (when (and readonly-mode
                 (member action write-access-actions))
        (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
        (exit 1)))
    
    (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
    (if (> 2 (length state-status))
	(begin
	  (debug:print-error 0 *default-log-port* "the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL")
	  (exit)))
    (for-each
     (lambda (run)
       (let ((runkey (string-intersperse (map (lambda (k)
						(db:get-value-by-header run header k)) keys) "/"))
	     (dirs-to-remove (make-hash-table))
	     (proc-get-tests (lambda (run-id)
                               (mt:get-tests-for-run run-id
                                                     testpatt states statuses
                                                     not-in:  #f
                                                     sort-by: (case action
                                                                ((remove-runs) 'rundir)
                                                                (else          'event_time))))))
	 (let* ((run-id    (db:get-value-by-header run header "id"))
		(run-state (db:get-value-by-header run header "state"))
		(run-name  (db:get-value-by-header run header "runname"))
		(tests     (if (not (equal? run-state "locked"))
			       (proc-get-tests run-id)
			       '()))
		(lasttpath "/does/not/exist/I/hope")
		(worker-thread #f))
	   (debug:print-info 4 *default-log-port* "runs:operate-on run=" run ", header=" header)
	   (if (not (null? tests))
	       (begin
		 (case action
                   ((kill-runs)
                    (tasks:kill-runner target run-name "%")
                    (debug:print 1 *default-log-port* "Killing tests for run: " runkey " " (db:get-value-by-header run header "runname"))
                    )
		   ((remove-runs)
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    ;; seek and kill in flight -runtests with % as testpatt here
		    ;; (if (equal? testpatt "%")
		    (tasks:kill-runner target run-name testpatt)
		    ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)
		    (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname"))
		    (set! worker-thread
			  (make-thread
			   (lambda ()
			     (case (string->symbol (args:get-arg "-archive"))
			       ((save save-remove keep-html)
				(archive:run-bup (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex))
			       ((restore)
				(archive:bup-restore (args:get-arg "-archive") run-id run-name tests rp-mutex bup-mutex))
			       (else 
				(debug:print-error 0 *default-log-port* "unrecognised sub command to -archive. Run \"megatest\" to see help")
				(exit))))
			   "archive-bup-thread"))
		    (thread-start! worker-thread))
		   (else
		    (debug:print-info 0 *default-log-port* "action not recognised " action)))
		 
		 ;; actions that operate on one test at a time can be handled below
		 ;;
		 (let ((sorted-tests     (filter 
					  vector?
					  (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir a)) ;; )  ;; (filedb:get-path *fdb* (db:test-get-rundir a)))
									 (dirb ;; (rmt:sdb-qry 'getstr 
									  (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								     (if (and (string? dira)(string? dirb))
									 (> (string-length dira)(string-length dirb))
									 #f))))))
		       (toplevel-retries (make-hash-table)) ;; try three times to loop through and remove top level tests
		       (test-retry-time  (make-hash-table))
                       (backgrounded-remove-status     (make-hash-table))
                       (backgrounded-remove-last-visit (make-hash-table))
                       (backgrounded-remove-result     (make-hash-table))
		       (allow-run-time   (string->number (or (args:get-arg "-kill-wait") "10")))) ;; seconds to allow for killing tests before just brutally killing 'em
		   (let loop ((test (car sorted-tests))
			      (tal  (cdr sorted-tests)))
		     (let* ((test-id       (db:test-get-id test))
			    (new-test-dat  (rmt:get-test-info-by-id run-id test-id)))
		       (if (not new-test-dat)
			   (begin
			     (debug:print-error 0 *default-log-port* "We have a test-id of " test-id " but no record was found. NOTE: No locking of records is done between processes, do not simultaneously remove the same run from two processes!")
			     (if (not (null? tal))
				 (loop (car tal)(cdr tal))))
			   (let* ((item-path     (db:test-get-item-path new-test-dat))
				  (test-name     (db:test-get-testname new-test-dat))
				  (run-dir       ;;(filedb:get-path *fdb*
				   ;; (rmt:sdb-qry 'getid 
				   (db:test-get-rundir new-test-dat)) ;; )    ;; run dir is from the link tree
                                  (has-subrun    (and (subrun:subrun-test-initialized? run-dir)
                                                      (not (subrun:subrun-removed? run-dir))))
				  (test-state    (db:test-get-state new-test-dat))
                                  (test-status    (db:test-get-status new-test-dat))
				  (test-fulln    (db:test-get-fullname new-test-dat))
				  (uname         (db:test-get-uname    new-test-dat))
				  (toplevel-with-children (and (db:test-get-is-toplevel test)
							       (> (rmt:test-toplevel-num-items run-id test-name) 0))))
                             
			     (case action
			       ((remove-runs)
				;; if the test is a toplevel-with-children issue an error and do not remove
				(cond
                                 (toplevel-with-children
                                  (debug:print 0 *default-log-port* "WARNING: skipping removal of " test-fulln " with run-id " run-id " as it has sub tests")
                                  (hash-table-set! toplevel-retries test-fulln (+ (hash-table-ref/default toplevel-retries test-fulln 0) 1))
                                  (if (> (hash-table-ref toplevel-retries test-fulln) 3)
                                      (if (not (null? tal))
                                          (loop (car tal)(cdr tal))) ;; no else clause - drop it if no more in queue and > 3 tries
                                      (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal))))) ;; loop with test still in queue
                                 (has-subrun
                                  ;; 
                                  (let ((last-visit (hash-table-ref/default backgrounded-remove-last-visit test-fulln 0))
                                        (now        (current-seconds))
                                        (rem-status (hash-table-ref/default backgrounded-remove-status test-fulln 'not-started)))
                                    (case rem-status
                                      ((not-started)
                                       (debug:print 0 *default-log-port* "WARNING: postponing removal of " test-fulln " with run-id " run-id " as it has a subrun")
                                       (hash-table-set! backgrounded-remove-status test-fulln 'started)
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       (common:send-thunk-to-background-thread
                                        (lambda ()
                                          (let* ((subrun-remove-succeeded
                                                  (subrun:remove-subrun run-dir keep-records)))
                                            (hash-table-set! backgrounded-remove-result test-fulln subrun-remove-succeeded)
                                            (hash-table-set! backgrounded-remove-status test-fulln 'done)))
                                        name: (conc "remove-subrun:"test-fulln))
                                       
                                       ;; send to back of line, loop
                                       (let ((newtal (append tal (list test))))
                                        (loop (car newtal)(cdr newtal)))
                                       )
                                      ((started)
                                       ;; if last visit was within last second, sleep 1 second
                                       (if (< (- now last-visit) 1.0)
                                           (thread-sleep! 1.0))
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       ;; send to back of line, loop
                                       (let ((newtal (append tal (list test))))
                                         (loop (car newtal)(cdr newtal)))
                                       )
                                      ((done)
                                       ;; drop this one; if remaining, loop, else finish
                                       (hash-table-set! backgrounded-remove-last-visit test-fulln (current-seconds))
                                       (let ((subrun-remove-succeeded (hash-table-ref/default backgrounded-remove-result test-fulln 'exception)))
                                         (cond
                                          ((eq? subrun-remove-succeeded 'exception)
                                           (let* ((logfile (subrun:get-log-path run-dir "remove")))
                                             (debug:print 0 *default-log-port* "ERROR: removing subrun of of " test-fulln " with run-id " run-id " ; see logfile @ "logfile))
					   (if (not (null? tal))
					       (loop (car tal)(cdr tal))))
					  (subrun-remove-succeeded
					   (debug:print 0 *default-log-port* "Now removing of " test-fulln " with run-id " run-id " since subrun was removed.")
					   ;;(runs:remove-test-directory new-test-dat mode) ;; let normal case handle this. it will go thru loop again as non-subrun
					   (let ((newtal (append tal (list test))))
					     (loop (car newtal)(cdr newtal))))
					  (else
					   (let* ((logfile (subrun:get-log-path run-dir "remove")))
					     (debug:print 0 *default-log-port* "WARNING: removal of subrun failed.  Please check "logfile" for details."))
					   ;; send to back of line, loop (will not match has-subrun next time through)
					    (if (not (null? tal))
						(loop (car tal)(cdr tal))))))
				       )
				      ) ; end case rem-status
                                    ) ; end let
                                  ); end cond has-subrun

                                 (else
                                  ;; BB - TODO - consider backgrounding to threads to delete tests (work below) 
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin
                                              ;; want to set to REMOVING BUT CANNOT do it here?
                                              (hash-table-set! test-retry-time test-fulln (current-seconds))))
                                        (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
                                            ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
                                            ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give
                                            ;; up and blow it away.
                                            (begin
                                              (debug:print 0 *default-log-port* "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing")
                                              (mt:test-set-state-status-by-id run-id (db:test-get-id test) "FAILEDKILL" "n/a" #f)
                                              (thread-sleep! 1))
                                            (begin
                                              (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
                                              (thread-sleep! 1)))
                                        ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
                                        (if (null? tal)
                                            (loop new-test-dat tal)
                                            (loop (car tal)(append tal (list new-test-dat)))))
                                      (begin
                                        (runs:remove-test-directory new-test-dat mode) ;; 'remove-all)
                                        (if (not (null? tal))
                                            (loop (car tal)(cdr tal)))))))
				(rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
                               ((kill-runs)
                                ;; RUNNING -> KILLREQ
                                ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED
                                (cond
                                 ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")))
                                  (common:send-thunk-to-background-thread
                                   (lambda ()
                                     (let* ((subrun-remove-succeeded
                                             (subrun:kill-subrun run-dir keep-records)))
                                       #t)))
                                  (if (not (null? tal))
				    (loop (car tal)(cdr tal)))
                                  )
                                 ((member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                  (debug:print 1 *default-log-port* "INFO: issuing killreq to test "test-fulln)
                                  (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal))))
                                 ((and (member test-status '("PREQ_FAIL" "PREQ_DISCARDED" "BLOCKED" "ZERO_ITEMS" "KEEP_TRYING" "TEN_STRIKES" "TIMED_OUT")))
                                  (rmt:set-state-status-and-roll-up-items run-id (db:test-get-id test) 'foo "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a"))
                                  ;;(mt:test-set-state-status-by-id run-id (db:test-get-id test) "NOT_STARTED" "n/a" (conc "kill-run moved from "test-state":"test-status" to NOT_STARTED:n/a"))
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal)))
                                  )
                                 (else
                                  (if (not (null? tal))
				      (loop (car tal)(cdr tal)))
                                  )))
			       ((set-state-status)
                                (let* ((new-state (car state-status))
                                       (new-status (cadr state-status))
                                       (test-id (db:test-get-id test))
                                       (test-run-dir (db:test-get-rundir new-test-dat))
                                       (has-subrun (and (subrun:subrun-test-initialized? test-run-dir)
                                                      (not (subrun:subrun-removed? test-run-dir)))))
                                  (when has-subrun
                                     (common:send-thunk-to-background-thread
                                        (lambda ()
                                          (subrun:set-state-status test-run-dir state status new-state-status)
                                          )
                                        )
                                    )
                                  (debug:print-info 2 *default-log-port* "new state " new-state ", new status " new-status )
                                  (mt:test-set-state-status-by-id run-id test-id new-state new-status #f))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
                                ;; BB TODO - manage has-subrun case
				(debug:print-info 2 *default-log-port* "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 *default-log-port* "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
                                ;; BB TODO - manage has-subrun case
				(if (and run-dir (not toplevel-with-children))
				    (let ((ddir (conc run-dir "/")))
				      (case (string->symbol (args:get-arg "-archive"))
					((save save-remove keep-html)
					 (if (common:file-exists? ddir)
					     (debug:print-info 0 *default-log-port* "Estimating disk space usage for " test-fulln ": " (common:get-disk-space-used ddir)))))))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       )))
		       )
		     (if worker-thread (thread-join! worker-thread)))
                   (common:join-backgrounded-threads))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let* ((run-id   (db:get-value-by-header run header "id")) ;; NB// masks run-id from above?
                      (remtests (mt:get-tests-for-run run-id #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))
						"/"))))
		       (debug:print 1 *default-log-port* "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record")
                       (if (not keep-records)
                           (begin
                             (rmt:delete-run run-id)
                             (rmt:delete-old-deleted-test-records)))
                           ;; (rmt:set-var "DELETED_TESTS" (current-seconds))
		       ;; need to figure out the path to the run dir and remove it if empty
		       ;;    (if (null? (glob (conc runpath "/*")))
		       ;;        (begin
		       ;; 	 (debug:print 1 *default-log-port* "Removing run dir " runpath)
		       ;; 	 (system (conc "rmdir -p " runpath))))
		       )))))
	 ))
     runs)
    ;; (sqlite3:finalize! (db:delay-if-busy tdbdat))
    )
  #t)

(define (runs:remove-test-directory test mode) ;; remove-data-only)
  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (common:file-exists? run-dir)
			    ;; (resolve-pathname run-dir)
			    (common:nice-path run-dir)
			    #f))
         (clean-mode    (or mode 'remove-all))
         (test-id       (db:test-get-id test))
        ;; (lock-key      (conc "test-" test-id))
        ;; (got-lock      (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	 ;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)))))))
	 )
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)
	     (common:file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
	(begin ;; let* ((realpath (resolve-pathname run-dir)))
	  (debug:print-info 1 *default-log-port* "Recursively removing " real-dir)
	  (if (common:file-exists? real-dir)
	      (runs:safe-delete-test-dir real-dir)
	      (debug:print 0 *default-log-port* "WARNING: test dir " real-dir " appears to not exist or is not readable")))
	(if real-dir 
	    (debug:print 0 *default-log-port* "WARNING: directory " real-dir " does not exist")
	    (debug:print 0 *default-log-port* "WARNING: no real directory corrosponding to link " run-dir ", nothing done")))
    (if (symbolic-link? run-dir)
	(begin
	  (debug:print-info 1 *default-log-port* "Removing symlink " run-dir)
	  (handle-exceptions
	   exn
	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
	   (delete-file run-dir)))
	(if (directory? run-dir)
	    (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0)
		(debug:print 0 *default-log-port* "WARNING: refusing to remove " run-dir " as it is not empty")
		(handle-exceptions
		 exn
		 (debug:print-error 0 *default-log-port* " Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue")
		 (delete-directory run-dir)))
	    (if (and run-dir
		     (not (member run-dir (list "n/a" "/tmp/badname"))))
		(debug:print 0 *default-log-port* "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
		(debug:print 0 *default-log-port* "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (case clean-mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))
    ;; (rmt:no-sync-del! lock-key)
    ))

;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code
(define (general-run-call switchname action-desc proc)
  (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))
	(target  (common:args-get-target)))
    (cond
     ((not target)
      (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the target with -target")
      (exit 3))
     ((not runname)
      (debug:print-error 0 *default-log-port* "Missing required parameter for " switchname ", you must specify the run name with -runname runname")
      (exit 3))
     (else
      (let (;; (db   #f)
	    (keys #f))
	(if (launch:setup)
	    (begin
	      (full-runconfigs-read) ;; cache the run config
	      ;; (launch:cache-config) ;; there are two independent config cache locations, turning this one off for now. MRW.
	      ) ;; do not cache here - need to be sure runconfigs is processed
	    (begin 
	      (debug:print 0 *default-log-port* "Failed to setup, exiting")
	      (exit 1)))

        
	(set! keys (common:get-fields *configdat*))
	;; have enough to process -target or -reqtarg here
	(if (args:get-arg "-reqtarg")
	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)
		    
		  (begin
		    (debug:print-error 0 *default-log-port* "[" (args:get-arg "-reqtarg") "] not found in " runconfigf)
		    ;; (if db (sqlite3:finalize! db))
		    (exit 1)
		    )))
	    (if (args:get-arg "-target")
		(keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash)))
	(if (not (car *configinfo*))
	    (begin
	      (debug:print-error 0 *default-log-port* "Attempted to " action-desc " but run area config file not found")
	      (exit 1))
	    ;; Extract out stuff needed in most or many calls
	    ;; here then call proc
	    (let* ((keyvals    (keys:target->keyval keys target)))
	      (proc target runname keys keyvals)))
	;; (if db (sqlite3:finalize! db))
	(set! *didsomething* #t))))))

;;======================================================================
;; Lock/unlock runs
;;======================================================================

(define (runs:handle-locking target keys runname lock unlock user)
  (let* ((db       #f)
	 (rundat   (mt:get-runs-by-patt keys runname target))
	 (header   (vector-ref rundat 0))
	 (runs     (vector-ref rundat 1)))
    (for-each (lambda (run)
		(let ((run-id (db:get-value-by-header run header "id"))
           (str (if lock 
									"lock"
                  "unlock")))
		  (if (or lock
			  (and unlock
			       (or (args:get-arg "-force")
                (begin
				 (print "Do you really wish to unlock run " run-id "?\n   y/n: ")
				 (equal? "y" (read-line))))))
          (begin 
		      (rmt:lock/unlock-run run-id lock unlock user)
					(debug:print-info 0 *default-log-port* "Done " str " on run id " run-id))
		      (debug:print-info 0 *default-log-port* "Skipping lock/unlock on " run-id))))
	      runs)))
;;======================================================================
;; Rollup runs
;;======================================================================

;; Update the test_meta table for this test
(define (runs:update-test_meta test-name test-conf)
  (let ((currrecord (rmt:testmeta-get-record test-name)))
    (if (not currrecord)
	(begin
	  (set! currrecord (make-vector 11 #f))
	  (rmt:testmeta-add-record test-name)))
    (for-each 
     (lambda (key)
       (let* ((idx (cadr key))
	      (fld (car  key))
	      (val (config-lookup test-conf "test_meta" fld)))
	 ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
	     (begin
	       (print "Updating " test-name " " fld " to " val)
	       (rmt:testmeta-update-field test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))

;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."
;;
(define (runs:get-tests-matching-tags tagpatt)
  (let* ((tagdata (rmt:get-tests-tags))
         (res     '())) ;; list of tests that match one or more tags
    (for-each
     (lambda (row)
       (let* ((tag (car row))
              (tests (cdr row)))
       (if (patt-list-match tag tagpatt)
           (set! res (append tests res)))))
     tagdata)
    res))
    

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))
	 (if test-conf (runs:update-test_meta test-name test-conf))))
     (hash-table-keys test-names))))



;; This could probably be refactored into one complex query ...
;; NOT PORTED - DO NOT USE YET
;;
#;(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 *default-log-port* "runs:rollup-run, keys: " keys " -runname " runname " user: " user)
  (let* ((db              #f)
	 ;; register run operates on the main db
	 (new-run-id      (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour")))
	 (prev-tests      (rmt:get-matching-previous-test-run-records new-run-id "%" "%"))
	 (curr-tests      (mt:get-tests-for-run new-run-id "%/%" '() '()))
	 (curr-tests-hash (make-hash-table)))
    (rmt:update-run-event_time new-run-id)
    ;; index the already saved tests by testname and itemdat in curr-tests-hash
    (for-each
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path)))
	 (hash-table-set! curr-tests-hash full-name testdat)))
     curr-tests)
    ;; NOPE: Non-optimal approach. Try this instead.
    ;;   1. tests are received in a list, most recent first
    ;;   2. replace the rollup test with the new *always*
    (for-each 
     (lambda (testdat)
       (let* ((testname  (db:test-get-testname testdat))
	      (item-path (db:test-get-item-path testdat))
	      (full-name (conc testname "/" item-path))
	      (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f))
	      (test-steps    (rmt:get-steps-for-test (db:test-get-id testdat)))
	      (new-test-record #f))
	 ;; replace these with insert ... select
	 (apply sqlite3:execute 
		db 
		(conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) "
		      "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);")
		new-run-id (cddr (vector->list testdat)))
	 (set! new-testdat (car (mt:get-tests-for-run new-run-id (conc testname "/" item-path) '() '())))
	 (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table?
	 ;; Now duplicate the test steps
	 (debug:print 4 *default-log-port* "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat))
	 (cdb:remote-run ;; to be replaced, note: this routine is not used currently
	  (lambda ()
	    (sqlite3:execute 
	     db 
	     (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) "
		   "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;")
	     (db:test-get-id testdat))
	    ;; Now duplicate the test data
	    (debug:print 4 *default-log-port* "Copying records in test_data from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat))
	    (sqlite3:execute 
	     db 
	     (conc "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment) "
		   "SELECT " (db:test-get-id new-testdat) ",category,variable,value,expected,tol,units,comment FROM test_data WHERE test_id=?;")
	     (db:test-get-id testdat))))
	 ))
     prev-tests)))
	 
     
;; clean cache files
(define (runs:clean-cache target runname toppath)
  (if target
      (if runname
	  (let* ((linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
		 (runtop   (conc linktree "/" target "/" runname))
		 (files    (if (common:file-exists? runtop)
			       (append (glob (conc runtop "/.megatest*"))
				       (glob (conc runtop "/.runconfig*")))
			       '())))
	    (if (null? files)
		(debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")
		(begin
		  (debug:print-info 0 *default-log-port* "Removing cached files:\n    " (string-intersperse files "\n    "))
		  (for-each 
		   (lambda (f)
		     (handle-exceptions
			 exn
			 (debug:print 0 *default-log-port* "WARNING: Failed to remove file " f)
		       (delete-file f)))
		   files))))
	  (debug:print-error 0 *default-log-port* "-clean-cache requires -runname."))
      (debug:print-error 0 *default-log-port* "-clean-cache requires -target or -reqtarg")))

;;======================================================================
;; strings db sdb
;;======================================================================

;; 
(define (sdb:open fname)
  (let* ((dbpath    (pathname-directory fname))
	 (dbexists  (let ((fe (common:file-exists? fname)))
		      (if fe 
			  fe
			  (begin
			    (create-directory dbpath #t)
			    #f))))
	 (sdb        (sqlite3:open-database fname))
	 (handler   (make-busy-timeout 136000)))
    (sqlite3:set-busy-handler! sdb handler)
    (if (not dbexists)
	(sdb:initialize sdb))
    (sqlite3:execute sdb "PRAGMA synchronous = 1;")
    sdb))

(define (sdb:initialize sdb)
  (sqlite3:execute sdb "CREATE TABLE IF NOT EXISTS strs
                           (id  INTEGER PRIMARY KEY,
                            str TEXT,
                        CONSTRAINT str UNIQUE (str));")
  (sqlite3:execute sdb "CREATE INDEX IF NOT EXISTS strindx ON strs (str);"))

;; (define sumup (let ((a 0))(lambda (x)(set! a (+ x a)) a)))

(define (sdb:register-string sdb str)
  (sqlite3:execute sdb "INSERT OR IGNORE INTO strs (str) VALUES (?);" str))

(define (sdb:string->id sdb str-cache str)
  (let ((id (hash-table-ref/default str-cache str #f)))
    (if (not id)
	(sqlite3:for-each-row
	 (lambda (sid)
	   (set! id sid)
	   (hash-table-set! str-cache str id))
	 sdb
	 "SELECT id FROM strs WHERE str=?;" str))
    id))

(define (sdb:id->string sdb id-cache id)
  (let ((str (hash-table-ref/default id-cache id #f)))
    (if (not str)
	(sqlite3:for-each-row
	 (lambda (istr)
	   (set! str istr)
	   (hash-table-set! id-cache id str))
	 sdb
	 "SELECT str FROM strs WHERE id=?;" id))
    str))

;; Numbers get passed though in both directions
;;
(define (make-sdb:qry fname)
  (let ((sdb    #f)
	(scache (make-hash-table))
	(icache (make-hash-table)))
    (lambda (cmd var)
      (case cmd
	((setup)   (set! sdb (if (not sdb)
				 (sdb:open (if var var fname)))))
	((setdb)    (set! sdb var))
	((getdb)    sdb)
	((finalize) (if sdb
			(begin
			  (sqlite3:finalize! sdb)
			  (set! sdb #f))))
	((getid)     (let ((id (if (or (number? var)
				       (string->number var))
				   var
				   (sdb:string->id sdb scache var))))
		       (if id
			   id
			   (begin
			     (sdb:register-string sdb var)
			     (sdb:string->id sdb scache var)))))
	((getstr)    (if (or (number? var)
			     (string->number var))
			 (sdb:id->string sdb icache var)
			 var))
	((passid)    var)
	((passstr)   var)
	(else #f)))))

;;======================================================================
;; subrun
;;======================================================================

(define (subrun:subrun-test-initialized? test-run-dir)
  (if (and (common:file-exists? (conc test-run-dir "/subrun-area") )
           (common:file-exists? (conc test-run-dir "/testconfig.subrun") ))
      #t
      #f))

(define (subrun:launch-dashboard test-run-dir)
  (if (subrun:subrun-test-initialized? test-run-dir)
      (let* ((subarea (subrun:get-runarea test-run-dir)))
        (if (and subarea (common:file-exists? subarea))
            (system (conc "cd " subarea ";env -i PATH=$PATH DISPLAY=$DISPLAY HOME=$HOME USER=$USER dashboard &"))))))

(define (subrun:subrun-removed? test-run-dir)
  (if (subrun:subrun-test-initialized? test-run-dir)
      (let ((flagfile (conc test-run-dir "/subrun.removed")))
        (if (common:file-exists? flagfile)
            #t
            #f))
      #t))

(define (subrun:set-subrun-removed test-run-dir)
  (let ((flagfile (conc test-run-dir "/subrun.removed")))
    (if (and (subrun:subrun-test-initialized? test-run-dir) (not (common:file-exists? flagfile)))
        (with-output-to-file flagfile
          (lambda () (print (current-seconds)))))))

(define (subrun:unset-subrun-removed test-run-dir)
  (let ((flagfile (conc test-run-dir "/subrun.removed")))
    (if (and (subrun:subrun-test-initialized? test-run-dir) (common:file-exists? flagfile))
        (delete-file flagfile))))


(define (subrun:testconfig-defines-subrun? testconfig)
  (configf:lookup testconfig "subrun" "runwait")) ;; we use runwait as the flag that a subrun is requested

(define (subrun:initialize-toprun-test  testconfig test-run-dir)
  (let ((ra (configf:lookup testconfig "subrun" "run-area"))
        (logpro (configf:lookup testconfig "subrun" "logpro"))
        (symlink-target (conc test-run-dir "/subrun-area"))
        )
    (if (not ra)      ;; when runarea is not set we default to *toppath*. However 
	(let ((fallback-run-area (or *toppath* (conc test-run-dir "/subrun"))))
	  ;; we need to force the setting in the testconfig so it will
          ;; be preserved in the testconfig.subrun file
	  (configf:set-section-var testconfig "subrun" "run-area" fallback-run-area)
	  (set! ra fallback-run-area)))
    (configf:set-section-var testconfig "logpro" "subrun" logpro) ;; append the logpro rules to the logpro section as stepname subrun
    (if (common:file-exists? symlink-target)
        (delete-file symlink-target))
    (create-symbolic-link ra symlink-target)
    (configf:write-alist testconfig "testconfig.subrun")))

(define (subrun:set-state-status test-run-dir state status new-state-status)
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((action-switches-str
              (conc "-set-state-status "new-state-status
                    (if state (conc " -state "state) "")
                    (if status (conc " -status "status) "")))
             (log-prefix
              (subrun:sanitize-path
               (conc "set-state-status="new-state-status
                     (if state (conc ":state="state) "")
                     (if status (conc "+status="status) ""))))
             (submt-result 
              (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)))
        submt-result)))

(define (subrun:remove-subrun test-run-dir keep-records )
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((action-switches-str
              (conc "-remove-runs"
                    (if keep-records "-keep-records " "")
                    ))
             (remove-result
              (subrun:exec-sub-megatest test-run-dir action-switches-str "remove")))
        (if remove-result
            (begin
              (subrun:set-subrun-removed test-run-dir)
              #t)
            #f))
      #t))

(define (subrun:kill-subrun test-run-dir )
  (if (and (not (subrun:subrun-removed? test-run-dir)) (subrun:subrun-test-initialized? test-run-dir))
      (let* ((action-switches-str
              (conc "-kill-runs" ))
             (kill-result
              (subrun:exec-sub-megatest test-run-dir action-switches-str "kill")))
        kill-result)
      #t))

(define (subrun:launch-cmd test-run-dir #!optional (sub-cmd "-run")) ;; BUG: "-run" should be changed to "-rerun-clean" but current doesn't work
  (if (subrun:subrun-removed? test-run-dir)
      (subrun:unset-subrun-removed test-run-dir))      

  (let* ((log-prefix "run")
         (switches (subrun:selector+log-switches test-run-dir log-prefix))
         (run-wait #t)
         (cmd      (conc "megatest " sub-cmd " " switches" "
                         (if run-wait "-run-wait " ""))))
    cmd))


(define (subrun:sanitize-path inpath)
  (let* ((insane-pattern (irregex "[^[a-zA-Z0-9_\\-]")))
    (regex#string-substitute insane-pattern "_" inpath #t)))

(define (subrun:get-runarea test-run-dir)
  (if (subrun:subrun-test-initialized? test-run-dir)
      (let* ((info-alist (subrun:selector+log-alist
                          test-run-dir
                          "foo"))
             (run-area   (if (list? info-alist)
                             (alist-ref "-start-dir" info-alist equal? #f)
                             #f)))
        run-area)
      #f))

(define (subrun:selector+log-alist test-run-dir log-prefix)
  (let* ((switch-def-alist (common:get-param-mapping flavor: 'config))
         (subrunfile   (conc test-run-dir "/testconfig.subrun" ))
         (subrundata   (with-input-from-file subrunfile read))
         (subrunconfig (configf:alist->config subrundata))
         (run-area     (configf:lookup subrunconfig "subrun" "run-area"))
         (defvals      `(("start-dir" . ,(or run-area  ;; default values if not specified in subrun section of tconf
                                             (get-environment-variable "MT_RUN_AREA_HOME")
                                             "/no/rundir/found")) 
                         ("run-name"  . ,(or (get-environment-variable "MT_RUNNAME") "NO-RUNNAME"))
                         ("target"    . ,(or (get-environment-variable "MT_TARGET")  "NO-TARGET"))))
         (switch-alist-pre  (filter-map (lambda (item)
                                          (let* ((config-key (car item))
                                                 (switch     (cdr item))
                                                 (defval     (alist-ref config-key defvals equal? #f))
                                                 (val        (or (configf:lookup subrunconfig "subrun" config-key)
                                                                 defval)))
                                            (if val
                                                (cons switch val)
                                                #f)))
                                        switch-def-alist))

         ;; testpatt may be modified if all three of mode-patt, tag-expr, and testpatt are null
         (mode-patt     (alist-ref "-modepatt" switch-alist-pre equal? #f))
         (tag-expr      (alist-ref "-tagexpr" switch-alist-pre equal? #f))
         (testpatt      (alist-ref "-testpatt" switch-alist-pre equal?
                                   (if (not (or mode-patt tag-expr)) "%" #f))) ;; testpatt is % if not
                                                                               ;; otherwise specified

         ;; define compact-stem for logfile
         (target        (alist-ref "-target" switch-alist-pre equal? #f)) ;; want data-structures alist-ref, not alist-lib alist-ref
         (runname       (alist-ref "-runname" switch-alist-pre equal? #f))


         (compact-stem  (subrun:sanitize-path
                         (conc
                          target
                          "-"
                          runname
                          "-" (or testpatt mode-patt tag-expr "NO-TESTPATT"))))
         (logfile       (conc
                         test-run-dir "/"
                         (if log-prefix
                             (conc (subrun:sanitize-path log-prefix) "-")
                             "")
                         compact-stem
                         ".log"))
         ;; swap out testpatt with modified test-patt and add -log
         (switch-alist  (cons
                         (cons "-log" logfile)
                         (map (lambda (item)
                                (if (equal? (car item) "-testpatt")
                                    (cons "-testpatt" testpatt)
                                    item))
                                switch-alist-pre))))
    switch-alist))
    ;; note - get precmd from subrun section
    ;;   apply to submegatest commands

(define (subrun:get-log-path test-run-dir log-prefix)
  (let* ((alist (subrun:selector+log-alist test-run-dir log-prefix))
         (res   (alist-ref "-log" alist equal? #f)))
    res))

(define (subrun:selector+log-switches test-run-dir log-prefix)
  (let* ((switch-alist (subrun:selector+log-alist test-run-dir log-prefix))
         (res
          (string-intersperse
           (apply
            append
            (map
             (lambda (x)
               (list (car x) (cdr x)))
             switch-alist))
           " ")))
    res))

(define (subrun:exec-sub-megatest test-run-dir action-switches-str log-prefix)
  (let* ((selector-switches  (subrun:selector+log-switches test-run-dir log-prefix))
         (cmd (conc "megatest " selector-switches " " action-switches-str ))
         (pid #f)
         (proc (lambda ()
                 (debug:print-info 0 *default-log-port* "Running sub megatest command: "cmd)
                 ;;(set! pid (process-run "/usr/bin/xterm" (list ))))))
                 (set! pid (process-run "/bin/bash" (list "-c" cmd))))))
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda  ()
       (common:without-vars proc "^MT_.*")))
    (let processloop ((i 0))
      (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
        (if (eq? pid-val 0)
            (begin
              (thread-sleep! 2)
              (processloop (+ i 1)))
            (begin
              (debug:print-info 0 *default-log-port* "sub megatest " action-switches-str " completed with exit code " exit-code)
              (if (eq? 0 exit-code)
                  (begin
                    #t)
                  (begin
                    #f))))))))



;; (subrun:exec-sub-megatest "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/165/megatest/ext-tests/tests/subrun-usecases/toparea/links/SYSTEM_val/RELEASE_val/go/toptest" "-foo" "foo")

;;======================================================================
;; synchash
;;======================================================================

#;(define (synchash:make)
   (make-hash-table))

;; given an alist of objects '((id obj) ...) 
;;   1. remove unchanged objects from the list
;;   2. create a list of removed objects by id
;;   3. remove removed objects from synchash
;;   4. replace or add new or changed objects to synchash
;;
#;(define (synchash:get-delta indat synchash)
  (let ((deleted '())
	(changed '())
	(found   '())
	(orig-keys (hash-table-keys synchash)))
    (for-each
     (lambda (item)
       (let* ((id  (car  item))
	      (dat (cadr item))
	      (ref (hash-table-ref/default synchash id #f)))
	 (if (not (equal? dat ref)) ;; item changed or new
	     (begin
	       (set! changed (cons item changed))
	       (hash-table-set! synchash id dat)))
	 (set! found (cons id found))))
     indat)
    (for-each 
     (lambda (id)
       (if (not (member id found))
	   (begin
	     (set! deleted (cons id deleted))
	     (hash-table-delete! synchash id))))
     orig-keys)
    (list changed deleted)
    ;; (list indat '()) ;; just for debugging
    ))
    
;; keynum => the field to use as the unique key (usually 0 but can be other field)
;;
#;(define (synchash:client-get proc synckey keynum synchash run-id . params)
  (let* ((data   (rmt:synchash-get run-id proc synckey keynum params))
	 (newdat (car data))
	 (removs (cadr data))
	 (myhash (hash-table-ref/default synchash synckey #f)))
    (if (not myhash)
	(begin
	  (set! myhash (make-hash-table))
	  (hash-table-set! synchash synckey myhash)))
    (for-each 
     (lambda (item)
       (let ((id  (car item))
	     (dat (cadr item)))
	 ;; (debug:print-info 2 *default-log-port* "Processing item: " item)
	 (hash-table-set! myhash id dat)))
     newdat)
    (for-each
     (lambda (id)
       (hash-table-delete! myhash id))
     removs)
    ;; WHICH ONE!?
    ;; data)) ;; return the changed and deleted list
    (list newdat removs))) ;; synchash))

#;(define *synchashes* (make-hash-table))

#;(define (synchash:server-get dbstruct run-id proc synckey keynum params)
  ;; (debug:print-info 2 *default-log-port* "synckey: " synckey ", keynum: " keynum ", params: " params)
  (let* ((dbdat     (db:get-db dbstruct run-id))
	 (db        (db:dbdat-get-db dbdat))
	 (synchash  (hash-table-ref/default *synchashes* synckey #f))
	 (newdat    (apply (case proc
			     ((db:get-runs)                   db:get-runs)
			     ((db:get-tests-for-run-mindata)  db:get-tests-for-run-mindata)
			     ((db:get-test-info-by-ids)       db:get-test-info-by-ids)
			     (else
			      (print "ERROR: sync for hash " proc " not setup! Edits needed in synchash.scm")
			      print))
			   db params))
	 (postdat  #f)
	 (make-indexed (lambda (x)
			 (list (vector-ref x keynum) x))))
    ;; Now process newdat based on the query type
    (set! postdat (case proc
		    ((db:get-runs)
		     ;; (debug:print-info 2 *default-log-port* "Get runs call")
		     (let ((header (vector-ref newdat 0))
			   (data   (vector-ref newdat 1)))
		       ;; (debug:print-info 2 *default-log-port* "header: " header ", data: " data)
		       (cons (list "header" header)         ;; add the header keyed by the word "header"
			     (map make-indexed data))))        ;; add each element keyed by the keynum'th val
		    (else 
		     ;; (debug:print-info 2 *default-log-port* "Non-get runs call")
		     (map make-indexed newdat))))
    ;; (debug:print-info 2 *default-log-port* "postdat: " postdat)
    ;; (if (not indb)(sqlite3:finalize! db))
    (if (not synchash)
	(begin
	  (set! synchash (make-hash-table))
	  (hash-table-set! *synchashes* synckey synchash)))
    (synchash:get-delta postdat synchash)))

;;======================================================================
;; tree
;;======================================================================

;;======================================================================
;; T R E E   S T U F F 
;;======================================================================

;; path is a list of nodes, each the child of the previous
;; this routine returns the id so another node can be added
;; either as a leaf or as a branch
;;
;; BUG: This needs a stop sensor for when a branch is exhausted
;;
(define (tree:find-node obj path)
  ;; start at the base of the tree
  (if (null? path)
      #f ;; or 0 ????
      (let loop ((hed      (car path))
		 (tal      (cdr path))
		 (depth    0)
		 (nodenum  0))
	;; nodes in iup tree are 100% sequential so iterate over nodenum
	(if (iup:attribute obj (conc "DEPTH" nodenum)) ;; end when no more nodes
	    (let ((node-depth (string->number (iup:attribute obj (conc "DEPTH" nodenum))))
		  (node-title (iup:attribute obj (conc "TITLE" nodenum))))
	      (if (and (equal? depth node-depth)
		       (equal? hed   node-title)) ;; yep, this is the one!
		  (if (null? tal) ;; end of the line
		      nodenum
		      (loop (car tal)(cdr tal)(+ depth 1)(+ 1 nodenum)))
		  ;; this is the case where we found part of the hierarchy but not 
		  ;; all of it, i.e. the node-depth went from deep to less deep
		  (if (> depth node-depth) ;; (+ 1 node-depth))
		      #f
		      (loop hed tal depth (+ nodenum 1)))))
	    #f))))

;; top is the top node name zeroeth node VALUE=0
(define (tree:add-node obj top nodelst #!key (userdata #f))
  (let ((curr-top (iup:attribute obj "TITLE0")))
    (if (or (not (string? curr-top))
	    (string-null? curr-top)
	    (string-match "^\\s*$" curr-top))
          (iup:attribute-set! obj "ADDBRANCH0" top))


    
    (cond
     ((not (equal? top (iup:attribute obj "TITLE0")))
      (print "ERROR: top name " top " doesn't match " (iup:attribute obj "TITLE0")))
     ((null? nodelst))
     (else
      (let loop ((hed      (car nodelst))
		 (tal      (cdr nodelst))
		 (depth    1)
		 (pathl    (list top)))
	;; Because the tree dialog changes node numbers when
	;; nodes are added or removed we must look up nodes
	;; each and every time. 0 is the top node so default
	;; to that.
	(let* ((newpath    (append pathl (list hed)))
	       (parentnode (tree:find-node obj pathl))
	       (nodenum    (tree:find-node obj newpath)))
	  ;; Add the branch under lastnode if not found
	  (if (not nodenum)
	      (begin
		(iup:attribute-set! obj (conc "ADDBRANCH" parentnode) hed)
		;; ERROR? ADDING DATA TO PARENT, DONT WE WANT IT ON CREATED NODE?
		(if userdata
		    (iup:attribute-set! obj (conc "USERDATA"   parentnode) userdata))
		(if (null? tal)
		    #t
		    ;; reset to top
		    (loop (car nodelst)(cdr nodelst) 1 (list top)))) 
	      (if (null? tal) ;; if null here then this path has already been added
		  #t
		  (loop (car tal)(cdr tal)(+ depth 1) newpath)))))))))

(define (tree:node->path obj nodenum)
  (let loop ((currnode 0)
	     (path     '()))
    (let* ((node-depth (string->number (iup:attribute obj (conc "DEPTH" currnode))))
	   (node-title (iup:attribute obj (conc "TITLE" currnode)))
	   (trimpath   (if (and (not (null? path))
				(> (length path) node-depth))
			   (take path node-depth)
			   path))
	   (newpath    (append trimpath (list node-title))))
      (if (>= currnode nodenum)
	  newpath
	  (loop (+ currnode 1)
		newpath)))))

(define (tree:delete-node obj top node-path) ;; node-path is a list of strings
  (let ((id  (tree:find-node obj (cons top node-path))))
    (print "Found node to remove " id " for path " top " " node-path)
    (iup:attribute-set! obj (conc "DELNODE" id) "SELECTED")))
	
#|

  (let* ((tb      (iup:treebox
                   #:value 0
                   #:name "Runs"
                   #:expand "YES"
                   #:addexpanded "NO"
                   #:selection-cb
                   (lambda (obj id state)
                     ;; (print "obj: " obj ", id: " id ", state: " state)
                     (let* ((run-path (tree:node->path obj id))
                            (run-id   (tree-path->run-id (cdr run-path))))
                       (if run-id
                           (begin
                             (dboard:data-curr-run-id-set! data run-id)
                             (dashboard:update-run-summary-tab)))
                       ;; (print "path: " (tree:node->path obj id) " run-id: " run-id)
                       ))))
|#

;;======================================================================
;; db
;;======================================================================

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res '()))
       (sqlite3:for-each-row
	(lambda (a . b)
	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
	  (set! res (cons (apply vector a b) res)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in ("
	      (string-intersperse (map conc test-ids) ",") ");"))
       res))))

;; (db:with-db alldat run-id sqlite3:exec "select blah fgrom blaz;")
;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no
;;
(define (db:with-db alldat run-id r/w proc . params)
  (let* ((have-struct      (alldat? alldat))
         (dbdat            (if have-struct 
		           	  (db:get-db alldat)
		           	  #f))
	 (db               (if have-struct
		           	  (db:dbdat-get-db dbdat)
		           	  alldat))
	 (use-mutex        (> (alldat-api-process-request-count alldat) 25))
	 (db-with-db-mutex (alldat-db-with-db-mutex alldat))
	 (log-port         (alldat-log-port alldat)))
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(debug:print-info 0 log-port (alldat-api-process-request-count alldat) " parallel api requests being processed in process " (current-process-id) ", throttling access"))
    (if (common:low-noise-print 600 (conc "parallel-api-requests" (alldat-max-api-process-requests alldat)))
	(debug:print-info 2 log-port "Parallel api request count: " (alldat-api-process-request-count alldat) " max parallel requests: " (alldat-max-api-process-requests alldat)))
    (handle-exceptions
     exn
     (begin
       (print-call-chain (current-error-port))
       (debug:print-error 0 log-port "sqlite3 issue in db:with-db, alldat=" alldat ", run-id=" run-id ", proc=" proc ", params=" params " error: " ((condition-property-accessor 'exn 'message) exn))
       ;; there is no recovering at this time. exit
       (exit 50))
     (if use-mutex (mutex-lock! db-with-db-mutex))
     (let ((res (apply proc db params)))
       (if use-mutex (mutex-unlock! db-with-db-mutex))
       (if dbdat (stack-push! (alldat-dbstack alldat) dbdat))
       res))))


(define db:test-record-qry-selector (string-intersperse db:test-record-fields ","))

;; get a useful subset of the tests data (used in dashboard
;; use db:mintest-get-{id ,run_id,testname ...}
;;
(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in)
  (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path" 0 #f))

;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN
;; i.e. these lists define what to NOT show.
;; states and statuses are required to be lists, empty is ok
;; not-in #t = above behaviour, #f = must match
;; mode:
;;  'dashboard - use state = 'COMPLETED' AND status in ( statuses ) OR state in ( states )
;;
(define (db:get-tests-for-run dbstruct run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  (let* ((qryvalstr       (case qryvals
			    ((shortlist) "id,run_id,testname,item_path,state,status")
			    ((#f)        db:test-record-qry-selector) ;; "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment")
			    (else        qryvals)))
	 (res            '())
	 ;; if states or statuses are null then assume match all when not-in is false
	 (states-qry      (if (null? states) 
			      #f
			      (conc " state "  
				    (if (eq? mode 'dashboard)
					" IN ('"
					(if not-in
					    " NOT IN ('"
					    " IN ('")) 
				    (string-intersperse states   "','")
				    "')")))
	 (statuses-qry    (if (null? statuses)
			      #f
			      (conc " status "
				    (if (eq? mode 'dashboard)
					" IN ('"
					(if not-in 
					    " NOT IN ('"
					    " IN ('") )
				    (string-intersperse statuses "','")
				    "')")))
	 (interim-qry       (conc " AND " (if not-in "NOT " "") "( state='COMPLETED' " (if statuses-qry (conc " AND " statuses-qry " ) ") " ) ")
				  (if states-qry
				      (conc (if not-in " AND " " OR ") states-qry ) ;; " ) ")
				      "")))
	 (states-statuses-qry 
	  (cond 
	   ((and states-qry statuses-qry)
	    (case mode
	      ((dashboard) 
	       (if not-in
		   (conc " AND (state='COMPLETED' AND status NOT IN ('" (string-intersperse statuses "','") "')) "
			 " OR (state != 'COMPLETED' AND state NOT IN ('" (string-intersperse states "','") "')) ")
		   (conc " AND (state='COMPLETED' AND status IN ('" (string-intersperse statuses "','") "')) "
			 " OR (state NOT IN ('COMPLETED','DELETED') AND state IN ('" (string-intersperse states "','") "')) ")))
	      (else       (conc " AND ( " states-qry " AND " statuses-qry " ) "))))
	   (states-qry  
	    (case mode
	      ((dashboard) (conc " AND " (if not-in "NOT " "") " state IN ('" (string-intersperse states    "','") "') ")) ;; interim-qry)
	      (else        (conc " AND " states-qry))))
	   (statuses-qry 
	    (case mode
	      ((dashboard) (conc " AND " (if not-in "NOT " "") " status IN ('" (string-intersperse statuses "','") "') ")) ;; interim-qry)
	      (else        (conc " AND " statuses-qry))))
	   (else "")))
	 (tests-match-qry (tests:match->sqlqry testpatt))
	 (qry             (conc "SELECT " qryvalstr
				(if run-id
				    " FROM tests WHERE run_id=? "
				    " FROM tests WHERE ? > 0 ") ;; should work?
				(if last-update " " " AND state != 'DELETED' ") ;; if using last-update we want deleted tests?
				states-statuses-qry
				(if tests-match-qry (conc " AND (" tests-match-qry ") ") "")
				(if last-update (conc " AND last_update >= " last-update " ") "")
				(case sort-by
				  ((rundir)      " ORDER BY length(rundir) ")
				  ((testname)    (conc " ORDER BY testname " (if sort-order (conc sort-order ",") "") " item_path "))
				  ((statestatus) (conc " ORDER BY state " (if  sort-order (conc sort-order ",") "") " status "))
				  ((event_time)  " ORDER BY event_time ")
				  (else          (if (string? sort-by)
						     (conc " ORDER BY " sort-by " ")
						     " ")))
				(if sort-order sort-order " ")
				(if limit  (conc " LIMIT " limit)   " ")
				(if offset (conc " OFFSET " offset) " ")
				";"
				)))
    (debug:print-info 8 *default-log-port* "db:get-tests-for-run run-id=" run-id ", qry=" qry)
    (db:with-db dbstruct run-id #f
		(lambda (db)
		  (sqlite3:for-each-row 
		   (lambda (a . b) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment)
		     (set! res (cons (apply vector a b) res))) ;; id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment) res)))
		   db
		   qry
		   (or run-id 1) ;; 1 > 0 , for the case where we are seeking tests matching criteral for all runs
		   )))
    (case qryvals
      ((shortlist)(map db:test-short-record->norm res))
      ((#f)       res)
      (else       res))))

(define db:test-record-fields '("id"           "run_id"        "testname"  "state"      "status"      "event_time"
				"host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
                                "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived" "last_update"))

(define (db:test-short-record->norm inrec)
  ;;  "id,run_id,testname,item_path,state,status"
  ;;  "id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
  (vector (vector-ref inrec 0) ;; id
	  (vector-ref inrec 1) ;; run_id
	  (vector-ref inrec 2) ;; testname
	  (vector-ref inrec 4) ;; state
	  (vector-ref inrec 5) ;; status
	  -1 "" -1 -1 "" "-" 
	  (vector-ref inrec 3) ;; item-path
	  -1 "-" "-"))

;; if itempath is #f then look only at the testname part
;;
(define (tests:match->sqlqry patterns)
  (if (string? patterns)
      (let ((patts (string-split patterns ",")))
	(if (null? patts) ;;; no pattern(s) means no match, we will do no query
	    #f
	    (let loop ((patt (car patts))
		       (tal  (cdr patts))
		       (res  '()))
	      ;; (print "loop: patt: " patt ", tal " tal)
	      (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
		     (test-patt  (cadr patt-parts))
		     (item-patt  (cadddr patt-parts))
		     (test-qry   (db:patt->like "testname" test-patt))
		     (item-qry   (db:patt->like "item_path" item-patt))
		     (qry        (conc "(" test-qry " AND " item-qry ")")))
		;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
		(if (null? tal)
		    (string-intersperse (append (reverse res)(list qry)) " OR ")
		    (loop (car tal)(cdr tal)(cons qry res)))))))
      #f))

;; make a query (fieldname like 'patt1' OR fieldname 
(define (db:patt->like fieldname pattstr #!key (comparator " OR "))
  (let ((patts (if (string? pattstr)
		   (string-split pattstr ",")
		   '("%"))))
    (string-intersperse (map (lambda (patt)
			       (let ((wildtype (if (substring-index "%" patt) "LIKE" "GLOB")))
				 (conc fieldname " " wildtype " '" patt "'")))
			     (if (null? patts)
				 '("")
				 patts))
			comparator)))

;; replace header and keystr with a call to runs:get-std-run-fields
;;
;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
;; runpatts: patt1,patt2 ...
;;
(define (db:get-runs dbstruct runpatt count offset keypatts)
  (let* ((res       '())
	 (keys       (db:get-keys dbstruct))
	 (runpattstr (db:patt->like "runname" runpatt))
	 (remfields  (list "id" "runname" "state" "status" "owner" "event_time"))
	 (header     (append keys remfields))
	 (keystr     (conc (keys->keystr keys) ","
			   (string-intersperse remfields ",")))
	 (qrystr     (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? "
			   ;; Generate: " AND x LIKE 'keypatt' ..."
			   (if (null? keypatts) ""
			       (conc " AND "
				     (string-join 
				      (map (lambda (keypatt)
					     (let ((key  (car keypatt))
						   (patt (cadr keypatt)))
					       (db:patt->like key patt)))
					   keypatts)
				      " AND ")))
			   " AND state != 'deleted' ORDER BY event_time DESC "
			   (if (number? count)
			       (conc " LIMIT " count)
			       "")
			   (if (number? offset)
			       (conc " OFFSET " offset)
			       ""))))
    (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count)
    (db:with-db dbstruct #f #f
		(lambda (db)		
		  (sqlite3:for-each-row
		   (lambda (a . x)
		     (set! res (cons (apply vector a x) res)))
		   db
		   qrystr
		   )))
    (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " keypatts: " keypatts " offset: " offset " limit: " count)
    (vector header res)))

(define (db:dbdat-get-path dbdat)
  (if (pair? dbdat)
      (cdr dbdat)
      #f))

(define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ...
  (string-intersperse keys ","))

;; ;; legacy handling of structure for managing db's. Refactor this into dbr:?
(define (db:dbdat-get-db dbdat)
  (if (pair? dbdat)
      (car dbdat)
      dbdat))

;; Get/open a database
;;    if run-id => get run specific db
;;    if #f     => get main db
;;    if db already open - return inmem
;;    if db not open, open inmem, rundb and sync then return inmem
;;    inuse gets set automatically for rundb's
;;
(define (db:get-db alldat) ;;  run-id) 
  (if (stack? (alldat-dbstack alldat))
      (if (stack-empty? (alldat-dbstack alldat))
          (let ((newdb (db:open-megatest-db path: (common:get-db-tmp-area alldat))))
            ;; (stack-push! (alldat-dbstack alldat) newdb)
            newdb)
          (stack-pop! (alldat-dbstack alldat)))
      (db:open-db alldat)))

;; This routine creates the db if not already present. It is only called if the db is not already opened
;;
(define (db:open-db alldat #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath
  (let ((toppath     (alldat-areapath alldat))
	(configdat   (alldat-mtconfig alldat))
	(log-port    (alldat-log-port alldat))
	(tmpdb-stack (alldat-dbstack  alldat))) ;; RA => Returns the first reference in alldat
    (if (stack? tmpdb-stack)
	(db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used
        (let* ((max-stale-tmp (configf:lookup-number configdat "server" "filling-db-max-stale-seconds" default: 10))
               (dbpath       (common:get-db-tmp-area alldat))      ;; path to tmp db area
               (dbexists     (file-exists? dbpath))
	       (tmpdbfname   (conc dbpath "/megatest.db"))
	       (dbfexists    (file-exists? tmpdbfname))  ;; (conc dbpath "/megatest.db")))
               (mtdbexists   (file-exists? (conc toppath "/megatest.db")))
							 
               (mtdbmodtime  (if mtdbexists (common:lazy-sqlite-db-modification-time (conc toppath "/megatest.db"))   #f))
	        		 (tmpdbmodtime (if dbfexists  (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) 
               (mtdb         (db:open-megatest-db))
               (mtdbpath     (db:dbdat-get-path mtdb))
               (tmpdb        (db:open-megatest-db path: dbpath)) ;; lock-create-open dbpath db:initialize-main-db))
               (refndb       (db:open-megatest-db path: dbpath name: "megatest_ref.db"))
               (write-access (file-write-access? mtdbpath))
	       
	       ;;(mtdbmodtime (if mtdbexists
	       ;;(common:lazy-sqlite-db-modification-time mtdbpath)
	       ;;#f)) ; moving this before db:open-megatest-db is
	       ;;called. if wal mode is on -WAL and -shm file get
	       ;;created with causing the tmpdbmodtime timestamp
	       ;;always greater than mtdbmodtime (tmpdbmodtime (if
	       ;;dbfexists (common:lazy-sqlite-db-modification-time
	       ;;tmpdbfname) #f))

	       ;;if wal mode is on -WAL and -shm file get created when
	       ;;db:open-megatest-db is called. modtimedelta will
	       ;;always be < 10 so db in tmp not get synced
	       ;;(tmpdbmodtime (if dbfexists (db:get-last-update-time
	       ;;(car tmpdb)) #f)) (fmt (file-modification-time
	       ;;tmpdbfname))
	       
	       (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime))))

	  (handle-exceptions
	   exn
	   (let ((call-chain (get-call-chain))
		 (msg        ((condition-property-accessor 'exn 'message) exn)))
	     (debug:print 0 log-port "ERROR: attempted to drop triggers on MTRA/megatest.db but failed. Error is " msg)
	     (set! write-access #f)) ;; if we failed to drop the triggers then we probably don't have write access
	   (when write-access
		 (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger")
		 (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger")))
          
          ;;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime "
          ;;tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath*
          ;;"/megatest.db")) (debug:print-info 13 log-port
          ;;"db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists"
          ;;and write-access="write-access)
          (if (and dbexists (not write-access))
              (begin
                (set! *db-write-access* #f)
                (alldat-read-only-set! alldat #t)))
          (alldat-mtdb-set!   alldat mtdb)
          (alldat-tmpdb-set!  alldat tmpdb)
          (alldat-dbstack-set! alldat (make-stack))   ;; why a stack?
          (stack-push! (alldat-dbstack alldat) tmpdb) ;; olddb is already a (cons db path)
          (alldat-refndb-set! alldat refndb)
          ;;	    (mutex-unlock! *rundb-mutex*)
          (if (and  (or (not dbfexists)
			(and modtimedelta
			     (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back
		    do-sync)
	      (begin
		(debug:print 1 log-port "filling db " (db:dbdat-get-path tmpdb) " with data \n    from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta)
		(db:sync-tables (db:sync-all-tables-list alldat) #f mtdb refndb tmpdb)
					;touch tmp db to avoid wal mode wierdness  
		(set! (file-modification-time tmpdbfname) (current-seconds))  
                (debug:print-info 13 log-port "db:sync-all-tables-list done.")
                )
	      (debug:print 4 log-port " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n     " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) )
	  ;; (db:multi-db-sync alldat 'old2new))  ;; migrate data from megatest.db automatically
          tmpdb))))

(define (db:sync-all-tables-list alldat)
  (append (db:sync-main-list alldat)
	  db:sync-tests-only))

;; just tests, test_steps and test_data tables
(define db:sync-tests-only
  (list
   ;; (list "strs"
   ;;       '("id"             #f)
   ;;       '("str"            #f))
   (list "tests" 
	 '("id"             #f)
	 '("run_id"         #f)
	 '("testname"       #f)
	 '("host"           #f)
	 '("cpuload"        #f)
	 '("diskfree"       #f)
	 '("uname"          #f)
	 '("rundir"         #f)
	 '("shortdir"       #f)
	 '("item_path"      #f)
	 '("state"          #f)
	 '("status"         #f)
	 '("attemptnum"     #f)
	 '("final_logf"     #f)
	 '("logdat"         #f)
	 '("run_duration"   #f)
	 '("comment"        #f)
	 '("event_time"     #f)
	 '("fail_count"     #f)
	 '("pass_count"     #f)
	 '("archived"       #f)
         '("last_update"    #f))
  (list "test_steps"
	 '("id"             #f)
	 '("test_id"        #f)
	 '("stepname"       #f)
	 '("state"          #f)
	 '("status"         #f)
	 '("event_time"     #f)
	 '("comment"        #f)
	 '("logfile"        #f)
         '("last_update"    #f))
   (list "test_data"
	 '("id"             #f)
	 '("test_id"        #f)
	 '("category"       #f)
	 '("variable"       #f)
	 '("value"          #f)
	 '("expected"       #f)
	 '("tol"            #f)
	 '("units"          #f)
	 '("comment"        #f)
	 '("status"         #f)
	 '("type"           #f)
         '("last_update"    #f))))

;; needs db to get keys, this is for syncing all tables
;;
(define (db:sync-main-list alldat)
  (let ((keys  (db:get-keys alldat)))
    (list
     (list "keys"
	   '("id"        #f)
	   '("fieldname" #f)
	   '("fieldtype" #f))
     (list "metadat" '("var" #f) '("val" #f))
     (append (list "runs" 
		   '("id"  #f))
	     (map (lambda (k)(list k #f))
		  (append keys
			  (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update"))))
     (list "test_meta"
	   '("id"             #f)
	   '("testname"       #f)
	   '("owner"          #f)
	   '("description"    #f)
	   '("reviewed"       #f)
	   '("iterated"       #f)
	   '("avg_runtime"    #f)
	   '("avg_disk"       #f)
	   '("tags"           #f)
	   '("jobgroup"       #f)))))

;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) )
;; db's are dbdat's
;;
;; if last-update specified ("field-name" . time-in-seconds)
;;    then sync only records where field-name >= time-in-seconds
;;    IFF field-name exists
;;
(define (db:sync-tables alldat tbls last-update fromdb todb . slave-dbs)
  (let* ((configdat (alldat-mtconfig alldat)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.")
       (print-call-chain (current-error-port))
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))
       (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
       (debug:print 0 *default-log-port* " src db:  " (db:dbdat-get-path fromdb))
       (for-each (lambda (dbdat)
		   (let ((dbpath (db:dbdat-get-path dbdat)))
		     (debug:print 0 *default-log-port* " dbpath:  " dbpath)
		     (if (not (db:repair-db dbdat))
			 (begin
			   (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.")
			   (exit)))))
		 (cons todb slave-dbs))
       
       0)
     ;; this is the work to be done
     (cond
      ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing")
       -1)
      ((not todb)   (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing")
       -2)
      ((not (sqlite3:database? (db:dbdat-get-db fromdb)))
       (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb)
       -3)
      ((not (sqlite3:database? (db:dbdat-get-db todb)))
       (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb)
       -4)

      ((not (file-write-access? (db:dbdat-get-path todb)))
       (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb)
       -5)
      ((not (null? (let ((readonly-slave-dbs
			  (filter
			   (lambda (dbdat)
			     (not (file-write-access? (db:dbdat-get-path todb))))
			   slave-dbs)))
		     (for-each
		      (lambda (bad-dbdat)
			(debug:print-error
			 0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat))
		      readonly-slave-dbs)
		     readonly-slave-dbs))) -6)
      (else
       (let ((stmts       (make-hash-table)) ;; table-field => stmt
	     (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	     (numrecs     (make-hash-table))
	     (start-time  (current-milliseconds))
	     (tot-count   0))
	 (for-each ;; table
	  (lambda (tabledat)
	    (let* ((tablename        (car tabledat))
		   (fields           (cdr tabledat))
		   (has-last-update  (member "last_update" fields))
		   (use-last-update  (cond
				      ((and has-last-update
					    (member "last_update" fields))
				       #t) ;; if given a number, just use it for all fields
				      ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
				      ((and (pair? last-update)
					    (member (car last-update)    ;; last-update field name
						    (map car fields))) #t)
				      (last-update
				       (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update) ;; found in fields
				       #f)
				      (else
				       #f)))
		   (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
					  (if (number? last-update)
					      last-update
					      (cdr last-update))
					  #f))
		   (last-update-field (if use-last-update
					  (if (number? last-update)
					      "last_update"
					      (car last-update))
					  #f))
		   (num-fields (length fields))
		   (field->num (make-hash-table))
		   (num->field (apply vector (map car fields))) ;; BBHERE
		   (full-sel   (conc "SELECT " (string-intersperse (map car fields) ",") 
				     " FROM " tablename (if use-last-update ;; apply last-update criteria
							    (conc " WHERE " last-update-field " >= " last-update-value)
							    "")
				     ";"))
		   (full-ins   (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) "
				     " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );"))
		   (fromdat    '())
		   (fromdats   '())
		   (totrecords 0)
		   (batch-len  (string->number (or (configf:lookup configdat "sync" "batchsize") "100")))
		   (todat      (make-hash-table))
		   (count      0)

		   (delay-handicap  (string->number (or (configf:lookup configdat "sync" "delay-handicap") "0")))
		   )

	      ;; set up the field->num table
	      (for-each
	       (lambda (field)
		 (hash-table-set! field->num field count)
		 (set! count (+ count 1)))
	       fields)

	      ;; read the source table
	      (sqlite3:for-each-row
	       (lambda (a . b)
		 (set! fromdat (cons (apply vector a b) fromdat))
		 (if (> (length fromdat) batch-len)
		     (begin
		       (set! fromdats (cons fromdat fromdats))
		       (set! fromdat  '())
		       (set! totrecords (+ totrecords 1)))))
	       (db:dbdat-get-db fromdb)
	       full-sel)
	      
	      ;; tack on remaining records in fromdat
	      (if (not (null? fromdat))
		  (set! fromdats (cons fromdat fromdats)))

	      (if (common:low-noise-print 120 "sync-records")
		  (debug:print-info 4 *default-log-port* "found " totrecords " records to sync"))

	      ;; read the target table; BBHERE
	      (sqlite3:for-each-row
	       (lambda (a . b)
		 (hash-table-set! todat a (apply vector a b)))
	       (db:dbdat-get-db todb)
	       full-sel)

	      (when (and delay-handicap (> delay-handicap 0))
		    (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
		    (thread-sleep! delay-handicap)
		    (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed")
		    )
	      
	      ;; first pass implementation, just insert all changed rows
	      (for-each 
	       (lambda (targdb)
		 (let* ((db     (db:dbdat-get-db targdb))
			(stmth  (sqlite3:prepare db full-ins)))
		   (for-each
		    (lambda (fromdat-lst)
		      (sqlite3:with-transaction
		       db
		       (lambda ()
			 (for-each ;; 
			  (lambda (fromrow)
			    (let* ((a    (vector-ref fromrow 0))
				   (curr (hash-table-ref/default todat a #f))
				   (same #t))
			      (let loop ((i 0))
				(if (or (not curr)
					(not (equal? (vector-ref fromrow i)(vector-ref curr i))))
				    (set! same #f))
				(if (and same
					 (< i (- num-fields 1)))
				    (loop (+ i 1))))
			      (if (not same)
				  (begin
				    (apply sqlite3:execute stmth (vector->list fromrow))
				    (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
			  fromdat-lst))
		       ))
		    fromdats)
		   (sqlite3:finalize! stmth)))
	       (append (list todb) slave-dbs))))
	  tbls)
	 (let* ((runtime      (- (current-milliseconds) start-time))
		(should-print (or (debug:debug-mode 12)
				  (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
	   (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms"))
	   (for-each 
	    (lambda (dat)
	      (let ((tblname (car dat))
		    (count   (cdr dat)))
		(set! tot-count (+ tot-count count))
		(if (> count 0)
		    (if should-print (debug:print 0 *default-log-port* (format #f "    ~10a ~5a" tblname count))))))
	    (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b))))))
	 tot-count))))))
;; Open the classic megatest.db file (defaults to open in toppath)
;;
;;   NOTE: returns a dbdat not a dbstruct!
;;

;;(define (db:reopen-megatest-db

(define (db:open-megatest-db alldat #!key (path #f)(name #f))
  (let* ((dbdir        (or path (alldat-areapath alldat)))
         (dbpath       (conc  dbdir "/" (or name "megatest.db")))
	 (dbexists     (file-exists? dbpath))
	 (db           (db:lock-create-open dbpath
					    (lambda (db)
                                              (db:initialize-main-db db)
					      ;;(db:initialize-run-id-db db)
					      )))
	 (write-access (file-write-access? dbpath)))
    (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath)
    (if (and dbexists (not write-access))
	(set! *db-write-access* #f))
    (cons db dbpath)))

(define (db:initialize-main-db alldat dbdat)
  #;(when (not *configinfo*)
           (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f.
  (let* ((configdat (alldat-mtconfig alldat))
	 ;; (configdat (car *configinfo*))  ;; tut tut, global warning...
	 (keys     (common:get-fields configdat))
	 (havekeys (> (length keys) 0))
	 (keystr   (keys->keystr keys))
	 (fieldstr (keys:make-key/field-string configdat))
	 (db       (db:dbdat-get-db dbdat)))
    (for-each (lambda (key)
		(let ((keyn key))
		  (if (member (string-downcase keyn)
			      (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count"
				    "pass_count" "contour"))
		      (begin
			(print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table, you must remove your megatest.db and <linktree>/.db before trying again.")
			(exit 1)))))
	      keys)
    (sqlite3:with-transaction
     db
     (lambda ()
      ;; handle-exceptions
      ;; exn
      ;; (begin
      ;;   (debug:print 0 "ERROR: Failed to create tables. Look at your [fields] section, should be: fieldname TEXT DEFAULT 'yourdefault'")
      ;;   (exit))
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));")
	(for-each (lambda (key)
		    (sqlite3:execute db "INSERT OR REPLACE INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT"))
		  keys)
	(sqlite3:execute db (conc 
			     "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, \n			 " 
			     fieldstr (if havekeys "," "") "
			 runname    TEXT DEFAULT 'norun',
                         contour    TEXT DEFAULT '',
			 state      TEXT DEFAULT '',
			 status     TEXT DEFAULT '',
			 owner      TEXT DEFAULT '',
			 event_time TIMESTAMP DEFAULT (strftime('%s','now')),
			 comment    TEXT DEFAULT '',
			 fail_count INTEGER DEFAULT 0,
			 pass_count INTEGER DEFAULT 0,
                         last_update INTEGER DEFAULT (strftime('%s','now')),
			 CONSTRAINT runsconstraint UNIQUE (runname" (if havekeys "," "") keystr "));"))
	(sqlite3:execute db "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE runs SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS run_stats (
                              id     INTEGER PRIMARY KEY,
                              run_id INTEGER,
                              state  TEXT,
                              status TEXT,
                              count  INTEGER,
                              last_update INTEGER DEFAULT (strftime('%s','now')))")
	(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_meta (
                                     id          INTEGER PRIMARY KEY,
                                     testname    TEXT DEFAULT '',
                                     author      TEXT DEFAULT '',
                                     owner       TEXT DEFAULT '',
                                     description TEXT DEFAULT '',
                                     reviewed    TIMESTAMP,
                                     iterated    TEXT DEFAULT '',
                                     avg_runtime REAL,
                                     avg_disk    REAL,
                                     tags        TEXT DEFAULT '',
                                     jobgroup    TEXT DEFAULT 'default',
                                CONSTRAINT test_meta_constraint UNIQUE (testname));")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
                                action TEXT DEFAULT '',
                                owner TEXT,
                                state TEXT DEFAULT 'new',
                                target TEXT DEFAULT '',
                                name TEXT DEFAULT '',
                                testpatt TEXT DEFAULT '',
                                keylock TEXT,
                                params TEXT,
                                creation_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                execution_time TIMESTAMP);")
	;; archive disk areas, cached info from [archive-disks]
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks (
                                id INTEGER PRIMARY KEY,
                                archive_area_name TEXT,
                                disk_path TEXT,
                                last_df INTEGER DEFAULT -1,
                                last_df_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                creation_time TIMESTAMP DEFAULT (strftime('%','now')));")
	;; individual bup (or tar) data chunks
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks (
                                id INTEGER PRIMARY KEY,
                                archive_disk_id INTEGER,
                                disk_path TEXT,
                                last_du INTEGER DEFAULT -1,
                                last_du_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                creation_time TIMESTAMP DEFAULT (strftime('%','now')));")
	;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient
	;; NB// the per run/test recording of where the archive is stored is done in the test
	;;      record. 
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations (
                                id INTEGER PRIMARY KEY,
                                archive_block_id INTEGER,
                                testname TEXT,
                                item_path TEXT,
                                creation_time TIMESTAMP DEFAULT (strftime('%','now')));")
	;; move this clean up call somewhere else
	(sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs
	(sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");"))
	;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT,
                                  CONSTRAINT metadat_constraint UNIQUE (var));")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);")
	;; Must do this *after* running patch db !! No more. 
	;; cannot use db:set-var since it will deadlock, hardwire the code here
	(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" "MEGATEST_VERSION" (common:version-signature))
	(debug:print-info 11 *default-log-port* "db:initialize END") ;; ))))

	;;======================================================================
	;; R U N   S P E C I F I C   D B 
	;;======================================================================
	
	;; (define (db:initialize-run-id-db db)
	;;   (sqlite3:with-transaction 
	;;    db
	;;    (lambda ()
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS tests 
                    (id INTEGER PRIMARY KEY,
                     run_id       INTEGER   DEFAULT -1,
                     testname     TEXT      DEFAULT 'noname',
                     host         TEXT      DEFAULT 'n/a',
                     cpuload      REAL      DEFAULT -1,
                     diskfree     INTEGER   DEFAULT -1,
                     uname        TEXT      DEFAULT 'n/a', 
                     rundir       TEXT      DEFAULT '/tmp/badname',
                     shortdir     TEXT      DEFAULT '/tmp/badname',
                     item_path    TEXT      DEFAULT '',
                     state        TEXT      DEFAULT 'NOT_STARTED',
                     status       TEXT      DEFAULT 'FAIL',
                     attemptnum   INTEGER   DEFAULT 0,
                     final_logf   TEXT      DEFAULT 'logs/final.log',
                     logdat       TEXT      DEFAULT '', 
                     run_duration INTEGER   DEFAULT 0,
                     comment      TEXT      DEFAULT '',
                     event_time   TIMESTAMP DEFAULT (strftime('%s','now')),
                     fail_count   INTEGER   DEFAULT 0,
                     pass_count   INTEGER   DEFAULT 0,
                     archived     INTEGER   DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found
                     last_update  INTEGER DEFAULT (strftime('%s','now')),
                        CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));")
	;; deprecated -- (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path, uname);")
        
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_run_id_index ON tests (run_id);")  ;; new
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_testname_index ON tests (testname,item_path);") ;; new
        (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_state_status_index ON tests (state, status); ") ;; new
        
	(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE tests SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps 
                              (id INTEGER PRIMARY KEY,
                               test_id INTEGER, 
                               stepname TEXT, 
                               state TEXT DEFAULT 'NOT_STARTED', 
                               status TEXT DEFAULT 'n/a',
                               event_time TIMESTAMP,
                               comment TEXT DEFAULT '',
                               logfile TEXT DEFAULT '',
                               last_update  INTEGER DEFAULT (strftime('%s','now')),
                               CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));")
	(sqlite3:execute db "CREATE INDEX IF NOT EXISTS teststeps_index ON tests (run_id, testname, item_path);")
	(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_steps SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_data (id INTEGER PRIMARY KEY,
                                test_id INTEGER,
                                category TEXT DEFAULT '',
                                variable TEXT,
	                        value REAL,
	                        expected REAL,
	                        tol REAL,
                                units TEXT,
                                comment TEXT DEFAULT '',
                                status TEXT DEFAULT 'n/a',
                                type TEXT DEFAULT '',
                                last_update  INTEGER DEFAULT (strftime('%s','now')),
                              CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));")
	(sqlite3:execute db "CREATE INDEX IF NOT EXISTS test_data_index ON test_data (test_id);")
	(sqlite3:execute db "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_data SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              update_time  TIMESTAMP,
                              cpuload      INTEGER DEFAULT -1,
                              diskfree     INTEGER DEFAULT -1,
                              diskusage    INTGER DEFAULT -1,
                              run_duration INTEGER DEFAULT 0);")
	(sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
     db)) ;; )

;;======================================================================
;; Some utility stuff moved from common.scm
;;======================================================================


(define (common:get-area-name alldat #!optional (areapath-in #f))
  (let* ((configdat (alldat-mtconfig alldat))
	 (areapath  (or (alldat-areapath alldat)
			(get-environment-variable "MT_RUN_AREA_HOME")
			areapath-in)))
    (or (configf:lookup configdat "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description.
	(configf:lookup configdat "setup" "testsuite" )
	(get-environment-variable "MT_TESTSUITENAME") ;; circulat?
	(if (string? areapath )
	    (pathname-file areapath)
	    #f)))) ;; (pathname-file (current-directory)))))

;; return first path that can be created or already exists and is writable
;;
(define (common:get-create-writeable-dir dirs)
  (if (null? dirs)
      #f
      (let loop ((hed (car dirs))
		 (tal (cdr dirs)))
	(let ((res (or (and (directory? hed)
			    (file-write-access? hed)
			    hed)
		       (handle-exceptions
			   exn
			   (begin
			     ;; TODO add print of exception here
			     ;; (debug:print-info 0 *default-log-port* "could not create " hed ", this might cause problems down the road.")
			     #f)
			(create-directory hed #t)))))
	  (if (and (string? res)
		   (directory? res))
	      res
	      (if (null? tal)
		  #f
		  (loop (car tal)(cdr tal))))))))

;; (define common:get-area-name common:get-area-name)

(define (common:get-db-tmp-area alldat)
  (let* ((dbdir #f)
	 (log-port (alldat-log-port alldat)))
    (if (alldat-tmppath alldat)
	(alldat-tmppath alldat)
	(if (alldat-areapath alldat) ;; common:get-create-writeable-dir
	    (handle-exceptions
	     exn
	     (begin
	       (print-call-chain)
	       (print ((condition-property-accessor 'exn 'message) exn))
	       (debug:print-error 0 log-port "Couldn't create path to " dbdir)
	       (exit 1))
	     (let ((dbpath (common:get-create-writeable-dir
			    (list (conc "/tmp/" (current-user-name)
					"/megatest_localdb/"
					(common:get-area-name alldat) "/"
					(string-translate (alldat-areapath alldat) "/" ".")))))) ;;  #t))))
	       (set! dbdir dbpath) 
	       (alldat-tmppath-set! alldat dbpath)
	       dbpath))
	    #f))))

;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
;; called in http-transport and replicated in rmt.scm for *local* access. 
;;
(define (db:setup do-sync alldat #!key (areapath #f))
  (let* ((log-port (alldat-log-port alldat)))
    (cond
     ((alldat-dbstack alldat) alldat) ;; already initialized
     ((not (alldat-areapath alldat))  ;; no top path yet? Just exit
      (debug:print-info 13 log-port "in db:setup, area-path not set; give up and exit.")
      (exit 1))
     (else ;;(common:on-homehost?)
      (debug:print-info 13 log-port "db:setup entered (first time, not cached.)")
      (debug:print-info 13 log-port "Begin db:open-db")
      (db:open-db alldat areapath: areapath do-sync: do-sync)
      (debug:print-info 13 log-port "Done db:open-db")
      ;; (set! *dbstruct-db* dbstruct)
      alldat))))



;; why get the keys from the db? why not get from the *configdat*
;; using common:get-fields?

(define (db:get-keys alldat)
  (if (alldat-db-keys alldat)
      (alldat-db-keys alldat)
      (let ((res '()))
	(db:with-db alldat #f #f
		    (lambda (db)
		      (sqlite3:for-each-row 
		       (lambda (key)
			 (set! res (cons key res)))
		       db
		       "SELECT fieldname FROM keys ORDER BY id DESC;")))
	(alldat-db-keys-set! alldat res)
	res)))


;; return #f to indicate the dbdat should be closed/reopened
;; else return dbdat
;;
(define (db:repair-db dbdat #!key (numtries 1))
  (let* ((dbpath   (db:dbdat-get-path        dbdat))
	 (dbdir    (pathname-directory       dbpath))
	 (fname    (pathname-strip-directory dbpath)))
    (debug:print-info 0 *default-log-port* "Checking db " dbpath " for errors.")
    (cond
     ((not (file-write-access? dbdir))
      (debug:print 0 *default-log-port* "WARNING: can't write to " dbdir ", can't fix " fname)
      #f)

     ;; handle special cases, megatest.db and monitor.db
     ;; 
     ;;  NOPE: apply this same approach to all db files
     ;;
     (else ;; ((equal? fname "megatest.db") ;; this file can be regenerated if needed
      (handle-exceptions
       exn
       (begin
	 ;; (db:move-and-recreate-db dbdat)
	 (if (> numtries 0)
	     (db:repair-db dbdat numtries: (- numtries 1))
	     #f)
	 (debug:print 0 *default-log-port* "FATAL: file " dbpath " was found corrupted, an attempt to fix has been made but you must start over.")
	 (debug:print 0 *default-log-port*
		      "   check the following:\n"
		      "      1. full directories, look in ~/ /tmp and " dbdir "\n"
		      "      2. write access to " dbdir "\n\n"
		      "   if the automatic recovery failed you may be able to recover data by doing \"" 
		      (if (member fname '("megatest.db" "monitor.db"))
			  "megatest -cleanup-db"
			  "megatest -import-megatest.db;megatest -cleanup-db")
		      "\"\n")
	 (exit) ;; we can not safely continue when a db was corrupted - even if fixed.
	 )
       ;; test read/write access to the database
       (let ((db (sqlite3:open-database dbpath)))
	 (cond
	  ((equal? fname "megatest.db")
	   (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';"))
	  ((equal? fname "main.db")
	   (sqlite3:execute db "DELETE FROM runs WHERE state='deleted';"))
	  ((string-match "\\d.db" fname)
	   (sqlite3:execute db "UPDATE tests SET state='DELETED' WHERE state='DELETED';"))
	  ((equal? fname "monitor.db")
	   (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';"))
	  (else
	   (sqlite3:execute db "vacuum;")))
	 
	 (sqlite3:finalize! db)
	 #t))))))

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
;; RA => Returns a db handler; sets the lock if opened in writable mode
;;
;; (define *db-open-mutex* (make-mutex))
;;
(define (db:lock-create-open alldat fname initproc)
  (let* ((configdat    (alldat-mtconfig alldat))
	 (parent-dir   (or (pathname-directory fname)
			   (current-directory))) ;; no parent? go local
         (raw-fname    (pathname-file fname))
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;; (mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(condition-case
         (let* ((lockfname   (conc fname ".lock"))
                (readyfname  (conc parent-dir "/.ready-" raw-fname))
                (readyexists (file-exists? readyfname)))
           (if (not readyexists)
               (common:simple-file-lock-and-wait lockfname))
           (let ((db      (sqlite3:open-database fname)))
             (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000))
             (sqlite3:execute db "PRAGMA synchronous = 0;")
             (if (and (configf:lookup configdat "setup" "tmp_mode") (string-match "^/tmp/.*" fname))
                 (begin
                   ;;(print "DEBUG: Setting tmp_mode for " fname) 
                   (sqlite3:execute db (configf:lookup configdat "setup" "tmp_mode"))
                   )
                 )  
             (if (and (configf:lookup configdat "setup" "nfs_mode") (not (string-match "^/tmp/.*" fname)))
                 (begin
                   ;;(print "DEBUG: Setting nfs_mode for " fname)
                   (sqlite3:execute db (configf:lookup configdat "setup" "nfs_mode"))
                   )
                 )  
             (if (and (not (or (configf:lookup configdat "setup" "tmp_mode") (configf:lookup configdat "setup" "nfs_mode")))  
                      (configf:lookup configdat "setup" "use-wal")
                      (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
                 (sqlite3:execute db "PRAGMA journal_mode=WAL;")
                 (debug:print 2 *default-log-port* "Creating " fname " in NON-WAL mode."))
             (if (not file-exists)
                 (initproc db))
             (if (not readyexists)
                 (begin
                   (common:simple-file-release-lock lockfname)
                   (with-output-to-file
                       readyfname
                     (lambda ()
                       (print "Ready at " 
                              (seconds->year-work-week/day-time 
                               (current-seconds)))))))
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
        
	(condition-case
         (begin
           (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
           (let ((db (sqlite3:open-database fname)))
             ;; (mutex-unlock! *db-open-mutex*)
             db))
         (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
         (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
         (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
         (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
         (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
	)))

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; each db entry is a pair ( db . dbfilepath )
;; I propose this record evolves into the area record
;;
;; MERGED INTO *alldat*
;;
;; (defstruct dbr:dbstruct 
;;   (tmpdb       #f)
;;   (dbstack     #f) ;; stack for tmp db handles, do not initialize with a stack
;;   (mtdb        #f)
;;   (refndb      #f)
;;   (homehost    #f) ;; not used yet
;;   (on-homehost #f) ;; not used yet
;;   (read-only   #f)
;;   )                ;; goal is to converge on one struct for an area but for now it is too confusing
  

;; record for keeping state,status and count for doing roll-ups in
;; iterated tests
;;
(defstruct dbr:counts
  (state #f)
  (status #f)
  (count  0)) 

;;======================================================================
;; SQLITE3 HELPERS
;;======================================================================

(define (db:general-sqlite-error-dump exn stmt . params)
  (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) ;; RADT ... how does this work?
    ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
    (print "err-status: " err-status)
    (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn))
    (print-call-chain (current-error-port))))

;; convert to -inline 
;;
(define (db:first-result-default db stmt default . params)
  (handle-exceptions
   exn
   (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     ;; check for (exn sqlite3) ((condition-property-accessor 'exn 'message) exn)
     (if (eq? err-status 'done)
	 default
	 (begin
	   (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn))
	   (print-call-chain (current-error-port))
	   default)))
   (apply sqlite3:first-result db stmt params)))

;; mod-read:
;;     'mod   modified data
;;     'read  read data
;; Locks the mutex and depending on 'mod or 'read passed, sets the last timestamp in dbstruct
;;
;; (define (db:done-with dbstruct run-id mod-read)
;;   (if (not (sqlite3:database? dbstruct))
;;       (begin
;; 	(mutex-lock! *rundb-mutex*)
;; 	(if (eq? mod-read 'mod)
;; 	    (dbr:dbstruct-mtime-set! dbstruct (current-milliseconds))
;; 	    (dbr:dbstruct-rtime-set! dbstruct (current-milliseconds)))
;; 	(dbr:dbstruct-inuse-set! dbstruct #f)
;; 	(mutex-unlock! *rundb-mutex*))))

;;======================================================================
;; K E E P   F I L E D B   I N   dbstruct
;;======================================================================

;; (define (db:get-filedb dbstruct run-id)
;;   (let ((db (vector-ref dbstruct 2)))
;;     (if db
;; 	db
;; 	(let ((fdb (filedb:open-db (conc *toplevel* "/db/files.db"))))
;; 	  (vector-set! dbstruct 2 fdb)
;; 	  fdb))))
;; 
;; ;; Can also be used to save arbitrary strings
;; ;;
;; (define (db:save-path dbstruct path)
;;   (let ((fdb (db:get-filedb dbstruct)))b
;;     (filedb:register-path fdb path)))
;; 
;; ;; Use to get a path. To get an arbitrary string see next define
;; ;;
;; (define (db:get-path dbstruct id)
;;   (let ((fdb (db:get-filedb dbstruct)))
;;     (filedb:get-path db id)))

;; NB// #f => return dbdir only
;;      (was planned to be;  zeroth db with name=main.db)
;; 
;; If run-id is #f return to create and retrieve the path where the db will live.
;;
;; (define db:dbfile-path common:get-db-tmp-area)

(define (db:set-sync db)
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 


(define (db:get-last-update-time db)
  (let ((last-update-time #f))
    (sqlite3:for-each-row 
     (lambda (lup) 
       (set! last-update-time lup))     
     db    
     "select max(lup) from ( select max(last_update) as lup  from tests union select max(last_update) as lup from runs);")
    last-update-time))

;; sync run to disk if touched
;;
(define (db:sync-touched dbstruct run-id #!key (force-sync #f))
  (let ((tmpdb   (db:get-db dbstruct))
	(mtdb    (dbr:dbstruct-mtdb   dbstruct))
        (refndb  (dbr:dbstruct-refndb dbstruct))
	(start-t (current-seconds)))
    (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id)
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update")))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))

(define (db:safely-close-sqlite3-db db #!key (try-num 3))
  (if (<= try-num 0)
      #f
      (handle-exceptions
	  exn
	  (begin
	    (thread-sleep! 3)
	    (sqlite3:interrupt! db)
	    (db:safely-close-sqlite3-db db try-num: (- try-num 1)))
	(if (sqlite3:database? db)
	    (begin
	      (sqlite3:finalize! db)
	      #t)
	    #f))))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
      (handle-exceptions
	  exn
	  (begin
	    (debug:print 0 *default-log-port* "WARNING: Finalizing failed, "  ((condition-property-accessor 'exn 'message) exn))
	    (print-call-chain *default-log-port*))
	;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
        (let ((tdbs (map db:dbdat-get-db 
                         (stack->list (dbr:dbstruct-dbstack dbstruct))))
              (mdb (db:dbdat-get-db (dbr:dbstruct-mtdb   dbstruct)))
              (rdb (db:dbdat-get-db (dbr:dbstruct-refndb dbstruct))))
          (map (lambda (db)
		 (db:safely-close-sqlite3-db db))
;; 		 (if (sqlite3:database? db)
;; 		     (sqlite3:finalize! db)))
	       tdbs)
          (db:safely-close-sqlite3-db mdb)     ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
          (db:safely-close-sqlite3-db rdb))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))

;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
;;     (if (hash-table? locdbs)
;; 	(for-each (lambda (run-id)
;; 		    (db:close-run-db dbstruct run-id))
;; 		  (hash-table-keys locdbs)))))

;; (define (db:open-inmem-db)
;;   (let* ((db      (sqlite3:open-database ":memory:"))
;; 	 (handler (make-busy-timeout 3600)))
;;     (sqlite3:set-busy-handler! db handler)
;;     (db:initialize-run-id-db db)
;;     (cons db #f)))


;; use bunch of Unix commands to try to break the lock and recreate the db
;;
(define (db:move-and-recreate-db dbdat)
  (let* ((dbpath   (db:dbdat-get-path        dbdat))
	 (dbdir    (pathname-directory       dbpath))
	 (fname    (pathname-strip-directory dbpath))
	 (fnamejnl (conc fname "-journal"))
	 (tmpname  (conc fname "." (current-process-id)))
	 (tmpjnl   (conc fnamejnl "." (current-process-id))))
    (debug:print-error 0 *default-log-port* "" fname " appears corrupted. Making backup \"old/" fname "\"")
    (system (conc "cd " dbdir ";mkdir -p old;cat " fname " > old/" tmpname))
    (system (conc "rm -f " dbpath))
    (if (file-exists? fnamejnl)
	(begin
	  (debug:print-error 0 *default-log-port* "" fnamejnl " found, moving it to old dir as " tmpjnl)
	  (system (conc "cd " dbdir ";mkdir -p old;cat " fnamejnl " > old/" tmpjnl))
	  (system (conc "rm -f " dbdir "/" fnamejnl))))
    ;; attempt to recreate database
    (system (conc "cd " dbdir ";sqlite3 old/" tmpname " .dump | sqlite3 " fname))))
    
    
(define (db:patch-schema-rundb frundb)
  ;;
  ;; remove this some time after September 2016 (added in version v1.6031
  ;;
  (for-each
   (lambda (table-name)
     (handle-exceptions
      exn
      (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn))
          (debug:print 0 *default-log-port* "Column last_update already added to " table-name " table")
          (db:general-sqlite-error-dump exn "alter table " table-name " ..." #f "none"))
      (sqlite3:execute
       frundb
       (conc "ALTER TABLE " table-name " ADD COLUMN last_update INTEGER DEFAULT 0")))
     (sqlite3:execute
      frundb
      (conc "DROP TRIGGER IF EXISTS update_" table-name "_trigger;"))
     (sqlite3:execute
      frundb
      (conc "CREATE TRIGGER IF NOT EXISTS update_" table-name "_trigger AFTER UPDATE ON " table-name "
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE " table-name " SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;"))
     )
   '("tests" "test_steps" "test_data")))

(define (db:patch-schema-maindb maindb)
  ;;
  ;; remove all these some time after september 2016 (added in v1.6031
  ;;
  (for-each
   (lambda (column type default)
     (handle-exceptions
	 exn
	 (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 *default-log-port* "Column " column " already added to runs table")
	     (db:general-sqlite-error-dump exn "alter table runs ..." #f "none"))
       (sqlite3:execute
	maindb
	(conc "ALTER TABLE runs ADD COLUMN " column " " type " DEFAULT " default))))
   (list "last_update" "contour")
   (list "INTEGER"     "TEXT"   )
   (list "0"           "''"   ))
  ;; these schema changes don't need exception handling
  (sqlite3:execute
   maindb
   "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE runs SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
  (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS run_stats (
                              id     INTEGER PRIMARY KEY,
                              run_id INTEGER,
                              state  TEXT,
                              status TEXT,
                              count  INTEGER,
                              last_update INTEGER DEFAULT (strftime('%s','now')))")
  (sqlite3:execute maindb "CREATE TRIGGER  IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE run_stats SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;")
  (sqlite3:execute maindb "CREATE TABLE IF NOT EXISTS test_rundat (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              update_time  TIMESTAMP,
                              cpuload      INTEGER DEFAULT -1,
                              diskfree     INTEGER DEFAULT -1,
                              diskusage    INTGER DEFAULT -1,
                              run_duration INTEGER DEFAULT 0);"))

(define (db:adj-target db)
  (let ((fields    (configf:get-section *configdat* "fields"))
	(field-num 0))
    ;; because we will be refreshing the keys table it is best to clear it here
    (sqlite3:execute db "DELETE FROM keys;")
    (for-each
     (lambda (field)
       (let ((column (car field))
	     (spec   (cadr field)))
	 (handle-exceptions
	  exn
	  (if (string-match ".*duplicate.*" ((condition-property-accessor 'exn 'message) exn))
	      (debug:print 0 *default-log-port* "Target field " column " already exists in the runs table")
	      (db:general-sqlite-error-dump exn "alter table runs ..." #f "none"))
	  ;; Add the column if needed
	  (sqlite3:execute
	   db
	   (conc "ALTER TABLE runs ADD COLUMN " column " " spec)))
	 ;; correct the entry in the keys column
	 (sqlite3:execute
	  db
	  "INSERT INTO keys (id,fieldname,fieldtype) VALUES (?,?,?);"
	  field-num column spec)
	 ;; fill in blanks (not allowed as it would be part of the path
	 (sqlite3:execute
	  db
	  (conc "UPDATE runs SET " column "='x' WHERE " column "='';"))
	 (set! field-num (+ field-num 1))))
     fields)))
  
(define *global-db-store* (make-hash-table))

(define (db:get-access-mode)
  (if (args:get-arg "-use-db-cache") 'cached 'rmt))

;; Add db direct
;;
(define (db:dispatch-query access-mode rmt-cmd db-cmd . params)
  (if (eq? access-mode 'cached)
      (debug:print 2 *default-log-port* "not doing cached calls right now"))
;;      (apply db:call-with-cached-db db-cmd params)
      (apply rmt-cmd params))
;;)

;; return the target db handle so it can be used
;;
(define (db:cache-for-read-only source target #!key (use-last-update #f))
  (if (and (hash-table-ref/default *global-db-store* target #f)
	   (>= (file-modification-time target)(file-modification-time source)))
      (hash-table-ref *global-db-store* target)
      (let* ((toppath   (launch:setup))
	     (targ-db-last-mod (if (file-exists? target)
				   (file-modification-time target)
				   0))
	     (cache-db  (or (hash-table-ref/default *global-db-store* target #f)
			    (db:open-megatest-db path: target)))
	     (source-db (db:open-megatest-db path: source))
	     (curr-time (current-seconds))
	     (res      '())
	     (last-update (if use-last-update (cons "last_update" targ-db-last-mod) #f)))
	(db:sync-tables (db:sync-main-list source-db) last-update source-db cache-db)
	(db:sync-tables db:sync-tests-only last-update source-db cache-db)
	(hash-table-set! *global-db-store* target cache-db)
	cache-db)))

;; ;; call a proc with a cached db
;; ;;
;; (define (db:call-with-cached-db proc . params)
;;   ;; first cache the db in /tmp
;;   (let* ((cname-part (conc "megatest_cache/" (common:get-testsuite-name)))
;; 	 (fname      (conc  (common:get-area-path-signature) ".db"))
;; 	 (cache-dir  (common:get-create-writeable-dir
;; 		      (list (conc "/tmp/" (current-user-name) "/" cname-part)
;; 			    (conc "/tmp/" (current-user-name) "-" cname-part)
;; 			     (conc "/tmp/" (current-user-name) "_" cname-part))))
;; 	 (megatest-db (conc *toppath* "/megatest.db")))
;;     ;; (debug:print-info 0 *default-log-port* "Using cache dir " cache-dir)
;;     (if (not cache-dir)
;; 	(begin
;; 	  (debug:print 0 *default-log-port* "ERROR: Failed to find an area to write the cache db")
;; 	  (exit 1))
;; 	(let* ((th1      (make-thread
;; 			  (lambda ()
;; 			    (if (and (common:file-exists? megatest-db)
;; 				     (file-write-access? megatest-db))
;; 				(begin
;; 				  (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync*
;; 				  (debug:print-info 2 *default-log-port* "Done syncing to megatest.db"))))
;; 			  "call-with-cached-db sync-to-megatest.db"))
;; 	       (cache-db (db:cache-for-read-only
;; 			  megatest-db
;; 			  (conc cache-dir "/" fname)
;; 			  use-last-update: #t)))
;; 	  (thread-start! th1)
;; 	  (apply proc cache-db params)
;; 	  ))))

;; options:
;;
;;  'killservers  - kills all servers
;;  'dejunk       - removes junk records
;;  'adj-testids  - move test-ids into correct ranges
;;  'old2new      - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db
;;  'new2old      - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced)
;;  'closeall     - close all opened dbs
;;  'schema       - attempt to apply schema changes
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
  ;; (if (not (launch:setup))
  ;;    (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
  (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
	 (tmpdb    (db:get-db dbstruct))
	 (refndb   (dbr:dbstruct-refndb dbstruct))
	 (allow-cleanup #t) ;; (if run-ids #f #t))
	 (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
	 (data-synced 0)) ;; count of changed records (I hope)
    
    (for-each
     (lambda (option)
       
       (case option
	 ;; kill servers
	 ((killservers)
	  (for-each
	   (lambda (server)
	     (match-let (((mod-time host port start-time pid) server))
	       (if (and host pid)
		   (tasks:kill-server host pid))))
	   servers)

          ;; /tmp/bjbarcla/megatest_localdb/fullrun/.nfs.pdx.disks.icf_env_disk001.bjbarcla.gwa.issues.mtdev.matt-bisect.megatest.ext-tests.runs.all.v1.65.1.6524.dev.bb-24-justrollup-f8.rollup.fullrun/megatest.db.lock
          (delete-file* (common:get-sync-lock-filepath *alldat*))
          )
	 
	 ;; clear out junk records
	 ;;
	 ((dejunk)
	  (db:delay-if-busy mtdb) ;; ok to delay on mtdb
	  (when (file-write-access? (db:dbdat-get-path mtdb)) (db:clean-up mtdb))
	  (db:clean-up tmpdb)
	  (db:clean-up refndb))

	 ;; sync runs, test_meta etc.
	 ;;
	 ((old2new)
	  (set! data-synced
	    (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)
	       data-synced)))
	 
	 ;; now ensure all newdb data are synced to megatest.db
	 ;; do not use the run-ids list passed in to the function
	 ;;
	 ((new2old)
	  (set! data-synced
	    (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)
	       data-synced)))

	 ((adj-target)
	  (db:adj-target (db:dbdat-get-db mtdb))
	  (db:adj-target (db:dbdat-get-db tmpdb))
	  (db:adj-target (db:dbdat-get-db refndb)))
	 
	 ((schema)
	  (db:patch-schema-maindb (db:dbdat-get-db mtdb))
	  (db:patch-schema-maindb (db:dbdat-get-db tmpdb))
	  (db:patch-schema-maindb (db:dbdat-get-db refndb))
	  (db:patch-schema-rundb  (db:dbdat-get-db mtdb))
	  (db:patch-schema-rundb  (db:dbdat-get-db tmpdb))
	  (db:patch-schema-rundb  (db:dbdat-get-db refndb))))
       
       (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb))
     options)
    data-synced))

(define (db:tmp->megatest.db-sync dbstruct last-update)
  (let* ((mtdb        (dbr:dbstruct-mtdb dbstruct))
	 (tmpdb       (db:get-db dbstruct))
	 (refndb      (dbr:dbstruct-refndb dbstruct))
	 (res         (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb)))
    (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)
    res))

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
;;
;;  NB// no-sync-db is the db handle, not a flag!
;;
(define (db:sync-to-megatest.db dbstruct #!key (no-sync-db #f)) 
  (let* ((start-time         (current-seconds))
	 (last-full-update   (if no-sync-db
				 (db:no-sync-get/default no-sync-db "LAST_FULL_UPDATE" 0)
				 0))
	 (full-sync-needed   (> (- start-time last-full-update) 3600)) ;; every hour do a full sync
	 (last-update        (if full-sync-needed
				 0
				 (if no-sync-db
				     (db:no-sync-get/default no-sync-db "LAST_UPDATE" 0)
				     0))) ;; (or (db:get-var dbstruct "LAST_UPDATE") 0))
	 (sync-needed        (> (- start-time last-update) 6))
	 (res                (if (or sync-needed ;; don't sync if a sync already occurred in the past 6 seconds
				     full-sync-needed)
				 (begin
				   (if no-sync-db
				       (begin
					 (if full-sync-needed (db:no-sync-set no-sync-db "LAST_FULL_UPDATE" start-time))
					 (db:no-sync-set no-sync-db "LAST_UPDATE" start-time)))
				   (db:tmp->megatest.db-sync dbstruct last-update))
				 0))
	 (sync-time           (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
          (if sync-needed
              (debug:print-info 0 *default-log-port* "Sync of " res " records from newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
              (debug:print-info 0 *default-log-port* "No sync needed, last updated " (- start-time last-update) " seconds ago")))
      res))

;; keeping it around for debugging purposes only
(define (open-run-close-no-exception-handling  proc idb . params)
  (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
  (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...")
  (exit)
  (if (or *db-write-access*
	  (not #t)) ;; was: (member proc * db:all-write-procs *)))
      (let* ((db (cond
		  ((pair? idb)                 (db:dbdat-get-db idb))
		  ((sqlite3:database? idb)     idb)
		  ((not idb)                   (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore"))
		  ((procedure? idb)            (idb))
		  (else   	               (debug:print-error 0 *default-log-port* "cannot open-run-close with #f anymore"))))
	     (res #f))
	(set! res (apply proc db params))
	(if (not idb)(sqlite3:finalize! dbstruct))
	(debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling END" )
	res)
      #f))

(define (open-run-close-exception-handling proc idb . params)
  (handle-exceptions
   exn
   (let ((sleep-time (random 30))
	 (err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
     (case err-status
       ((busy)
	(thread-sleep! sleep-time))
       (else
	(debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(debug:print 5 *default-log-port* "exn=" (condition->list exn))
	(debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	(print-call-chain (current-error-port))
	(thread-sleep! sleep-time)
	(debug:print-info 0 *default-log-port* "trying db call one more time....this may never recover, if necessary kill process " (current-process-id) " on host " (get-host-name) " to clean up")))
     (apply open-run-close-exception-handling proc idb params))
   (apply open-run-close-no-exception-handling proc idb params)))

;; (define open-run-close 
(define open-run-close open-run-close-exception-handling)
		;;	   open-run-close-no-exception-handling
;;			   open-run-close-exception-handling)
;;)

;;======================================================================
;; A R C H I V E S
;;======================================================================

;; dneeded is minimum space needed, scan for existing archives that 
;; are on disks with adequate space and already have this test/itempath
;; archived
;;
(define (db:archive-get-allocations dbstruct testname itempath dneeded)
  (let* ((dbdat        (db:get-db dbstruct)) ;; archive tables are in main.db
	 (db           (db:dbdat-get-db dbdat))
	 (res          '())
	 (blocks       '())) ;; a block is an archive chunck that can be added too if there is space
    (sqlite3:for-each-row
     (lambda (id archive-disk-id disk-path last-du last-du-time)
       (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res)))
     db
     "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b
        INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id
        WHERE a.testname=? AND a.item_path=?;" 
     testname itempath)
    ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space
    (if (null? res)
	'()
	(sqlite3:for-each-row
	 (lambda (id archive-area-name disk-path last-df last-df-time)
	   (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks)))
	 db 
	 (conc
	  "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d
             INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id
             WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND
         last_df > ?;")
	 dneeded))
    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
    blocks))
    
;; returns id of the record, register a disk allocated to archiving and record it's last known
;; available space
;;
(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df)
  (let* ((dbdat        (db:get-db dbstruct)) ;; archive tables are in main.db
	 (db           (db:dbdat-get-db dbdat))
	 (res          #f))
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     db
     "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;"
     bdisk-name bdisk-path)
    (if res ;; record exists, update df and return id
	(begin
	  (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now'))
                                  WHERE archive_area_name=? AND disk_path=?;"
			   df bdisk-name bdisk-path)
          (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
	  res)
	(begin
	  (sqlite3:execute
	   db
	   "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df)
                VALUES (?,?,?);"
	   bdisk-name bdisk-path df)
          (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
	  (db:archive-register-disk dbstruct bdisk-name bdisk-path df)))))

;; record an archive path created on a given archive disk (identified by it's bdisk-id)
;; if path starts with / then it is full, otherwise it is relative to the archive disk
;; preference is to store the relative path.
;;
(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f))
  (let* ((dbdat        (db:get-db dbstruct)) ;; archive tables are in main.db
	 (db           (db:dbdat-get-db dbdat))
	 (res          #f))
    ;; first look to see if this path is already registered
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     db
     "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;"
     bdisk-id archive-path)
    (if res ;; record exists, update du if applicable and return res
	(begin
	  (if du (sqlite3:exectute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now'))
                                          WHERE archive_disk_id=? AND disk_path=?;"
				   bdisk-id archive-path du))
	  res)
	(begin
	  (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du)
                                                        VALUES (?,?,?);"
			   bdisk-id archive-path (or du 0))
	  (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))
    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
    res))


;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id
;;
(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;"
		      archive-block-id test-id))))
 
;; Look up the archive block info given a block-id
;;
(define (db:test-get-archive-block-info dbstruct archive-block-id)
  (db:with-db
   dbstruct
   #f
   #f
   (lambda (db)
     (let ((res #f))
       (sqlite3:for-each-row 
	;;        0         1           2        3          4           5
	(lambda (id archive-disk-id disk-path last-du last-du-time creation-time)
	  (set! res (vector id archive-disk-id disk-path last-du last-du-time creation-time)))
	db
	"SELECT id,archive_disk_id,disk_path,last_du,last_du_time,creation_time FROM archive_blocks WHERE id=?;"
	archive-block-id)
       res))))

;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
;;   (let* ((dbdat        (db:get-db dbstruct #f)) ;; archive tables are in main.db
;; 	 (db           (db:dbdat-get-db dbdat))
;; 	 (res          '())
;; 	 (blocks       '())) ;; a block is an archive chunck that can be added too if there is space
;;     (sqlite3:for-each-row  #f)

;;======================================================================
;; L O G G I N G    D B 
;;======================================================================

(define (open-logging-db)
  (let* ((dbpath    (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname)
	 (dbexists  (file-exists? dbpath))
	 (db        (sqlite3:open-database dbpath))
	 (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
					   (string->number (args:get-arg "-override-timeout"))
					   136000)))) ;; 136000)))
    (sqlite3:set-busy-handler! db handler)
    (if (not dbexists)
	(begin
	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS log (id INTEGER PRIMARY KEY,event_time TIMESTAMP DEFAULT (strftime('%s','now')),logline TEXT,pwd TEXT,cmdline TEXT,pid INTEGER);")
	  (db:set-sync db) ;; (sqlite3:execute db (conc "PRAGMA synchronous = 0;"))
	  ))
    db))

(define (db:log-local-event . loglst)
  (let ((logline (apply conc loglst)))
    (db:log-event logline)))

(define (db:log-event logline)
  (let ((db (open-logging-db)))
    (sqlite3:execute db "INSERT INTO log (logline,pwd,cmdline,pid) VALUES (?,?,?,?);"
		     logline
		     (current-directory)
		     (string-intersperse (argv) " ")
		     (current-process-id))
    (sqlite3:finalize! db)
    logline))

;;======================================================================
;; D B   U T I L S
;;======================================================================

;;======================================================================
;; M A I N T E N A N C E
;;======================================================================

(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   72000))) ;; twenty hours
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
       ;;                     (db:test-get-run_duration testdat)))
       ;;                    600) 
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
                (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
              (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
        db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');"
        run-id deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
        run-id)
       
       (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f
           #t)))))

;; given a launch delay (minimum time from last launch) return amount of time to wait
;;
;; (define (db:launch-delay-left dbstruct run-id launch-delay)
  

;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCHED'));

(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
  (let* ((incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
         (deadtime-trim (or ovr-deadtime (configf:lookup-number *configdat* "setup" "deadtime")))
         (server-start-allowance 200)
         (server-overloaded-budget 200)
         (launch-monitor-off-time (or (configf:lookup-number *configdat* "setup" "test-stats-update-period") 30))
         (launch-monitor-on-time-budget 30)
         (launch-monitor-period (+ launch-monitor-off-time launch-monitor-on-time-budget server-overloaded-budget))
         (remotehoststart-deadtime-default (+ server-start-allowance server-overloaded-budget 30))
         (remotehoststart-deadtime (or deadtime-trim remotehoststart-deadtime-default))
         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
         (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
         )
    (db:with-db 
     dbstruct #f #f
     (lambda (db)
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
       ;;                     (db:test-get-run_duration testdat)))
       ;;                    600) 
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path event-time run-duration)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
                (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
              (begin
                (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))
                (debug:print-info 0 *default-log-port* "Found old test in RUNNING state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration))))
        db
        "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING');"
        run-id running-deadtime)

       
       (sqlite3:for-each-row 
        (lambda (test-id run-dir uname testname item-path event-time run-duration)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (begin
                (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
                (debug:print-info 0 *default-log-port* "Found old toplevel test in RUNNING state, test-id=" test-id))
              (begin
                (debug:print-info 0 *default-log-port* "Found old test in REMOTEHOSTSTART state, test-id=" test-id" exceeded running-deadtime "running-deadtime" now="(current-seconds)" event-time="event-time" run-duration="run-duration)
                (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted)))))
        db
        "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('REMOTEHOSTSTART');"
        run-id remotehoststart-deadtime)

       ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
       ;;
       ;; (db:delay-if-busy dbdat)
       (sqlite3:for-each-row
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (begin
                (debug:print-info 0 *default-log-port* "Found old test in LAUNCHED state, test-id=" test-id" 1 day since event_time marked")
                (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched)))))
        db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
        run-id)
       
       (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")

       ;; These are defunct tests, do not do all the overhead of set-state-status. Force them to INCOMPLETE.
       ;;
       ;; (db:delay-if-busy dbdat)
       (let* (;; (min-incompleted (filter (lambda (x)
              ;;      		      (let* ((testpath (cadr x))
              ;;      			     (tdatpath (conc testpath "/testdat.db"))
              ;;      			     (dbexists (common:file-exists? tdatpath)))
              ;;      			(or (not dbexists) ;; if no file then something wrong - mark as incomplete
              ;;      			    (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim
              ;;      		    incompleted))
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as DEAD")
               (for-each
                (lambda (test-id)
                  (db:set-state-status-and-roll-up-items dbstruct run-id test-id 'foo "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead."))
                  ;;(db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test stopped responding while in RUNNING or REMOTEHOSTSTART; presumed dead.")) ;; fix for one aspect of Randy's ticket 1405717332 ;; TODO - fix problem where test goes to COMPLETED/DEAD while in progress, only later to go to COMPLETED/PASS.  ref ticket 220546828
                all-ids))))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 
;; 		   (string-intersperse (map conc all-ids) ",")
;; 		   ");")
;;              run-id))))
;; 
;;     ;; Now do rollups for the toplevel tests
;;     ;;
;;     ;; (db:delay-if-busy dbdat)
;;     (for-each
;;      (lambda (toptest)
;;        (let ((test-name (list-ref toptest 3)))
;; ;;	     (run-id    (list-ref toptest 5)))
;; 	 (db:top-test-set-per-pf-counts dbstruct run-id test-name)))
;;      toplevels)))

;; BUG: Probably broken - does not explicitly use run-id in the query
;;
(define (db:top-test-set-per-pf-counts dbstruct run-id test-name)
  (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name)))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up dbdat)
  ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((keep-record-age ( - (current-seconds) (common:hms-string->seconds (or (configf:lookup *configdat* "setup" "delete-record-age") "30d"))))
	 (db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);"))
	(statements
	 (map (lambda (stmt)
		(sqlite3:prepare db stmt))
	      (list
	       ;; delete all tests that belong to runs that are 'deleted'
	       (conc "DELETE FROM tests WHERE run_id in (SELECT id FROM runs WHERE state='deleted') and last_update < " keep-record-age ";")
	       ;; delete all tests that are 'DELETED'
	       (conc "DELETE FROM tests WHERE state='DELETED' and last_update < " keep-record-age " ;")
	       ;; delete all tests that have no run
	       (conc "DELETE FROM tests WHERE run_id NOT IN (SELECT DISTINCT id FROM runs) and last_update < " keep-record-age "; ")
	       ;; delete all runs that are state='deleted'
	       (conc "DELETE FROM runs WHERE state='deleted' and last_update < " keep-record-age ";")
	       ;; delete empty runs
	       (conc "DELETE FROM runs WHERE id NOT IN (SELECT DISTINCT r.id FROM runs AS r INNER JOIN tests AS t ON t.run_id=r.id) and last_update < " keep-record-age ";")
	       ;; remove orphaned test_rundat entries
	       (conc "DELETE FROM test_rundat where test_id NOT IN (SELECT id FROM tests);")
	       ;; remove orphaned test_steps entries
	       (conc "DELETE FROM test_steps WHERE test_id NOT IN (SELECT id FROM tests);")
               ;; remove orphaned test_dat entries
	       (conc "DELETE FROM test_data WHERE test_id NOT IN (SELECT id FROM tests);")

	       ))))
    ;; (db:delay-if-busy dbdat)
    ;(debug:print-info 0 *default-log-port*  statements) 
    (sqlite3:with-transaction 
     db
     (lambda ()
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 *default-log-port* "Records count before clean: " tot))
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 *default-log-port* "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    ;; (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up-rundb dbdat)
  ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);"))
	(statements
	 (map (lambda (stmt)
		(sqlite3:prepare db stmt))
	      (list
	       ;; delete all tests that belong to runs that are 'deleted'
	       ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");")
	       ;; delete all tests that are 'DELETED'
	       "DELETE FROM tests WHERE state='DELETED';"
	       ))))
    ;; (db:delay-if-busy dbdat)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 *default-log-port* "Records count before clean: " tot))
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 *default-log-port* "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    ;; (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up-maindb dbdat)
  ;; (debug:print 0 *default-log-port* "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);"))
	 (statements
	  (map (lambda (stmt)
		 (sqlite3:prepare db stmt))
	       (list
		;; delete all tests that belong to runs that are 'deleted'
		;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");")
		;; delete all tests that are 'DELETED'
		"DELETE FROM runs WHERE state='deleted';"
		)))
	 (dead-runs '()))
    (sqlite3:for-each-row
     (lambda (run-id)
       (set! dead-runs (cons run-id dead-runs)))
       db
       "SELECT id FROM runs WHERE state='deleted';")
    ;; (db:delay-if-busy dbdat)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 *default-log-port* "Records count before clean: " tot))
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 *default-log-port* "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    ;; (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")
    dead-runs))

;;======================================================================
;; M E T A   G E T   A N D   S E T   V A R S
;;======================================================================

;; returns number if string->number is successful, string otherwise
;; also updates *global-delta*
;;
(define (db:get-var dbstruct var)
  (let* ((res      #f))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
        (lambda (val)
          (set! res val))
        db
        "SELECT val FROM metadat WHERE var=?;" var)
       ;; convert to number if can
       (if (string? res)
           (let ((valnum (string->number res)))
             (if valnum (set! res valnum))))
       res))))

;; This was part of db:get-var. It was used to estimate the load on
;; the database files.
;;
;; scale by 10, average with current value.
;;     (set! *global-delta* (/ (+ *global-delta* (* (- (current-milliseconds) start-ms)
;; 						 (if throttle throttle 0.01)))
;; 			    2))
;;     (if (> (abs (- *last-global-delta-printed* *global-delta*)) 0.08) ;; don't print all the time, only if it changes a bit
;; 	(begin
;; 	  (debug:print-info 4 *default-log-port* "launch throttle factor=" *global-delta*)
;; 	  (set! *last-global-delta-printed* *global-delta*)))

(define (db:set-var dbstruct var val)
  (db:with-db dbstruct #f #t 
	      (lambda (db)
		(sqlite3:execute db "INSERT OR REPLACE INTO metadat (var,val) VALUES (?,?);" var val))))

(define (db:del-var dbstruct var)
  (db:with-db dbstruct #f #t 
	      (lambda (db)
		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:open-no-sync-db)
  (let* ((dbpath (common:get-db-tmp-area *alldat*))
	 (dbname (conc dbpath "/no-sync.db"))
	 (db-exists (file-exists? dbname))
	 (db     (sqlite3:open-database dbname)))
    (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
    (if (not db-exists)
	(begin
	  (sqlite3:execute db "PRAGMA synchronous = 0;")
	  (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));")
	  (sqlite3:execute db "PRAGMA journal_mode=WAL;")))
    db))

;; if we are not a server create a db handle. this is not finalized
;; so watch for problems. I'm still not clear if it is needed to manually
;; finalize sqlite3 dbs with the sqlite3 egg.
;;
(define (db:no-sync-db db-in)
  (mutex-lock! *db-access-mutex*)
  (let ((res (if db-in
                 db-in
                 (let ((db (db:open-no-sync-db)))
                   (set! *no-sync-db* db)
                   db))))
    (mutex-unlock! *db-access-mutex*)
    res))

(define (db:no-sync-set db var val)
  (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val))

(define (db:no-sync-del! db var)
  (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var))

(define (db:no-sync-get/default db var default)
  (let ((res default))
    (sqlite3:for-each-row
     (lambda (val)
       (set! res val))
     (db:no-sync-db db)
     "SELECT val FROM no_sync_metadat WHERE var=?;"
     var)
    (if res
        (let ((newres (if (string? res)
			  (string->number res)
			  #f)))
          (if newres
              newres
              res))
        res)))

(define (db:no-sync-close-db db)
  (db:safely-close-sqlite3-db db))

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;
(define (db:no-sync-get-lock db-in keyname)
  (let ((db (db:no-sync-db db-in)))
    (sqlite3:with-transaction
     db
     (lambda ()
       (handle-exceptions
	   exn
	   (let ((lock-time (current-seconds)))
	     (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time)
	     `(#t . ,lock-time))
	 `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))))



;; use a global for some primitive caching, it is just silly to
;; re-read the db over and over again for the keys since they never
;; change

;; look up values in a header/data structure
(define (db:get-value-by-header row header field)
  (if (or (null? header) (not row))
      #f
      (let loop ((hed (car header))
                 (tal (cdr header))
                 (n   0))
        (if (equal? hed field)
            (handle-exceptions
             exn
             (begin
               (debug:print 0 *default-log-port* "WARNING: attempt to read non-existant field, row=" row " header=" header " field=" field)
               #f)
             (vector-ref row n))
	    (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1)))))))

;; Accessors for the header/data structure
;; get rows and header from 
(define (db:get-header vec)(vector-ref vec 0))
(define (db:get-rows   vec)(vector-ref vec 1))

;;======================================================================
;;  R U N S
;;======================================================================





(define (db:get-run-times dbstruct run-patt target-patt)
(let ((res `())
           (qry 	(conc "select runname, (max(end_time)-min(event_time))/60 as runtime, target from (select runname, run_id,tests.event_time,tests.event_time+run_duration AS end_time, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname like ? and target like ?) group by run_id ;")))
;(print qry)
(db:with-db 
   dbstruct
   #f ;; this is for the main runs db
   #f ;; does not modify db
   (lambda (db)
            (sqlite3:for-each-row
	(lambda (runname runtime target )
	  (set! res (cons (vector runname runtime target) res)))
	db
        qry 
	run-patt target-patt)
       
       res))))



(define (db:get-run-name-from-id dbstruct run-id)
  (db:with-db 
   dbstruct
   #f ;; this is for the main runs db
   #f ;; does not modify db
   (lambda (db)
     (let ((res #f))
       (sqlite3:for-each-row
	(lambda (runname)
	  (set! res runname))
	db
	"SELECT runname FROM runs WHERE id=?;"
	run-id)
       res))))

(define (db:get-run-key-val dbstruct run-id key)
  (db:with-db
   dbstruct
   #f
   #f
   (lambda (db)
     (let ((res #f))
       (sqlite3:for-each-row
	(lambda (val)
	  (set! res val))
	db
	(conc "SELECT " key " FROM runs WHERE id=?;")
	run-id)
       res))))

;; keys list to key1,key2,key3 ...
(define (runs:get-std-run-fields keys remfields)
  (let* ((header    (append keys remfields))
	 (keystr    (conc (keys->keystr keys) ","
			  (string-intersperse remfields ","))))
    (list keystr header)))


;; register a test run with the db, this accesses the main.db and does NOT
;; use server api
;;
(define (db:register-run dbstruct keyvals runname state status user contour-in)
  (let* ((keys      (map car keyvals))
	 (keystr    (keys->keystr keys))
	 (contour   (or contour-in ""))  ;; empty string to force no hierarcy and be backwards compatible.
	 (comma     (if (> (length keys) 0) "," ""))
	 (andstr    (if (> (length keys) 0) " AND " ""))
	 (valslots  (keys->valslots keys)) ;; ?,?,? ...
	 (allvals   (append (list runname state status user contour) (map cadr keyvals)))
	 (qryvals   (append (list runname) (map cadr keyvals)))
	 (key=?str  (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND ")))
    (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str)
    (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run")
    (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and"
	(db:with-db
	 dbstruct #f #f
	 (lambda (db)
	   (let ((res #f))
	     (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");")
		    allvals)
	     (apply sqlite3:for-each-row 
		    (lambda (id)
		      (set! res id))
		    db
		    (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");")))
		      qry)
		    qryvals)
	     (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res)
	     res))) 
	(begin
	  (debug:print-error 0 *default-log-port* "Called without all necessary keys")
	  #f))))


(define-record simple-run target id runname state status owner event_time)
(define-record-printer (simple-run x out)
  (fprintf out "#,(simple-run ~S ~S ~S ~S)"
	   (simple-run-target x) (simple-run-id x) (simple-run-runname x) (time->string (seconds->local-time (simple-run-event_time x) ))))

;; simple get-runs
;;
(define (db:simple-get-runs dbstruct runpatt count offset target)
    (let* ((res       '())
	   (keys       (db:get-keys dbstruct))
	   (runpattstr (db:patt->like "runname" runpatt))
	   (remfields  (list "id" "runname" "state" "status" "owner" "event_time"))
	   (targstr    (string-intersperse keys "||'/'||"))
	   (keystr     (conc targstr " AS target,"
			     (string-intersperse remfields ",")))
	   (qrystr     (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? "
			     ;; Generate: " AND x LIKE 'keypatt' ..."
			     " AND target LIKE '" target "'"
			     " AND state != 'deleted' ORDER BY event_time DESC "
			     (if (number? count)
				 (conc " LIMIT " count)
				 "")
			     (if (number? offset)
				 (conc " OFFSET " offset)
				 ""))))
    (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    (db:with-db dbstruct #f #f
		(lambda (db)		
		  (sqlite3:for-each-row
		   (lambda (target id runname state status owner event_time)
		     (set! res (cons (make-simple-run target id runname state status owner event_time) res)))
		   db
		   qrystr
		   )))
    (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db)
;;
(define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (common:get-db-tmp-area *alldat*)) ;; (configf:lookup *configdat* "setup" "dbdir"))
	 (alldbs     (glob (conc dbdir "/[0-9]*.db")))
	 (changed    (filter (lambda (dbfile)
			       (> (file-modification-time dbfile) since-time))
			     alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
	    (let* ((res (string-match ".*\\/(\\d)*\\.db" dbfile)))
	      (if res
		  (string->number (cadr res))
		  (begin
		    (debug:print 2 *default-log-port* "WARNING: Failed to process " dbfile " for run-id")
		    0))))
	  changed))))

;; Get all targets from the db
;;
(define (db:get-targets dbstruct)
  (let* ((res       '())
	 (keys       (db:get-keys dbstruct))
	 (header     keys) ;; (map key:get-fieldname keys))
	 (keystr     (keys->keystr keys))
	 (qrystr     (conc "SELECT " keystr " FROM runs WHERE state != 'deleted';"))
	 (seen       (make-hash-table)))
    (db:with-db
     dbstruct
     #f
     #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (a . x)
	  (let ((targ (cons a x)))
	    (if (not (hash-table-ref/default seen targ #f))
		(begin
		  (hash-table-set! seen targ #t)
		  (set! res (cons (apply vector targ) res))))))
	db
	qrystr)
       (debug:print-info 11 *default-log-port* "db:get-targets END qrystr: " qrystr )
       (vector header res)))))

;; just get count of runs
(define (db:get-num-runs dbstruct runpatt)
  (db:with-db
   dbstruct
   #f
   #f
   (lambda (db)
     (let ((numruns 0))
       (debug:print-info 11 *default-log-port* "db:get-num-runs START " runpatt)
       (sqlite3:for-each-row 
	(lambda (count)
	  (set! numruns count))
	db
	"SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt)
       (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt)
       numruns))))

;; just get count of runs
(define (db:get-runs-cnt-by-patt dbstruct runpatt targetpatt keys)
  (db:with-db
   dbstruct
   #f
   #f
   (lambda (db)
     (let ((numruns 0)
           (qry-str #f)
           (key-patt "")
	     	   (keyvals  (if targetpatt (keys:target->keyval keys targetpatt) '())))
     
    (for-each (lambda (keyval)
		(let* ((key    (car keyval))
		       (patt   (cadr keyval))
		       (fulkey (conc ":" key))
		       (wildtype (if (substring-index "%" patt) "like" "glob")))
         
		  (if patt
		      (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'"))
		      (begin
			(debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey)
			(exit 6)))))
	      keyvals)
    ;(print  runpatt " -- " key-patt)
    (set! qry-str (conc "SELECT COUNT(id) FROM runs WHERE state != 'deleted' AND runname  like '" runpatt "'" key-patt))
    ;(print qry-str )
       
       (sqlite3:for-each-row 
					 (lambda (count)
	  			    (set! numruns count))
				db
				qry-str)
       (debug:print-info 11 *default-log-port* "db:get-num-runs END " runpatt)
       numruns))))


;; (sqlite3#fold-row proc3670 init3671 db-or-stmt3672 . params3673)>
;; 
(define (db:get-raw-run-stats dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:fold-row
	(lambda (res state status count)
	  (cons (list state status count) res))
	'()
	db
	"SELECT state,status,count(id) AS count FROM tests WHERE run_id=? AND NOT(uname='n/a' AND item_path='') GROUP BY state,status;"
	run-id))))

;; Update run_stats for given run_id
;; input data is a list (state status count)
;;
(define (db:update-run-stats dbstruct run-id stats)
  ;; (mutex-lock! *db-transaction-mutex*)
  (db:with-db
   dbstruct
   #f
   #f

   (lambda (db)
     ;; remove previous data
     
     (let* ((stmt1 (sqlite3:prepare db "DELETE FROM run_stats WHERE run_id=? AND state=? AND status=?;"))
	    (stmt2 (sqlite3:prepare db "INSERT INTO run_stats (run_id,state,status,count) VALUES (?,?,?,?);"))
	    (res
	     (sqlite3:with-transaction
	      db
	      (lambda ()
		(for-each
		 (lambda (dat)
		   (sqlite3:execute stmt1 run-id (car dat)(cadr dat))
		   (apply sqlite3:execute stmt2 run-id dat))
		 stats)))))
       (sqlite3:finalize! stmt1)
       (sqlite3:finalize! stmt2)
       ;; (mutex-unlock! *db-transaction-mutex*)
       res))))

(define (db:get-main-run-stats dbstruct run-id)
  (db:with-db
   dbstruct
   #f ;; this data comes from main
   #f
   (lambda (db)
     (sqlite3:fold-row
	(lambda (res state status count)
	  (cons (list state status count) res))
	'()
	db
	"SELECT state,status,count FROM run_stats WHERE run_id=? AND run_id IN (SELECT id FROM runs WHERE state NOT IN ('DELETED','deleted'));"
	run-id))))

(define (db:print-current-query-stats)
  ;; generate stats from *db-api-call-time*
  (let ((ordered-keys (sort (hash-table-keys *db-api-call-time*)
			    (lambda (a b)
			      (let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a)))
				    (sum-b (common:sum (hash-table-ref *db-api-call-time* b))))
				(> sum-a sum-b)))))
	(total        0))
    (for-each
     (lambda (cmd-key)
       (let* ((dat  (hash-table-ref *db-api-call-time* cmd-key))
	      (num  (length dat))
	      (avg  (if (> num 0)
			(/ (common:sum dat)(length dat)))))
	 (set! total (+ total num))
	 (debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat))))
     ordered-keys)
    (debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start.")))

(define (db:get-all-run-ids dbstruct)
  (db:with-db
   dbstruct
   #f
   #f
   (lambda (db)
     (let ((run-ids '()))
       (sqlite3:for-each-row
	(lambda (run-id)
	  (set! run-ids (cons run-id run-ids)))
	db
	"SELECT id FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;")
    (reverse run-ids)))))

;; get some basic run stats
;;
;; data structure:
;;
;; ( (runname (( state  count ) ... ))
;;   (   ...
;;
(define (db:get-run-stats dbstruct)
  (let* ((totals       (make-hash-table))
	 (curr         (make-hash-table))
	 (res          '())
	 (runs-info    '()))
    ;; First get all the runname/run-ids
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (run-id runname)
	  (set! runs-info (cons (list run-id runname) runs-info)))
	db
	"SELECT id,runname FROM runs WHERE state != 'deleted' ORDER BY event_time DESC;"))) ;; If you change this to the more logical ASC please adjust calls to db:get-run-stats
    ;; for each run get stats data
    (for-each
     (lambda (run-info)
       ;; get the net state/status counts for this run
       (let* ((run-id   (car  run-info))
	      (run-name (cadr run-info)))
	 (db:with-db
	  dbstruct
	  run-id
	  #f
	  (lambda (db)
	    (sqlite3:for-each-row
	     (lambda (state status count)
	       (let ((netstate (if (equal? state "COMPLETED") status state)))
		 (if (string? netstate)
		     (begin
		       (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count))
		       (hash-table-set! curr   netstate (+ (hash-table-ref/default curr   netstate 0) count))))))
	     db
	     "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;"
             run-id)
	    ;; add the per run counts to res
	    (for-each (lambda (state)
			(set! res (cons (list run-name state (hash-table-ref curr state)) res)))
		      (sort (hash-table-keys curr) string>=))
	    (set! curr (make-hash-table))))))
     runs-info)
    (for-each (lambda (state)
		(set! res (cons (list "Totals" state (hash-table-ref totals state)) res)))
	      (sort (hash-table-keys totals) string>=))
    res))

;; db:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db:get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;;  to extract info from the structure returned
;;
(define (db:get-runs-by-patt dbstruct keys runnamepatt targpatt offset limit fields last-update  sort-order ) ;; test-name)
  (let* ((tmp      (runs:get-std-run-fields keys (or fields '("id" "runname" "state" "status" "owner" "event_time"))))
	 (keystr   (car tmp))
	 (header   (cadr tmp))
	 (key-patt "")
	 (runwildtype (if (substring-index "%" runnamepatt) "like" "glob"))
	 (qry-str  #f)
	 (keyvals  (if targpatt (keys:target->keyval keys targpatt) '())))
    (for-each (lambda (keyval)
		(let* ((key    (car keyval))
		       (patt   (cadr keyval))
		       (fulkey (conc ":" key))
		       (wildtype (if (substring-index "%" patt) "like" "glob")))
		  (if patt
		      (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'"))
		      (begin
			(debug:print-error 0 *default-log-port* "searching for runs with no pattern set for " fulkey)
			(exit 6)))))
	      keyvals)
    (set! qry-str (conc "SELECT " keystr " FROM runs WHERE state != 'deleted' AND runname " runwildtype " ? " key-patt 
			(if last-update
			       (conc " AND last_update >= " last-update " ")
			       " ")
			" ORDER BY event_time " sort-order " " 
			(if limit  (conc " LIMIT " limit)   "")
			(if offset (conc " OFFSET " offset) "")
			";"))
    (debug:print-info 4 *default-log-port* "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)
    ;(print "runs:get-runs-by-patt qry=" qry-str " " runnamepatt)

    (vector header 
            (reverse
             (db:with-db dbstruct #f #f ;; reads db, does not write to it.
                         (lambda (db)
                           (sqlite3:fold-row
                            (lambda (res . r)
                              (cons (list->vector r) res))
                            '()
                            db
                            qry-str
                            runnamepatt)))))))

;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
  ;;    (hash-table-ref *run-info-cache* run-id)
  (let* ((res       (vector #f #f #f #f))
	 (keys      (db:get-keys dbstruct))
	 (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;;  "area_id"))
	 (header    (append keys remfields))
	 (keystr    (conc (keys->keystr keys) ","
			  (string-intersperse remfields ","))))
    (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
    
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (a . x)
	  (set! res (apply vector a x)))
	db 
	(conc "SELECT " keystr " FROM runs WHERE id=?;")
	run-id)))
    (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
    (let ((finalres (vector header res)))
      ;; (hash-table-set! *run-info-cache* run-id finalres)
      finalres)))

(define (db:set-comment-for-run dbstruct run-id comment)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (sqlite3:execute db "UPDATE runs SET comment=? WHERE id=?;" comment ;; (sdb:qry 'getid comment)
		      run-id))))

;; does not (obviously!) removed dependent data. But why not!!?
(define (db:delete-run dbstruct run-id)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (sqlite3:with-transaction
      db
      (lambda ()
        (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);" run-id)
        (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE run_id=?);"  run-id)
        (sqlite3:execute db "UPDATE tests SET state='DELETED',comment='' WHERE run_id=?;" run-id)
        ;; (db:delay-if-busy dbdat)
        (sqlite3:execute db "UPDATE runs SET state='deleted',comment='' WHERE id=?;" run-id))))))

(define (db:update-run-event_time dbstruct run-id)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id))))

(define (db:lock/unlock-run dbstruct run-id lock unlock user)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (let ((newlockval (if lock "locked"
			   (if unlock
			       "unlocked"
			       "locked")))) ;; semi-failsafe
       (sqlite3:execute db "UPDATE runs SET state=? WHERE id=?;" newlockval run-id)
       (sqlite3:execute db "INSERT INTO access_log (user,accessed,args) VALUES(?,strftime('%s','now'),?);"
			user (conc newlockval " " run-id))
       (debug:print-info 1 *default-log-port* "" newlockval " run number " run-id)))))

(define (db:set-run-status dbstruct run-id status msg)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
       (if msg
         (sqlite3:execute db "UPDATE runs SET status=?,comment=? WHERE id=?;" status msg run-id)
         (sqlite3:execute db "UPDATE runs SET status=? WHERE id=?;" status run-id)))))

(define (db:set-run-state-status dbstruct run-id state status )
  (db:with-db
   dbstruct #f #f
   (lambda (db)
          (sqlite3:execute db "UPDATE runs SET status=?,state=? WHERE id=?;" status state run-id))))



(define (db:get-run-status dbstruct run-id)
  (let ((res "n/a"))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (status)
	  (set! res status))
	db
	"SELECT status FROM runs WHERE id=?;" 
	run-id)
       res))))

(define (db:get-run-state dbstruct run-id)
  (let ((res "n/a"))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (status)
	  (set! res status))
	db
	"SELECT state FROM runs WHERE id=?;" 
	run-id)
       res))))


;;======================================================================
;; K E Y S
;;======================================================================

;; get key val pairs for a given run-id
;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... )
(define (db:get-key-val-pairs dbstruct run-id)
  (let* ((keys (db:get-keys dbstruct))
	 (res  '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (for-each 
	(lambda (key)
	  (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
	    (sqlite3:for-each-row 
	     (lambda (key-val)
	       (set! res (cons (list key (if (string? key-val) key-val "")) res))) ;; replace non-string bad values with empty string to prevent crashes. This scenario can happen when Megatest is killed on updating the db
	     db qry run-id)))
	keys)))
       (reverse res)))

;; get key vals for a given run-id
(define (db:get-key-vals dbstruct run-id)
  (let* ((keys (db:get-keys dbstruct))
	 (res  '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (for-each 
	(lambda (key)
	  (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;")))
	    ;; (db:delay-if-busy dbdat)
	    (sqlite3:for-each-row 
	     (lambda (key-val)
	       (set! res (cons (if (string? key-val) key-val "") res))) ;; check that the key-val is a string for cases where a crash injected bad data in the megatest.db
	     db qry run-id)))
	keys)))
    (let ((final-res (reverse res)))
      (hash-table-set! *keyvals* run-id final-res)
      final-res)))

;; The target is keyval1/keyval2..., cached in *target* as it is used often
(define (db:get-target dbstruct run-id)
  (let* ((keyvals (db:get-key-vals dbstruct run-id))
	 (thekey  (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/")))
    thekey))

;; Get run-ids for runs with same target but different runnames and NOT run-id
;;
(define (db:get-prev-run-ids dbstruct run-id)
  (let* ((keyvals (db:get-key-val-pairs dbstruct run-id))
	 (kvalues (map cadr keyvals))
	 (keys    (rmt:get-keys))
	 (qrystr  (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")))
    (let ((prev-run-ids '()))
      (if (null? keyvals)
          '()
          (begin
            (db:with-db dbstruct #f #f ;; #f means work with the zeroth db - i.e. the runs db
                        (lambda (db)
                          (apply sqlite3:for-each-row
                                 (lambda (id)
                                   (set! prev-run-ids (cons id prev-run-ids)))
                                 db
                                 (conc "SELECT id FROM runs WHERE " qrystr " AND state != 'deleted' AND id != ?;")
                                 (append kvalues (list run-id)))))
            prev-run-ids)))))

;;======================================================================
;;  T E S T S
;;======================================================================

(define (db:get-tests-for-run-state-status dbstruct run-id testpatt)
  (let* ((res            '())
	 (tests-match-qry (tests:match->sqlqry testpatt))
	 (qry             (conc "SELECT id,testname,item_path,state,status FROM tests WHERE run_id=? " 
				(if tests-match-qry (conc " AND (" tests-match-qry ") ") ""))))
    (debug:print-info 8 *default-log-port* "db:get-tests-for-run qry=" qry)
    (db:with-db dbstruct run-id #f
		(lambda (db)
		  (sqlite3:for-each-row
		   (lambda (id testname item-path state status)
		     ;;                      id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
		     (set! res (cons (vector id run-id testname state status -1         ""     -1      -1       ""    "-"  item-path -1           "-"         "-") res)))
		   db 
		   qry
		   run-id)))
    res))

(define (db:get-testinfo-state-status dbstruct run-id test-id)
  (let ((res            #f))
    (db:with-db dbstruct run-id #f
		(lambda (db)
		  (sqlite3:for-each-row
		   (lambda (run-id testname item-path state status)
		     ;; id,run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment
		     (set! res (vector test-id run-id testname state status -1 "" -1 -1 "" "-" item-path -1 "-" "-")))
		   db 
		   "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" 
		   test-id)))
    res))

;; do not use.
;;
(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f))
  ;; (db:delay-if-busy)
  (let ((res '()))
    (for-each 
     (lambda (run-id)
       (set! res (append 
		  res 
		  (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f qryvals #f 'normal))))
     (if run-ids
	 run-ids
	 (db:get-all-run-ids dbstruct)))
    res))

;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs
;;

(define (db:delete-test-records dbstruct run-id test-id)
  (db:general-call dbstruct 'delete-test-step-records (list test-id))
  (db:general-call dbstruct 'delete-test-data-records (list test-id))
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))))

;; 
(define (db:delete-old-deleted-test-records dbstruct)
  (let (;; (run-ids  (db:get-all-run-ids dbstruct))
	(targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past
    (db:with-db
     dbstruct
     0
     #t
     (lambda (db)
       (sqlite3:with-transaction
	db
	(lambda ()
	  (sqlite3:execute db "DELETE FROM test_steps WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time<?);" targtime)
	  (sqlite3:execute db "DELETE FROM test_data WHERE test_id IN (SELECT id FROM tests WHERE state='DELETED' AND event_time<?);" targtime)
	  (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED' AND event_time<?;" targtime)))))))

;; set tests with state currstate and status currstatus to newstate and newstatus
;; use currstate = #f and or currstatus = #f to apply to any state or status respectively
;; WARNING: SQL injection risk. NB// See new but not yet used "faster" version below
;;
;;  AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")))
;;  (debug:print 0 *default-log-port* "QRY: " qry)
;;  (db:delay-if-busy)
;;
;; NB// This call only operates on toplevel tests. Consider replacing it with more general call
;;
(define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus)
  (let ((test-ids '()))
    (for-each
     (lambda (testname)
       (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE "
			(if currstate  (conc "state='" currstate "' AND ") "")
			(if currstatus (conc "status='" currstatus "' AND ") "")
			" run_id=? AND testname LIKE ?;"))
	     (test-id (db:get-test-id dbstruct run-id testname "")))
	 (db:with-db
	  dbstruct
	  run-id
	  #t
	  (lambda (db)
	    (sqlite3:execute db qry
			     (or newstate  currstate "NOT_STARTED")
			     (or newstatus currstate "UNKNOWN")
			     run-id testname)))
	 (if test-id
	     (begin
	       (set! test-ids (cons test-id test-ids))
	       (mt:process-triggers dbstruct run-id test-id newstate newstatus)))))
     testnames)
    test-ids))

;; ;; speed up for common cases with a little logic
;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
;;
;;      NOTE: run-id is not used
;; ;;
(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment)
  (db:with-db
   dbstruct
   ;; run-id
   #f
   #t
   (lambda (db)
     (cond
      ((and newstate newstatus newcomment)
       (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
			test-id))
      ((and newstate newstatus)
       (sqlite3:execute db "UPDATE tests SET state=?,status=? WHERE id=?;" newstate newstatus test-id))
      (else
       (if newstate   (sqlite3:execute db "UPDATE tests SET state=?   WHERE id=?;" newstate   test-id))
       (if newstatus  (sqlite3:execute db "UPDATE tests SET status=?  WHERE id=?;" newstatus  test-id))
       (if newcomment (sqlite3:execute db "UPDATE tests SET comment=? WHERE id=?;" newcomment ;; (sdb:qry 'getid newcomment)
				       test-id))))))
  (mt:process-triggers dbstruct run-id test-id newstate newstatus))

;; NEW BEHAVIOR: Count tests running in all runs!
;;
(define (db:get-count-tests-running dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result 
      db
      ;; WARNING BUG EDIT ME - merged from v1.55 - not sure what is right here ...
      ;; AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted')
      "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');"
      ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;" 
      ))))

;; NEW BEHAVIOR: Count tests running in only one run!
;;
(define (db:get-count-tests-actually-running dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      ;; WARNING BUG EDIT ME - merged from v1.55 - not sure what is right here ...
      ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted') AND NOT (uname = 'n/a' AND item_path = '');")
      "SELECT count(id) FROM tests WHERE state in ('RUNNING','REMOTEHOSTSTART','LAUNCHED') AND run_id=?;" 
      run-id)))) ;; NOT IN (SELECT id FROM runs WHERE state='deleted');")

;; NEW BEHAVIOR: Look only at single run with run-id
;; 
;; (define (db:get-running-stats dbstruct run-id)
(define (db:get-count-tests-running-for-run-id dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;" run-id))))

;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;; 
(define (db:get-count-tests-running-for-testname dbstruct run-id testname)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;" run-id testname))))


(define (db:get-not-completed-cnt dbstruct run-id)
(db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
      ;(print "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=" run-id)  
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state not in ('COMPLETED', 'DELETED') AND run_id=?;" run-id))))

(define (db:get-count-tests-running-in-jobgroup dbstruct run-id jobgroup)
  (if (not jobgroup)
      0 ;; 
      (let ((testnames '()))
	;; get the testnames
	(db:with-db
	 dbstruct #f #f
	 (lambda (db)
	   (sqlite3:for-each-row
	    (lambda (testname)
	      (set! testnames (cons testname testnames)))
	    db
	    "SELECT testname FROM test_meta WHERE jobgroup=?"
	    jobgroup)))
	;; get the jobcount NB// EXTEND THIS TO OPPERATE OVER ALL RUNS?
	(if (not (null? testnames))
	    (db:with-db
	     dbstruct
	     run-id
	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       ))
	    0))))

;; tags: '("tag%" "tag2" "%ag6")
;;

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING','KILLREQ') AND run_id=?;")
     run-id)))

;; map run-id, testname item-path to test-id
(define (db:get-test-id dbstruct run-id testname item-path)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (db:first-result-default
      db
      "SELECT id FROM tests WHERE testname=? AND item_path=? AND run_id=?;"
      #f ;; the default
      testname item-path run-id))))

;; overload the unused attemptnum field for the process id of the runscript or 
;; ezsteps step script in progress
;;
(define (db:test-set-top-process-pid dbstruct run-id test-id pid)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:execute db "UPDATE tests SET attemptnum=? WHERE id=?;"
		      pid test-id))))

(define (db:test-get-top-process-pid dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (db:first-result-default 
      db
      "SELECT attemptnum FROM tests WHERE id=?;"
      #f
      test-id))))

;; fields *must* be a non-empty list
;;
(define (db:field->number fieldname fields)
  (if (null? fields)
      #f
      (let loop ((hed  (car fields))
		 (tal  (cdr fields))
		 (indx 0))
	(if (equal? fieldname hed)
	    indx
	    (if (null? tal)
		#f
		(loop (car tal)(cdr tal)(+ indx 1)))))))


;; NOTE: Use db:test-get* to access records
;; NOTE: This needs rundir decoding? Decide, decode here or where used? For the moment decode where used.
(define (db:get-all-tests-info-by-run-id dbstruct run-id)
  (let* ((res '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived)
	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14     15        16
	  (set! res (cons (vector id run-id testname state status event-time host cpuload diskfree uname rundir item-path run-duration final-logf comment shortdir attemptnum archived)
			  res)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE state != 'DELETED' AND run_id=?;")
	run-id)))
    res))

(define (db:replace-test-records dbstruct run-id testrecs)
  (db:with-db dbstruct run-id #t 
	      (lambda (db)
		(let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ","))
		       (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ") WHERE run_id=?;"))
		       (qry    (sqlite3:prepare db qrystr)))
		  (debug:print 0 *default-log-port* "INFO: migrating test records for run with id " run-id)
		  (sqlite3:with-transaction
		   db
		   (lambda ()
		     (for-each 
		      (lambda (rec)
			;; (debug:print 0 *default-log-port* "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n")
			(apply sqlite3:execute qry (append (vector->list rec)(list run-id))))
		      testrecs)))
		  (sqlite3:finalize! qry)))))

;; map a test-id into the proper range
;;
(define (db:adj-test-id mtdb min-test-id test-id)
  (if (>= test-id min-test-id)
      test-id
      (let loop ((new-id min-test-id))
	(let ((test-id-found #f))
	  (sqlite3:for-each-row 
	   (lambda (id)
	     (set! test-id-found id))
	   (db:dbdat-get-db mtdb)
	   "SELECT id FROM tests WHERE id=?;"
	   new-id)
	  ;; if test-id-found then need to try again
	  (if test-id-found
	      (loop (+ new-id 1))
	      (begin
		(debug:print-info 0 *default-log-port* "New test id " new-id " selected for test with id " test-id)
		(sqlite3:execute mtdb "UPDATE tests SET id=? WHERE id=?;" new-id test-id)))))))

;; move test ids into the 30k * run_id range
;;
(define (db:prep-megatest.db-adj-test-ids mtdb run-id testrecs)
  (debug:print-info 0 *default-log-port* "Adjusting test ids in megatest.db for run " run-id)
  (let ((min-test-id (* run-id 30000)))
    (for-each 
     (lambda (testrec)
       (let* ((test-id (vector-ref testrec (db:field->number "id" db:test-record-fields))))
	 (db:adj-test-id (db:dbdat-get-db mtdb) min-test-id test-id)))
     testrecs)))
	
;; 1. move test ids into the 30k * run_id range
;; 2. move step ids into the 30k * run_id range
;;
(define (db:prep-megatest.db-for-migration mtdb)
  (let* ((run-ids (db:get-all-run-ids mtdb)))
    (for-each 
     (lambda (run-id)
       (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
	 (db:prep-megatest.db-adj-test-ids (db:dbdat-get-db mtdb) run-id testrecs)))
     run-ids)))

;; Get test data using test_id, run-id is not used
;; 
(define (db:get-test-info-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   #f ;; run-id
   #f
   (lambda (db)
     (let ((res #f))
       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)
	  ;;                0    1       2      3      4        5       6      7        8     9     10      11          12          13           14         15          16
	  (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;")
	test-id)
       res))))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res '()))
       (sqlite3:for-each-row
	(lambda (a . b)
	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
	  (set! res (cons (apply vector a b) res)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in ("
	      (string-intersperse (map conc test-ids) ",") ");"))
       res))))

(define (db:get-test-info dbstruct run-id test-name item-path)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res #f))
       (sqlite3:for-each-row
	(lambda (a . b)
	  (set! res (apply vector a b)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE testname=? AND item_path=? AND run_id=?;")
	test-name item-path run-id)
       res))))

(define (db:test-get-rundir-from-test-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (db:first-result-default
      db
      "SELECT rundir FROM tests WHERE id=?;"
      #f ;; default result
      test-id))))

(define (db:get-test-times dbstruct run-name target)
  (let ((res `())
        (qry 	(conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ?  ;")))
   
  (db:with-db 
    dbstruct
    #f ;; this is for the main runs db
    #f ;; does not modify db
    (lambda (db)
            (sqlite3:for-each-row
	(lambda (test-name item-path test-time target )
	  (set! res (cons (vector test-name item-path test-time) res)))
	db
        qry 
	run-name target)
       res))))

(define (db:get-test-times dbstruct run-name target)
  (let ((res `())
        (qry 	(conc "select testname, item_path, run_duration, " (string-join (db:get-keys dbstruct) " || '/' || ") " as target from tests inner join runs on tests.run_id = runs.id where runs.runname = ? and target = ?  ;")))
   
  (db:with-db 
    dbstruct
    #f ;; this is for the main runs db
    #f ;; does not modify db
    (lambda (db)
            (sqlite3:for-each-row
	(lambda (test-name item-path test-time target )
	  (set! res (cons (vector test-name item-path test-time) res)))
	db
        qry 
	run-name target)
       res))))

;;======================================================================
;; S T E P S
;;======================================================================

(define (db:teststep-set-status! dbstruct run-id test-id teststep-name state-in status-in comment logfile)
  (db:with-db
   dbstruct
   run-id
   #t
   (lambda (db)
     (sqlite3:execute 
      db
      "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);"
      test-id teststep-name state-in status-in (current-seconds)
      (if comment comment "")
      (if logfile logfile "")))))



(define (db:delete-steps-for-test! dbstruct run-id test-id)
  ;; TODO: figure out why status is the key field rather than state (note:  CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) )
  (db:with-db
   dbstruct
   run-id
   #t
   (lambda (db)
     (sqlite3:execute 
      db
      "UPDATE test_steps set status='DELETED' where test_id=?" ;;  and run_id=? !! - run_id not in table (bummer) TODO: get run_id into schema for test_steps
      test-id))))


;; db-get-test-steps-for-run
(define (db:get-steps-for-test dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let* ((res '()))
       (sqlite3:for-each-row 
	(lambda (id test-id stepname state status event-time logfile comment)
	  (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment) res)))
	db
	"SELECT id,test_id,stepname,state,status,event_time,logfile,comment FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
	test-id)
       (reverse res)))))

 (define (db:get-steps-info-by-id dbstruct  test-step-id)
   (db:with-db
    dbstruct
    #f 
    #f
    (lambda (db)
      (let* ((res (vector #f #f #f #f #f #f #f #f #f)))
        (sqlite3:for-each-row 
       (lambda (id test-id stepname state status event-time logfile comment last-update)
         (set! res (vector id test-id stepname state status event-time (if (string? logfile) logfile "") comment last-update)))
       db
       "SELECT id,test_id,stepname,state,status,event_time,logfile,comment,last_update FROM test_steps WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
       test-step-id)
        res))))

(define (db:get-steps-data dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res '()))
       (sqlite3:for-each-row 
	(lambda (id test-id stepname state status event-time logfile)
	  (set! res (cons (vector id test-id stepname state status event-time (if (string? logfile) logfile "")) res)))
	db
	"SELECT id,test_id,stepname,state,status,event_time,logfile FROM test_steps WHERE status != 'DELETED' AND test_id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
	test-id)
       (reverse res)))))

;;======================================================================
;; T E S T  D A T A 
;;======================================================================

 (define (db:get-data-info-by-id dbstruct  test-data-id)
   (db:with-db
    dbstruct
    #f 
    #f
    (lambda (db)
      (let* ((res (vector #f #f #f #f #f #f #f #f #f #f #f #f)))
        (sqlite3:for-each-row 
       (lambda (id test-id  category variable value expected tol units comment status type last-update)
         (set! res (vector id test-id  category variable value expected tol units comment status type last-update)))
       db
       "SELECT id,test_id, category, variable, value, expected, tol, units, comment, status, type, last_update FROM test_data WHERE id=? ORDER BY id ASC;" ;; event_time DESC,id ASC;
       test-data-id)
        res))))


;; WARNING: Do NOT call this for the parent test on an iterated test
;; Roll up test_data pass/fail results
;; look at the test_data status field, 
;;    if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS.
;;    if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored
(define (db:test-data-rollup dbstruct run-id test-id status)
  (let* ((fail-count 0)
	 (pass-count 0))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (fcount pcount)
	  (set! fail-count fcount)
	  (set! pass-count pcount))
	db 
	"SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count,
             (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;"
	test-id test-id)
       ;; Now rollup the counts to the central megatest.db
       (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id))
       ;; if the test is not FAIL then set status based on the fail and pass counts.
       (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id))))))

;; each section is a rule except "final" which is the final result
;;
;; [rule-5]
;; operator in
;; section LogFileBody
;; desc Output voltage
;; status OK
;; expected 1.9
;; measured 1.8
;; type +/-
;; tolerance 0.1
;; pass 1
;; fail 0
;; 
;; [final]
;; exit-code 6
;; exit-status SKIP
;; message If flagged we are asking for this to exit with code 6
;;
;; recorded in steps table:
;;   category: stepname
;;   variable: rule-N
;;   value:    measured
;;   expected: expected
;;   tol:      tolerance
;;   units:    -
;;   comment:  desc or message
;;   status:   status
;;   type:     type
;; 
(define (db:logpro-dat->csv dat stepname)
  (let ((res '()))
    (for-each
     (lambda (entry-name)
       (if (equal? entry-name "final")
	   (set! res (append 
		      res
		      (list
		       (list stepname
			     entry-name
			     (configf:lookup dat entry-name "exit-code")    ;; 0 ;; Value
			     0                                              ;; 1 ;; Expected
			     0                                              ;; 2 ;; Tolerance
			     "n/a"					    ;; 3 ;; Units
			     (configf:lookup dat entry-name "message")      ;; 4 ;; Comment
			     (configf:lookup dat entry-name "exit-status")  ;; 5 ;; Status
			     "logpro"                                       ;; 6 ;; Type
			     ))))
	   (let* ((value     (or (configf:lookup dat entry-name "measured")  "n/a"))
		  (expected  (or (configf:lookup dat entry-name "expected")  0.0))
		  (tolerance (or (configf:lookup dat entry-name "tolerance") 0.0))
		  (comment   (or (configf:lookup dat entry-name "comment")
				 (configf:lookup dat entry-name "desc")      "n/a"))
		  (status    (or (configf:lookup dat entry-name "status")    "n/a"))
		  (type      (or (configf:lookup dat entry-name "expected")  "n/a")))
	     (set! res (append
			res  
			(list (list stepname
				    entry-name 
				    value        ;; 0
				    expected     ;; 1
				    tolerance    ;; 2
				    "n/a"        ;; 3 Units
				    comment      ;; 4
				    status       ;; 5
				    type         ;; 6
				    )))))))
     (hash-table-keys dat))
    res))

;; $MT_MEGATEST -load-test-data << EOF
;; foo,bar,   1.2,  1.9, >
;; foo,rab, 1.0e9, 10e9, 1e9
;; foo,bla,   1.2,  1.9, <
;; foo,bal,   1.2,  1.2, <   ,     ,Check for overload
;; foo,alb,   1.2,  1.2, <=  , Amps,This is the high power circuit test
;; foo,abl,   1.2,  1.3, 0.1
;; foo,bra,   1.2, pass, silly stuff
;; faz,bar,    10,  8mA,     ,     ,"this is a comment"
;; EOF

(define (db:csv->test-data dbstruct run-id test-id csvdata)
  (debug:print 4 *default-log-port* "test-id " test-id ", csvdata: " csvdata)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (let* ((csvlist (csv->list (make-csv-reader
				 (open-input-string csvdata)
				 '((strip-leading-whitespace? #t)
				   (strip-trailing-whitespace? #t)))))) ;; (csv->list csvdata)))
       (for-each
	(lambda (csvrow)
	  (let* ((padded-row  (take (append csvrow (list #f #f #f #f #f #f #f #f #f)) 9))
		 (category    (list-ref padded-row 0))
		 (variable    (list-ref padded-row 1))
		 (value       (any->number-if-possible (list-ref padded-row 2)))
		 (expected    (any->number-if-possible (list-ref padded-row 3)))
		 (tol         (any->number-if-possible (list-ref padded-row 4))) ;; >, <, >=, <=, or a number
		 (units       (list-ref padded-row 5))
		 (comment     (list-ref padded-row 6))
		 (status      (let ((s (list-ref padded-row 7)))
				(if (and (string? s)(or (string-match (regexp "^\\s*$") s)
							(string-match (regexp "^n/a$") s)))
				    #f
				    s))) ;; if specified on the input then use, else calculate
		 (type        (list-ref padded-row 8)))
	    ;; look up expected,tol,units from previous best fit test if they are all either #f or ''
	    (debug:print 4 *default-log-port* "BEFORE: category: " category " variable: " variable " value: " value 
			 ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment " type: " type)
	    
	    (if (and (or (not expected)(equal? expected ""))
		     (or (not tol)     (equal? expected ""))
		     (or (not units)   (equal? expected "")))
		(let-values (((new-expected new-tol new-units)(tdb:get-prev-tol-for-test #f test-id category variable)))
		  (set! expected new-expected)
		  (set! tol      new-tol)
		  (set! units    new-units)))
	    
	    (debug:print 4 *default-log-port* "AFTER:  category: " category " variable: " variable " value: " value 
			 ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment)
	    ;; calculate status if NOT specified
	    (if (and (not status)(number? expected)(number? value)) ;; need expected and value to be numbers
		(if (number? tol) ;; if tol is a number then we do the standard comparison
		    (let* ((max-val (+ expected tol))
			   (min-val (- expected tol))
			   (result  (and (>=  value min-val)(<= value max-val))))
		      (debug:print 4 *default-log-port* "max-val: " max-val " min-val: " min-val " result: " result)
		      (set! status (if result "pass" "fail")))
		    (set! status ;; NB// need to assess each one (i.e. not return operator since need to act if not valid op.
		      (case (string->symbol tol) ;; tol should be >, <, >=, <=
			((>)  (if (>  value expected) "pass" "fail"))
			((<)  (if (<  value expected) "pass" "fail"))
			((>=) (if (>= value expected) "pass" "fail"))
			((<=) (if (<= value expected) "pass" "fail"))
			(else (conc "ERROR: bad tol comparator " tol))))))
	    (debug:print 4 *default-log-port* "AFTER2: category: " category " variable: " variable " value: " value 
			 ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment)
	    ;; (db:delay-if-busy dbdat)
	    (sqlite3:execute db "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);"
			     test-id category variable value expected tol units (if comment comment "") status type)))
	csvlist)))))

;; This routine moved from tdb.scm, tdb:read-test-data
;;
(define (db:read-test-data dbstruct run-id test-id categorypatt)
  (let* ((res '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (id test_id category variable value expected tol units comment status type)
	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
	db
	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? ORDER BY category,variable;" test-id categorypatt)
       (reverse res)))))

;; This routine moved from tdb.scm, :read-test-data
;;
(define (db:read-test-data* dbstruct run-id test-id categorypatt varpatt)
  (let* ((res '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (id test_id category variable value expected tol units comment status type)
	  (set! res (cons (vector id test_id category variable value expected tol units comment status type) res)))
	db
	"SELECT id,test_id,category,variable,value,expected,tol,units,comment,status,type FROM test_data WHERE test_id=? AND category LIKE ? AND variable LIKE ? ORDER BY category,variable;" test-id categorypatt varpatt)
       (reverse res)))))


;;======================================================================
;; Misc. test related queries
;;======================================================================

(define (db:get-run-ids-matching-target dbstruct keynames target res runname testpatt statepatt statuspatt)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (let* ((row-ids '())
	    (keystr (string-intersperse 
		     (map (lambda (key val)
			    (conc key " like '" val "'"))
			  keynames 
			  (string-split target "/"))
		     " AND "))
	    ;; (testqry (tests:match->sqlqry testpatt))
	    (runsqry (sqlite3:prepare db (conc "SELECT id FROM runs WHERE " keystr " AND runname LIKE '" runname "';"))))
       ;; (debug:print 8 *default-log-port* "db:test-get-paths-matching-keynames-target-new\n  runsqry=" runsqry "\n  tstsqry=" testqry)
       (sqlite3:for-each-row
	(lambda (rid)
	  (set! row-ids (cons rid row-ids)))
	runsqry)
       (sqlite3:finalize! runsqry)
       row-ids))))

;; finds latest matching all patts for given run-id
;;
(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname)
  (let* ((testqry (tests:match->sqlqry testpatt))
	 (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;")))
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (p)
	  (set! res (cons p res)))
	db
	tstsqry
	run-id)
       res))))

(define (db:test-toplevel-num-items dbstruct run-id testname)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res 0))
       (sqlite3:for-each-row
	(lambda (num-items)
	  (set! res num-items))
	db
	"SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state NOT IN ('DELETED');"
	run-id
	testname)
       res))))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================

;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items
;; ;
;; define (db:test-set-state-status dbstruct run-id test-id state status msg)
;;  (let ((dbdat  (db:get-db dbstruct run-id)))
;;    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
;; 	(db:general-call dbdat 'set-test-start-time (list test-id)))
;;    ;; (if msg
;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
;;    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
;;    ;; process the test_data table
;;    (if (and test-id state status (equal? status "AUTO")) 
;; 	(db:test-data-rollup dbstruct run-id test-id status))
;;    (mt:process-triggers dbstruct run-id test-id state status)))

;; state is the priority rollup of all states
;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
         (tl-test-id   (if tl-testdat
			   (db:test-get-id tl-testdat)
			   #f)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) 
	(db:general-call dbstruct 'set-test-start-time (list test-id)))
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			      						  (state-stauses (db:roll-up-rules state-status-counts state status))
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
		(let* ((running     (length (filter (lambda (x)
                          (member (dbr:counts-state x) *common:running-states*))
                                 state-status-counts)))
           (bad-not-started      (length (filter (lambda (x)
                                      (and (equal? (dbr:counts-state x) "NOT_STARTED") 
                                        (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
																	state-status-counts)))
           (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
                                    (delete-duplicates
                                      (if (and state (not (member state *common:dont-roll-up-states*)))
                                          (cons state (map dbr:counts-state state-status-counts))
                                          (map dbr:counts-state state-status-counts)))
                                                  *common:std-states* >))
           (all-curr-statuses    (common:special-sort  ;; worst -> best
                                    (delete-duplicates
                                      (if (and state status (not (member state *common:dont-roll-up-states*)))
                                          (cons status (map dbr:counts-status state-status-counts))
                                          (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
           (non-completes        (filter (lambda (x)
							 										 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
						       									all-curr-states))
			     (preq-fails        (filter (lambda (x)
							 								(equal? x "PREQ_FAIL"))
						       							all-curr-statuses))
           (num-non-completes (length non-completes))
 					 (newstate          (cond
															((> running 0)           "RUNNING")            ;; anything running, call the situation running
                              ((> (length preq-fails) 0) "NOT_STARTED")
															((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
															((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
															(else                    (car all-curr-states))))
           (newstatus         (cond
                              ((> (length preq-fails) 0)  "PREQ_FAIL")
                              ((or (> bad-not-started 0)
                                   (and (equal? newstate "NOT_STARTED")
                                      (> num-non-completes 0)))
                                            "STARTED")
                              (else (car all-curr-statuses)))))
 					(debug:print-info 2 *default-log-port*
                                         "\n--> probe db:set-state-status-and-roll-up-items: "
                                         "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
                                         "\n--> running:             "running
                                         "\n--> bad-not-started:     "bad-not-started
                                         "\n--> non-non-completes:   "num-non-completes
                                         "\n--> non-completes:       "non-completes
                                         "\n--> all-curr-states:     "all-curr-states
                                         "\n--> all-curr-statuses:     "all-curr-statuses
                                         "\n--> newstate              "newstate
                                         "\n--> newstatus            "newstatus
                                         "\n\n")

                        ;; NB// Pass the db so it is part of the transaction
         (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
													(state-stauses (db:roll-up-rules state-status-counts #f #f ))
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses))) 
                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
         (mutex-unlock! *db-transaction-mutex*)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)
 (let* ((test-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)
                                    (sqlite3:map-row
                                     (lambda (state status count)
                                        (make-dbr:counts state: state status: status count: count))
                                     db
                                     "SELECT state,status,count(id) FROM tests WHERE run_id=?  GROUP BY state,status;"
                                     run-id )))))
   test-count-recs))


;; BBnote: db:get-all-state-status-counts-for-test returns dbr:counts object aggregating state and status of items of a given test, *not including rollup state/status*
(define (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path item-state-in item-status-in)
  (let* ((test-info   (db:get-test-info dbstruct run-id test-name item-path))
         (item-state  (or item-state-in (db:test-get-state test-info))) 
         (item-status (or item-status-in (db:test-get-status test-info)))
         (other-items-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)
                                    (sqlite3:map-row
                                     (lambda (state status count)
                                       (make-dbr:counts state: state status: status count: count))
                                     db
                                     ;; ignore current item because we have changed its value in the current transation so this select will see the old value.
                                     "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND item_path !=? GROUP BY state,status;"
                                     run-id test-name item-path))))

         ;; add current item to tally outside of sql query
         (match-countrec-lambda (lambda (countrec) 
                                  (and (equal? (dbr:counts-state  countrec) item-state)
                                       (equal? (dbr:counts-status countrec) item-status))))

         (already-have-count-rec-list
          (filter match-countrec-lambda other-items-count-recs)) ;; will have either 0 or 1 count recs depending if another item shares this item's state/status
         
         (updated-count-rec    (if (null? already-have-count-rec-list)
                                   (make-dbr:counts state: item-state status: item-status count: 1)
                                   (let* ((our-count-rec (car already-have-count-rec-list))
                                          (new-count (add1 (dbr:counts-count our-count-rec))))
                                     (make-dbr:counts state: item-state status: item-status count: new-count))))

         (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec))))
         
         (unrelated-rec-list   
          (filter nonmatch-countrec-lambda other-items-count-recs)))
    
    (cons updated-count-rec unrelated-rec-list)))

;; (define (db:get-all-item-states db run-id test-name)
;;   (sqlite3:map-row 
;;    (lambda (a) a)
;;    db
;;    "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?"
;;    run-id test-name))
;; 
;; (define (db:get-all-item-statuses db run-id test-name)
;;   (sqlite3:map-row 
;;    (lambda (a) a)
;;    db
;;    "SELECT DISTINCT status FROM tests WHERE item_path != '' AND state != 'DELETED' AND state='COMPLETED' AND run_id=? AND testname=?"
;;    run-id test-name))

(define (db:test-get-logfile-info dbstruct run-id test-name)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res #f))
       (sqlite3:for-each-row 
	(lambda (path final_logf)
	  ;; (let ((path       (sdb:qry 'getstr path-id))
	  ;;       (final_logf (sdb:qry 'getstr final_logf-id)))
	  (set! logf final_logf)
	  (set! res (list path final_logf))
	  (if (directory? path)
	      (debug:print 2 *default-log-port* "Found path: " path)
	      (debug:print 2 *default-log-port* "No such path: " path))) ;; )
	db
	"SELECT rundir,final_logf FROM tests WHERE testname=? AND item_path='' AND run_id=?;"
	test-name run-id)
       res))))

;;======================================================================
;; A G R E G A T E D   T R A N S A C T I O N   D B   W R I T E S 
;;======================================================================

(define db:queries 
  (list '(update-run-duration     "UPDATE tests SET run_duration=? WHERE id=?;")

	;; TESTS
	'(register-test          "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');")
	;; Test state and status
	'(set-test-state         "UPDATE tests SET state=?   WHERE id=?;")
	'(set-test-status        "UPDATE tests SET state=?   WHERE id=?;")
	'(state-status           "UPDATE tests SET state=?,status=? WHERE id=?;") ;; D/ONE
	'(state-status-msg       "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") ;; DONE
	;; Test comment
	'(set-test-comment       "UPDATE tests SET comment=? WHERE id=?;")
	'(set-test-start-time    "UPDATE tests SET event_time=strftime('%s','now') WHERE id=?;") ;; DONE
	'(pass-fail-counts       "UPDATE tests SET pass_count=?,fail_count=? WHERE id=?;")
	;; test_data-pf-rollup is used to set a tests PASS/FAIL based on the pass/fail info from the steps
	'(test_data-pf-rollup    "UPDATE tests
                                    SET status=CASE WHEN (SELECT fail_count FROM tests WHERE id=?) > 0 
                                      THEN 'FAIL'
                                    WHEN (SELECT pass_count FROM tests WHERE id=?) > 0 AND 
                                      (SELECT status FROM tests WHERE id=?) NOT IN ('WARN','FAIL')
                                    THEN 'PASS'
                                    ELSE status
                                    END WHERE id=?;") ;; DONE
	'(test-set-log            "UPDATE tests SET final_logf=? WHERE id=?;")      ;; DONE
	;; '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?")        ;; DONE
	;; '(test-set-rundir         "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE
	'(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;")    ;; BROKEN!!! NEEDS run-id
	'(delete-tests-in-state   ;; "DELETE FROM tests WHERE state=?;")                  ;; DONE
	  "UPDATE tests SET state='DELETED' WHERE state=?")
	'(tests:test-set-toplog   "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';")
	'(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE
	'(update-uname-host       "UPDATE tests SET uname=?,host=? WHERE id=?;")       ;; DONE
        '(update-test-rundat      "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);")
	'(update-test-state       "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	'(update-test-status      "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	;; stuff for set-state-status-and-roll-up-items
	'(update-pass-fail-counts "UPDATE tests 
             SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')),
                 pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED'))
             WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE  ;; BROKEN!!! NEEDS run-id
	'(top-test-set-running  "UPDATE tests SET state='RUNNING' WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE   ;; BROKEN!!! NEEDS run-id

	;; NOT USED
	;;
	;; Might be the following top-test-set-per-pf-counts query could be better based off of something like this:
	;;
	;; select state,status,count(state) from tests where run_id=59 AND testname='runfirst' group by state,status;
	;;
	'(top-test-set-per-pf-counts "UPDATE tests
                       SET state=CASE 
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND status NOT IN ('n/a')
                                                     AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE'))
                                                     AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND state = 'NOT_STARTED') > 0 THEN 'NOT_STARTED'
                                   ELSE 'UNKNOWN' END,
                            status=CASE 
                                  WHEN fail_count > 0 THEN 'FAIL' 
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state IN ('BLOCKED','INCOMPLETE')) > 0 THEN 'FAIL'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status IN ('INCOMPLETE','ABORT')) > 0 THEN 'ABORT'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status = 'AUTO') > 0 THEN 'AUTO'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status IN ('STUCK/INCOMPLETE', 'INCOMPLETE')) > 0 THEN 'INCOMPLETE'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state IN ('COMPLETED','STUCK/INCOMPLETE','INCOMPLETE')
                                              AND status = 'FAIL') > 0 THEN 'FAIL'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status = 'CHECK') > 0 THEN 'CHECK'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status = 'SKIP') > 0 THEN 'SKIP'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status = 'WARN') > 0 THEN 'WARN'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status = 'WAIVED') > 0 THEN 'WAIVED'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=? 
                                              AND item_path != ''
                                              AND state NOT IN ('DELETED')
                                              AND status NOT IN ('PASS','FAIL','WARN','WAIVED')) > 0 THEN 'ABORT'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state='NOT_STARTED') > 0 THEN 'n/a'
                                  WHEN (SELECT count(id) FROM tests
                                         WHERE testname=?
                                              AND item_path != ''
                                              AND state = 'COMPLETED'
                                              AND status = 'PASS') > 0 THEN 'PASS'
                                  WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' 
                                  ELSE 'UNKNOWN' END
                       WHERE testname=? AND item_path='';") ;; DONE  ;; BROKEN!!! NEEDS run-id

	;; STEPS
	'(delete-test-step-records "UPDATE test_steps SET status='DELETED' WHERE test_id=?;")
	'(delete-test-data-records "UPDATE test_data  SET status='DELETED' WHERE test_id=?;") ;; using status since no state field
	))

(define (db:lookup-query qry-name)
  (let ((q (alist-ref qry-name db:queries)))
    (if q (car q) #f)))

;; do not run these as part of the transaction
(define db:special-queries   '(rollup-tests-pass-fail
			       ;; db:set-state-status-and-roll-up-items  ;; WHY NOT!?
			       login
			       immediate
			       flush
			       sync
			       set-verbosity
			       killserver
			       ))

(define (db:login dbstruct calling-path calling-version client-signature)
  (cond 
   ((not (equal? calling-path *toppath*))
    (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*))
   ;; ((not (equal? *run-id* run-id))
   ;;  (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*))
   ((not (equal? megatest-version calling-version))
    (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version)))
   (else
    (hash-table-set! *logged-in-clients* client-signature (current-seconds))
    '(#t "successful login"))))

(define (db:general-call dbstruct stmtname params)
  (let ((query (let ((q (alist-ref (if (string? stmtname)
				       (string->symbol stmtname)
				       stmtname)
				   db:queries)))
 		 (if q (car q) #f))))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (apply sqlite3:execute db query params)
       #t))))

;; get a summary of state and status counts to calculate a rollup
;;
(define (db:get-state-status-summary dbstruct run-id testname)
  (let ((res   '()))
    (db:with-db
     dbstruct #f #f
     (sqlite3:for-each-row
      (lambda (state status count)
	(set! res (cons (vector state status count) res)))
      db
      "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;"
      run-id testname)
     res)))

(define (db:get-latest-host-load dbstruct raw-hostname)
  (let* ((hostname (string-substitute "\\..*$" "" raw-hostname))
         (res  (cons -1 0)))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
        (lambda (cpuload update-time)  (set! res (cons cpuload update-time)))
        db
        "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1  AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;"
        hostname))) res ))

(define (db:set-top-level-from-items dbstruct run-id testname)
  (let* ((summ  (db:get-state-status-summary dbstruct run-id testname))
	 (find  (lambda (state status)
		  (if (null? summ) 
		      #f
		      (let loop ((hed (car summ))
				 (tal (cdr summ)))
			(if (and (string-match state  (vector-ref hed 0))
				 (string-match status (vector-ref hed 1)))
			    hed
			    (if (null? tal)
				#f
				(loop (car tal)(cdr tal)))))))))


      ;;;     E D I T M E ! !


    (cond
     ((> (find "COMPLETED" ".*") 0) #f))))
		   
    

;; get the previous records for when these tests were run where all keys match but runname
;; NB// Merge this with test:get-previous-test-run-records? This one looks for all matching tests
;; can use wildcards. Also can likely be factored in with get test paths?
;;
;; Run this remotely!!
;;
(define (db:get-matching-previous-test-run-records dbstruct run-id test-name item-path)
  (let* ((keys    (db:get-keys dbstruct))
	 (selstr  (string-intersperse keys ","))
	 (qrystr  (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND "))
	 (keyvals #f)
	 (tests-hash (make-hash-table)))
    ;; first look up the key values from the run selected by run-id
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (a . b)
	  (set! keyvals (cons a b)))
	db
	(conc "SELECT " selstr " FROM runs WHERE id=? ORDER BY event_time DESC;") run-id)))
    (if (not keyvals)
	'()
	(let ((prev-run-ids '()))
	  (db:with-db
	   dbstruct #f #f
	   (lambda (db)
	     (apply sqlite3:for-each-row
		    (lambda (id)
		      (set! prev-run-ids (cons id prev-run-ids)))
		    db
		    (conc "SELECT id FROM runs WHERE " qrystr " AND id != ?;") (append keyvals (list run-id)))))
	  ;; collect all matching tests for the runs then
	  ;; extract the most recent test and return that.
	  (debug:print 4 *default-log-port* "selstr: " selstr ", qrystr: " qrystr ", keyvals: " keyvals 
		       ", previous run ids found: " prev-run-ids)
	  (if (null? prev-run-ids) '()  ;; no previous runs? return null
	      (let loop ((hed (car prev-run-ids))
			 (tal (cdr prev-run-ids)))
		(let ((results (db:get-tests-for-run dbstruct hed (conc test-name "/" item-path) '() '() #f #f #f #f #f #f #f 'normal)))
		  (debug:print 4 *default-log-port* "Got tests for run-id " run-id ", test-name " test-name 
			       ", item-path " item-path " results: " (intersperse results "\n"))
		  ;; Keep only the youngest of any test/item combination
		  (for-each 
		   (lambda (testdat)
		     (let* ((full-testname (conc (db:test-get-testname testdat) "/" (db:test-get-item-path testdat)))
			    (stored-test   (hash-table-ref/default tests-hash full-testname #f)))
		       (if (or (not stored-test)
			       (and stored-test
				    (> (db:test-get-event_time testdat)(db:test-get-event_time stored-test))))
			   ;; this test is younger, store it in the hash
			   (hash-table-set! tests-hash full-testname testdat))))
		   results)
		  (if (null? tal)
		      (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests
		      (loop (car tal)(cdr tal))))))))))

;; Function recursively checks if <db>.journal exists; if yes means db busy; call itself after delayed interval
;; return the sqlite3 db handle if possible
;; 
(define (db:delay-if-busy dbdat #!key (count 6))
  (if (not (configf:lookup *configdat* "server" "delay-on-busy")) 
      (and dbdat (db:dbdat-get-db dbdat))
      (if dbdat
	  (let* ((dbpath (db:dbdat-get-path dbdat))
		 (db     (db:dbdat-get-db   dbdat)) ;; we'll return this so (db:delay--if-busy can be called inline
		 (dbfj   (conc dbpath "-journal")))
	    (if (handle-exceptions
		 exn
		 (begin
		   (debug:print-info 0 *default-log-port* "WARNING: failed to test for existance of " dbfj)
		   (thread-sleep! 1)
		   (db:delay-if-busy count (- count 1))) 
		 (common:file-exists? dbfj))
		(case count
		  ((6)
		   (thread-sleep! 0.2)
		   (db:delay-if-busy count: 5))
		  ((5)
		   (thread-sleep! 0.4)
		   (db:delay-if-busy count: 4))
		  ((4)
		   (thread-sleep! 0.8)
		   (db:delay-if-busy count: 3))
		  ((3)
		   (thread-sleep! 1.6)
		   (db:delay-if-busy count: 2))
		  ((2)
		   (thread-sleep! 3.2)
		   (db:delay-if-busy count: 1))
		  ((1)
		   (thread-sleep! 6.4)
		   (db:delay-if-busy count: 0))
		  (else
		   (debug:print-info 0 *default-log-port* "delaying db access due to high database load.")
		   (thread-sleep! 12.8))))
	    db) 
	  "bogus result from db:delay-if-busy")))

(define (db:test-get-records-for-index-file dbstruct run-id test-name)
  (let ((res '()))
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (id itempath state status run_duration logf comment)
	  (set! res (cons (vector id itempath state status run_duration logf comment) res)))
	db
	"SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '' AND run_id=?;" ;; BUG! WHY NO run_id?
	test-name
	run-id)
       res))))

;;======================================================================
;; Tests meta data
;;======================================================================

;; returns a hash table of tags to tests
;;
(define (db:get-tests-tags dbstruct)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (let* ((res     (make-hash-table)))
       (sqlite3:for-each-row
	(lambda (testname tags-in)
	  (let ((tags (string-split tags-in ",")))
	    (for-each
	     (lambda (tag)
	       (hash-table-set! res tag
				(delete-duplicates
				 (cons testname (hash-table-ref/default res tag '())))))
	     tags)))
	db
	"SELECT testname,tags FROM test_meta")
       (hash-table->alist res)))))

;; read the record given a testname
(define (db:testmeta-get-record dbstruct testname)
  (let ((res   #f))
    (db:with-db
     dbstruct
     #f
     #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)
	  (set! res (vector id testname author owner description reviewed iterated avg_runtime avg_disk tags jobgroup)))
	db
	"SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta WHERE testname=?;"
	testname)
       res))))

;; create a new record for a given testname
(define (db:testmeta-add-record dbstruct testname)
  (db:with-db dbstruct #f #f 
	      (lambda (db)
		(sqlite3:execute 
		 db
		 "INSERT OR IGNORE INTO test_meta (testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags) VALUES (?,'','','','','','','','');" testname))))

;; update one of the testmeta fields
(define (db:testmeta-update-field dbstruct testname field value)
  (db:with-db dbstruct #f #f 
	      (lambda (db)
		(sqlite3:execute 
		 db
		 (conc "UPDATE test_meta SET " field "=? WHERE testname=?;") value testname))))

(define (db:testmeta-get-all dbstruct)
  (db:with-db dbstruct #f #f 
	      (lambda (db)
		(let ((res '()))
		  (sqlite3:for-each-row
		   (lambda (a . b)
		     (set! res (cons (apply vector a b) res)))
		   db
		   "SELECT id,testname,author,owner,description,reviewed,iterated,avg_runtime,avg_disk,tags,jobgroup FROM test_meta;")
		  res))))

;;======================================================================
;; M I S C   M A N A G E M E N T   I T E M S 
;;======================================================================

;; A routine to map itempaths using a itemmap
;; patha and pathb must be strings or this will fail
;;
;; path-b is waiting on path-a
;;
(define (db:compare-itempaths test-b-name path-a path-b itemmaps )
  (debug:print-info 6 *default-log-port* "ITEMMAPS: " itemmaps)
  (let* ((itemmap    (tests:lookup-itemmap itemmaps test-b-name)))
    (if itemmap
	(let ((path-b-mapped (db:multi-pattern-apply path-b itemmap)))
	  (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap ", path: " path-b ", mapped path: " path-b-mapped)
	  (equal? path-a path-b-mapped))
	(equal? path-b path-a))))

;; A routine to convert test/itempath using a itemmap
;; NOTE: to process only an itempath (i.e. no prepended testname)
;;       just call db:multi-pattern-apply
;;
(define (db:convert-test-itempath path-in itemmap)
  (debug:print-info 6 *default-log-port* "ITEMMAP is " itemmap)
  (let* ((path-parts  (string-split path-in "/"))
	 (test-name   (if (null? path-parts) "" (car path-parts)))
	 (item-path   (string-intersperse (if (null? path-parts) '() (cdr path-parts)) "/")))
    (conc test-name "/" 
	  (db:multi-pattern-apply item-path itemmap))))

;; patterns are:
;;    "rx1"  "replacement1"\n
;;    "rx2"  "replacement2"
;; etc.
;;
(define (db:multi-pattern-apply item-path itemmap)
  (let ((all-patts (string-split itemmap "\n")))
    (if (null? all-patts)
	item-path
	(let loop ((hed (car all-patts))
		   (tal (cdr all-patts))
		   (res item-path))
	  (let* ((parts (string-split hed))
		 (patt  (car parts))

		 (repl  (if (> (length parts) 1)(cadr parts) ""))

		 (newr  (if (and patt repl)
			    (begin
                              (handle-exceptions
                               exn
                               (begin
                                  (debug:print 0 *default-log-port*
                                  "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
                                 res)
                              (string-substitute patt repl res))


                              )
			    (begin
                              (debug:print 0 *default-log-port*
                               "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
			      res))))
	    (if (null? tal)
		newr
		(loop (car tal)(cdr tal) newr)))))))




;; the new prereqs calculation, looks also at itempath if specified
;; all prereqs must be met
;;    if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met
;;    if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met
;;
;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
;;       mode 'toplevel means that tests must be COMPLETED only
;;       mode 'itemmatch or 'itemwait means that tests items must be COMPLETED and (PASS|WARN|WAIVED|CHECK) [[ NB// NOT IMPLEMENTED YET ]]
;;       mode 'exclusive means this test/item cannot run if the same test/item is LAUNCHED,REMOTEHOSTSTART or RUNNING
;;
;; IDEA for consideration:
;;    1. collect all tests "upstream"
;;    2. any NOT completed and good? if yes => return those as prereqs not met, if no => return null list
;; 
;; (define (db:get-prereqs-not-met dbstruct run-id waitons ref-item-path mode)
(define (db:get-prereqs-not-met dbstruct run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmap #f))
  ;; BBnote - rollup of an itemized test's overall state/status done in db:set-state-status-and-roll-up-items
  (append
   (if (member 'exclusive mode)
       (let ((running-tests (db:get-tests-for-run dbstruct
						  #f  ;; run-id of #f means for all runs. 
						  (if (string=? ref-item-path "")   ;; testpatt
						      ref-test-name
						      (conc ref-test-name "/" ref-item-path))
						  '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states
						  '()          ;; statuses
						  #f           ;; offset
						  #f           ;; limit
						  #f           ;; not-in
						  #f           ;; sort by
						  #f           ;; sort order
						  'shortlist   ;; query type
						  0            ;; last update, beginning of time ....
						  #f           ;; mode
						  )))
	;;(map (lambda (testdat)
	;;	(if (equal? (db:test-get-item-path testdat) "")
	;;	    (db:test-get-testname testdat)
	;;	    (conc (db:test-get-testname testdat)
	;;		  "/"
	;;		  (db:test-get-item-path testdat))))
	 running-tests) ;; calling functions want the entire data
       '())

   ;; collection of: for each waiton -
   ;;   if this ref-test-name is an item in an itemized test and mode is itemwait/itemmatch:
   ;;     if waiton is not itemized - if waiton is not both completed and in ok status, add as unmet prerequisite
   ;;     if waiton is itemized:
   ;;           and waiton's items are not expanded, add as unmet prerequisite
   ;;           else if matching waiton item is not both completed and in an ok status, add as unmet prerequisite
   ;;   else
   ;;    if waiton toplevel is not in both completed and ok status, add as unmet prerequisite

   (if (or (not waitons)
	   (null? waitons))
       '()
       (let* ((ref-test-itemized-mode (not (null? (lset-intersection eq? mode '(itemmatch itemwait))))) ;; how is this different from using member?
              (ref-test-toplevel-mode (not (null? (lset-intersection eq? mode '(toplevel)))))
              (ref-test-is-toplevel   (equal? ref-item-path ""))
              (ref-test-is-item       (not ref-test-is-toplevel))
              (unmet-pre-reqs '())
	      (result         '())
              (unmet-prereq-items '())
              )
	 (for-each  ; waitons
	  (lambda (waitontest-name)
	    ;; by getting the tests with matching name we are looking only at the matching test 
	    ;; and related sub items
	    ;; next should be using mt:get-tests-for-run?

            (let (;(waiton-is-itemized ...)
                  ;(waiton-items-are-expanded ...)
                  (waiton-tests             (db:get-tests-for-run-state-status dbstruct run-id waitontest-name))
		  (ever-seen         #f)
		  (parent-waiton-met #f)
		  (item-waiton-met   #f)

                  )
	      (for-each ; test expanded from waiton
	       (lambda (waiton-test) 
		 (let* ((waiton-state             (db:test-get-state waiton-test))
			(waiton-status            (db:test-get-status waiton-test))
			(waiton-item-path         (db:test-get-item-path waiton-test)) ;; BB- this is the upstream itempath
			(waiton-test-name         (db:test-get-testname waiton-test))
                        (waiton-is-toplevel       (equal? waiton-item-path ""))
                        (waiton-is-item           (not waiton-is-toplevel))
			(waiton-is-completed      (member waiton-state  *common:ended-states*))
			(waiton-is-running        (member waiton-state  *common:running-states*))
			(waiton-is-killed         (member waiton-state  *common:badly-ended-states*))
			(waiton-is-ok             (member waiton-status *common:well-ended-states*)) 
			;;                                              testname-b    path-a    path-b
			(same-itempath            (db:compare-itempaths ref-test-name waiton-item-path ref-item-path itemmaps)) ;; (equal? ref-item-path waiton-item-path)))
			(real-ref-test-name       (car (string-split ref-test-name "/"))) ;; I THINK ref-test-name SHOULD NEVER HAVE THE ITEM_PATH!
			(test-and-ref-are-same    (equal? real-ref-test-name waiton-test-name)))
		   (debug:print 4 *default-log-port* "waiton-test-name " waiton-test-name " ref-test-name: " ref-test-name " test-and-ref-are-same: " test-and-ref-are-same)
		   (set! ever-seen #t)
                   ;;(BB> "***consider waiton "waiton-test"/"waiton-item-path"***")
                   (cond
                    ;; case 0 - toplevel of an itemized test, at least one item in prereq has completed
                    ((and waiton-is-item ref-test-is-toplevel ref-test-itemized-mode waiton-is-completed)
                     (set! parent-waiton-met #t))

                    ;; case 1, non-item (parent test) is 
		    ((and waiton-is-toplevel ;; this is the parent test of the waiton being examined
			  waiton-is-completed
                          ;;(BB> "cond1")
			  (or waiton-is-ok ref-test-toplevel-mode)) ;;  itemmatch itemwait))))))
		     (set! parent-waiton-met #t))
		    ;; Special case for toplevel and KILLED
		    ((and waiton-is-toplevel ;; this is the parent test
			  waiton-is-killed
			  (member 'toplevel mode))
                     ;;(BB> "cond2")
		     (set! parent-waiton-met #t))
		    ;; For itemwait mode IFF the previous matching item is good the set parent-waiton-met
                    ((and ref-test-itemized-mode ref-test-is-item same-itempath)
                     ;;(BB> "cond3")
		     (if (and waiton-is-completed (or waiton-is-ok ref-test-toplevel-mode)) 
                         (set! item-waiton-met #t)
                         (set! unmet-prereq-items (cons waiton-test unmet-prereq-items)))
                     (if (and waiton-is-toplevel ;; if upstream rollup test is completed, parent-waiton-met is set
			      (or waiton-is-completed waiton-is-running))
			 (set! parent-waiton-met #t)))
		    ;; normal checking of parent items, any parent or parent item not ok blocks running
		    ((and waiton-is-completed
			  (or waiton-is-ok 
			      (member 'toplevel mode))              ;; toplevel does not block on FAIL
			  (and waiton-is-ok (member 'itemmatch mode) ;; itemmatch blocks on not ok ;; TODO: THIS IS PROBABLY A BUG.  ITEMMATCH AND ITEMWAIT ARE SYNONYMS!! WHAT HAPPENED OT ITEMWAIT???
                               ))
                     ;;(BB> "cond4")
		     (set! item-waiton-met #t))
                    ((and waiton-is-completed waiton-is-ok same-itempath)
                     ;;(BB> "cond5")
                     (set! item-waiton-met #t))
		    ((and waiton-is-completed waiton-is-ok test-and-ref-are-same) ;; probably from [waitons] table
		     (set! item-waiton-met #t))
                    (else
                     #t
                     ;;(BB> "condelse")
                     ))))
               waiton-tests)
	      ;; both requirements, parent and item-waiton must be met to NOT add item to
	      ;; prereq's not met list
               ;; (BB>
               ;;  "\n* waiton-tests           "waiton-tests
               ;;  "\n* parent-waiton-met      "parent-waiton-met
               ;;  "\n* item-waiton-met        "item-waiton-met
               ;;  "\n* ever-seen              "ever-seen
               ;;  "\n* ref-test-itemized-mode "ref-test-itemized-mode
               ;;  "\n* unmet-prereq-items     "unmet-prereq-items
               ;;  "\n* result (pre)           "result
               ;;  "\n* ever-seen              "ever-seen
               ;;  "\n")

              (cond
               ((and ref-test-itemized-mode ref-test-is-item (not (null? unmet-prereq-items)))
                (set! result (append unmet-prereq-items result)))
               ((not (or parent-waiton-met item-waiton-met))
                (set! result (append (if (null? waiton-tests) (list waitontest-name) waiton-tests) result))) ;; appends the string if the full record is not available
	      ;; if the test is not found then clearly the waiton is not met...
	      ;; (if (not ever-seen)(set! result (cons waitontest-name result)))))
               ((not ever-seen)
                (set! result (append (if (null? waiton-tests)(list waitontest-name) waiton-tests) result))))))
	  waitons)
	 (delete-duplicates result)))))

;;======================================================================
;; To sync individual run
;;======================================================================
#;(define (db:get-run-record-ids dbstruct target run keynames test-patt)
  (let ((backcons (lambda (lst item)(cons item lst))))
    (db:with-db
     dbstruct #f #f 
     (lambda (db)
       (let* ((keystr (string-intersperse 
		       (map (lambda (key val)
			      (conc key " like '" val "'"))
			    keynames 
			    (string-split target "/"))
		       " AND "))
	      (run-qry (conc "SELECT id FROM runs  WHERE " keystr  " and runname='" run"'"))
	      (test-qry (conc "SELECT id FROM tests WHERE run_id in (" run-qry ") and testname like '" test-patt "'")))
         (print run-qry)
         (print test-qry) 
	 `((runs       . ,(fold-row backcons '() db run-qry))
	   (tests      . ,(fold-row backcons '() db test-qry))
	   (test_steps . ,(fold-row backcons '() db (conc "SELECT id FROM test_steps WHERE test_id in (" test-qry ")")))
	   (test_data  . ,(fold-row backcons '() db (conc "SELECT id FROM test_data  WHERE test_id in (" test-qry ")" )))
	   ))))))

;;======================================================================
;; Just for sync, procedures to make sync easy
;;======================================================================

;; get an alist of record ids changed since time since-time
;;   '((runs . (1 2 3 ...))(steps . (5 6 7 ...) ...))
;;
#;(define (db:get-changed-record-ids dbstruct since-time)
  ;; no transaction, allow the db to be accessed between the big queries
  (let ((backcons (lambda (lst item)(cons item lst))))
    (db:with-db
     dbstruct #f #f 
     (lambda (db)
       `((runs       . ,(fold-row backcons '() db "SELECT id FROM runs  WHERE last_update>=?" since-time))
	 (tests      . ,(fold-row backcons '() db "SELECT id FROM tests WHERE last_update>=?" since-time))
	 (test_steps . ,(fold-row backcons '() db "SELECT id FROM test_steps WHERE last_update>=?" since-time))
	 (test_data  . ,(fold-row backcons '() db "SELECT id FROM test_data  WHERE last_update>=?" since-time))
	 ;; (test_meta  . ,(fold-row backcons '() db "SELECT id FROM test_meta  WHERE last_update>?" since-time))
	 (run_stats  . ,(fold-row backcons '() db "SELECT id FROM run_stats  WHERE last_update>=?" since-time))
	 )))))

;;======================================================================
;; Extract ods file from the db
;;======================================================================

;; NOT REWRITTEN YET!!!!!

;; runspatt is a comma delimited list of run patterns
;; keypatt-alist must contain *all* keys with an associated pattern: '( ("KEY1" "%") .. )
(define (db:extract-ods-file dbstruct outputfile keypatt-alist runspatt pathmod)
  (let* ((keysstr  (string-intersperse (map car keypatt-alist) ","))
	 (keyqry   (string-intersperse (map (lambda (p)(conc (car p) " LIKE ? ")) keypatt-alist) " AND "))
	 (numkeys  (length keypatt-alist))
	 (test-ids '())
	 (dbdat    (db:get-db dbstruct))
	 (db       (db:dbdat-get-db dbdat))
	 (windows  (and pathmod (substring-index "\\" pathmod)))
	 (tempdir  (conc "/tmp/" (current-user-name) "/" runspatt "_" (random 10000) "_" (current-process-id)))
	 (runsheader (append (list "Run Id" "Runname") ; 0 1
			     (map car keypatt-alist)   ; + N = length keypatt-alist
			     (list "Testname"          ; 2
				   "Item Path"         ; 3 
				   "Description"       ; 4 
				   "State"             ; 5 
				   "Status"            ; 6  
				   "Final Log"         ; 7 
				   "Run Duration"      ; 8 
				   "When Run"          ; 9 
				   "Tags"              ; 10
				   "Run Owner"         ; 11
				   "Comment"           ; 12
				   "Author"            ; 13
				   "Test Owner"        ; 14
				   "Reviewed"          ; 15
				   "Diskfree"          ; 16
				   "Uname"             ; 17
				   "Rundir"            ; 18
				   "Host"              ; 19
				   "Cpu Load"          ; 20
				   )))
	 (results (list runsheader))			 
	 (testdata-header (list "Run Id" "Testname" "Item Path" "Category" "Variable" "Value" "Expected" "Tol" "Units" "Status" "Comment"))
	 (mainqry (conc "SELECT
              t.testname,r.id,runname," keysstr ",t.testname,
              t.item_path,tm.description,t.state,t.status,
              final_logf,run_duration, 
              strftime('%m/%d/%Y %H:%M:%S',datetime(t.event_time,'unixepoch'),'localtime'),
              tm.tags,r.owner,t.comment,
              author,
              tm.owner,reviewed,
              diskfree,uname,rundir,
              host,cpuload
            FROM tests AS t JOIN runs AS r ON t.run_id=r.id JOIN test_meta AS tm ON tm.testname=t.testname
            WHERE runname LIKE ? AND " keyqry ";")))
    (debug:print 2 *default-log-port* "Using " tempdir " for constructing the ods file. keyqry: " keyqry " keystr: " keysstr " with keys: " (map cadr keypatt-alist)
		 "\n      mainqry: " mainqry)
    ;; "Expected Value"
    ;; "Value Found"
    ;; "Tolerance"
    (apply sqlite3:for-each-row
	   (lambda (test-id . b)
	     (set! test-ids (cons test-id test-ids))   ;; test-id is now testname
	     (set! results (append results ;; note, drop the test-id
				   (list
				    (if pathmod
					(let* ((vb        (apply vector b))
					       (keyvals   (let loop ((i    0)
								     (res '()))
							    (if (>= i numkeys)
								res
								(loop (+ i 1)
								      (append res (list (vector-ref vb (+ i 2))))))))
					       (runname   (vector-ref vb 1))
					       (testname  (vector-ref vb (+  2 numkeys)))
					       (item-path (vector-ref vb (+  3 numkeys)))
					       (final-log (vector-ref vb (+  7 numkeys)))
					       (run-dir   (vector-ref vb (+ 18 numkeys)))
					       (log-fpath (conc run-dir "/"  final-log))) ;; (string-intersperse keyvals "/") "/" testname "/" item-path "/"
					  (debug:print 4 *default-log-port* "log: " log-fpath " exists: " (common:file-exists? log-fpath))
					  (vector-set! vb (+ 7 numkeys) (if (common:file-exists? log-fpath)
									    (let ((newpath (conc pathmod "/"
												 (string-intersperse keyvals "/")
												 "/" runname "/" testname "/"
												 (if (string=? item-path "") "" (conc "/" item-path))
												 final-log)))
									      ;; for now throw away newpath and use the log-fpath conc'd with pathmod
									      (set! newpath (conc pathmod log-fpath))
									      (if windows (string-translate newpath "/" "\\") newpath))
									    (if (debug:debug-mode 1)
										(conc final-log " not-found")
										"")))
					  (vector->list vb))
					b)))))
	   db
	   mainqry
	   runspatt (map cadr keypatt-alist))
    (debug:print 2 *default-log-port* "Found " (length test-ids) " records")
    (set! results (list (cons "Runs" results)))
    ;; now, for each test, collect the test_data info and add a new sheet
    (for-each
     (lambda (test-id)
       (let ((test-data (list testdata-header))
	     (curr-test-name #f))
	 (sqlite3:for-each-row
	  (lambda (run-id testname item-path category variable value expected tol units status comment)
	    (set! curr-test-name testname)
	    (set! test-data (append test-data (list (list run-id testname item-path category variable value expected tol units status comment)))))
	  db 
	  ;; "SELECT run_id,testname,item_path,category,variable,td.value AS value,expected,tol,units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE test_id=?;"
	  "SELECT run_id,testname,item_path,category,variable,td.value AS value,td.expected,td.tol,td.units,td.status AS status,td.comment AS comment FROM test_data AS td INNER JOIN tests ON tests.id=td.test_id WHERE testname=?;"
	  test-id)
	 (if curr-test-name
	     (set! results (append results (list (cons curr-test-name test-data)))))
	 ))
     (sort (delete-duplicates test-ids) string<=))
    (system (conc "mkdir -p " tempdir))
    ;; (pp results)
    (ods:list->ods 
     tempdir
     (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path?
	 outputfile
	 (begin
	   (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory")
	   (conc (current-directory) "/" outputfile)))
     results)
    ;; brutal clean up
    (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
    (system "rm -rf tempdir")))

;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%")
;; tiresome setup for rmtmod (and other mods) goes here
;; (set-fn 'db:dbfile-path common:get-db-tmp-area)

(define (keys:make-key/field-string confdat)
  (let ((fields (configf:get-section confdat "fields")))
    (string-join
     (map (lambda (field)(conc (car field) " " (cadr field)))
	  fields)
     ",")))

;;======================================================================
;;  T E S T   L A U N C H I N G   P E R   I T E M   W I T H   H O S T   T Y P E S
;;======================================================================
;; 
;; [hosts]
;; arm cubie01 cubie02
;; x86_64 zeus xena myth01
;; allhosts #{g hosts arm} #{g hosts x86_64}
;; 
;; [host-types]
;; general #MTLOWESTLOAD #{g hosts allhosts}
;; arm     #MTLOWESTLOAD #{g hosts arm}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;;
;; [host-rules]
;; # maxnload   => max normalized load
;; # maxnjobs   => max jobs per cpu
;; # maxjobrate => max jobs per second
;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 
;; 
;; [launchers]
;; envsetup general
;; xor/%/n 4C16G
;; % nbgeneral
;; 
;; [jobtools]
;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
;; flexi-launcher yes  
;; launcher nbfake
;;
(define (common:get-launcher configdat testname itempath)
  (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
    (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
	     (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
	(let* ((launchers         (hash-table-ref/default configdat "launchers" '())))
	  (if (null? launchers)
	      fallback-launcher
	      (let loop ((hed (car launchers))
			 (tal (cdr launchers)))
		(let ((patt      (car hed))
		      (host-type (cadr hed)))
		  (if (tests:match patt testname itempath)
		      (begin
			(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
			(let ((launcher (configf:lookup configdat "host-types" host-type)))
			  (if launcher
			      (let* ((launcher-parts (string-split launcher))
				     (launcher-exe   (car launcher-parts)))
				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
				    (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
						    (count     100))
				      (if targ-host
					  (conc "remrun " targ-host)
					  (if (> count 0)
					      (begin
						(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
						(thread-sleep! (- 101 count))
						(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
							   (- count 1)))
					      (begin
						(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
						(exit)))))
				    launcher))
			      (begin
				(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
				(if (null? tal)
				    fallback-launcher
				    (loop (car tal)(cdr tal)))))))
		      ;; no match, try again
		      (if (null? tal)
			  fallback-launcher
			  (loop (car tal)(cdr tal))))))))
	fallback-launcher)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;
(define (server:ping host-port-in #!key (do-exit #f))
  (let ((host:port (if (not host-port-in) ;; use read-dotserver to find
		       #f ;; (server:check-if-running *toppath*)
		;; (if (number? host-port-in) ;; we were handed a server-id
		;; 	   (let ((srec (tasks:get-server-by-id (db:delay-if-busy (tasks:open-db)) host-port-in)))
		;; 	     ;; (print "srec: " srec " host-port-in: " host-port-in)
		;; 	     (if srec
		;; 		 (conc (vector-ref srec 3) ":" (vector-ref srec 4))
		;; 		 (conc "no such server-id " host-port-in)))
		       host-port-in))) ;; )
    (let* ((host-port (if host:port
			  (let ((slst (string-split   host:port ":")))
			    (if (eq? (length slst) 2)
				(list (car slst)(string->number (cadr slst)))
				#f))
			  #f)))
;;	   (toppath       (launch:setup)))
      ;; (print "host-port=" host-port)
      (if (not host-port)
	  (begin
	    (if host-port-in
		(debug:print 0 *default-log-port*  "ERROR: bad host:port"))
	    (if do-exit (exit 1))
	    #f)
	  (let* ((iface      (car host-port))
		 (port       (cadr host-port))
		 (server-dat (http-transport:client-connect iface port))
		 (login-res  (rmt:login-no-auto-client-setup server-dat)))
	    (if (and (list? login-res)
		     (car login-res))
		(begin
		  ;; (print "LOGIN_OK")
		  (if do-exit (exit 0))
		  #t)
		(begin
		  ;; (print "LOGIN_FAILED")
		  (if do-exit (exit 1))
		  #f)))))))

;; run ping in separate process, safest way in some cases
;;
(define (server:ping-server ifaceport)
  (with-input-from-pipe 
   (conc (common:get-megatest-exe) " -ping " ifaceport)
   (lambda ()
     (let loop ((inl (read-line))
		(res "NOREPLY"))
       (if (eof-object? inl)
	   (case (string->symbol res)
	     ((NOREPLY)  #f)
	     ((LOGIN_OK) #t)
	     (else       #f))
	   (loop (read-line) inl))))))

;; NOT USED (well, ok, was referenced in rpc-transport but otherwise
;; not used).
;;
(define (server:login toppath)
  (lambda (toppath)
    (set! *db-last-access* (current-seconds)) ;; might not be needed.
    (if (equal? *toppath* toppath)
	#t
	#f)))


;; NB// some of these probably needed
;; (include "key_records.scm")
;; (include "db_records.scm")
;; (include "run_records.scm")
;; (include "test_records.scm")
(include "js-path.scm")


;; Call this one to do all the work and get a standardized list of tests
;;   gets paths from configs and finds valid tests 
;;   returns hash of testname --> fullpath
;;
(define (tests:get-all)
  (let* ((test-search-path   (tests:get-tests-search-path *configdat*)))
    (tests:get-valid-tests (make-hash-table) test-search-path)))

(define (tests:get-tests-search-path cfgdat)
  (let ((paths (let ((section (if cfgdat
				  (configf:get-section cfgdat "tests-paths")
				  #f)))
		 (if section
		     (map cadr section)
		     '()))))
    (filter (lambda (d)
	      (if (directory-exists? d)
		  d
		  (begin
		    (if (common:low-noise-print 60 "tests:get-tests-search-path" d)
			(debug:print 0 *default-log-port* "WARNING: problem with directory " d ", dropping it from tests path"))
		    #f)))
	    (append paths (list (conc *toppath* "/tests"))))))

(define (tests:get-valid-tests test-registry tests-paths)
  (if (null? tests-paths) 
      test-registry
      (let loop ((hed (car tests-paths))
		 (tal (cdr tests-paths)))
	(if (common:file-exists? hed)
	    (for-each (lambda (test-path)
			(let* ((tname   (last (string-split test-path "/")))
			       (tconfig (conc test-path "/testconfig")))
			  (if (and (not (hash-table-ref/default test-registry tname #f))
				   (common:file-exists? tconfig))
			      (hash-table-set! test-registry tname test-path))))
		      (glob (conc hed "/*"))))
	(if (null? tal)
	    test-registry
	    (loop (car tal)(cdr tal))))))

(define (tests:filter-test-names-not-matched test-names test-patts)
  (delete-duplicates
   (filter (lambda (testname)
	     (not (tests:match test-patts testname #f)))
	   test-names)))


(define (tests:filter-test-names test-names test-patts)
  (delete-duplicates
   (filter (lambda (testname)
	     (tests:match test-patts testname #f))
	   test-names)))

;; itemmap is a list of testname patterns to maps
;;     test1 .*/bar/(\d+) foo/\1
;;     %     foo/([^/]+)  \1/bar
;;
;; # NOTE: the line with the single % could be the result of
;; #       itemmap entry in requirements (legacy). The itemmap
;; #       requirements entry is deprecated
;;
(define (tests:get-itemmaps tconfig)
  (let ((base-itemmap  (configf:lookup tconfig "requirements" "itemmap"))
	(itemmap-table (configf:get-section tconfig "itemmap")))
    (append (if base-itemmap
		(list (list "%" base-itemmap))
		'())
	    (if itemmap-table
		itemmap-table
		'()))))

;; given a list of itemmaps (testname . map), return the first match
;;
(define (tests:lookup-itemmap itemmaps testname)
  (let ((best-matches (filter (lambda (itemmap)
				(tests:match (car itemmap) testname #f))
			      itemmaps)))
    (if (null? best-matches)
	#f
	(let ((res (car best-matches)))
	  ;; (debug:print 0 *default-log-port* "res=" res)
	  (cond
	   ((string? res) res) ;;; FIX THE ROOT CAUSE HERE ....
	   ((null? res)   #f)
	   ((string? (cdr res)) (cdr res))  ;; it is a pair
	   ((string? (cadr res))(cadr res)) ;; it is a list
	   (else cadr res))))))

;; return items given config
;;
(define (tests:get-items tconfig)
  (let ((items      (hash-table-ref/default tconfig "items" #f)) ;; items 4
	(itemstable (hash-table-ref/default tconfig "itemstable" #f))) 
    ;; if either items or items table is a proc return it so test running
    ;; process can know to call items:get-items-from-config
    ;; if either is a list and none is a proc go ahead and call get-items
    ;; otherwise return #f - this is not an iterated test
    (cond
     ((procedure? items)      
      (debug:print-info 4 *default-log-port* "items is a procedure, will calc later")
      items)            ;; calc later
     ((procedure? itemstable)
      (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later")
      itemstable)       ;; calc later
     ((filter (lambda (x)
		(let ((val (car x)))
		  (if (procedure? val) val #f)))
	      (append (if (list? items) items '())
		      (if (list? itemstable) itemstable '())))
      'have-procedure)
     ((or (list? items)(list? itemstable)) ;; calc now
      (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n"
			"    items: " items " itemstable: " itemstable)
      (items:get-items-from-config tconfig))
     (else #f))))                           ;; not iterated


;; returns waitons waitors tconfigdat
;;
(define (tests:get-waitons test-name all-tests-registry)
   (let* ((config  (tests:get-testconfig test-name #f all-tests-registry 'return-procs))) ;; assuming no problems with immediate evaluation, this could be simplified ('return-procs -> #t)
     (let ((instr (if config 
		      (config-lookup config "requirements" "waiton")
		      (begin ;; No config means this is a non-existant test
			(debug:print-error 0 *default-log-port* "non-existent required test \"" test-name "\"")
			(exit 1))))
	   (instr2 (if config
		       (config-lookup config "requirements" "waitor")
		       "")))
       (debug:print-info 8 *default-log-port* "waitons string is " instr ", waitors string is " instr2)
       (let ((newwaitons
	      (string-split (cond
			     ((procedure? instr) ;; here 
			      (let ((res (instr)))
				(debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " test-name)
				res))
			     ((string? instr)     instr)
			     (else 
			      ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
			      ""))))
	     (newwaitors
	      (string-split (cond
			     ((procedure? instr2)
			      (let ((res (instr2)))
				(debug:print-info 8 *default-log-port* "waitor procedure results in string " res " for test " test-name)
				res))
			     ((string? instr2)     instr2)
			     (else 
			      ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " test-name)
			      "")))))
	 (values
	  ;; the waitons
	  (filter (lambda (x)
		    (if (hash-table-ref/default all-tests-registry x #f)
			#t
			(begin
			  (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
			  #f)))
		  newwaitons)
	  (filter (lambda (x)
		    (if (hash-table-ref/default all-tests-registry x #f)
			#t
			(begin
			  (debug:print-error 0 *default-log-port* "test " test-name " has unrecognised waiton testname " x)
			  #f)))
		  newwaitors)
	  config)))))
					     
;; given waiting-test that is waiting on waiton-test extend test-patt appropriately
;;
;;  genlib/testconfig               sim/testconfig
;;  genlib/sch                      sim/sch/cell1
;;
;;  [requirements]                  [requirements]
;;                                  mode itemwait
;;                                  # trim off the cell to determine what to run for genlib
;;                                  itemmap /.*
;;
;;                                  waiting-test is waiting on waiton-test so we need to create a pattern for waiton-test given waiting-test and itemmap
;; BB> (tests:extend-test-patts "normal-second/2" "normal-second" "normal-first" '())
;; observed -> "normal-first/2,normal-first/,normal-second/2,normal-second/"
;; expected -> "normal-first,normal-second/2,normal-second/"
;; testpatt = normal-second/2
;; waiting-test = normal-second
;; waiton-test = normal-first
;; itemmaps = ()

(define (tests:extend-test-patts test-patt waiting-test waiton-test itemmaps itemized-waiton)
  (cond
   (itemized-waiton
    (let* ((itemmap          (tests:lookup-itemmap itemmaps waiton-test))
           (patts            (string-split test-patt ","))
           (waiting-test-len (+ (string-length waiting-test) 1))
           (patts-waiton     (map (lambda (x)  ;; for each incoming patt that matches the waiting test
                                    (let* ((modpatt (if itemmap (db:convert-test-itempath x itemmap) x)) 
                                           (newpatt (conc waiton-test "/" (substring modpatt waiting-test-len (string-length modpatt)))))
                                      ;; (conc waiting-test "/," waiting-test "/" (substring modpatt waiton-test-len (string-length modpatt)))))
                                      ;; (print "in map, x=" x ", newpatt=" newpatt)
                                      newpatt))
                                  (filter (lambda (x)
                                            (eq? (substring-index (conc waiting-test "/") x) 0)) ;; is this patt pertinent to the waiting test
                                          patts)))
           (extended-test-patt   (append patts (if (null? patts-waiton)
                                                   (list (conc waiton-test "/%")) ;; really shouldn't add the waiton forcefully like this
                                                   patts-waiton)))
           (extended-test-patt-with-toplevels
            (fold (lambda (testpatt-item accum )
                    (let ((my-match (string-match "^([^%\\/]+)\\/.+$" testpatt-item)))
                      (cons testpatt-item
                            (if my-match
                                (cons
                                 (conc (cadr my-match) "/")
                                 accum)
                                accum))))
                  '()
                  extended-test-patt)))
      (string-intersperse (delete-duplicates extended-test-patt-with-toplevels) ",")))
   (else ;; not waiting on items, waiting on entire waiton test.
    (let* ((patts (string-split test-patt ","))
           (new-patts (if (member waiton-test patts)
                          patts
                          (cons waiton-test patts))))
      (string-intersperse (delete-duplicates new-patts) ",")))))


  
;; tests:glob-like-match 
(define (tests:glob-like-match patt str) 
  (let ((like (substring-index "%" patt)))
    (let* ((notpatt  (equal? (substring-index "~" patt) 0))
	   (newpatt  (if notpatt (substring patt 1) patt))
	   (finpatt  (if like
			(string-substitute (regexp "%") ".*" newpatt #f)
			(string-substitute (regexp "\\*") ".*" newpatt #f)))
	   (res      #f))
      ;; (print "tests:glob-like-match => notpatt: " notpatt ", newpatt: " newpatt ", finpatt: " finpatt)
      (set! res (string-match (regexp finpatt (if like #t #f)) str))
      (if notpatt (not res) res))))

;; if itempath is #f then look only at the testname part
;;
(define (tests:match patterns testname itempath #!key (required '()))
  (if (string? patterns)
      (let ((patts (append (string-split patterns ",") required)))
	(if (null? patts) ;;; no pattern(s) means no match
	    #f
	    (let loop ((patt (car patts))
		       (tal  (cdr patts)))
	      ;; (print "loop: patt: " patt ", tal " tal)
	      (if (string=? patt "")
		  #f ;; nothing ever matches empty string - policy
		  (let* ((patt-parts (string-match (regexp "^([^\\/]*)(\\/(.*)|)$") patt))
			 (test-patt  (cadr patt-parts))
			 (item-patt  (cadddr patt-parts)))
		    ;; special case: test vs. test/
		    ;;   test  => "test" "%"
		    ;;   test/ => "test" ""
		    (if (and (not (substring-index "/" patt)) ;; no slash in the original
			     (or (not item-patt)
				 (equal? item-patt "")))      ;; should always be true that item-patt is ""
			(set! item-patt "%"))
		    ;; (print "tests:match => patt-parts: " patt-parts ", test-patt: " test-patt ", item-patt: " item-patt)
		    (if (and (tests:glob-like-match test-patt testname)
			     (or (not itempath)
				 (tests:glob-like-match (if item-patt item-patt "") itempath)))
			#t
			(if (null? tal)
			    #f
			    (loop (car tal)(cdr tal)))))))))))

;; Check for waiver eligibility
;;
(define (tests:check-waiver-eligibility testdat prev-testdat)
  (let* ((test-registry (make-hash-table))
	 (testconfig  (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f))
	 (test-rundir ;; (sdb:qry 'passstr 
	  (db:test-get-rundir testdat)) ;; )
	 (prev-rundir ;; (sdb:qry 'passstr 
	  (db:test-get-rundir prev-testdat)) ;; )
	 (waivers     (if testconfig (configf:section-vars testconfig "waivers") '()))
	 (waiver-rx   (regexp "^(\\S+)\\s+(.*)$"))
	 (diff-rule   "diff %file1% %file2%")
	 (logpro-rule "diff %file1% %file2% | logpro %waivername%.logpro %waivername%.html"))
    (if (not (common:file-exists? test-rundir))
	(begin
	  (debug:print-error 0 *default-log-port* "test run directory is gone, cannot propagate waiver")
	  #f)
	(begin
	  (push-directory test-rundir)
	  (let ((result (if (null? waivers)
			    #f
			    (let loop ((hed (car waivers))
				       (tal (cdr waivers)))
			      (debug:print 0 *default-log-port* "INFO: Applying waiver rule \"" hed "\"")
			      (let* ((waiver      (configf:lookup testconfig "waivers" hed))
				     (wparts      (if waiver (string-match waiver-rx waiver) #f))
				     (waiver-rule (if wparts (cadr wparts)  #f))
				     (waiver-glob (if wparts (caddr wparts) #f))
				     (logpro-file (if waiver
						      (let ((fname (conc hed ".logpro")))
							(if (common:file-exists? fname)
							    fname 
							    (begin
							      (debug:print 0 *default-log-port* "INFO: No logpro file " fname " falling back to diff")
							      #f)))
						      #f))
				     ;; if rule by name of waiver-rule is found in testconfig - use it
				     ;; else if waivername.logpro exists use logpro-rule
				     ;; else default to diff-rule
				     (rule-string (let ((rule (configf:lookup testconfig "waiver_rules" waiver-rule)))
						    (if rule
							rule
							(if logpro-file
							    logpro-rule
							    (begin
							      (debug:print 0 *default-log-port* "INFO: No logpro file " logpro-file " found, using diff rule")
							      diff-rule)))))
				     ;; (string-substitute "%file1%" "foofoo.txt" "This is %file1% and so is this %file1%." #t)
				     (processed-cmd (string-substitute 
						     "%file1%" (conc test-rundir "/" waiver-glob)
						     (string-substitute
						      "%file2%" (conc prev-rundir "/" waiver-glob)
						      (string-substitute
						       "%waivername%" hed rule-string #t) #t) #t))
				     (res            #f))
				(debug:print 0 *default-log-port* "INFO: waiver command is \"" processed-cmd "\"")
				(if (eq? (system processed-cmd) 0)
				    (if (null? tal)
					#t
					(loop (car tal)(cdr tal)))
				    #f))))))
	    (pop-directory)
	    result)))))

;; Do not rpc this one, do the underlying calls!!!
(define (tests:test-set-status! run-id test-id state status comment dat #!key (work-area #f))
  (let* ((real-status status)
	 (otherdat    (if dat dat (make-hash-table)))
	 (testdat     (rmt:get-test-info-by-id run-id test-id))
	 (test-name   (db:test-get-testname  testdat))
	 (item-path   (db:test-get-item-path testdat))
	 ;; before proceeding we must find out if the previous test (where all keys matched except runname)
	 ;; was WAIVED if this test is FAIL

	 ;; NOTES:
	 ;;  1. Is the call to test:get-previous-run-record remotified?
	 ;;  2. Add test for testconfig waiver propagation control here
	 ;;
	 (prev-test   (if (equal? status "FAIL")
			  (rmt:get-previous-test-run-record run-id test-name item-path)
			  #f))
	 (waived   (if prev-test
		       (if prev-test ;; true if we found a previous test in this run series
			   (let ((prev-status  (db:test-get-status  prev-test))
				 (prev-state   (db:test-get-state   prev-test))
				 (prev-comment (db:test-get-comment prev-test)))
			     (debug:print 4 *default-log-port* "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
			     (if (and (equal? prev-state  "COMPLETED")
				      (equal? prev-status "WAIVED"))
				 (if comment
				     comment
				     prev-comment) ;; waived is either the comment or #f
				 #f))
			   #f)
		       #f)))
    (if (and waived 
	     (tests:check-waiver-eligibility testdat prev-test))
	(set! real-status "WAIVED"))

    (debug:print 4 *default-log-port* "real-status " real-status ", waived " waived ", status " status)

    ;; update the primary record IF state AND status are defined
    (if (and state status)
	(begin
	  (rmt:set-state-status-and-roll-up-items run-id test-id item-path state real-status (if waived waived comment))
	  ;; (mt:process-triggers run-id test-id state real-status) ;; triggers are called in test-set-state-status
	  ))
    
    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
    ;; run area, it does remote calls under the hood.
    ;; (if (and test-id state status (equal? status "AUTO")) 
    ;; 	(rmt:test-data-rollup run-id test-id status))

    ;; add metadata (need to do this way to avoid SQL injection issues)

    ;; :first_err
    ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f)))
    ;;   (if val
    ;;       (sqlite3:execute db "UPDATE tests SET first_err=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))
    ;; 
    ;; ;; :first_warn
    ;; (let ((val (hash-table-ref/default otherdat ":first_warn" #f)))
    ;;   (if val
    ;;       (sqlite3:execute db "UPDATE tests SET first_warn=? WHERE run_id=? AND testname=? AND item_path=?;" val run-id test-name item-path)))

    (let ((category (hash-table-ref/default otherdat ":category" ""))
	  (variable (hash-table-ref/default otherdat ":variable" ""))
	  (value    (hash-table-ref/default otherdat ":value"    #f))
	  (expected (hash-table-ref/default otherdat ":expected" "n/a"))
	  (tol      (hash-table-ref/default otherdat ":tol"      "n/a"))
	  (units    (hash-table-ref/default otherdat ":units"    ""))
	  (type     (hash-table-ref/default otherdat ":type"     ""))
	  (dcomment (hash-table-ref/default otherdat ":comment"  "")))
      (debug:print 4 *default-log-port* 
		   "category: " category ", variable: " variable ", value: " value
		   ", expected: " expected ", tol: " tol ", units: " units)
      (if (and value) ;; require only value; BB was- all three required
	  (let ((dat (conc category ","
			   variable ","
			   value    ","
			   expected ","
			   tol      ","
			   units    ","
			   dcomment ",," ;; extra comma for status
			   type     )))
	    ;; This was run remote, don't think that makes sense. Perhaps not, but that is the easiest path for the moment.
	    (rmt:csv->test-data run-id test-id
				dat)
	    ;; This was added in check-in a5adfa3f9a. Message was: "...added delay in set-values to allow for delayed write on server start"
	    ;; I'm inserting an arbitrary rmt: call to force/ensure that the server is available to (hopefully) prevent a communication issue.
	    (rmt:get-var "MEGATEST_VERSION") ;; this does NOTHING but ensure the server is reachable. This is almost certainly NOT needed :)
            ;; BB - commentiong out arbitrary 10 second wait (thread-sleep! 10) ;; add 10 second delay before quit incase rmt needs time to start a server.
            )))
      
    ;; need to update the top test record if PASS or FAIL and this is a subtest
    ;;;;;; (if (not (equal? item-path ""))
    ;;;;;;     (rmt:set-state-status-and-roll-up-items run-id test-name item-path state status #f) ;;;;;)

    (if (or (and (string? comment)
		 (string-match (regexp "\\S+") comment))
	    waived)
	(let ((cmt  (if waived waived comment)))
	  (rmt:general-call 'set-test-comment run-id cmt test-id)))))

(define (tests:test-set-toplog! run-id test-name logf) 
  (rmt:general-call 'tests:test-set-toplog run-id logf run-id test-name))

(define (tests:summarize-items run-id test-id test-name force)
  ;; if not force then only update the record if one of these is true:
  ;;   1. logf is "log/final.log
  ;;   2. logf is same as outputfilename
  (let* ((outputfilename (conc "megatest-rollup-" test-name ".html"))
	 (orig-dir       (current-directory))
	 (logf-info      (rmt:test-get-logfile-info run-id test-name))
	 (logf           (if logf-info (cadr logf-info) #f))
	 (path           (if logf-info (car  logf-info) #f)))
    ;; This query finds the path and changes the directory to it for the test
    (if (and (string? path)
	     (directory? path)) ;; can get #f here under some wierd conditions. why, unknown ...
	(begin
	  (debug:print 4 *default-log-port* "Found path: " path)
	  (change-directory path))
	;; (set! outputfilename (conc path "/" outputfilename)))
	(debug:print-error 0 *default-log-port* "summarize-items for run-id=" run-id ", test-name=" test-name ", no such path: " path))
    (debug:print 4 *default-log-port* "summarize-items with logf " logf ", outputfilename " outputfilename " and force " force)
    (if (or (equal? logf "logs/final.log")
	    (equal? logf outputfilename)
	    force)
	(let ((my-start-time (current-seconds))
	      (lockf         (conc outputfilename ".lock")))
	  (let loop ((have-lock  (common:simple-file-lock lockf)))
	    (if have-lock
		(let ((script (configf:lookup *configdat* "testrollup" test-name)))
		  (print "Obtained lock for " outputfilename)
		  (rmt:set-state-status-and-roll-up-items run-id test-name "" #f #f #f)
		  (if script
		      (system (conc script " > " outputfilename " & "))
		      (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
		  (common:simple-file-release-lock lockf)
		  (change-directory orig-dir)
		  ;; NB// tests:test-set-toplog! is remote internal...
		  (tests:test-set-toplog! run-id test-name outputfilename))
		;; didn't get the lock, check to see if current update started later than this 
		;; update, if so we can exit without doing any work
		(if (> my-start-time (handle-exceptions
					 exn
					 0
				       (file-modification-time lockf)))
		    ;; we started since current re-gen in flight, delay a little and try again
		    (begin
		      (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it")
		      (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds
		      (loop (common:simple-file-lock lockf))))))))))

(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)
  (let ((counts              (make-hash-table))
	(statecounts         (make-hash-table))
	(outtxt              "")
	(tot                 0)
	(testdat             (rmt:test-get-records-for-index-file run-id test-name)))
    (with-output-to-file outputfilename
      (lambda ()
	(set! outtxt (conc outtxt "<html><title>Summary: " test-name 
			   "</title><body><h2>Summary for " test-name "</h2>"))
	(for-each
	 (lambda (testrecord)
	   (let ((id             (vector-ref testrecord 0))
		 (itempath       (vector-ref testrecord 1))
		 (state          (vector-ref testrecord 2))
		 (status         (vector-ref testrecord 3))
		 (run_duration   (vector-ref testrecord 4))
		 (logf           (vector-ref testrecord 5))
		 (comment        (vector-ref testrecord 6)))
	     (hash-table-set! counts status (+ 1 (hash-table-ref/default counts status 0)))
	     (hash-table-set! statecounts state (+ 1 (hash-table-ref/default statecounts state 0)))
	     (set! outtxt (conc outtxt "<tr>"
				;; "<td><a href=\"" itempath "/" logf "\"> " itempath "</a></td>" 
				"<td><a href=\"" itempath "/test-summary.html\"> " itempath "</a></td>" 
				"<td>" state    "</td>" 
				"<td><font color=" (common:get-color-from-status status)
				">"   status   "</font></td>"
				"<td>" (if (equal? comment "")
					   "&nbsp;"
					   comment) "</td>"
					   "</tr>"))))
	 (if (list? testdat)
	     testdat
	     (begin
	       (print "ERROR: failed to get records with rmt:test-get-records-for-index-file run-id=" run-id "test-name=" test-name)
	       '())))
	
	(print "<table><tr><td valign=\"top\">")
	;; Print out stats for status
	(set! tot 0)
	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>State stats</h2></td></tr>")
	(for-each (lambda (state)
		    (set! tot (+ tot (hash-table-ref statecounts state)))
		    (print "<tr><td>" state "</td><td>" (hash-table-ref statecounts state) "</td></tr>"))
		  (hash-table-keys statecounts))
	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
	(print "</td><td valign=\"top\">")
	;; Print out stats for state
	(set! tot 0)
	(print "<table cellspacing=\"0\" border=\"1\"><tr><td colspan=\"2\"><h2>Status stats</h2></td></tr>")
	(for-each (lambda (status)
		    (set! tot (+ tot (hash-table-ref counts status)))
		    (print "<tr><td><font color=\"" (common:get-color-from-status status) "\">" status
			   "</font></td><td>" (hash-table-ref counts status) "</td></tr>"))
		  (hash-table-keys counts))
	(print "<tr><td>Total</td><td>" tot "</td></tr></table>")
	(print "</td></td></tr></table>")
	
	(print "<table cellspacing=\"0\" border=\"1\">" 
	       "<tr><td>Item</td><td>State</td><td>Status</td><td>Comment</td>"
	       outtxt "</table></body></html>")
	;; (release-dot-lock outputfilename)
	;;(rmt:update-run-stats 
	;; run-id
	;; (hash-table-map
	;;  state-status-counts
	;;  (lambda (key val)
	;;	(append key (list val)))))
	))))

(define tests:css-jscript-block
#<<EOF
<style type="text/css">
ul.LinkedList { display: block; }
/* ul.LinkedList ul { display: none; } */
.HandCursorStyle { cursor: pointer; cursor: hand; }  /* For IE */
th {background-color: #8c8c8c;}
td.test {background-color: #d9dbdd;}
td.PASS {background-color: #347533;}
td.FAIL {background-color: #cc2812;}
td.SKIP{background-color: #FFD733;}
td.WARN {background-color: #EA8724;}
td.WAIVED {background-color: #838A12;}
td.ABORT{background-color: #EA24B7;}
.PASS .link, .SKIP .link, .WARN .link,.WAIVED .link,.ABORT .link, .FAIL .link{color: #FFFFFF;}


</style>


  <script type="text/JavaScript">

    function filtersome() {
  $("tr").show();
  $(".test").filter(
    function() {
      var names = $('#testname').val().split(',');
      var good=1;
      for (var i=0, len=names.length; i<len; i++) {
        var uname=names[i];
        console.log("Trying to check for " + uname); 
        if($(this).text().indexOf(uname) != -1) {
          good= 0;
          console.log("Found "+uname);
        }
      }
      return good; 
    }
  ).parent().hide();
//  $(".sum").show();
}
  
    // Add this to the onload event of the BODY element
    function addEvents() {
      activateTree(document.getElementById("LinkedList1"));
    }

    // This function traverses the list and add links 
    // to nested list items
    function activateTree(oList) {
      // Collapse the tree
      for (var i=0; i < oList.getElementsByTagName("ul").length; i++) {
        oList.getElementsByTagName("ul")[i].style.display="none";            
      }                                                                  
      // Add the click-event handler to the list items
      if (oList.addEventListener) {
        oList.addEventListener("click", toggleBranch, false);
      } else if (oList.attachEvent) { // For IE
        oList.attachEvent("onclick", toggleBranch);
      }
      // Make the nested items look like links
      addLinksToBranches(oList);
    }

    // This is the click-event handler
    function toggleBranch(event) {
      var oBranch, cSubBranches;
      if (event.target) {
        oBranch = event.target;
      } else if (event.srcElement) { // For IE
        oBranch = event.srcElement;
      }
      cSubBranches = oBranch.getElementsByTagName("ul");
      if (cSubBranches.length > 0) {
        if (cSubBranches[0].style.display == "block") {
          cSubBranches[0].style.display = "none";
        } else {
          cSubBranches[0].style.display = "block";
        }
      }
    }

    // This function makes nested list items look like links
    function addLinksToBranches(oList) {
      var cBranches = oList.getElementsByTagName("li");
      var i, n, cSubBranches;
      if (cBranches.length > 0) {
        for (i=0, n = cBranches.length; i < n; i++) {
          cSubBranches = cBranches[i].getElementsByTagName("ul");
          if (cSubBranches.length > 0) {
            addLinksToBranches(cSubBranches[0]);
            cBranches[i].className = "HandCursorStyle";
            cBranches[i].style.color = "blue";
            cSubBranches[0].style.color = "black";
            cSubBranches[0].style.cursor = "auto";
          }
        }
      }
    }
  </script>
EOF
)

(define tests:css-jscript-block-dynamic 
#<<EOF
           <script src= ./jquery3.1.0.js></script> 
EOF
)

(define  (test:js-block javascript-lib)
   (conc  "<script src=" javascript-lib "></script>" ))


(define tests:css-jscript-block-static (test:js-block *java-script-lib*))

(define (tests:css-jscript-block-cond dynamic) 
      (if (equal? dynamic  #t)
       tests:css-jscript-block-dynamic
       tests:css-jscript-block-static))

       
(define (tests:run-record->test-path run numkeys)
   (append (take (vector->list run) numkeys)
	   (list (vector-ref run (+ 1 numkeys)))))


(define (tests:get-rest-data runs header numkeys)
   (let ((resh (make-hash-table)))
   (for-each
     (lambda (run)
        (let* ((run-id (db:get-value-by-header run header "id"))
               (run-dir      (tests:run-record->test-path run numkeys))
	       (test-data    (rmt:get-tests-for-run
				   run-id
                                   "%"       ;; testnamepatt
				   '()        ;; states
				   '()        ;; statuses
				   #f         ;; offset
				   #f         ;; num-to-get
				   #f         ;; hide/not-hide
				   #f         ;; sort-by
				   #f         ;; sort-order
				   #f         ;; 'shortlist                           ;; qrytype
                                   0         ;; last update
				   #f)))
            
            (map (lambda (test)
                 (let* ((test-name (vector-ref test 2))
                        (test-html-path (conc (vector-ref test 10) "/" (vector-ref test 13)))
                        (test-item (conc test-name ":" (vector-ref test 11)))
                        (test-status (vector-ref test 4)))
                         
                (if (not (hash-table-ref/default resh test-name  #f))
                      (hash-table-set! resh test-name   (make-hash-table)))
                (if (not (hash-table-ref/default (hash-table-ref/default resh test-name  #f)  test-item  #f))
                       (hash-table-set! (hash-table-ref/default resh test-name  #f) test-item   (make-hash-table))) 
               (hash-table-set!  (hash-table-ref/default (hash-table-ref/default resh test-name  #f) test-item #f) run-id (list test-status test-html-path)))) 
        test-data)))
      runs)
   resh))


;; tests:genrate dashboard body 
;;

(define (tests:dashboard-body page pg-size keys numkeys  total-runs linktree area-name get-prev-links get-next-links flag run-patt target-patt)
  (let* ((start (* page pg-size)) 
	       ;(runsdat   (rmt:get-runs "%" pg-size start (map (lambda (x)(list x "%")) keys)))
         (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt start pg-size #f 0 sort-order: "desc"))
                    ; db:get-runs-by-patt   keys runnamepatt targpatt offset limit fields last-update   
	       (header    (vector-ref runsdat 0))
	       (runs      (vector-ref runsdat 1))
         (ctr 0)
         (test-runs-hash (tests:get-rest-data runs header numkeys))
         (test-list (hash-table-keys test-runs-hash))) 
  
  (s:html tests:css-jscript-block (tests:css-jscript-block-cond flag)
		   (s:title "Summary for " area-name)
		   (s:body 'onload "addEvents();"
                          (get-prev-links page linktree)
                          (get-next-links page linktree total-runs)
                           
			   (s:h1 "Summary for " area-name)
                           (s:h3 "Filter" )
                           (s:input 'type "text"  'name "testname" 'id "testname" 'length "30" 'onkeyup "filtersome()")
			   ;; top list
         
			   (s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
                            (map (lambda (key)
				 (let* ((res (s:tr 'class "something" 
				  (s:th key )
                                   (map (lambda (run)
                                   (s:th  (vector-ref run ctr)))
                                  runs))))
                             (set! ctr (+ ctr 1))
                               res))
                               keys)
                               (s:tr
				 (s:th "Run Name")
                                  (map (lambda (run)
                                   (s:th (db:get-value-by-header run header "runname")))
                                  runs))
                              
                               (map (lambda (test-name)
                                 (let* ((item-hash (hash-table-ref/default test-runs-hash test-name  #f))
                                         (item-keys (sort (hash-table-keys item-hash) string<=?))) 
                                          (map (lambda (item-name)  
  		                             (let* ((res (s:tr  'class item-name
				                         (s:td  item-name 'class "test" )
                                                           (map (lambda (run)
                                                               (let* ((run-test (hash-table-ref/default item-hash item-name  #f))
                                                                      (run-id (db:get-value-by-header run header "id"))
                                                                      (result (hash-table-ref/default run-test run-id "n/a"))
                                                                      ;(relative-path (get-relative-path)) 
                                                                      (status (if (string? result)
									                                                            	result
										                                                            (car result)))
                                                                        (link (if (string? result)
										                                                            result
                                                                                (if (equal? flag #t) 
                                                                                (s:a (car result) 'href (conc "./test_log?runid=" run-id "&testname="  item-name ))
  																																						  (s:a (car result) 'href (string-substitute  (conc linktree "/")  "" (cadr result)  "-"))))))
                                                                       (s:td  link 'class status)))
                                                                runs))))
                                                        res))
                                                   item-keys)))
                               test-list)))))) 

;; (tests:create-html-tree "test-index.html")
;;
(define (tests:create-html-tree outf)
   (let* ((lockfile  (conc outf ".lock"))
	 			 (runs-to-process '())
         (linktree  (common:get-linktree))
         (area-name (common:get-area-name *alldat*))
	  		 (keys      (rmt:get-keys))
	  		 (numkeys   (length keys))
         (run-patt (or (args:get-arg "-run-patt")
                        (args:get-arg "-runname")
                        "%"))
         (target (or  (args:get-arg "-target-patt") 
											(args:get-arg "-target")
                      "%"))
         (targlist (string-split target "/"))
         (numtarg  (length targlist))  
         (targtweaked (if (> numkeys numtarg)
			   								(append targlist (make-list (- numkeys numtarg) "%"))
			  								targlist))
         (target-patt (string-join targtweaked "/"))
         ;(total-runs  (rmt:get-num-runs "%")) ;;this needs to be changed to filter by target
          (total-runs (rmt:get-runs-cnt-by-patt run-patt target-patt keys )) 
         (pg-size 10))
    (if (common:simple-file-lock lockfile)
        (begin
         ;(print total-runs)    
        (let loop ((page 0))
	(let* ((oup       (open-output-file (or outf (conc linktree "/page" page ".html"))))
               (get-prev-links (lambda (page linktree )   
                            (let* ((link  (if (not (eq? page 0))
                                   (s:a "&lt;&lt;prev" 'href (conc  "page" (- page 1) ".html"))
                                   (s:a "" 'href (conc   "page"  page ".html")))))
                               link)))
               (get-next-links (lambda (page linktree total-runs)   
                            (let* ((link  (if (> total-runs (+ 10 (* page pg-size)))
                                   (s:a "next&gt;&gt;" 'href (conc  "page"  (+ page 1) ".html"))
                                   (s:a "" 'href (conc   "page" page  ".html")))))
                               link))) )
          (print "total runs: " total-runs) 
          (s:output-new
	   			 oup
	   					(tests:dashboard-body page pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #f run-patt target-patt)) ;; update this function
          (close-output-port oup)
         ; (set! page (+ 1 page))
          (if (> total-runs (* (+ 1 page) pg-size))
           (loop (+ 1  page)))))
	  (common:simple-file-release-lock lockfile))
	            
	#f)))


(define (tests:readlines filename)
  (call-with-input-file filename
    (lambda (p)
      (let loop ((line (read-line p))
                 (result '()))
        (if (eof-object? line)
            (reverse result)
            (loop (read-line p) (cons line result)))))))

(define (tests:get-test-log run-id test-name item-name)
  (let* ((test-data    (rmt:get-tests-for-run
				   (string->number run-id)
                                    test-name      ;; testnamepatt
				   '()        ;; states
				   '()        ;; statuses
				   #f         ;; offset
				   #f         ;; num-to-get
				   #f         ;; hide/not-hide
				   #f         ;; sort-by
				   #f         ;; sort-order
				   #f         ;; 'shortlist                           ;; qrytype
                                   0         ;; last update
				   #f))
         (path "")
         (found 0))
    (debug:print-info 0 *default-log-port* "found: " found )

   (let loop ((hed (car test-data))
		 (tal (cdr test-data)))
          (debug:print-info 0 *default-log-port* "item: " (vector-ref hed 11) (vector-ref hed 10) "/" (vector-ref hed 13))

	(if (equal? (vector-ref hed 11) item-name)
            (begin
              (set! found 1) 
	      (set! path (conc (vector-ref hed 10) "/" (vector-ref hed 13)))))
	    (if (and (not (null? tal)) (equal? found 0))
		(loop (car tal)(cdr tal))))
   (if (equal? path "")
     "<H2>Data not found</H2>"
     (string-join (tests:readlines path) "\n"))))


(define (tests:dynamic-dboard page)
;(define (tests:create-html-tree o)
 (let* (
;(page "1")
          (linktree  (common:get-linktree))
         (area-name (common:get-area-name *alldat*))
	       (keys      (rmt:get-keys))
	       (numkeys   (length keys))
         (targtweaked (make-list numkeys "%"))
         (target-patt (string-join targtweaked "/"))
         (total-runs  (rmt:get-num-runs "%"))
         (pg-size 10)
         (pg (if (equal? page #f)
                 0
                 (- (string->number page) 1)))
          (get-prev-links  (lambda (pg linktree)
                           (debug:print-info 0 *default-log-port* "val: " (- 1 pg))
                          (let* ((link  (if (not (eq? pg 0))
                               (s:a  "&lt;&lt;prev " 'href (conc  "dashboard?page="  pg  ))
                               (s:a "" 'href (conc  "dashboard?page=" pg)))))
                               link)))
          (get-next-links   (lambda (pg linktree total-runs)  
                            (debug:print-info 0 *default-log-port* "val: " pg)
                             (debug:print-info 0 *default-log-port* "val: " total-runs " size" pg-size)
 
                            (let* ((link  (if (> total-runs (+ 10 (* pg pg-size)))
                              (s:a  "next&gt;&gt; "  'href (conc  "dashboard?page="  (+ pg 2)  ))
                             (s:a "" 'href (conc  "dashboard?page=" pg  )))))
                             link)))
         (html-body (tests:dashboard-body pg pg-size keys numkeys total-runs linktree area-name get-prev-links get-next-links #t "%" target-patt))) ;; update tis function
        html-body))

(define (tests:create-html-summary outf)
 (let* ((lockfile  (conc outf ".lock"))
        (linktree  (common:get-linktree))
				(keys      (rmt:get-keys))
        (area-name (common:get-area-name *alldat*))
        (run-patt (or (args:get-arg "-run-patt")
                        (args:get-arg "-runname")
                        "%"))
        (target (or (args:get-arg "-target-patt")
                        (args:get-arg "-target")
                        "%"))
         (targlist (string-split target "/"))
         (numkeys  (length keys))
	       (numtarg  (length targlist))  
         (targtweaked (if (> numkeys numtarg)
			   								(append targlist (make-list (- numkeys numtarg) "%"))
			  								targlist))
        (target-patt (string-join targtweaked "/")))
    (if (common:simple-file-lock lockfile)
        (begin
          (let* (;(runsdat1   (rmt:get-runs run-patt #f #f (map (lambda (x)(list x "%")) keys)))
                 (runsdat   (rmt:get-runs-by-patt  keys run-patt target-patt #f #f #f 0))
					       (runs      (vector-ref runsdat 1))
                 (header      (vector-ref runsdat 0))
        	       (oup       (open-output-file (or outf (conc linktree "/targets.html"))))
                 (target-hash (test:create-target-hash runs header (length keys))))
           (test:create-target-html target-hash oup area-name linktree)
          (test:create-run-html  runs area-name linktree (length keys) header))
	  (common:simple-file-release-lock lockfile))
	#f)))

(define (test:get-test-hash test-data)
	(let ((resh (make-hash-table)))
    	(map (lambda (test)
        (let* ((test-name (vector-ref test 2))
               (test-html-path (if (file-exists? (conc (vector-ref test 10) "/test-summary.html"))
																 (conc (vector-ref test 10) "/test-summary.html" )
							 									 (conc (vector-ref test 10) "/" (vector-ref test 13))))
               (test-item  (vector-ref test 11))
               (test-status (vector-ref test 4)))
               (if (not (hash-table-ref/default resh test-item  #f))
                   (hash-table-set! resh test-item   (make-hash-table)))
               (hash-table-set! (hash-table-ref/default resh test-item  #f) test-name (list test-status test-html-path)))) 
        test-data)
resh))

(define (test:get-data->b-keys ordered-data a-keys)
  (delete-duplicates
   (sort (apply
	  append
	  (map (lambda (sub-key)
		 (let ((subdat (hash-table-ref ordered-data sub-key)))
		   (hash-table-keys subdat)))
	       a-keys))
	 string>=?)))


(define (test:create-run-html runs area-name linktree numkeys header)
  (map (lambda (run)
		 (let* ((target (string-join (take (vector->list run) numkeys) "/"))
						(run-name (db:get-value-by-header run header "runname"))
            (run-time (seconds->work-week/day-time (db:get-value-by-header run header "event_time")))
						(oup (if (file-exists? (conc linktree "/" target "/" run-name))
                        (open-output-file (conc linktree "/" target "/" run-name "/run.html"))
                         #f))
            (run-id (db:get-value-by-header run header "id"))
            (test-data    (rmt:get-tests-for-run
				  								 run-id
                           "%"       ;; testnamepatt
				  								 '()        ;; states
				   								 '()        ;; statuses
				  								 	#f         ;; offset
				  						 			#f         ;; num-to-get
				   									#f         ;; hide/not-hide
				  								  #f         ;; sort-by
				   									#f         ;; sort-order
				   									#f         ;; 'shortlist                           ;; qrytype
                            0         ;; last update
				  									#f))
            (item-test-hash (test:get-test-hash test-data))
            (items  (hash-table-keys item-test-hash))
 						(test-names (test:get-data->b-keys item-test-hash items)))
    (if oup
      (begin 
     (s:output-new
	   oup
	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)
		   (s:title "Runs View " run-name)
		   (s:body
		     (s:h1 "Runs View " )
         (s:h3 "Target" target)
				 (s:p 
					(s:b "Run name" ) run-name)
         (s:p 
					(s:b "Run Date" ) run-time)
         (s:table 'border 1 'cellspacing 0
           (s:tr
           (s:th "Items")
           (map (lambda (test)
            (s:th test))
           test-names))  
           (map (lambda (item) 
					  (let* ((test-hash (hash-table-ref/default item-test-hash item  #f)))
								 (if test-hash
                  (begin
									(s:tr
					  			(s:td 'class "test" item)
            			(map (lambda (test)
						  		(let* ((test-details (hash-table-ref/default test-hash test  #f))
												(status (if test-details
																(car test-details)))
                        (link (if test-details 
														(string-substitute  (conc linktree "/" target "/" run-name "/")  "" (cadr test-details) "-"))))
                   (if test-details
											(s:td 'class status
												(s:a 'class "link" 'href link status ))
                      (s:td "")))) 			
									test-names))))))
				  (sort items string<=?))))))
		(close-output-port oup))
    (debug:print-info 0 "Skip: Dirctory structure " linktree "/" target "/" run-name " does not exist. Megatest will not create run.html"))))
runs))

(define (test:create-target-hash runs header numkeys)
  (let ((resh (make-hash-table)))
   (for-each
     (lambda (run)
        (let* ((run-name (db:get-value-by-header run header "runname"))
               (target   (string-join (take (vector->list run) numkeys) "/"))
               (run-list (hash-table-ref/default resh target  #f)))
               
               (if (not run-list)
                   (hash-table-set! resh target   (list run-name))
                   (hash-table-set! resh target   (cons run-name run-list)))))
      runs)
   resh))

(define (test:get-max-run-cnt target-hash targets)
   (let* ((cnt 0 ))
   (map (lambda (target)
        (let* ((runs  (hash-table-ref/default target-hash target  #f))
               (run-length (if runs
																(length runs)
                                 0)))
  
              (if (< cnt run-length)
               (set! cnt  run-length)))) 
		targets) 
cnt))
 
(define (test:pad-runs target-hash targets max-row-length)
 (map (lambda (target)
        (let loop ((run-list  (hash-table-ref/default target-hash target  #f)))
               (if (< (length run-list) max-row-length)
                 (begin  
               		 (hash-table-set! target-hash target   (cons "" run-list))
               		 (loop (hash-table-ref/default target-hash target  #f) ))))) 
		targets)
   target-hash)

(define (test:create-target-html target-hash oup area-name linktree)
  (let* ((targets (hash-table-keys target-hash))
         (max-row-length (test:get-max-run-cnt target-hash targets))
         (pad-runs-hash (test:pad-runs target-hash targets max-row-length)))
   (s:output-new
	   oup
	   (s:html tests:css-jscript-block (tests:css-jscript-block-cond #f)

		   (s:title "Target View " area-name)
		   (s:body
		   (s:h1 "Target View " area-name)
					(s:table 'id "LinkedList1" 'border "1" 'cellspacing 0
             (s:tr 'class "something" 
               (s:th "Target")
								(s:th 'colspan max-row-length "Runs"))                                              
                (let* ((tbl (map (lambda (target)
                      (s:tr
                      (s:td 'class "test" target)
										  (let* ((runs  (hash-table-ref/default target-hash target  #f))
														 (rest-row (map (lambda (run)
																				(if (equal? run "")
																						(s:td run)
                                            (if (file-exists?(conc linktree "/" target "/" run ))
																						(begin 
																							(s:td 
																							(s:a 'href (conc  target "/" run "/run.html") run))))))
																				(reverse runs))))
                              rest-row)))
                                   targets)))
                           tbl)))))
          (close-output-port oup)))


(define (tests:create-html-tree-old outf)
   (let* ((lockfile  (conc outf ".lock"))
	 (runs-to-process '()))
    (if (common:simple-file-lock lockfile)
	(let* ((linktree  (common:get-linktree))
	       (oup       (open-output-file (or outf (conc linktree "/runs-index.html"))))
	       (area-name (common:get-area-name *alldat*))
	       (keys      (rmt:get-keys))
	       (numkeys   (length keys))
	       (runsdat   (rmt:get-runs "%" #f #f (map (lambda (x)(list x "%")) keys)))
	       (header    (vector-ref runsdat 0))
	       (runs      (vector-ref runsdat 1))
	       (runtreedat (map (lambda (x)
				  (tests:run-record->test-path x numkeys))
				runs))
	       (runs-htree (common:list->htree runtreedat)))
	  (set! runs-to-process runs)
	  (s:output-new
	   oup
	   (s:html tests:css-jscript-block
		   (s:title "Summary for " area-name)
		   (s:body 'onload "addEvents();"
			   (s:h1 "Summary for " area-name)
			   ;; top list
			   (s:ul 'id "LinkedList1" 'class "LinkedList"
				 (s:li
				  "Runs"
				  (common:htree->html runs-htree
						      '()
						      (lambda (x p)
							(let* ((targ-path (string-intersperse p "/"))
                                                               (full-path (conc linktree "/" targ-path))
                                                               (run-name  (car (reverse p))))
                                                          (if (and (common:file-exists? full-path)
                                                                   (directory?   full-path)
                                                                   (file-write-access? full-path))
                                                              (s:a run-name 'href (conc targ-path "/run-summary.html"))
                                                              (begin
                                                                (debug:print 0 *default-log-port* "INFO: Can't create " targ-path "/run-summary.html")
                                                                (conc run-name " (Not able to create summary at " targ-path ")")))))))))))
          (close-output-port oup)
	  (common:simple-file-release-lock lockfile)
               
	  (for-each
	   (lambda (run)
	     (let* ((test-subpath (tests:run-record->test-path run numkeys))
		    (run-id       (db:get-value-by-header run header "id"))
                    (run-dir      (tests:run-record->test-path run numkeys))
		    (test-dats    (rmt:get-tests-for-run
				   run-id
                                   "%/"       ;; testnamepatt
				   '()        ;; states
				   '()        ;; statuses
				   #f         ;; offset
				   #f         ;; num-to-get
				   #f         ;; hide/not-hide
				   #f         ;; sort-by
				   #f         ;; sort-order
				   #f         ;; 'shortlist                           ;; qrytype
                                   0         ;; last update
				   #f))
                    (tests-tree-dat (map (lambda (test-dat)
                                         ;; (tests:run-record->test-path x numkeys))
                                         (let* ((test-name  (db:test-get-testname test-dat))
                                                (item-path  (db:test-get-item-path test-dat))
                                                (full-name  (db:test-make-full-name test-name item-path))
                                                (path-parts (string-split full-name)))
                                           path-parts))
                                       test-dats))
                    (tests-htree (common:list->htree tests-tree-dat))
                    (html-dir    (conc linktree "/" (string-intersperse run-dir "/")))
                    (html-path   (conc html-dir "/run-summary.html"))
                    (oup         (if (and (common:file-exists? html-dir)
                                          (directory?   html-dir)
                                          (file-write-access? html-dir))
                                     (open-output-file  html-path)
                                     #f)))
               ;; (print "run-dir: " run-dir ", tests-tree-dat: " tests-tree-dat)
               (if oup
                   (begin
                     (s:output-new
                      oup
                      (s:html tests:css-jscript-block
                              (s:title "Summary for " area-name)
                              (s:body 'onload "addEvents();"
                                      (s:h1 "Summary for " (string-intersperse run-dir "/"))
                                      ;; top list
                                      (s:ul 'id "LinkedList1" 'class "LinkedList"
                                            (s:li
                                             "Tests"
                                             (common:htree->html tests-htree
                                                                 '()
                                                                 (lambda (x p)
                                                                   (let* ((targ-path (string-intersperse p "/"))
                                                                          (test-name (car p))
                                                                          (item-path ;; (if (> (length p) 2) ;; test-name + run-name
                                                                           (string-intersperse p "/"))
                                                                          (full-targ (conc html-dir "/" targ-path))
                                                                          (std-file  (conc full-targ "/test-summary.html"))
                                                                          (alt-file  (conc full-targ "/megatest-rollup-" test-name ".html"))
                                                                          (html-file (if (common:file-exists? alt-file)
                                                                                         alt-file
                                                                                         std-file))
                                                                          (run-name  (car (reverse p))))
                                                                     (if (and (not (common:file-exists? full-targ))
                                                                              (directory? full-targ)
                                                                              (file-write-access? full-targ))
                                                                         (tests:summarize-test 
                                                                          run-id 
                                                                          (rmt:get-test-id run-id test-name item-path)))
                                                                     (if (common:file-exists? full-targ)
                                                                         (s:a run-name 'href html-file)
                                                                         (begin
                                                                           (debug:print 0 *default-log-port* "ERROR: can't access " full-targ)
                                                                           (conc "No summary for " run-name)))))
                                                                 ))))))
                     (close-output-port oup)))))
           runs)
          #t)
	#f)))







;; CHECK - WAS THIS ADDED OR REMOVED? MANUAL MERGE WITH API STUFF!!!
;;
;; get a pretty table to summarize steps
;;
;; (define (dcommon:process-steps-table steps);; db test-id #!key (work-area #f))
(define (tests:process-steps-table steps);; db test-id #!key (work-area #f))
;;  (let ((steps   (db:get-steps-for-test db test-id work-area: work-area)))
    ;; organise the steps for better readability
    (let ((res (make-hash-table)))
      (for-each 
       (lambda (step)
	 (debug:print 6 *default-log-port* "step=" step)
	 (let ((record (hash-table-ref/default 
			res 
			(tdb:step-get-stepname step)
			;;           0                      1    2    3       4         5       6       7
			;;        stepname                start end status Duration  Logfile Comment  first-id
			(vector (tdb:step-get-stepname step) ""   "" ""     ""        ""     ""       #f))))
	   (debug:print 6 *default-log-port* "record(before) = " record 
			"\nid:       " (tdb:step-get-id step)
			"\nstepname: " (tdb:step-get-stepname step)
			"\nstate:    " (tdb:step-get-state step)
			"\nstatus:   " (tdb:step-get-status step)
			"\ntime:     " (tdb:step-get-event_time step))
	   (if (not (vector-ref record 7))(vector-set! record 7 (tdb:step-get-id step))) ;; do not clobber the id if previously set
	   (case (string->symbol (tdb:step-get-state step))
	     ((start)(vector-set! record 1 (tdb:step-get-event_time step))
	      (vector-set! record 3 (if (equal? (vector-ref record 3) "")
					(tdb:step-get-status step)))
	      (if (> (string-length (tdb:step-get-logfile step))
		     0)
		  (vector-set! record 5 (tdb:step-get-logfile step))))
	     ((end)  
	      (vector-set! record 2 (any->number (tdb:step-get-event_time step)))
	      (vector-set! record 3 (tdb:step-get-status step))
	      (vector-set! record 4 (let ((startt (any->number (vector-ref record 1)))
					  (endt   (any->number (vector-ref record 2))))
				      (debug:print 4 *default-log-port* "record[1]=" (vector-ref record 1) 
						   ", startt=" startt ", endt=" endt
						   ", get-status: " (tdb:step-get-status step))
				      (if (and (number? startt)(number? endt))
					  (seconds->hr-min-sec (- endt startt)) "-1")))
	      (if (> (string-length (tdb:step-get-logfile step))
		     0)
		  (vector-set! record 5 (tdb:step-get-logfile step)))
	      (if (> (string-length (tdb:step-get-comment step))
		     0)
		  (vector-set! record 6 (tdb:step-get-comment step))))
	     (else
	      (vector-set! record 2 (tdb:step-get-state step))
	      (vector-set! record 3 (tdb:step-get-status step))
	      (vector-set! record 4 (tdb:step-get-event_time step))
	      (vector-set! record 6 (tdb:step-get-comment step))))
	   (hash-table-set! res (tdb:step-get-stepname step) record)
	   (debug:print 6 *default-log-port* "record(after)  = " record 
			"\nid:       " (tdb:step-get-id step)
			"\nstepname: " (tdb:step-get-stepname step)
			"\nstate:    " (tdb:step-get-state step)
			"\nstatus:   " (tdb:step-get-status step)
			"\ntime:     " (tdb:step-get-event_time step))))
       ;; (else   (vector-set! record 1 (tdb:step-get-event_time step)))
       (sort steps (lambda (a b)
		     (cond
		      ((<   (tdb:step-get-event_time a)(tdb:step-get-event_time b)) #t)
		      ((eq? (tdb:step-get-event_time a)(tdb:step-get-event_time b)) 
		       (<   (tdb:step-get-id a)        (tdb:step-get-id b)))
		      (else #f)))))
      res))

;; 
;;
(define (tests:get-compressed-steps run-id test-id)
  (let* ((steps-data  (rmt:get-steps-for-test run-id test-id)) ;;      0       1    2    3       4       5       6      7       
	 (comprsteps  (tests:process-steps-table steps-data))) ;; #<stepname start end status Duration Logfile Comment id>
    (map (lambda (x)
	   ;; take advantage of the \n on time->string
	   (vector    ;; we are constructing basically the original vector but collapsing start end records
	    (vector-ref x 0)                              ;; id        0
	    (let ((s (vector-ref x 1)))
	      (if (number? s)(seconds->time-string s) s)) ;; starttime 1
	    (let ((s (vector-ref x 2)))
	      (if (number? s)(seconds->time-string s) s)) ;; endtime   2
	    (vector-ref x 3)                              ;; status    3    
	    (vector-ref x 4)                              ;; duration  4
	    (vector-ref x 5)                              ;; logfile   5
	    (vector-ref x 6)                              ;; comment   6
	    (vector-ref x 7)))                            ;; id        7
	 (sort (hash-table-values comprsteps)
	       (lambda (a b)
		 (let ((time-a (vector-ref a 1))
		       (time-b (vector-ref b 1))
		       (id-a   (vector-ref a 7))
		       (id-b   (vector-ref b 7)))
		   (if (and (number? time-a)(number? time-b))
		       (if (< time-a time-b)
			   #t
			   (if (eq? time-a time-b)
			       (< id-a id-b)
			       ;; (string<? (conc (vector-ref a 2))
			       ;;	    (conc (vector-ref b 2)))
			       #f))
		       (string<? (conc time-a)(conc time-b)))))))))


;; summarize test in to a file test-summary.html in the test directory
;;
(define (tests:summarize-test run-id test-id)
  (let* ((test-dat  (rmt:get-test-info-by-id run-id test-id))
	 (out-dir   (db:test-get-rundir test-dat))
	 (out-file  (conc out-dir "/test-summary.html")))
    ;; first verify we are able to write the output file
    (if (not (file-write-access? out-dir))
	(debug:print 0 *default-log-port* "ERROR: cannot write test-summary.html to " out-dir)
	(let* (;; (steps-dat (rmt:get-steps-for-test run-id test-id))
	       (test-name (db:test-get-testname test-dat))
	       (item-path (db:test-get-item-path test-dat))
	       (full-name (db:test-make-full-name test-name item-path))
	       (oup       (open-output-file out-file))
	       (status    (db:test-get-status   test-dat))
	       (color     (common:get-color-from-status status))
	       (logf      (db:test-get-final_logf test-dat))
	       (steps-dat (tests:get-compressed-steps run-id test-id)))
	  ;; (dcommon:get-compressed-steps #f 1 30045)
	  ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log"))
	  
	  (s:output-new
	   oup
	   (s:html
	    (s:title "Summary for " full-name)
	    (s:body 
	     (s:h2 "Summary for " full-name)
	     (s:table 'cellspacing "0" 'border "1"
		      (s:tr (s:td "run id")   (s:td (db:test-get-run_id   test-dat))
			    (s:td "test id")  (s:td (db:test-get-id       test-dat)))
		      (s:tr (s:td "testname") (s:td test-name)
			    (s:td "itempath") (s:td item-path))
		      (s:tr (s:td "state")    (s:td (db:test-get-state    test-dat))
			    (s:td "status")   (s:td (s:a 'href logf (s:font 'color color status))))
		      (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time 
						     (db:test-get-event_time test-dat)))
			    (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat)))))
	     (s:h3 "Log files")
	     (s:table 
	      'cellspacing "0" 'border "1"
	      (s:tr (s:td "Final log")(s:td (s:a 'href logf logf))))
	     (s:table
	      'cellspacing "0" 'border "1"
	      (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File"))
	      (map (lambda (step-dat)
		     (s:tr (s:td (tdb:steps-table-get-stepname step-dat))
			   (s:td (tdb:steps-table-get-start    step-dat))
			   (s:td (tdb:steps-table-get-end      step-dat))
			   (s:td (tdb:steps-table-get-status   step-dat))
			   (s:td (tdb:steps-table-get-runtime  step-dat))
			   (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat)))
				   (s:a 'href step-log step-log)))))
		   steps-dat))
	     )))
	  (close-output-port oup)))))
	  
	  
;; MUST BE CALLED local!
;;
(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
	 (statepatt  (or (args:get-arg "-state")   (args:get-arg ":state")    "%"))
	 (statuspatt (or (args:get-arg "-status")  (args:get-arg ":status")   "%"))
	 (runname    (or (args:get-arg "-runname") (args:get-arg ":runname")  "%"))
	 (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res
					testpatt
					statepatt
					statuspatt
					runname)))
    (if fnamepatt
	(apply append 
	       (map (lambda (p)
		      (if (directory-exists? p)
			  (let ((glob-query (conc p "/" fnamepatt)))
			    (handle-exceptions
				exn
				(with-input-from-pipe
				    (conc "echo " glob-query)
				  read-lines)  ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar
			      (glob glob-query)))
			  '()))
		    paths-from-db))
	paths-from-db)))

			      
;;======================================================================
;; Gather data from test/task specifications
;;======================================================================

;; (define (tests:get-valid-tests testsdir test-patts) ;;  #!key (test-names '()))
;;   (let ((tests (glob (conc testsdir "/tests/*")))) ;; " (string-translate patt "%" "*")))))
;;     (set! tests (filter (lambda (test)(common:file-exists? (conc test "/testconfig"))) tests))
;;     (delete-duplicates
;;      (filter (lambda (testname)
;; 	       (tests:match test-patts testname #f))
;; 	     (map (lambda (testp)
;; 		    (last (string-split testp "/")))
;; 		  tests)))))

(define (tests:get-test-path-from-environment)
  (if (and (getenv "MT_LINKTREE")
	   (getenv "MT_TARGET")
	   (getenv "MT_RUNNAME")
	   (getenv "MT_TEST_NAME")
	   (getenv "MT_ITEMPATH"))
      (conc (getenv "MT_LINKTREE")  "/"
	    (getenv "MT_TARGET")    "/"
	    (getenv "MT_RUNNAME")   "/"
	    (getenv "MT_TEST_NAME")
	    (if (and (getenv "MT_ITEMPATH")
                     (not (string=? "" (getenv "MT_ITEMPATH"))))
		(conc "/" (getenv "MT_ITEMPATH"))
                ""))
      #f))

;; if .testconfig exists in test directory read and return it
;; else if have cached copy in *testconfigs* return it IFF there is a section "have fulldata"
;; else read the testconfig file
;;   if have path to test directory save the config as .testconfig and return it
;;
(define (tests:get-testconfig test-name item-path test-registry system-allowed #!key (force-create #f)(allow-write-cache #t)(wait-a-minute #f))
  (let* ((use-cache    (common:use-cache?))
	 (cache-path   (tests:get-test-path-from-environment))
	 (cache-file   (and cache-path (conc cache-path "/.testconfig")))
	 (cache-exists (and cache-file
			    (not force-create)  ;; if force-create then pretend there is no cache to read
			    (common:file-exists? cache-file)))
	 (cached-dat   (if (and (not force-create)
				cache-exists
				use-cache)
			   (handle-exceptions
			    exn
			    #f ;; any issues, just give up with the cached version and re-read
			    (configf:read-alist cache-file))
			   #f))
         (test-full-name (if (and item-path (not (string-null? item-path)))
                             (conc test-name "/" item-path)
                             test-name)))
    (if cached-dat
	cached-dat
	(let ((dat (hash-table-ref/default *testconfigs* test-full-name #f)))
	  (if (and  dat ;; have a locally cached version
		    (hash-table-ref/default dat "have fulldata" #f)) ;; marked as good data?
	      dat
	      ;; no cached data available
	      (let* ((treg         (or test-registry
				       (tests:get-all)))
		     (test-path    (or (hash-table-ref/default treg test-name #f)
                                       (let* ((local-tcdir (conc (getenv "MT_LINKTREE") "/"
                                                                 (getenv "MT_TARGET") "/"
                                                                 (getenv "MT_RUNNAME") "/"
                                                                 test-name "/" item-path))
                                              (local-tcfg (conc local-tcdir "/testconfig")))
                                         (if (common:file-exists? local-tcfg)
                                             local-tcdir
                                             #f))
				       (conc *toppath* "/tests/" test-name)))
		     (test-configf (conc test-path "/testconfig"))
		     (testexists   (let loopa ((tries-left 30))
                                     (cond
                                      (
                                       (and (common:file-exists? test-configf)(file-read-access? test-configf))
                                       #t)
                                      (
                                       (common:file-exists? test-configf)
                                       (debug:print 0 *default-log-port* "WARNING: Cannot read testconfig file: "test-configf)
                                       #f)
                                      (
                                       (and wait-a-minute (> tries-left 0))
                                       (thread-sleep! 10)
                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf" will retry in 10 seconds.  Tries left: "tries-left) ;; BB: this fires
                                       (loopa (sub1 tries-left)))
                                      (else
                                       (debug:print 0 *default-log-port* "WARNING: testconfig file does not exist: "test-configf) ;; BB: this fires
                                       #f))))
		     (tcfg         (if testexists
				       (read-config test-configf #f system-allowed
						    environ-patt: (if system-allowed
								      "pre-launch-env-vars"
								      #f))
				       #f)))
		(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data
		(if tcfg (hash-table-set! *testconfigs* test-full-name tcfg))
		(if (and testexists
			 cache-file
			 (file-write-access? cache-path)
			 allow-write-cache)
		    (let ((tpath (conc cache-path "/.testconfig")))
		      (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
                      (if (and tcfg (not (common:in-running-test?)))
                          (configf:write-alist tcfg tpath))))
		tcfg))))))
  
;; sort tests by priority and waiton
;; Move test specific stuff to a test unit FIXME one of these days
(define (tests:sort-by-priority-and-waiton test-records)
  (if (eq? (hash-table-size test-records) 0)
      '()
      (let* ((mungepriority (lambda (priority)
			      (if priority
				  (let ((tmp (any->number priority)))
				    (if tmp tmp (begin (debug:print-error 0 *default-log-port* "bad priority value " priority ", using 0") 0)))
				  0)))
	     (all-tests      (hash-table-keys test-records))
	     (all-waited-on  (let loop ((hed (car all-tests))
					(tal (cdr all-tests))
					(res '()))
			       (let* ((trec    (hash-table-ref test-records hed))
				      (waitons (or (tests:testqueue-get-waitons trec) '())))
				 (if (null? tal)
				     (append res waitons)
				     (loop (car tal)(cdr tal)(append res waitons))))))
	     (sort-fn1 
	      (lambda (a b)
		(let* ((a-record   (hash-table-ref test-records a))
		       (b-record   (hash-table-ref test-records b))
		       (a-waitons  (or (tests:testqueue-get-waitons a-record) '()))
		       (b-waitons  (or (tests:testqueue-get-waitons b-record) '()))
		       (a-config   (tests:testqueue-get-testconfig  a-record))
		       (b-config   (tests:testqueue-get-testconfig  b-record))
		       (a-raw-pri  (config-lookup a-config "requirements" "priority"))
		       (b-raw-pri  (config-lookup b-config "requirements" "priority"))
		       (a-priority (mungepriority a-raw-pri))
		       (b-priority (mungepriority b-raw-pri)))
		  (tests:testqueue-set-priority! a-record a-priority)
		  (tests:testqueue-set-priority! b-record b-priority)
		  ;; (debug:print 0 *default-log-port* "a=" a ", b=" b ", a-waitons=" a-waitons ", b-waitons=" b-waitons)
		  (cond
		   ;; is 
		   ((member a b-waitons)          ;; is b waiting on a?
		    ;; (debug:print 0 *default-log-port* "case1")
		    #t)
		   ((member b a-waitons)          ;; is a waiting on b?
		    ;; (debug:print 0 *default-log-port* "case2")
		    #f)
		   ((and (not (null? a-waitons))  ;; both have waitons - do not disturb
			 (not (null? b-waitons)))
		    ;; (debug:print 0 *default-log-port* "case2.1")
		    #t)
		   ((and (null? a-waitons)        ;; no waitons for a but b has waitons
			 (not (null? b-waitons)))
		    ;; (debug:print 0 *default-log-port* "case3")
		    #f)
		   ((and (not (null? a-waitons))  ;; a has waitons but b does not
			 (null? b-waitons)) 
		    ;; (debug:print 0 *default-log-port* "case4")
		    #t)
		   ((not (eq? a-priority b-priority)) ;; use
		    (> a-priority b-priority))
		   (else
		    ;; (debug:print 0 *default-log-port* "case5")
		    (string>? a b))))))
	     
	     (sort-fn2
	      (lambda (a b)
		(> (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records a)))
		   (mungepriority (tests:testqueue-get-priority (hash-table-ref test-records b)))))))
	;; (let ((dot-res (tests:run-dot (tests:tests->dot test-records) "plain")))
	;;   (debug:print "dot-res=" dot-res))
	;; (let ((data (map cdr (filter
	;;     		  (lambda (x)(equal? "node" (car x)))
	;;     		  (map string-split (tests:easy-dot test-records "plain"))))))
	;;   (map car (sort data (lambda (a b)
	;;     		    (> (string->number (caddr a))(string->number (caddr b)))))))
	;; ))
	(sort all-tests sort-fn1)))) ;; avoid dealing with deleted tests, look at the hash table

(define (tests:easy-dot test-records outtype)
  (let-values (((fd temp-path) (file-mkstemp (conc "/tmp/" (current-user-name) ".XXXXXX"))))
    (let ((all-testnames (hash-table-keys test-records))
	  (temp-port     (open-output-file* fd)))
      ;; (format temp-port "This file is ~A.~%" temp-path)
      (format temp-port "digraph tests {\n")
      (format temp-port "  size=4,8\n")
      ;; (format temp-port "   splines=none\n")
      (for-each
       (lambda (testname)
	 (let* ((testrec (hash-table-ref test-records testname))
		(waitons (or (tests:testqueue-get-waitons testrec) '())))
	   (for-each
	    (lambda (waiton)
	      (format temp-port (conc "   " waiton " -> " testname " [splines=ortho]\n")))
	    waitons)))
       all-testnames)
      (format temp-port "}\n")
      (close-output-port temp-port)
      (with-input-from-pipe
       (conc "env -i PATH=$PATH dot -T" outtype " < " temp-path)
       (lambda ()
	 (let ((res (read-lines)))
	   ;; (delete-file temp-path)
	   res))))))

(define (tests:write-dot-file test-records fname sizex sizey)
  (if (file-write-access? (pathname-directory fname))
      (with-output-to-file fname
	(lambda ()
	  (map print (tests:tests->dot test-records sizex sizey))))))

(define (tests:tests->dot test-records sizex sizey)
  (let ((all-testnames (hash-table-keys test-records)))
    (if (null? all-testnames)
	'()
	(let loop ((hed (car all-testnames))
		   (tal (cdr all-testnames))
		   (res (list "digraph tests {"
			      (conc " size=\"" (or sizex 11) "," (or sizey 11) "\";")
			      " ratio=0.95;"
			      )))
	  (let* ((testrec (hash-table-ref test-records hed))
		 (waitons (or (tests:testqueue-get-waitons testrec) '()))
		 (newres  (append res
				  (if (null? waitons)
				      (list (conc "   \"" hed "\" [shape=box];"))
				      (map (lambda (waiton)
					     (conc "   \"" waiton "\" -> \"" hed "\" [shape=box];"))
					   waitons)
				      ))))
	    (if (null? tal)
		(append newres (list "}"))
		(loop (car tal)(cdr tal) newres)
		))))))

;; (tests:run-dot (list "digraph tests {" "a -> b" "}") "plain")

(define (tests:run-dot indat outtype) ;; outtype is plain, fig, dot, etc. http://www.graphviz.org/content/output-formats
  (let-values (((inp oup pid)(process "env -i PATH=$PATH dot" (list "-T" outtype))))
    (with-output-to-port oup
      (lambda ()
	(map print indat)))
    (close-output-port oup)
    (let ((res (with-input-from-port inp
		 (lambda ()
		   (read-lines)))))
      (close-input-port inp)
      res)))

;; read data from tmp file or create if not exists
;; if exists regen in background
;;
(define (tests:lazy-dot testrecords  outtype sizex sizey)
  (let ((dfile (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dot"))
	(fname (conc "/tmp/." (current-user-name) "-" (server:mk-signature) ".dotdat")))
    (tests:write-dot-file testrecords dfile sizex sizey)
    (if (common:file-exists? fname)
	(let ((res (with-input-from-file fname
		     (lambda ()
		       (read-lines)))))
	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname "&"))
	  res)
	(begin
	  (system (conc "env -i PATH=$PATH dot -T " outtype " < " dfile " > " fname))
	  (with-input-from-file fname
	    (lambda ()
	      (read-lines)))))))
	  

;; for each test:
;;   
(define (tests:filter-non-runnable run-id testkeynames testrecordshash)
  (let ((runnables '()))
    (for-each
     (lambda (testkeyname)
       (let* ((test-record (hash-table-ref testrecordshash testkeyname))
	      (test-name   (tests:testqueue-get-testname  test-record))
	      (itemdat     (tests:testqueue-get-itemdat   test-record))
	      (item-path   (tests:testqueue-get-item_path test-record))
	      (waitons     (tests:testqueue-get-waitons   test-record))
	      (keep-test   #t)
	      (test-id     (rmt:get-test-id run-id test-name item-path))
	      (tdat        (rmt:get-testinfo-state-status run-id test-id)))
	 (if tdat
	     (begin
	       ;; Look at the test state and status
	       (if (or (and (member (db:test-get-status tdat) 
				    '("PASS" "WARN" "WAIVED" "CHECK" "SKIP"))
			    (equal? (db:test-get-state tdat) "COMPLETED"))
		       (member (db:test-get-state tdat)
				    '("INCOMPLETE" "KILLED")))
		   (set! keep-test #f))

	       ;; examine waitons for any fails. If it is FAIL or INCOMPLETE then eliminate this test
	       ;; from the runnable list
	       (if keep-test
		   (for-each (lambda (waiton)
			       ;; for now we are waiting only on the parent test
			       (let* ((parent-test-id (rmt:get-test-id run-id waiton ""))
				      (wtdat          (rmt:get-testinfo-state-status run-id test-id)))
				 (if (or (and (equal? (db:test-get-state wtdat) "COMPLETED")
					      (member (db:test-get-status wtdat) '("FAIL" "ABORT")))
					 (member (db:test-get-status wtdat)  '("KILLED"))
					 (member (db:test-get-state wtdat)   '("INCOMPETE")))
				 ;; (if (or (member (db:test-get-status wtdat)
				 ;;        	 '("FAIL" "KILLED"))
				 ;;         (member (db:test-get-state wtdat)
				 ;;        	 '("INCOMPETE")))
				     (set! keep-test #f)))) ;; no point in running this one again
			     waitons))))
	 (if keep-test (set! runnables (cons testkeyname runnables)))))
     testkeynames)
    runnables))

;;======================================================================
;; refactoring this block into tests:get-full-data from line 263 of runs.scm
;;======================================================================
;; hed is the test name
;; test-records is a hash of test-name => test record
(define (tests:get-full-data test-names test-records required-tests all-tests-registry)
  (if (not (null? test-names))
      (let loop ((hed (car test-names))
		 (tal (cdr test-names)))         ;; 'return-procs tells the config reader to prep running system but return a proc
	(debug:print-info 4 *default-log-port* "hed=" hed " at top of loop")
        ;; don't know item-path at this time, let the testconfig get the top level testconfig
	(let* ((config  (tests:get-testconfig hed #f all-tests-registry 'return-procs))
	       (waitons (let ((instr (if config 
					 (config-lookup config "requirements" "waiton")
					 (begin ;; No config means this is a non-existant test
					   (debug:print-error 0 *default-log-port* "non-existent required test \"" hed "\", grep through your testconfigs to find and remove or create the test. Discarding and continuing.")
					     ""))))
			  (debug:print-info 8 *default-log-port* "waitons string is " instr)
			  (string-split (cond
					 ((procedure? instr)
					  (let ((res (instr)))
					    (debug:print-info 8 *default-log-port* "waiton procedure results in string " res " for test " hed)
					    res))
					 ((string? instr)     instr)
					 (else 
					  ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print-error 0 *default-log-port* "something went wrong in processing waitons for test " hed)
					  ""))))))
	  (if (not config) ;; this is a non-existant test called in a waiton. 
	      (if (null? tal)
		  test-records
		  (loop (car tal)(cdr tal)))
	      (begin
		(debug:print-info 8 *default-log-port* "waitons: " waitons)
		;; check for hed in waitons => this would be circular, remove it and issue an
		;; error
		(if (member hed waitons)
		    (begin
		      (debug:print-error 0 *default-log-port* "test " hed " has listed itself as a waiton, please correct this!")
		      (set! waitons (filter (lambda (x)(not (equal? x hed))) waitons))))
		
		;; (items   (items:get-items-from-config config)))
		(if (not (hash-table-ref/default test-records hed #f))
		    (hash-table-set! test-records
				     hed (vector hed     ;; 0
						 config  ;; 1
						 waitons ;; 2
						 (config-lookup config "requirements" "priority")     ;; priority 3
						 (let ((items      (hash-table-ref/default config "items" #f)) ;; items 4
						       (itemstable (hash-table-ref/default config "itemstable" #f))) 
						   ;; if either items or items table is a proc return it so test running
						   ;; process can know to call items:get-items-from-config
						   ;; if either is a list and none is a proc go ahead and call get-items
						   ;; otherwise return #f - this is not an iterated test
						   (cond
						    ((procedure? items)      
						     (debug:print-info 4 *default-log-port* "items is a procedure, will calc later")
						     items)            ;; calc later
						    ((procedure? itemstable)
						     (debug:print-info 4 *default-log-port* "itemstable is a procedure, will calc later")
						     itemstable)       ;; calc later
						    ((filter (lambda (x)
							       (let ((val (car x)))
								 (if (procedure? val) val #f)))
							     (append (if (list? items) items '())
								     (if (list? itemstable) itemstable '())))
						     'have-procedure)
						    ((or (list? items)(list? itemstable)) ;; calc now
						     (debug:print-info 4 *default-log-port* "items and itemstable are lists, calc now\n"
								       "    items: " items " itemstable: " itemstable)
						     (items:get-items-from-config config))
						    (else #f)))                           ;; not iterated
						 #f      ;; itemsdat 5
						 #f      ;; spare - used for item-path
						 )))
		(for-each 
		 (lambda (waiton)
		   (if (and waiton (not (member waiton test-names)))
		       (begin
			 (set! required-tests (cons waiton required-tests))
			 (set! test-names (cons waiton test-names))))) ;; was an append, now a cons
		 waitons)
		(let ((remtests (delete-duplicates (append waitons tal))))
		  (if (not (null? remtests))
		      (loop (car remtests)(cdr remtests))
		      test-records))))))))

;;======================================================================
;; test steps
;;======================================================================

;; teststep-set-status! used to be here

(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
  (let* ((testdat   (rmt:get-test-info-by-id run-id test-id)))
    (and testdat
	 (equal? (test:get-state testdat) "KILLREQ"))))

(define (test:tdb-get-rundat-count tdb)
  (if tdb
      (let ((res 0))
	(sqlite3:for-each-row
	 (lambda (count)
	   (set! res count))
	 tdb
	 "SELECT count(id) FROM test_rundat;")
	res))
  0)

(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
  (if (and cpuload diskfree)
      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
  (if minutes 
      (rmt:general-call 'update-run-duration run-id minutes test-id))
  (if (and uname hostname)
      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
  
;; This one is for running with no db access (i.e. via rmt: internally)
(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
;; (define (tests:set-full-meta-info test-id run-id minutes work-area)
;;  (let ((remtries 10))
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (uname    (get-uname "-srvpio"))
	 (hostname (get-host-name)))
    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)))
    
;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (print-call-chain (current-error-port))
	   (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times")
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain (current-error-port))))
     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  )))
	 
;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)

(define (test:archive-tests db keynames target)
  #f)


;; from metadat lookup MEGATEST_VERSION
;;
(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB
  (rmt:get-var "MEGATEST_VERSION"))

(define (common:set-last-run-version)
  (rmt:set-var "MEGATEST_VERSION" (common:version-signature)))

(define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params alldat #!key (remretries 5))
  (let* ((ro-queries     (alldat-read-only-queries alldat))
	 (qry-is-write   (not (member cmd ro-queries)))
	 (db-file-path   (common:get-db-tmp-area alldat)) ;;  0))
	 (dbstruct-local (db:setup #t))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a
						  ;; long history of
						  ;; receiving strange
						  ;; errors from
						  ;; values returned
						  ;; by the client
						  ;; when things go
						  ;; wrong..
				exn ;;  This is an attempt to detect that situation and recover gracefully
				(begin
				  (debug:print 0 log-port "ERROR: bad data from server " v " message: "
					       ((condition-property-accessor 'exn 'message) exn))
				  (vector #t '())) ;; should always
						   ;; get a vector but
						   ;; if something
						   ;; goes wrong
						   ;; return a dummy
				(if (and (vector? v)
					 (> (vector-length v) 1))
				    (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
				      newvec) ;; by copying the vector
					      ;; while inside the
					      ;; error handler we
					      ;; should force the
					      ;; detection of a
					      ;; corrupted record
				    (vector #t '()))))  ;; we could also check that the returned types are valid
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (and read-only qry-is-write)
        (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)
	(if (> remretries 0)
	    (begin
	      (debug:print-error 0 log-port "local query failed. Trying again.")
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params alldat remretries: (- remretries 1)))
	    (begin
	      (debug:print-error 0 log-port "too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  #;(if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! multi-sync-mutex)
		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! multi-sync-mutex)))))
    res))

(define (rmtmod:calc-ro-mode areadat toppath)
  (if (and areadat
	   (alldat-ro-mode-checked areadat))
      (alldat-ro-mode areadat)
      (let* ((dbfile  (conc toppath "/megatest.db"))
	     (ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or areadat to figure this out in future
	(if areadat
	    (begin
	      (alldat-ro-mode-set! areadat ro-mode)
	      (alldat-ro-mode-checked-set! areadat #t)
	      ro-mode)
	    ro-mode))))

(define (extras-readonly-mode rmt-mutex log-port cmd params)
  ;;(mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case 3")
  (debug:print 0 log-port "WARNING: write transaction requested on a readonly area.  cmd="cmd" params="params)
  #f)

(define (extras-transport-failed log-port rmt-mutex attemptnum areadat areapath cmd rid params alldat)
  (debug:print 0 log-port "WARNING: communication failed. Trying again, try num: " attemptnum)
  ;;(mutex-lock! rmt-mutex)
  (alldat-conndat-set!    areadat #f)
  (exec-fn 'http-transport:close-connections area-dat: areadat)
  (alldat-server-url-set! areadat #f)
  ;;(mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case  9.1")
  (rmt:send-receive-orig log-port areadat rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1)))
  
(define (extras-transport-succeded log-port rmt-mutex attemptnum areadat areapath res params rid cmd alldat)
  (if (and (vector? res)
	   (eq? (vector-length res) 2)
	   (eq? (vector-ref res 1) 'overloaded)) ;; since we are
						 ;; looking at the
						 ;; data to carry the
						 ;; error we'll use a
						 ;; fairly obtuse
						 ;; combo to minimise
						 ;; the chances of
						 ;; some sort of
						 ;; collision.  this
						 ;; is the case where
						 ;; the returned data
						 ;; is bad or the
						 ;; server is
						 ;; overloaded and we
						 ;; want to ease off
						 ;; the queries
      (let ((wait-delay (+ attemptnum (* attemptnum 10))))
	(debug:print 0 log-port "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
	;;(mutex-lock! rmt-mutex)
	(exec-fn 'http-transport:close-connections area-dat: areadat)
	;; (set! *areadat* #f) ;; force starting over
	(alldat-server-url-set! areadat #f) ;; I am hoping this will force a redo on server connection. NOT TESTED
	;;(mutex-unlock! rmt-mutex)
	(thread-sleep! wait-delay)
	(rmt:send-receive-orig log-port areadat rmt-mutex areapath cmd rid params alldat attemptnum: (+ attemptnum 1)))
      res)) ;; All good, return res

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
;;  add multi-sync-mutex 
;;
(define (rmt:send-receive-orig log-port areadat rmt-mutex toppath multi-sync-mutex cmd rid params alldat #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
                          
  
  ;; do all the prep locked under the rmt-mutex
  ;;(mutex-lock! rmt-mutex)
  
  ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in areadat
  ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
  ;; 3. do the query, if on homehost use local access
  ;;
  (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value

	 (readonly-mode (rmtmod:calc-ro-mode areadat toppath)))

    ;; (assert (not (pair? (alldat-hh-dat areadat))))
    
    ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
    (cond
     ;; give up if more than 15 attempts
     ((> attemptnum 15)
      (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.")
      (exit 1))

     ;; readonly mode, read request-  handle it - case 2
     ((and readonly-mode
           (member cmd api:read-only-queries)) 
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 2")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat)
      )

     ;; readonly mode, write request.  Do nothing, return #f
     (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params))

     ;; This block was for pre-emptively resetting the connection if there had been no communication for some time.
     ;; I don't think it adds any value. If the server is not there, just fail and start a new connection.
     ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout)
     ;;
     ;; reset the connection if it has been unused too long
     ((and areadat
           (alldat-conndat areadat)
	   (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on
	      (+ (http-transport:server-dat-get-last-access (alldat-conndat areadat))
		 (alldat-server-timeout areadat))))
      (debug:print-info 0 log-port "Connection to " (alldat-server-url areadat) " expired due to no accesses, forcing new connection.")
      (exec-fn 'http-transport:close-connections area-dat: areadat)
      (alldat-conndat-set! areadat #f) ;; invalidate the connection, thus forcing a new connection.
      ;; (mutex-unlock! rmt-mutex)
      (rmt:send-receive-orig log-port areadat rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum))
     

     ;; on homehost and this is a read
     ((and (not (alldat-force-server areadat)) ;; honor forced use of server, i.e. server NOT required
	   (pair? (alldat-hh-dat areadat))
	   (cdr (alldat-hh-dat areadat))       ;; on homehost
           (member cmd api:read-only-queries))   ;; this is a read
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  5")
      (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params alldat))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (alldat-hh-dat areadat))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (alldat-server-url areadat)             ;; have a server
           (not (exec-fn 'server:ping (alldat-server-url areadat))))  ;; server has died. NOTE: this is not a cheap call! Need better approach.
      ;; (set! *areadat* (make-remote)) ;; WARNING - broken this.
      (alldat-force-server-set! areadat (exec-fn 'common:force-server?))
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  6")
      (rmt:send-receive-orig log-port areadat rmt-mutex toppath  multi-sync-mutex cmd rid params alldat attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (not (alldat-force-server areadat))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (alldat-hh-dat areadat))           ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (alldat-server-url areadat))            ;; have a server
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params alldat))

     ;;  on homehost, no server contact made and this is a write, passively start a server 
     ((and (not (alldat-force-server areadat))     ;; honor forced use of server, i.e. server NOT required
	   (cdr (alldat-hh-dat areadat))           ;; have homehost
           (not (alldat-server-url areadat))       ;; no connection yet
	   (not (member cmd api:read-only-queries))) ;; not a read-only query
      (debug:print-info 12 log-port "rmt:send-receive, case  8")
      (let ((server-url  (exec-fn 'server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if server-url
	    (alldat-server-url-set! areadat server-url) ;; the string can be consumed by the client setup if needed
	    (if (exec-fn 'common:force-server?)
		(exec-fn 'server:start-and-wait toppath)
		(exec-fn 'server:kind-run toppath))))
      (alldat-force-server-set! areadat (exec-fn 'common:force-server?))
      ;; (mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case  8.1")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd 0 params alldat))

     ((or (and (alldat-force-server areadat)              ;; we are forcing a server and don't yet have a connection to one
	       (not (alldat-conndat areadat)))
	  (and (not (cdr (alldat-hh-dat areadat)))        ;; not on a homehost 
	       (not (alldat-conndat areadat))))           ;; and no connection
      (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (alldat-hh-dat areadat) " conndat: " (alldat-conndat areadat))
      ;;(mutex-unlock! rmt-mutex)
      (if (not (exec-fn 'server:check-if-running toppath)) ;; who knows, maybe one has started up?
	  (exec-fn 'server:start-and-wait toppath))
      (alldat-conndat-set! areadat (rmt:get-connection-info areadat toppath)) ;; calls client:setup which calls client:setup-http
      (rmt:send-receive-orig log-port areadat rmt-mutex toppath multi-sync-mutex cmd rid params alldat attemptnum: attemptnum)) ;; TODO: add back-off timeout as

     ;; all set up if get this far, dispatch the query
     ((and (not (alldat-force-server areadat))
	   (cdr (alldat-hh-dat areadat))) ;; we are on homehost
      ;;(mutex-unlock! rmt-mutex)
      (debug:print-info 12 log-port "rmt:send-receive, case 10")
      (rmt:open-qry-close-locally  log-port multi-sync-mutex cmd (if rid rid 0) params alldat))

     ;; not on homehost, do server query
     (else (extras-case-11 log-port rmt-mutex areadat toppath cmd params attemptnum rid alldat)))))

(define (extras-case-11 log-port rmt-mutex areadat areapath cmd params attemptnum rid alldat)
  ;; (mutex-unlock! rmt-mutex)
  (debug:print-info 12 log-port "rmt:send-receive, case  9")
  ;; (mutex-lock! rmt-mutex)
  (let* ((conninfo (alldat-conndat areadat))
	 (dat      (case (alldat-transport areadat)
		     ((http) (condition-case ;; handling here has
					     ;; caused a lot of
					     ;; problems. However it
					     ;; is needed to deal with
					     ;; attemtped
					     ;; communication to
					     ;; servers that have gone
					     ;; away
			      (exec-fn 'http-transport:client-api-send-receive 0 conninfo cmd params)
			      ((commfail)(vector #f "communications fail"))
			      ((exn)(vector #f "other fail" (print-call-chain)))))
		     (else
		      (debug:print 0 log-port "ERROR: transport " (alldat-transport areadat) " not supported")
		      (exit))))
	 (success  (if (vector? dat) (vector-ref dat 0) #f))
	 (res      (if (vector? dat) (vector-ref dat 1) #f)))
    (if (and (vector? conninfo) (< 5 (vector-length conninfo)))
	(http-transport:server-dat-update-last-access conninfo) ;; refresh access time
	(begin
	  (debug:print 0 log-port "INFO: Should not get here! conninfo=" conninfo)
	  (set! conninfo #f)
	  (alldat-conndat-set! areadat #f) ;; NOTE: *areadat* is global copy of areadat. Purpose: factor out global.
	  (exec-fn 'http-transport:close-connections  area-dat: areadat)))
    (debug:print-info 13 log-port "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " areadat = " areadat)
    ;; (mutex-unlock! rmt-mutex)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded log-port rmt-mutex attemptnum areadat areapath res params rid cmd alldat)
	(extras-transport-failed log-port rmt-mutex attemptnum areadat areapath cmd rid params alldat)
	)))

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info areadat areapath #!key (area-dat #f)) ;; TODO: push areapath down.
  (let* (;; (areadat (or area-dat areadat))
	 (cinfo     (if (alldat? areadat)
			(alldat-conndat areadat)
			#f)))
	  (if cinfo
	      cinfo
	      (if (exec-fn 'server:check-if-running areapath)
		  (exec-fn 'client:setup areadat areapath)
		  #f))))

;;======================================================================
;; ulex and steps stuff
;;======================================================================

(define (rmtmod:setup-ulex toppath)
  (ulex:make-area
   dbdir:   (conc toppath "/ulexdb")
   pktsdir: (conc toppath "/pkts") 
   ))



(define (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat)
  #f)

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; (define *runremote* (make-remote))

;; this entry point can decide based on cmd whether to dispatch to old api calls via remote or via ulex
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f))
  (let* ((areapath      *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
	 (alldat     (or area-dat
			    *alldat*)))
    ;; ensure we have a record for our connection for given area
    (if (not (alldat-hh-dat alldat))
	(begin
	  (alldat-server-timeout-set! alldat (server:expiration-timeout))
	  (alldat-hh-dat-set!         alldat (common:get-homehost))
	  )) ;; new alldat will come from this on next iteration

    ;; ensure we have a homehost record and mtconfig, do this here instead of in -orig
    (if (or (not (alldat-mtconfig *alldat*))
	    (not (alldat-hh-dat alldat))
	    (not (pair? (alldat-hh-dat alldat))))  ;; not on homehost
	(begin
	  (alldat-hh-dat-set! alldat (common:get-homehost))
	  (alldat-mtconfig-set! *alldat* *configdat*)
	  (alldat-areapath-set! *alldat* *toppath*)
	  (alldat-areadat-set!  *alldat* alldat) ;; TODO: converge usage of alldat and area-dat
	  ))

    (if (member cmd '(blah))
	(begin
	  (mutex-lock! *send-receive-mutex*)
	  (let ((ulex:conn (alldat-ulex:conn alldat)))
	    (if (not ulex:conn)(alldat-ulex:conn-set! alldat (rmtmod:setup-ulex areapath)))
	    (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat)))
	(rmt:send-receive-orig *default-log-port* alldat *rmt-mutex* areapath *db-multi-sync-mutex*
			       cmd rid params *alldat* attemptnum: attemptnum area-dat: area-dat))))