Megatest

mtmod.scm at [6faaffbe15]
Login

File mtmod.scm artifact 700ce1d5a9 part of check-in 6faaffbe15


;;======================================================================
;; Copyright 2019, Matthew Welland.
;; 
;; This file is part of Megatest.
;; 
;;     Megatest is free software: you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation, either version 3 of the License, or
;;     (at your option) any later version.
;; 
;;     Megatest is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit mtmod))
(declare (uses mtconfigf))
(declare (uses commonmod))
(declare (uses rmtmod))
(declare (uses testsmod))

(module mtmod
	*
	
(import scheme chicken data-structures extras posix)

(import (prefix mtconfigf configf:)
	commonmod
	rmtmod
	testsmod)


;; This is the Megatest API. All generally "useful" routines will be wrapped or extended
;; here.

;;======================================================================
;;  R U N S
;;======================================================================

;; runs:get-runs-by-patt
;; get runs by list of criteria
;; register a test run with the db
;;
;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;;  to extract info from the structure returned
;;
(define (mt:get-runs-by-patt keys runnamepatt targpatt)
  (let loop ((runsdat  (rmt:get-runs-by-patt keys runnamepatt targpatt 0 500 #f 0))
	     (res      '())
	     (offset   0)
	     (limit    500))
    ;; (print "runsdat: " runsdat)
    (let* ((header    (vector-ref runsdat 0))
	   (runslst   (vector-ref runsdat 1))
	   (full-list (append res runslst))
	   (have-more (eq? (length runslst) limit)))
      ;; (debug:print 0 *default-log-port* "header: " header " runslst: " runslst " have-more: " have-more)
      (if have-more 
	  (let ((new-offset (+ offset limit))
		(next-batch (rmt:get-runs-by-patt keys runnamepatt targpatt offset limit #f 0)))
	    (debug:print-info 4 *default-log-port* "More than " limit " runs, have " (length full-list) " runs so far.")
	    (debug:print-info 0 *default-log-port* "next-batch: " next-batch)
	    (loop next-batch
		  full-list
		  new-offset
		  limit))
	 (vector header full-list)))))

;;======================================================================
;;  T E S T S
;;======================================================================

(define (mt:get-tests-for-run run-id testpatt states status #!key (not-in #t) (sort-by 'event_time) (sort-order "ASC") (qryvals #f)(last-update #f))
  (let loop ((testsdat (rmt:get-tests-for-run run-id testpatt states status 0 500 not-in sort-by sort-order qryvals last-update 'normal))
	     (res      '())
	     (offset   0)
	     (limit    500))
    (let* ((full-list (append res testsdat))
	   (have-more (eq? (length testsdat) limit)))
      (if have-more 
	  (let ((new-offset (+ offset limit)))
	    (debug:print-info 4 *default-log-port* "More than " limit " tests, have " (length full-list) " tests so far.")
	    (loop (rmt:get-tests-for-run run-id testpatt states status new-offset limit not-in sort-by sort-order qryvals last-update 'normal)
		  full-list
		  new-offset
		  limit))
	  full-list))))

(define (mt:lazy-get-prereqs-not-met run-id waitons ref-item-path #!key (mode '(normal))(itemmaps #f) )
  (let* ((key    (list run-id waitons ref-item-path mode))
	 (res    (hash-table-ref/default *pre-reqs-met-cache* key #f))
	 (useres (let ((last-time (if (vector? res) (vector-ref res 0) #f)))
		   (if last-time
		       (< (current-seconds)(+ last-time 5))
		       #f))))
    (if useres
	(let ((result (vector-ref res 1)))
	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
	  result)
	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
	  newres))))

(define (mt:get-run-stats dbstruct run-id)
;;  Get run stats from local access, move this ... but where?
  (db:get-run-stats dbstruct run-id))

(define (mt:discard-blocked-tests run-id failed-test tests test-records)
  (if (null? tests)
      tests
      (begin
	(debug:print-info 1 *default-log-port* "Discarding tests from " tests " that are waiting on " failed-test)
	(let loop ((testn (car tests))
		   (remt  (cdr tests))
		   (res   '()))
	  (let* ((test-dat (hash-table-ref/default test-records testn (vector #f #f '())))
		 (waitons  (vector-ref test-dat 2)))
	    ;; (print "mt:discard-blocked-tests run-id: " run-id " failed-test: " failed-test " testn: " testn " with waitons: " waitons)
	    (if (null? remt)
		(let ((new-res (reverse res)))
		  ;; (print "       new-res: " new-res)
		  new-res)
		(loop (car remt)
		      (cdr remt)
		      (if (member failed-test waitons)
			  (begin
			    (debug:print 0 *default-log-port* "Discarding test " testn "(" test-dat ") due to " failed-test)
			    res)
			  (cons testn res)))))))))

;;======================================================================
;;  S T A T E   A N D   S T A T U S   F O R   T E S T S 
;;======================================================================

;; speed up for common cases with a little logic
(define (mt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (if (not (and run-id test-id))
      (begin
	(debug:print-error 0 *default-log-port* "bad data handed to mt:test-set-state-status-by-id, run-id=" run-id ", test-id=" test-id ", newstate=" newstate)
	(print-call-chain (current-error-port))
	#f)
      (begin
	;; cond
	;; ((and newstate newstatus newcomment)
	;;  (rmt:general-call 'state-status-msg run-id newstate newstatus newcomment test-id))
	;; ((and newstate newstatus)
	;;  (rmt:general-call 'state-status run-id newstate newstatus test-id))
	;; (else
	;;  (if newstate   (rmt:general-call 'set-test-state   run-id newstate   test-id))
	;;  (if newstatus  (rmt:general-call 'set-test-status  run-id newstatus  test-id))
	;;  (if newcomment (rmt:general-call 'set-test-comment run-id newcomment test-id))))
	(rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment)
	;; (mt:process-triggers run-id test-id newstate newstatus)
	#t)))


(define (mt:test-set-state-status-by-id-unless-completed run-id test-id newstate newstatus newcomment)
  (let* ((test-vec   (rmt:get-testinfo-state-status run-id test-id))
         (state     (vector-ref test-vec 3)))
    (if (equal? state "COMPLETED")
        #t
        (rmt:set-state-status-and-roll-up-items run-id test-id #f newstate newstatus newcomment))))

  
(define (mt:test-set-state-status-by-testname run-id test-name item-path new-state new-status new-comment)
  ;(let ((test-id (rmt:get-test-id run-id test-name item-path)))
  (rmt:set-state-status-and-roll-up-items run-id test-name item-path new-state new-status new-comment)
  ;; (mt:process-triggers run-id test-id new-state new-status)
  #t);)
	;;(mt:test-set-state-status-by-id run-id test-id new-state new-status new-comment)))

(define (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path new-state new-status new-comment)
  (let ((test-id (rmt:get-test-id run-id test-name item-path)))
    (mt:test-set-state-status-by-id-unless-completed run-id test-id new-state new-status new-comment)))
    
;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
;; 
;; do a remote call to get the task queue info but do the killing as self here.
;;
(define (tasks:kill-runner target run-name testpatt)
  (let ((records    (rmt:tasks-find-task-queue-records target run-name testpatt "running" "run-tests"))
	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 *default-log-port* "No run launching processes found for " target " / " run-name " with testpatt " (or testpatt "* no testpatt specified! *"))
	(debug:print 0 *default-log-port* "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
	      (match-dat (string-search hostpid-rx param-key)))
	 (if match-dat
	     (let ((hostname  (cadr match-dat))
		   (pid       (string->number (caddr match-dat))))
	       (debug:print 0 *default-log-port* "Sending SIGINT to process " pid " on host " hostname)
	       (if (equal? (get-host-name) hostname)
		   (if (process:alive? pid)
		       (begin
			 (handle-exceptions
			  exn
			  (begin
			    (debug:print 0 *default-log-port* "Kill of process " pid " on host " hostname " failed.")
			    (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
			    #t)
			  (process-signal pid signal/int)
			  (thread-sleep! 5)
			  (if (process:alive? pid)
			      (process-signal pid signal/kill)))))
		   ;;  (call-with-environment-variables
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (setenv "TARGETHOST_LOGF" "server-kills.log")
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost))
		     (unsetenv "TARGETHOST")
		     (unsetenv "TARGETHOST_LOGF"))))
	     (debug:print-error 0 *default-log-port* "no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
     records)))

(define (task:get-run-times)
   (let* ( 
           (run-patt (if (args:get-arg "-run-patt")
                        (args:get-arg "-run-patt")
                        "%"))
           (target-patt (if (args:get-arg "-target-patt")
                        (args:get-arg "-target-patt")
                        "%"))
 
           (run-times  (rmt:get-run-times  run-patt target-patt )))
   (if (eq? (length run-times) 0)
     (begin
       (print "Data not found!!")
       (exit)))
   (if (equal? (args:get-arg "-dumpmode") "json")
       (task:print-runtime-as-json run-times)
         (if (equal? (args:get-arg "-dumpmode") "csv")
	     (task:print-runtime run-times ",")
	     (task:print-runtime run-times "  ")))))

 (define (task:get-test-times)
   (let* ((runname (if (args:get-arg "-runname")
                        (args:get-arg "-runname")
                        #f))
           (target (if (args:get-arg "-target")
                        (args:get-arg "-target")
                        #f))
 
           (test-times  (rmt:get-test-times  runname target )))
   (if (not runname)
      (begin
      (print "Error: Missing argument -runname")
      (exit))) 
    (if (string-contains runname "%")
      (begin
      (print "Error: Invalid runname, '%' not allowed  (" runname ") ")
      (exit)))
    (if (not target)
      (begin
      (print "Error: Missing argument -target")
      (exit)))
     (if  (string-contains target "%")
      (begin
      (print "Error: Invalid target, '%' not allowed  (" target ") ")
      (exit)))
 
   (if (eq? (length test-times) 0)
     (begin
       (print "Data not found!!")
       (exit)))
   (if (equal? (args:get-arg "-dumpmode") "json")
       (task:print-testtime-as-json test-times)
         (if (equal? (args:get-arg "-dumpmode") "csv")
	     (task:print-testtime test-times ",")
	     (task:print-testtime test-times "  ")))))



;; gets mtpg-run-id and syncs the record if different
;;
(define (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time)
  (let* ((runs-ht (hash-table-ref cached-info 'runs))
	 (runinf  (hash-table-ref/default runs-ht run-id #f))
         (area-id (vector-ref area-info 0)))
       (if runinf
	runinf ;; already cached
	(let* ((run-dat    (rmt:get-run-info run-id))               ;; NOTE: get-run-info returns a vector < row header >
	       (run-name   (rmt:get-run-name-from-id run-id))
	       (row        (db:get-rows run-dat))                   ;; yes, this returns a single row
	       (header     (db:get-header run-dat))
	       (state      (db:get-value-by-header row header "state"))
	       (status     (db:get-value-by-header row header "status"))
	       (owner      (db:get-value-by-header row header "owner"))
	       (event-time (db:get-value-by-header row header "event_time"))
	       (comment    (db:get-value-by-header row header "comment"))
	       (fail-count (db:get-value-by-header row header "fail_count"))
	       (pass-count (db:get-value-by-header row header "pass_count"))
         (db-contour (db:get-value-by-header row header "contour"))
	       (contour    (if (args:get-arg "-prepend-contour") 
                                 (if (and db-contour (not (equal? db-contour ""))  (string? db-contour )) 
                                           (begin 
                                            (debug:print-info 1 *default-log-port*  "db-contour") 
 						db-contour)
					    (args:get-arg "-contour"))))
         (run-tag (if (args:get-arg "-run-tag")
                            (args:get-arg "-run-tag")
									""))
         (last-update (db:get-value-by-header row header "last_update"))
	       (keytarg    (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target"))
	       			(conc "MT_CONTOUR/MT_AREA/" (string-intersperse (rmt:get-keys) "/")) (string-intersperse (rmt:get-keys) "/"))) ;; e.g. version/iteration/platform
	       (target     (if (or (args:get-arg "-prepend-contour") (args:get-arg "-prefix-target")) 
	       			(conc (or (args:get-arg "-prefix-target") (conc contour "/" (common:get-area-name) "/")) (rmt:get-target run-id)) (rmt:get-target run-id)))                 ;; e.g. v1.63/a3e1/ubuntu
	       (spec-id    (pgdb:get-ttype dbh keytarg))
	       (publish-time (if (args:get-arg "-cp-eventtime-to-publishtime")
                            event-time
                           (current-seconds))) 
	       (new-run-id (pgdb:get-run-id dbh spec-id target run-name area-id)))
         (if new-run-id
	         (begin ;; let ((run-record (pgdb:get-run-info dbh new-run-id))
		        (hash-table-set! runs-ht run-id new-run-id)
		;; ensure key fields are up to date
     ;; if last_update == pgdb_last_update do not update smallest-last-update-time  
    (let* ((pgdb-last-update (pgdb:get-run-last-update dbh new-run-id))
           (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
     (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update)))
		(pgdb:refresh-run-info
		 dbh
		 new-run-id
		 state status owner event-time comment fail-count pass-count area-id last-update publish-time)
     (debug:print-info 0 *default-log-port* "Working on run-id " run-id " pgdb-id "  new-run-id )
     (if (not (equal? run-tag ""))
      (task:add-run-tag dbh new-run-id run-tag))
		new-run-id) 
      
	      (if (equal? state "deleted")
          (begin 
          (debug:print-info 1 *default-log-port*  "Warning: Run with id " run-id " was created after previous sync and deleted before the sync") #f)
          (if (handle-exceptions
		        exn
		        (begin (print-call-chain)
              (print ((condition-property-accessor 'exn 'message) exn))     
			      #f)
            
            (pgdb:insert-run
		     dbh
		     spec-id target run-name state status owner event-time comment fail-count pass-count  area-id last-update publish-time))
		       (let* ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
             (if (or (not smallest-time) (< last-update smallest-time))
        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
             (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
		  #f)))))))

(define (task:add-run-tag dbh run-id tag) 
  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
   (if (not tag-info)
     (begin   
     (if (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
	   (pgdb:insert-tag  dbh   tag))
                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
		  #f)))
     ;;add to area_tags
     (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
           (if (not (pgdb:is-run-taged-with-a-tag dbh (vector-ref tag-info 0)  run-id))  
	   (pgdb:insert-run-tag  dbh   (vector-ref tag-info 0)  run-id)))))


(define (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
 ; (print "Sync Steps " test-step-ids )
  (let ((test-ht (hash-table-ref cached-info 'tests))
        (step-ht (hash-table-ref cached-info 'steps)))
    (for-each
     (lambda (test-step-id)
        (let* ((test-step-info  (rmt:get-steps-info-by-id test-step-id))
               (step-id (tdb:step-get-id test-step-info))
               (test-id  (tdb:step-get-test_id    test-step-info))   
	       (stepname (tdb:step-get-stepname  test-step-info))
	       (state (tdb:step-get-state test-step-info))	
	       (status (tdb:step-get-status test-step-info))	
	       (event_time (tdb:step-get-event_time  test-step-info))	
	       (comment  (tdb:step-get-comment test-step-info))	
	       (logfile (tdb:step-get-logfile test-step-info))	
         (last-update (tdb:step-get-last_update test-step-info))
	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
         (pgdb-step-id (if pgdb-test-id 
                         (pgdb:get-test-step-id dbh pgdb-test-id stepname state)
                          #f)))
    (if step-id
      (begin  
        (if pgdb-test-id
           (begin 
                (if  pgdb-step-id
                   (begin
                    (debug:print-info 1 *default-log-port*  "Updating existing test-step with test-id: " test-id " and step-id " step-id " pgdb test id: " pgdb-test-id " pgdb step id " pgdb-step-id )
										(let* ((pgdb-last-update (pgdb:get-test-step-last-update dbh pgdb-step-id)))
         (if (and  (> last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-step dbh pgdb-step-id pgdb-test-id stepname state status event_time comment logfile last-update))
                    (begin
 		      (debug:print-info 1 *default-log-port*  "Inserting test-step with test-id: " test-id " and step-id " step-id  " pgdb test id: " pgdb-test-id)
                     (if (or (not smallest-time) (< last-update smallest-time))
        				      (hash-table-set! smallest-last-update-time "smallest-time" last-update))
                      (pgdb:insert-test-step dbh pgdb-test-id stepname state status event_time comment logfile last-update )
                      (set! pgdb-step-id  (pgdb:get-test-step-id dbh pgdb-test-id stepname state))))
                (hash-table-set! step-ht step-id pgdb-step-id ))
           (debug:print-info 1 *default-log-port*  "Error: Test not cashed")))
      (debug:print-info 1 *default-log-port*  "Error: Could not get test step info for step id " test-step-id ))))	;; this is a wierd senario need to debug      	
   test-step-ids)))

(define (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
  (let ((test-ht (hash-table-ref cached-info 'tests))
        (data-ht (hash-table-ref cached-info 'data)))
    (for-each
     (lambda (test-data-id)
        (let* ((test-data-info  (rmt:get-data-info-by-id test-data-id))
               (data-id (db:test-data-get-id  test-data-info))
               (test-id  (db:test-data-get-test_id   test-data-info))   
	       (category  (db:test-data-get-category  test-data-info))
	       (variable  (db:test-data-get-variable test-data-info))	
	       (value (db:test-data-get-value  test-data-info))	
               (expected (db:test-data-get-expected  test-data-info))
               (tol (db:test-data-get-tol  test-data-info))
               (units (db:test-data-get-units  test-data-info))     
	       (comment  (db:test-data-get-comment test-data-info))	
               (status (db:test-data-get-status test-data-info))	
	       (type (db:test-data-get-type test-data-info))
				 (last-update (db:test-data-get-last_update test-data-info))
				 (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))
   	
	       (pgdb-test-id  (hash-table-ref/default test-ht test-id #f))
               (pgdb-data-id (if pgdb-test-id 
                                 (pgdb:get-test-data-id dbh pgdb-test-id category variable)
                                  #f)))
    (if data-id
      (begin
        (if pgdb-test-id
           (begin 
                (if  pgdb-data-id
                   (begin
                    (debug:print-info 1 *default-log-port*  "Updating existing test-data with test-id: " test-id " and  data-id " data-id " pgdb test id: " pgdb-test-id " pgdb data id " pgdb-data-id)
                    (let* ((pgdb-last-update (pgdb:get-test-data-last-update dbh pgdb-data-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time)))
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
                    (pgdb:update-test-data dbh pgdb-data-id pgdb-test-id  category variable value expected tol units comment status type last-update))
                    (begin
 		      (debug:print-info 1 *default-log-port*  "Inserting test-data with test-id: " test-id " and data-id " data-id " pgdb test id: " pgdb-test-id)
                       (if (handle-exceptions
		      exn
		      (begin (print-call-chain)
                              (print ((condition-property-accessor 'exn 'message) exn))     
			#f)
                     
                    (pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type last-update))
		       ;(tasks:run-id->mtpg-run-id dbh cached-info run-id area-info)
                      (begin
                      ;(pgdb:insert-test-data dbh pgdb-test-id category variable value expected tol units comment status type )
											(if (or (not smallest-time) (< last-update smallest-time))
        								(hash-table-set! smallest-last-update-time "smallest-time" last-update))
                      (set! pgdb-data-id  (pgdb:get-test-data-id dbh pgdb-test-id  category variable)))
		   #f)))
                (hash-table-set! data-ht data-id pgdb-data-id ))
             (begin
                 (debug:print-info 1 *default-log-port*  "Error: Test not in pgdb"))))

      (debug:print-info 1 *default-log-port*  "Error: Could not get test data info for data id " test-data-id ))))	;; this is a wierd senario need to debug      	
   test-data-ids)))



(define (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
  (let ((test-ht (hash-table-ref cached-info 'tests)))
    (for-each
     (lambda (test-id)
      ; (print test-id)
       (let* ((test-info    (rmt:get-test-info-by-id #f test-id))
	      (run-id       (db:test-get-run_id    test-info)) ;; look these up in db_records.scm
	      (test-id      (db:test-get-id        test-info))
	      (test-name    (db:test-get-testname  test-info))
	      (item-path    (db:test-get-item-path test-info))
	      (state        (db:test-get-state     test-info))
	      (status       (db:test-get-status    test-info))
	      (host         (db:test-get-host      test-info))
        (pid          (db:test-get-process_id test-info)) 
	      (cpuload      (db:test-get-cpuload   test-info))
	      (diskfree     (db:test-get-diskfree  test-info))
	      (uname        (db:test-get-uname     test-info))
	      (run-dir      (db:test-get-rundir    test-info))
	      (log-file     (db:test-get-final_logf test-info))
	      (run-duration (db:test-get-run_duration test-info))
	      (comment      (db:test-get-comment   test-info))
	      (event-time   (db:test-get-event_time test-info))
	      (archived     (db:test-get-archived  test-info))
        (last-update  (db:test-get-last_update  test-info))
	      (pgdb-run-id  (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
        (smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f))       
	      (pgdb-test-id (if pgdb-run-id 
				(begin
                                  ;(print pgdb-run-id)    
                                 (pgdb:get-test-id dbh pgdb-run-id test-name item-path))
                                 #f)))
	 ;; "id"           "run_id"        "testname"  "state"      "status"      "event_time"
	 ;; "host"         "cpuload"       "diskfree"  "uname"      "rundir"      "item_path"
	 ;; "run_duration" "final_logf"    "comment"   "shortdir"   "attemptnum"  "archived"
         (if pgdb-run-id
           (begin
	   (if pgdb-test-id ;; have a record
	     (begin ;; let ((key-name (conc run-id "/" test-name "/" item-path)))
	       (debug:print-info 0 *default-log-port*  "Updating existing test with run-id: " run-id " and test-id: " test-id " pgdb run id: " pgdb-run-id "  pgdb-test-id "  pgdb-test-id)
         (let* ((pgdb-last-update (pgdb:get-test-last-update dbh pgdb-test-id)))
         (if (and  (>  last-update pgdb-last-update) (or (not smallest-time) (< last-update smallest-time))) ;;if last-update is same as pgdb-last-update then it is safe to assume the records are identical and we can use a larger last update time.
        (hash-table-set! smallest-last-update-time "smallest-time" last-update))) 
	       (pgdb:update-test dbh pgdb-test-id pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid))
	     (begin 
           (debug:print-info 0 *default-log-port*  "Inserting test with run-id: " run-id " and test-id: " test-id  " pgdb run id: " pgdb-run-id)
           (pgdb:insert-test dbh pgdb-run-id test-name item-path state status host cpuload diskfree uname run-dir log-file run-duration comment event-time archived last-update pid)
            (if (or (not smallest-time) (< last-update smallest-time))
        				(hash-table-set! smallest-last-update-time "smallest-time" last-update))
           (set! pgdb-test-id (pgdb:get-test-id dbh pgdb-run-id test-name item-path))))
           (hash-table-set! test-ht test-id pgdb-test-id))
           (debug:print-info 1 *default-log-port*  "WARNING: Skipping run with run-id:" run-id ". This run was created after privious sync and removed before this sync."))))
     test-ids)))

(define (task:add-area-tag dbh area-info tag) 
  (let* ((tag-info (pgdb:get-tag-info-by-name dbh tag)))
   (if (not tag-info)
     (begin   
     (if (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
	   (pgdb:insert-tag  dbh   tag))
                       (set! tag-info (pgdb:get-tag-info-by-name dbh tag))
		  #f)))
     ;;add to area_tags
     (handle-exceptions
	   exn
	   (begin 
               (debug:print-info 1 *default-log-port*  ((condition-property-accessor 'exn 'message) exn))     
	   #f)
           (if (not (pgdb:is-area-taged-with-a-tag dbh (vector-ref tag-info 0)  (vector-ref area-info 0)))  
	   (pgdb:insert-area-tag  dbh   (vector-ref tag-info 0)  (vector-ref area-info 0))))))

(define (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
  (for-each
     (lambda (run-id)
      (debug:print-info 1 *default-log-port*   "Check if run with " run-id " needs to be synced" )
       (tasks:run-id->mtpg-run-id dbh cached-info run-id area-info smallest-last-update-time))
run-ids))


;; get runs changed since last sync
;; (define (tasks:sync-test-data dbh cached-info area-info)
;;   (let* ((

(define (tasks:sync-to-postgres configdat dest)
  (print "In sync")
  (let* ((dbh         (pgdb:open configdat dbname: dest))
	 (area-info   (pgdb:get-area-by-path dbh *toppath*))
	 (cached-info (make-hash-table))
	 (start       (current-seconds))
   (test-patt   (if (args:get-arg "-testpatt")
											(args:get-arg "-testpatt")
                      "%"))
   (target         (if (args:get-arg "-target")
														 (args:get-arg "-target")
													#f))
    (run-name         (if (args:get-arg "-runname")
														 (args:get-arg "-runname")
													#f)))
     (if (and target  (not run-name))
       (begin
					(print "Error: Provide runname")
          (exit 1)))
     (if (and (not target)  run-name)
       (begin
					(print "Error: Provide target")
          (exit 1)))
    ;(print "123")
    ;(exit 1) 
    (for-each (lambda (dtype)
		(hash-table-set! cached-info dtype (make-hash-table)))
	      '(runs targets tests steps data))
    (hash-table-set! cached-info 'start start) ;; when done we'll set sync times to this
    (if area-info
	(let* ((last-sync-time (vector-ref area-info 3))
	       (smallest-last-update-time  (make-hash-table))
         (changed      (if (and target run-name)
                            (rmt:get-run-record-ids target run-name (rmt:get-keys) test-patt)
                            (rmt:get-changed-record-ids last-sync-time)))
	       (run-ids        (alist-ref 'runs       changed))
	       (test-ids       (alist-ref 'tests      changed))
	       (test-step-ids  (alist-ref 'test_steps changed))
	       (test-data-ids  (alist-ref 'test_data  changed))
	       (run-stat-ids   (alist-ref 'run_stats  changed))
         (area-tag    (if (args:get-arg "-area-tag") 
                                 (args:get-arg "-area-tag")
                                 (if (args:get-arg "-area") 
                                   (args:get-arg "-area") 
                                   ""))))
           (if (and (equal? area-tag "") (not (pgdb:is-area-taged dbh (vector-ref area-info 0))))
            (set! area-tag *default-area-tag*)) 
           (if (not (equal? area-tag "")) 
             (task:add-area-tag dbh area-info area-tag)) 
	  (if (or (not (null? test-ids)) (not (null? run-ids)))
	      (begin
                (debug:print-info 0 *default-log-port*  "syncing runs")   
	              (tasks:sync-run-data dbh cached-info run-ids area-info smallest-last-update-time) 
                (debug:print-info 0 *default-log-port*  "syncing tests")
		            (tasks:sync-tests-data dbh cached-info test-ids area-info smallest-last-update-time)
                (debug:print-info 0 *default-log-port*  "syncing test steps")
                (tasks:sync-test-steps dbh cached-info test-step-ids smallest-last-update-time)
								(debug:print-info 0 *default-log-port*  "syncing test data")
                (tasks:sync-test-gen-data dbh cached-info test-data-ids smallest-last-update-time)
                (print "----------done---------------")))
     (let*  ((smallest-time (hash-table-ref/default smallest-last-update-time "smallest-time" #f)))
     (debug:print-info 0 "smallest-time :" smallest-time  " last-sync-time " last-sync-time)
    (if (not (and target run-name)) 
	  (if (or (and smallest-time (> smallest-time last-sync-time)) (and smallest-time (eq? last-sync-time 0)))
				(pgdb:write-sync-time dbh area-info smallest-time))))) ;;this needs to be changed
	(if (tasks:set-area dbh configdat)
	    (tasks:sync-to-postgres configdat dest)
	    (begin
	      (debug:print 0 *default-log-port* "ERROR: unable to create an area record")
	      #f)))))

;;======================================================================
;; L O C K I N G   M E C H A N I S M S 
;;======================================================================

;; faux-lock is deprecated. Please use simple-lock below
;;
(define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t))
  (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count
      (if (> wait-time 0)
	  (begin
	    (thread-sleep! 1)
	    (if (eq? wait-time 1) ;; only one second left, steal the lock
		(begin
		  (debug:print-info 0 *default-log-port* "stealing lock for " keyname)
		  (common:faux-unlock keyname force: #t)))
	    (common:faux-lock keyname wait-time: (- wait-time 1)))
	  #f)
      (begin
        (rmt:no-sync-set keyname (conc (current-process-id)))
        (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))))

(define (common:faux-unlock keyname #!key (force #f))
  (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f))))
      (begin
        (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname))
        #t)
      #f))

;; simple lock. improve and converge on this one.
;;
(define (common:simple-lock keyname)
  (rmt:no-sync-get-lock keyname))

(define (common:simple-unlock keyname #!key (force #f))
  (rmt:no-sync-del! keyname))

;;======================================================================
;; db based host calls
;;======================================================================

;;======================================================================
;;  T E S T   L A U N C H I N G   P E R   I T E M   W I T H   H O S T   T Y P E S
;;======================================================================
;; 
;; [hosts]
;; arm cubie01 cubie02
;; x86_64 zeus xena myth01
;; allhosts #{g hosts arm} #{g hosts x86_64}
;; 
;; [host-types]
;; general #MTLOWESTLOAD #{g hosts allhosts}
;; arm     #MTLOWESTLOAD #{g hosts arm}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;;
;; [host-rules]
;; # maxnload   => max normalized load
;; # maxnjobs   => max jobs per cpu
;; # maxjobrate => max jobs per second
;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 
;; 
;; [launchers]
;; envsetup general
;; xor/%/n 4C16G
;; % nbgeneral
;; 
;; [jobtools]
;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
;; flexi-launcher yes  
;; launcher nbfake
;;
(define (common:get-launcher configdat testname itempath)
  (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
    (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
	     (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
	(let* ((launchers         (hash-table-ref/default configdat "launchers" '())))
	  (if (null? launchers)
	      fallback-launcher
	      (let loop ((hed (car launchers))
			 (tal (cdr launchers)))
		(let ((patt      (car hed))
		      (host-type (cadr hed)))
		  (if (tests:match patt testname itempath)
		      (begin
			(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
			(let ((launcher (configf:lookup configdat "host-types" host-type)))
			  (if launcher
			      (let* ((launcher-parts (string-split launcher))
				     (launcher-exe   (car launcher-parts)))
				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
				    (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat))
						    (count     100))
				      (if targ-host
					  (conc "remrun " targ-host)
					  (if (> count 0)
					      (begin
						(debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type)
						(thread-sleep! (- 101 count))
						(host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)
							   (- count 1)))
					      (begin
						(debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type)
						(exit)))))
				    launcher))
			      (begin
				(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
				(if (null? tal)
				    fallback-launcher
				    (loop (car tal)(cdr tal)))))))
		      ;; no match, try again
		      (if (null? tal)
			  fallback-launcher
			  (loop (car tal)(cdr tal))))))))
	fallback-launcher)))

;; ideally put all this info into the db, no need to preserve it across moving homehost
;;
;; return list of
;;  ( reachable? cpuload update-time )
(define (common:get-host-info hostname)
  (let* ((loadinfo                         (rmt:get-latest-host-load hostname)) ;; if this host happens to have been recently used by a test reuse the load data
         (load                             (car loadinfo))
         (load-sample-time                 (cdr loadinfo))
         (load-sample-age                  (- (current-seconds) load-sample-time))
         (loadinfo-timeout-seconds         6) ;; this was 20 seconds, seems way too lax. Switch to 6 seconds
         (host-last-update-timeout-seconds 4)
         (host-rec (hash-table-ref/default *host-loads* hostname #f))
         )
    (cond
     ((< load-sample-age loadinfo-timeout-seconds)
      (list #t
            load-sample-time
            load))
     ((and host-rec
           (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds)))
      (list #t
            (host-last-update host-rec)
            (host-last-cpuload host-rec )))
     ((common:unix-ping hostname)
      (list #t
            (current-seconds)
            (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) ;; this is cheaper than you might think. get-normalized-cpu-load is cached for up to 5 seconds
     (else
      (list #f 0 -1) ;; bad host, don't use!
      ))))

;; see defstruct host at top of file.
;;    host: reachable last-update last-used last-cpuload
;;
(define (common:update-host-loads-table hosts-raw)
  (let* ((hosts (filter (lambda (x)
                          (string-match (regexp "^\\S+$") x))
                        hosts-raw)))
    (for-each
     (lambda (hostname)
       (let* ((rec       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
                          (if h
                              h
                              (let ((h (make-host)))
                                (hash-table-set! *host-loads* hostname h)
                                h))))
              (host-info         (common:get-host-info hostname))
              (is-reachable      (car host-info))
              (last-reached-time (cadr host-info))
              (load              (caddr host-info)))
         (host-reachable-set!    rec is-reachable)
         (host-last-update-set!  rec last-reached-time)
         (host-last-cpuload-set! rec load)))
     hosts)))

;; go through the hosts from least recently used to most recently used, pick the first that meets the load criteral from the
;; [host-rules] section.
;;
(define (common:get-least-loaded-host hosts-raw host-type configdat)
  (let* ((rdat       (configf:lookup configdat "host-rules" host-type))
	 (rules      (common:val->alist (or rdat "") convert: #t))   ;; maxnload, maxnjobs, maxjobrate
	 (maxnload   (common:alist-ref/default 'maxnload rules 1.5)) ;; max normalized load
	 (maxnjobs   (common:alist-ref/default 'maxnjobs rules 1.5)) ;; max normalized number of jobs
	 (maxjobrate (common:alist-ref/default 'maxjobrate rules (/ 1 6))) ;; max rate of submitting jobs to a given host in jobs/second
	 (hosts      (filter (lambda (x)
			       (string-match (regexp "^\\S+$") x))
			     hosts-raw))
         ;; (best-host #f)
	 (get-rec    (lambda (hostname)
		       ;; (print "get-rec hostname=" hostname)
		       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
			 (if h
			     h
			     (let ((h (make-host)))
			       (hash-table-set! *host-loads* hostname h)
			       h)))))
         (best-load 99999)
         (curr-time (current-seconds))
	 (get-hosts-sorted (lambda (hosts)
			     (sort hosts (lambda (a b)
					   (let ((a-rec (get-rec a))
						 (b-rec (get-rec b)))
					     ;; (print "a=" a " a-rec=" a-rec " host-last-used=" (host-last-used a-rec))
					     ;; (print "b=" b " b-rec=" b-rec " host-last-used=" (host-last-used b-rec))
					     (< (host-last-used a-rec)
						(host-last-used b-rec))))))))
    (debug:print 0 *default-log-port* "INFO: hosts-sorted=" (get-hosts-sorted hosts))
    (if (null? hosts)
	#f ;; no hosts to select from. All done and giving up now.
	(let ((hosts-sorted (get-hosts-sorted hosts)))
	  (common:update-host-loads-table hosts)
	  (let loop ((hostname  (car hosts-sorted))
		     (tal       (cdr hosts-sorted))
		     (best-host #f))
	    (let* ((rec       (get-rec hostname))
		   (reachable (host-reachable     rec))
		   (load      (host-last-cpuload  rec))
		   (last-used (host-last-used     rec))
		   (delta     (- curr-time last-used))
		   (job-rate  (if (> delta 0)
				  (/ 1 delta)
				  999)) ;; jobs per second
		   (new-best  
		    (cond
		     ((not reachable)
		      (debug:print 0 *default-log-port* "Skipping host " hostname " as it cannot be reached.")
		      best-host)
		     ((and (< load maxnload)        ;; load is acceptable
			   (< job-rate maxjobrate)) ;; job rate is acceptable
		      (set! best-load load)
		      hostname)
		     (else best-host))))
	      (debug:print 0 *default-log-port* "INFO: Trying host " hostname " with load " load ", last used " delta " seconds ago, with job-rate " job-rate " for running a test." )
	      (if new-best
		  (begin ;; found a host, return it
		    (debug:print 0 *default-log-port* "INFO: Found host: " new-best " load: " load " last-used: " delta " seconds ago, with job-rate: " job-rate)
		    (host-last-used-set! rec curr-time)
		    new-best)
		  (if (null? tal) #f (loop (car tal)(cdr tal) best-host)))))))))

(define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (numcpus (if (<= 1 numcpus-in) ;; not possible to have zero.  If we get 1, it's possible that we got the previous default, and we should check again
		      (common:get-num-cpus remote-host)
		      numcpus-in))
	 (maxload (if force-maxload
		      maxload-in
		      (max maxload-in 0.5))) ;; so maxload must be greater than 0.5 for now BUG - FIXME?
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload (max 1 numcpus))) ;; possible bug where numcpus (or could be maxload) is zero, crude fallback is to at least use 1
	 (loadjmp (- first next))
         (adjwait (min (+ 300 (random 10)) (abs (* (+ (random 10)(/ (- 1000 count) 10) waitdelay) (- first adjload) ))  ))) ;; add some randomness to the time to break any alignment where netbatch dumps many jobs to machines simultaneously
    (debug:print-info 1 *default-log-port* "Checking cpuload on " (or remote-host "localhost") ", maxload: " maxload
		      ", load: " first ", adjload: " adjload ", loadjmp: " loadjmp)
    (cond
     ((and (> first adjload)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "server start delayed " adjwait " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))
     ((and (> loadjmp numcpus)
	   (> count 0))
      (debug:print-info 0 *default-log-port* "waiting " adjwait " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
      (thread-sleep! adjwait)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)))))

(define (common:wait-for-homehost-load maxload msg)
  (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local.
                     #f
                     (common:get-homehost)))
         (hh     (if hh-dat (car hh-dat) #f))
         (numcpus (common:get-num-cpus hh)))
    (common:wait-for-normalized-load maxload msg hh)))

(define (common:get-num-cpus remote-host)
  (let* ((actual-host (or remote-host (get-host-name))))
    (or (common:get-cached-info actual-host "num-cpus" age: 86400) ;; hosts had better not be changing the number of cpus too often!
	(let* ((proc   (lambda ()
			 (let loop ((numcpu 0)
				    (inl    (read-line)))
			   (if (eof-object? inl)
			       (begin
				 (common:write-cached-info remote-host "num-cpus" numcpu)
				 numcpu)
			       (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
					 (+ numcpu 1)
					 numcpu)
				     (read-line))))))
	       (result (if remote-host
			   (with-input-from-pipe 
			       (conc "ssh " remote-host " cat /proc/cpuinfo")
			     proc)
			   (with-input-from-file "/proc/cpuinfo" proc))))
	  (common:write-cached-info actual-host "num-cpus" result)
	  result))))

;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload msg remote-host)
  (let ((num-cpus (common:get-num-cpus remote-host)))
    (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host)))

;;======================================================================
;; D E B U G G I N G   S T U F F 
;;======================================================================

;; (define *verbosity*         1)
;; (define *logging*           #f)

(define (common:set-last-run-version)
  (rmt:set-var "MEGATEST_VERSION" (common:version-signature)))

;; postive number if megatest version > db version
;; negative number if megatest version < db version
(define (common:version-db-delta)
         (- megatest-version (common:get-last-run-version-number)))

(define (common:version-changed?)
  (not (equal? (common:get-last-run-version)
               (common:version-signature))))

;; from metadat lookup MEGATEST_VERSION
;;
(define (common:get-last-run-version) ;; RADT => How does this work in send-receive function??; assume it is the value saved in some DB
  (rmt:get-var "MEGATEST_VERSION"))

(define (common:get-last-run-version-number)
  (string->number 
   (substring (common:get-last-run-version) 0 6)))

(define (common:api-changed?)
  (not (equal? (substring (->string megatest-version) 0 4)
               (substring (conc (common:get-last-run-version)) 0 4))))

;; '(print (string-intersperse (map cadr (hash-table-ref/default (read-config "megatest.config" \#f \#t) "disks" '"'"'("none" ""))) "\n"))'
(define (common:get-disks #!key (configf #f))
  (hash-table-ref/default 
   (or configf (configf:read-config "megatest.config" #f #t))
   "disks" '("none" "")))

;;======================================================================
;; watchdog and exit procedures
;;======================================================================

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

;;   (let ((ohh (common:on-homehost?))
;; 	(srv (args:get-arg "-server")))
;;     (and ohh srv)))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)

(define *watchdog* (make-thread
		    (lambda ()
		      (handle-exceptions
			  exn
			  (begin
			    (print-call-chain)
			    (print " message: " ((condition-property-accessor 'exn 'message) exn)))
			(common:watchdog)))
		    "Watchdog thread"))

;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
  (thread-sleep! 0.05) ;; delay for startup
  (debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
  ;; sync megatest.db to /tmp/.../megatst.db
  (let* ((sync-cool-off-duration   3)
        (golden-mtdb     (dbr:dbstruct-mtdb dbstruct))
        (golden-mtpath   (db:dbdat-get-path golden-mtdb))
        (tmp-mtdb        (dbr:dbstruct-tmpdb dbstruct))
        (tmp-mtpath      (db:dbdat-get-path tmp-mtdb)))
    (debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
    (let loop ((last-sync-time 0))
      (debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
      (let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
        (debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
        (if (and (not *time-to-exit*)
                 (< duration-since-last-sync sync-cool-off-duration))
            (thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
        (if (not *time-to-exit*)
            (let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
                  (tmp-mtdb-mtime    (file-modification-time tmp-mtpath)))
	      (if (> golden-mtdb-mtime tmp-mtdb-mtime)
		  (if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
		      (let ((res (db:multi-db-sync dbstruct 'old2new)))
			(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
              (loop (current-seconds)))
            #t)))
    (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))


;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
                (cond
                 ((equal? syncer "brute-force-sync")
                  (server:writable-watchdog-bruteforce dbstruct))
                 ((equal? syncer "delta-sync")
                  (server:writable-watchdog-deltasync dbstruct))
                 (else
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)
			 #t))))
    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
    (if (and no-hurry (debug:debug-mode 18))
	(rmt:print-db-stats))
    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
                              (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
			      (if *task-db*    
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
                              (http-client#close-all-connections!)
                              ;; (if (and *runremote*
                              ;;          (remote-conndat *runremote*))
                              ;;     (begin
                              ;;       (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin
                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
                                  (begin
      				  (thread-sleep! 2)))
      			      (debug:print 4 *default-log-port* " ... done")
      			      )
			    "clean exit")))
      (thread-start! th1)
      (thread-start! th2)
      (thread-join! th1)
      )
    )

  0)

(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t) 
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))

(define (special-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t)
  ;;(debug:print-info 13 *default-log-port* "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " sending email befor exiting!!")
  ;;TODO send email to notify admin contact listed in the config that the lisner got killed
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))


(set-signal-handler! signal/int  std-signal-handler)  ;; ^C
(set-signal-handler! signal/term std-signal-handler)

;; (set-signal-handler! signal/stop std-signal-handler)  ;; ^Z NO, do NOT handle ^Z!

;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
  (if (common:on-homehost?)
      (if (common:api-changed?)
	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
                (read-only (not (file-write-access? dbfile)))
                (dbstruct (db:setup #t)))
	    (debug:print 0 *default-log-port*
			 "WARNING: Version mismatch!\n"
			 "   expected: " (common:version-signature) "\n"
			 "   got:      " (common:get-last-run-version))
            (cond
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
              (handle-exceptions
               exn
               (begin
                 (debug:print 0 *default-log-port* "Failed to switch versions.")
                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
                 (print-call-chain (current-error-port))
                 (exit 1))
               (common:cleanup-db dbstruct)))
             ((not (common:file-exists? mtconf))
              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (common:file-exists? dbfile))
              (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (eq? (current-user-id)(file-owner mtconf)))
              (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (read-only
              (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (else
              (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
              (exit 1)))))))
;;      (begin
;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;;	(exit 1))))

;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
  (apply db:multi-db-sync 
   dbstruct
   'schema
   ;; 'new2old
   'killservers
   'adj-target
   ;; 'old2new
   'new2old
   ;; (if full
       '(dejunk)
       ;; '())
       )
  (if (common:api-changed?)
      (common:set-last-run-version)))

)