Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -759,22 +759,23 @@ (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num))))))) ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (common:watchdog) (debug:print-info 13 *default-log-port* "common:watchdog entered.") - (if (common:on-homehost?) - (let ((dbstruct (db:setup #t))) - (debug:print-info 13 *default-log-port* "after db:setup with dbstruct="dbstruct) - (cond - ((dbr:dbstruct-read-only dbstruct) - (debug:print-info 13 *default-log-port* "loading read-only watchdog") - (common:readonly-watchdog dbstruct)) - (else - (debug:print-info 13 *default-log-port* "loading writable-watchdog.") - (common:writable-watchdog dbstruct))) - (debug:print-info 13 *default-log-port* "watchdog done.")) - (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))) + (if (launch:setup) + (if (common:on-homehost?) + (let ((dbstruct (db:setup #t))) + (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) + (cond + ((dbr:dbstruct-read-only dbstruct) + (debug:print-info 13 *default-log-port* "loading read-only watchdog") + (common:readonly-watchdog dbstruct)) + (else + (debug:print-info 13 *default-log-port* "loading writable-watchdog.") + (common:writable-watchdog dbstruct))) + (debug:print-info 13 *default-log-port* "watchdog done.")) + (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost")))) (define (std-exit-procedure) (on-exit (lambda () 0)) ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -937,71 +937,71 @@ ;; 'closeall - close all opened dbs ;; 'schema - attempt to apply schema changes ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) - (if (not (launch:setup)) - (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") - (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) - (tmpdb (db:get-db dbstruct)) - (refndb (dbr:dbstruct-refndb dbstruct)) - (allow-cleanup #t) ;; (if run-ids #f #t)) - (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) - (data-synced 0)) ;; count of changed records (I hope) - - (for-each - (lambda (option) - - (case option - ;; kill servers - ((killservers) - (for-each - (lambda (server) - (match-let (((mod-time host port start-time pid) server)) - (if (and host pid) - (tasks:kill-server host pid)))) - servers)) - - ;; clear out junk records - ;; - ((dejunk) - (db:delay-if-busy mtdb) ;; ok to delay on mtdb - (db:clean-up mtdb) - (db:clean-up tmpdb) - (db:clean-up refndb)) - - ;; sync runs, test_meta etc. - ;; - ((old2new) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) - data-synced))) - - ;; now ensure all newdb data are synced to megatest.db - ;; do not use the run-ids list passed in to the function - ;; - ((new2old) - (set! data-synced - (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) - data-synced))) - - ((adj-target) - (db:adj-target (db:dbdat-get-db mtdb)) - (db:adj-target (db:dbdat-get-db tmpdb)) - (db:adj-target (db:dbdat-get-db refndb))) - - ((schema) - (db:patch-schema-maindb (db:dbdat-get-db mtdb)) - (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) - (db:patch-schema-maindb (db:dbdat-get-db refndb)) - (db:patch-schema-rundb (db:dbdat-get-db mtdb)) - (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) - (db:patch-schema-rundb (db:dbdat-get-db refndb)))) - - (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) - options) - data-synced))) + ;; (if (not (launch:setup)) + ;; (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.") + (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) + (tmpdb (db:get-db dbstruct)) + (refndb (dbr:dbstruct-refndb dbstruct)) + (allow-cleanup #t) ;; (if run-ids #f #t)) + (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (data-synced 0)) ;; count of changed records (I hope) + + (for-each + (lambda (option) + + (case option + ;; kill servers + ((killservers) + (for-each + (lambda (server) + (match-let (((mod-time host port start-time pid) server)) + (if (and host pid) + (tasks:kill-server host pid)))) + servers)) + + ;; clear out junk records + ;; + ((dejunk) + (db:delay-if-busy mtdb) ;; ok to delay on mtdb + (db:clean-up mtdb) + (db:clean-up tmpdb) + (db:clean-up refndb)) + + ;; sync runs, test_meta etc. + ;; + ((old2new) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb) + data-synced))) + + ;; now ensure all newdb data are synced to megatest.db + ;; do not use the run-ids list passed in to the function + ;; + ((new2old) + (set! data-synced + (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb) + data-synced))) + + ((adj-target) + (db:adj-target (db:dbdat-get-db mtdb)) + (db:adj-target (db:dbdat-get-db tmpdb)) + (db:adj-target (db:dbdat-get-db refndb))) + + ((schema) + (db:patch-schema-maindb (db:dbdat-get-db mtdb)) + (db:patch-schema-maindb (db:dbdat-get-db tmpdb)) + (db:patch-schema-maindb (db:dbdat-get-db refndb)) + (db:patch-schema-rundb (db:dbdat-get-db mtdb)) + (db:patch-schema-rundb (db:dbdat-get-db tmpdb)) + (db:patch-schema-rundb (db:dbdat-get-db refndb)))) + + (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) + options) + data-synced)) ;; keeping it around for debugging purposes only (define (open-run-close-no-exception-handling proc idb . params) (debug:print-info 11 *default-log-port* "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params) (print "I don't work anymore. open-run-close-no-exception-handling needs fixing or removing...") Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -857,13 +857,18 @@ (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath (target (common:args-get-target)) (sections (if target (list "default" target) #f)) ;; for runconfigs (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig)) - (mtcachef (car cachefiles)) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) - (rccachef (cdr cachefiles)) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) - ) ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) + ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ... + (mtcachef (if (null? cachefiles) + #f + (car cachefiles))) ;; (and cachedir (conc cachedir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) + (rccachef (if (null? cachefiles) + #f + (cdr cachefiles)))) ;; (and cachedir (conc cachedir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) + ;; (cancreate (and cachedir (common:file-exists? cachedir)(file-write-access? cachedir) (not (common:in-running-test?))))) (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource ;;(BB> "launch:setup-body -- cachefiles="cachefiles) (cond ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME ((and (not force-reread) mtcachef (common:file-exists? mtcachef) (get-environment-variable "MT_RUN_AREA_HOME") use-cache) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -389,11 +389,19 @@ ;; The watchdog is to keep an eye on things like db sync etc. ;; ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage -(define *watchdog* (make-thread common:watchdog "Watchdog thread")) +(define *watchdog* (make-thread + (lambda () + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (common:watchdog))) + "Watchdog thread")) ;;(if (not (args:get-arg "-server")) ;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog (let* ((no-watchdog-args '("-list-runs" Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -514,11 +514,17 @@ (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)) + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests + (any->number reglen) all-tests-registry))) ;; (handle-exceptions ;; exn ;; (begin ;; (print-call-chain (current-error-port)) ;; (debug:print-error 0 *default-log-port* "failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))