Index: common_records.scm ================================================================== --- common_records.scm +++ common_records.scm @@ -39,10 +39,11 @@ ;; (define-syntax common:handle-exceptions ;; (syntax-rules () ;; ((_ exn-in errstmt ...)(handle-exceptions exn-in errstmt ...)))) +;; this works, why didn't I use it more? (define-syntax common:debug-handle-exceptions (syntax-rules () ((_ debug exn errstmt body ...) (if debug (begin body ...) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -498,11 +498,11 @@ (lambda () (handle-exceptions exn (begin (print-call-chain) - (print " message: " ((condition-property-accessor 'exn 'message) exn))) + (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (common:watchdog))) "Watchdog thread")) ;;(if (not (args:get-arg "-server")) ;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog @@ -552,11 +552,11 @@ ;; (if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server (handle-exceptions exn (begin - (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn)) + (print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) ) (let* ((tl (or (args:get-arg "-log")(launch:setup))) ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name (conc tl "/logs/server-" (current-process-id) "-" (get-host-name) ".log"))) (oup (open-logfile logf))) @@ -602,12 +602,14 @@ (printf "Preparing to exit with exit code ~A ...\n" exit-code) (for-each (lambda (pid) (handle-exceptions - exn - #t + exn + (begin + (printf "process reap failed. exn=~A\n" exn) + #t) (let-values (((pid-val exit-status exit-code) (process-wait pid #t))) (if (or (eq? pid-val pid) (eq? pid-val 0)) (begin (printf "Sending signal/term to ~A\n" pid) Index: mt.scm ================================================================== --- mt.scm +++ mt.scm @@ -190,11 +190,11 @@ ;; (mutex-lock! *triggers-mutex*) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* " Exception in mt:process-triggers for run-id="run-id" test-id="test-id" newstate="newstate" newstatus="newstatus - "\n error: " ((condition-property-accessor 'exn 'message) exn) + "\n error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn "\n test-rundir="test-rundir "\n test-name="test-name "\n item-path="item-path "\n state="state "\n status="status Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -196,12 +196,14 @@ (equal? rpid pid))))) (define (process:alive-on-host? host pid) (let ((cmd (conc "ssh " host " ps -o pid= -p " pid))) (handle-exceptions - exn - #f ;; anything goes wrong - assume the process in NOT running. + exn + (begin + (debug:print 0 *default-log-port* "failed to identify if process " pid ", on host " host " is alive. exn=" exn) + #f) ;; anything goes wrong - assume the process in NOT running. (with-input-from-pipe cmd (lambda () (let loop ((inl (read-line))) (if (eof-object? inl) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -415,13 +415,15 @@ res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res (handle-exceptions - exn - #f - (http-transport:client-api-send-receive run-id connection-info cmd params)))) + exn + (begin + (print "transport failed. exn=" exn) + #f) + (http-transport:client-api-send-receive run-id connection-info cmd params)))) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) ;; ;; Wrap json library for strings (why the ports crap in the first place?) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -168,11 +168,13 @@ ;; (define (server:logf-get-start-info logf) (let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs (handle-exceptions exn - (list #f #f #f) ;; no idea what went wrong, call it a bad server + (begin + (print "failed to get server info from " logf ", exn=" exn) + (list #f #f #f)) ;; no idea what went wrong, call it a bad server (with-input-from-file logf (lambda () (let loop ((inl (read-line)) (lnum 0)) @@ -214,11 +216,13 @@ (let loop ((hed (car server-logs)) (tal (cdr server-logs)) (res '())) (let* ((mod-time (handle-exceptions exn - (current-seconds) ;; 0 + (begin + (print "failed to get modification time on " hed ", exn=" exn) + (current-seconds)) ;; 0 (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted (down-time (- (current-seconds) mod-time)) (serv-dat (if (or (< num-serv-logs 10) (< down-time 900)) ;; day-seconds)) (server:logf-get-start-info hed) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -73,11 +73,11 @@ (configf:lookup *configdat* "setup" "dbdir") (conc (common:get-linktree) "/.db")))) (handle-exceptions exn (begin - (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir) + (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir ", exn=" exn) (exit 1)) (if (not (directory? dbdir))(create-directory dbdir #t))) dbdir)) ;; If file exists AND Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -552,11 +552,13 @@ (tests:test-set-toplog! run-id test-name outputfilename)) ;; didn't get the lock, check to see if current update started later than this ;; update, if so we can exit without doing any work (if (> my-start-time (handle-exceptions exn - 0 + (begin + (print "failed to get mod time on " lockf ", exn=" exn) + 0) (file-modification-time lockf))) ;; we started since current re-gen in flight, delay a little and try again (begin (debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it") (thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds @@ -1504,13 +1506,15 @@ (map (lambda (p) (if (directory-exists? p) (let ((glob-query (conc p "/" fnamepatt))) (handle-exceptions exn + (begin + (print "built-in glob on " glob-query ", failed, try using the shell. exn=" exn) (with-input-from-pipe - (conc "echo " glob-query) - read-lines) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar + (conc "echo " glob-query) + read-lines)) ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar (glob glob-query))) '())) paths-from-db)) paths-from-db))) @@ -1559,13 +1563,15 @@ (common:file-exists? cache-file))) (cached-dat (if (and (not force-create) cache-exists use-cache) (handle-exceptions - exn - #f ;; any issues, just give up with the cached version and re-read - (configf:read-alist cache-file)) + exn + (begin + (debug:print 0 *default-log-port* "failed to read " cache-file ", exn=" exn) + #f) ;; any issues, just give up with the cached version and re-read + (configf:read-alist cache-file)) #f)) (test-full-name (if (and item-path (not (string-null? item-path))) (conc test-name "/" item-path) test-name))) (if cached-dat