Index: commonmod.scm ================================================================== --- commonmod.scm +++ commonmod.scm @@ -2067,12 +2067,14 @@ (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")))) (define (server:get-logs-list area-path) - (let* ((server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log")) - (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string))))) + (let* (;; (server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log")) + ;; (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string)))) + (server-logs (glob (conc area-path"/logs/server-*-*.log"))) + ) server-logs)) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; @@ -2219,12 +2221,14 @@ ;; given a path to a server log return: host port startseconds ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let (define (server:logf-get-start-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id - (dbprep-rx (regexp "^SERVER: dbprep")) - (dbprep-found 0)) + (dbprep-rx (regexp "^SERVER: dbprep")) + (exiting-rx (regexp ".*exiting promptly.*")) + (dbprep-found #f) + (exiting-found #f)) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server @@ -2232,36 +2236,38 @@ logf (lambda () (let loop ((inl (read-line)) (lnum 0)) (if (not (eof-object? inl)) - (let ((mlst (string-match server-rx inl)) - (dbprep (string-match dbprep-rx inl)) - ) - (if dbprep - (set! dbprep-found 1) - ) + (let ((mlst (string-match server-rx inl)) + (dbprep (string-match dbprep-rx inl)) + (exiting (string-match exiting-rx inl))) + (if dbprep (set! dbprep-found #t)) + (if exiting (set! exiting-found #t)) (if (not mlst) (if (< lnum 500) ;; give up if more than 500 lines of server log read (loop (read-line)(+ lnum 1)) (begin - (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) - (list #f #f #f #f))) + (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) + (list #f #f #f #f))) (let ((dat (cdr mlst))) (list (car dat) ;; host (string->number (cadr dat)) ;; port (string->number (caddr dat)) (cadr (cddr dat)))))) - (begin - (if dbprep-found - (begin - (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds)) - (thread-sleep! 25) - ) - (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)) - ) - (list #f #f #f #f))))))))) + (begin + (cond + (dbprep-found + (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds)) + (thread-sleep! 25)) + (exiting-found + (debug:print-info 0 *default-log-port* "Removing server log "logf" as the server exited due to signal") + (delete-file* logf) + (thread-sleep! 1)) + (else + (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)))) + (list #f #f #f #f))))))))) ;;====================================================================== ;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load ;; Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -406,12 +406,12 @@ (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) - ;touch tmp db to avoid wal mode wierdness - (set! (file-modification-time tmpdbfname) (current-seconds)) + ;; touch tmp db to avoid wal mode wierdness + (set-file-times! tmpdbfname (current-seconds)) (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") ) (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically tmpdb)))) Index: debugprint.scm ================================================================== --- debugprint.scm +++ debugprint.scm @@ -60,22 +60,23 @@ #t)) (define (debug:debug-mode n) (let* ((vb (verbosity))) (cond - ((and (number? vb) ;; number number - (number? n)) - (<= n vb)) - ((and (list? vb) ;; list number - (number? n)) - (member n vb)) - ((and (list? vb) ;; list list - (list? n)) - (not (null? (lset-intersection! eq? vb n)))) - ((and (number? vb) - (list? n)) - (member vb n))))) + ((and (number? vb) ;; number number + (number? n)) + (<= n vb)) + ((and (list? vb) ;; list number + (number? n)) + (member n vb)) + ((and (list? vb) ;; list list + (list? n)) + (not (null? (lset-intersection! eq? vb n)))) + ((and (number? vb) + (list? n)) + (member vb n)) + (else #f)))) (define (debug:print n e . params) (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) (lambda () Index: http-transportmod.scm ================================================================== --- http-transportmod.scm +++ http-transportmod.scm @@ -598,11 +598,11 @@ ;;(BB> "http-transport:server-shutdown called") (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; - (set! (bdat-time-to-exit *bdat*) #t) ;; tell on-exit to be fast as we've already cleaned up + (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") (thread-sleep! 1) ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) Index: launchmod.scm ================================================================== --- launchmod.scm +++ launchmod.scm @@ -2341,20 +2341,20 @@ (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, (bdat-time-to-exit *bdat*) = " (bdat-time-to-exit *bdat*)" pid="(current-process-id)" mtpath="golden-mtpath))) ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (init-watchdog) - (set! (bdat-watchdog-set! *bdat*) - (make-thread - (lambda () - (handle-exceptions - exn - (begin - (print-call-chain) - (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) - (common:watchdog))) - "Watchdog thread")) + (bdat-watchdog-set! *bdat* + (make-thread + (lambda () + (handle-exceptions + exn + (begin + (print-call-chain) + (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) + (common:watchdog))) + "Watchdog thread")) (start-watchdog)) (define (start-watchdog) ;;(if (not (args:get-arg "-server")) ;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog @@ -2371,26 +2371,26 @@ "-show-runconfig" "-show-config" "-show-cmdinfo" "-cleanup-db" )) - (no-watchdog-argvals (list '("-archive" . "replicate-db"))) - (start-watchdog-specail-arg-val (let loop ((hed (car no-watchdog-argvals)) - (tail (cdr no-watchdog-argvals))) + (no-watchdog-argvals (list '("-archive" . "replicate-db"))) + (start-watchdog-special-arg-val (let loop ((hed (car no-watchdog-argvals)) + (tail (cdr no-watchdog-argvals))) ;; (print "hed" hed " arg " (args:get-arg (car hed)) " val:" (cdr hed) " eql" (equal? (args:get-arg (car hed)) (cdr hed))) (if (equal? (args:get-arg (car hed)) (cdr hed)) #f (if (null? tail) #t (loop (car tail) (cdr tail)))))) (no-watchdog-args-vals (filter (lambda (x) x) (map args:get-arg no-watchdog-args))) - (start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-specail-arg-val))) - ;(print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog) + (start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-special-arg-val))) + ;; (print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-special-arg-val:" start-watchdog-special-arg-val " start-watchdog:" start-watchdog) (if start-watchdog (thread-start! (bdat-watchdog *bdat*))))) - + (define (server:writable-watchdog-deltasync dbstruct) (thread-sleep! 0.054) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) (debug-mode (debug:debug-mode 1)) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -777,11 +777,11 @@ (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ (setenv "MT_TARGET" targ))) ;; The watchdog is to keep an eye on things like db sync etc. ;; - +(init-watchdog) ;; (define (debug:debug-mode n) ;; (cond ;; ((and (number? *verbosity*) ;; number number ;; (number? n))