Index: debugprint.scm ================================================================== --- debugprint.scm +++ debugprint.scm @@ -6,10 +6,12 @@ ;;(import scheme chicken data-structures extras files ports) (import scheme chicken.base chicken.string + chicken.time + chicken.time.posix chicken.port chicken.process-context chicken.process-context.posix (prefix mtargs args:) @@ -21,11 +23,11 @@ ;; debug stuff ;;====================================================================== (define verbosity (make-parameter '())) (define *default-log-port* (current-error-port)) -(define debug:print-logger (make-parameter #f)) ;; se to a proc to call on every logging print +(define debug:print-logger (make-parameter #f)) ;; set to a proc to call on every logging print (define (debug:setup) (let ((debugstr (or (args:get-arg "-debug") (args:get-arg "-debug-noprop") (get-environment-variable "MT_DEBUG_MODE")))) @@ -109,17 +111,25 @@ (if (debug:print-logger) ;; NOTE: turn params into string to keep things simple for now ((debug:print-logger)(conc "REMOTE ("(get-host-name)", pid="(current-process-id)") " (string-intersperse (map conc params) " ") "; " (string-intersperse (command-line-arguments) " "))))) -(define (debug:print n e . params) +(define debug:enable-timestamp (make-parameter #t)) + +(define (debug:timestamp) + (if (debug:enable-timestamp) + (conc (time->string + (seconds->local-time (current-seconds)) "%H:%M:%S") " ") + "")) + + (define (debug:print n e . params) (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) (lambda () ;; (if *logging* ;; (db:log-event (apply conc params)) - (apply print params) + (apply print (debug:timestamp) params) (debug:handle-remote-logging params) ))) #t ;; only here to make remote stuff happy. It'd be nice to fix that ... ) @@ -126,31 +136,31 @@ (define (debug:print-error n e . params) ;; normal print (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () - (apply print "ERROR: " params) + (apply print "ERROR: " (debug:timestamp) params) (debug:handle-remote-logging (cons "ERROR: " params)) ))) ;; pass important messages to stderr (if (and (eq? n 0)(not (eq? e (current-error-port)))) (with-output-to-port (current-error-port) (lambda () - (apply print "ERROR: " params) + (apply print "ERROR: " (debug:timestamp) params) )))) (define (debug:print-info n e . params) (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () - (apply print "INFO: (" n ") " params) ;; res) + (apply print "INFO: (" n ") "(debug:timestamp) params) ;; res) (debug:handle-remote-logging (cons "INFO: " params)) )))) (define (debug:print-warn n e . params) (if (debug:debug-mode n) (with-output-to-port (if (port? e) e (current-error-port)) (lambda () - (apply print "WARN: (" n ") " params) ;; res) + (apply print "WARN: (" n ") " (debug:timestamp) params) ;; res) (debug:handle-remote-logging (cons "WARN: " params)) )))) ) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -223,14 +223,15 @@ (< (current-seconds) (conndat-expires conn))) #t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died ((and conn (>= (current-seconds)(conndat-expires conn))) (debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.") - (rmt:drop-conn remdat apath ".db/main.db") ;; (hash-table-delete! conns fullpath) ;; clean up + (rmt:drop-conn remdat apath ".db/main.db") ;; (rmt:open-main-connection remdat apath)) (else ;; Below we will find or create and connect to main + (debug:print-info 0 *default-log-port* "rmt:open-main-connection - starting from scratch") (let* ((dbname (db:run-id->dbname #f)) (the-srv (rmt:find-main-server myconn apath dbname)) (start-main-srv (lambda () ;; call IF there is no the-srv found (mutex-lock! *connstart-mutex*) (if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server @@ -259,38 +260,40 @@ ipaddr: ipaddr port: port srvpkt: the-srv srvkey: srvkey ;; generated by rmt:get-signature on the server side lastmsg: (current-seconds) - expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping + expires: (+ (current-seconds) + (server:expiration-timeout) + -2) ;; this needs to be gathered during the ping ))) (hash-table-set! conns fullpath new-the-srv))) #t))))) ;; NB// sinfo is a servdat struct ;; (define (rmt:general-open-connection sinfo apath dbname #!key (num-tries 5)) (assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db") - (let* ((mdbname (db:run-id->dbname #f)) + (let* ((mdbname ".db/main.db") ;; (db:run-id->dbname #f)) TODO: put this back to the lookup when stable (fullname (db:dbname->path apath dbname)) (conns (servdat-conns sinfo)) - (mconn (rmt:get-conn sinfo apath mdbname))) - (if (and mconn + (mconn (rmt:get-conn sinfo apath ".db/main.db")) + (dconn (rmt:get-conn sinfo apath dbname))) + #;(if (and mconn (not (debug:print-logger))) (begin (debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.") (debug:print-logger rmt:log-to-main))) (cond - ((or (not mconn) ;; no channel open to main? - (< (conndat-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease - (if mconn ;; previously opened - clean up NB// consolidate this with the similar code in open main above - (begin - (debug:print-info 0 *default-log-port* "Clearing out connection to main that has expired.") - (hash-table-set! conns fullname #f))) + ((and mconn + dconn + (< (current-seconds)(conndat-expires dconn))) + #t) ;; good to go + ((not mconn) ;; no channel open to main? open it... (rmt:open-main-connection sinfo apath) - (rmt:general-open-connection sinfo apath mdbname)) - ((not (rmt:get-conn sinfo apath dbname)) ;; no channel open to dbname? + (rmt:general-open-connection sinfo apath dbname num-tries: (- num-tries 1))) + ((not dconn) ;; no channel open to dbname? (let* ((res (rmt:send-receive-real sinfo apath mdbname 'get-server `(,apath ,dbname)))) (case res ((server-started) (if (> num-tries 0) (begin @@ -319,18 +322,19 @@ ;; socket: (open-nn-connection (conc host":"port)) ;; TODO - open ulex connection? ipaddr: ipaddr port: port srvkey: servkey lastmsg: (current-seconds) - expires: (+ (current-seconds) 60)))) + expires: (+ (current-seconds) + (server:expiration-timeout) + -2)))) (else (debug:print-info 0 *default-log-port* "return data from starting server did not match host port servkey pid ipaddr apath dbname " res))) res) (begin (debug:print-info 0 *default-log-port* "Unexpected result: " res) res))))))) - #t)) ;;====================================================================== ;; FOR DEBUGGING SET TO #t @@ -347,28 +351,33 @@ (if *localmode* (api:execute-requests *dbstruct* cmd params) (begin (rmt:open-main-connection sinfo apath) (if rid (rmt:general-open-connection sinfo apath dbname)) + (if (not (member cmd '(log-to-main))) + (debug:print-info 0 *default-log-port* "rmt:send-receive "cmd" params="params)) (rmt:send-receive-real sinfo apath dbname cmd params))))) ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future ;; (define (rmt:send-receive-real sinfo apath dbname cmd params) + (assert (not (eq? 'primordial (thread-name (current-thread)))) "FATAL: Do not call rmt:send-receive-real in the primodial thread.") (let* ((cdat (rmt:get-conn sinfo apath dbname))) (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened") (let* ((uconn (servdat-uconn sinfo)) ;; get the interface to ulex ;; then send-receive using the ulex layer to host-port stored in cdat - (res (send-receive uconn (conndat-hostport cdat) cmd params))) + (res (send-receive uconn (conndat-hostport cdat) cmd params)) + #;(th1 (make-thread (lambda () + (set! res (send-receive uconn (conndat-hostport cdat) cmd params))) + "send-receive thread"))) + ;; (thread-start! th1) + ;; (thread-join! th1) ;; gratuitious thread stuff is so that mailbox is not used in primordial thead ;; since we accessed the server we can bump the expires time up (conndat-expires-set! cdat (+ (current-seconds) (server:expiration-timeout) -2)) ;; two second margin for network time misalignments etc. - #;(if (member res '("#")) ;; TODO - fix this in string->sexpr - #f - res) res))) ;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed ;; sometime in the future. ;; @@ -1646,14 +1655,13 @@ (servdat-uconn *db-serv-info*)) (let* ((uconn (servdat-uconn *db-serv-info*))) (wait-and-close uconn)) (let* ((port (portlogger:open-run-close portlogger:find-port)) (handler-proc (lambda (rem-host-port qrykey cmd params) ;; - ;;(let* ((prms (alist-ref 'params params))) - ;; (api:execute-requests *dbstruct-db* cmd prms))))) - (assert (list? params) "FATAL: handler called with non-list params") - (api:execute-requests *dbstruct-db* cmd params)))) + (set! *db-last-access* (current-seconds)) + (assert (list? params) "FATAL: handler called with non-list params") + (api:execute-requests *dbstruct-db* cmd params)))) ;; (api:process-request *dbstuct-db* (if (not *db-serv-info*) (set! *db-serv-info* (make-servdat host: hostn port: port))) (let* ((uconn (run-listener handler-proc port)) (rport (udat-port uconn))) ;; the real port ADDED tests/simplerun/debug.scm Index: tests/simplerun/debug.scm ================================================================== --- /dev/null +++ tests/simplerun/debug.scm @@ -0,0 +1,30 @@ +(import big-chicken trace rmtmod apimod dbmod ulex srfi-18) +(trace-call-sites #t) +(trace + ;; db:get-tests-for-run + ;; rmt:general-open-connection + ;; rmt:open-main-connection + ;; rmt:drop-conn + ;; rmt:send-receive + ;; rmt:log-to-main + ) + +(define th1 + (make-thread + (lambda () +(let loop ((r 1) + (i 1)) + (print "register-test "r" test"i) + (rmt:register-test r "test1" (conc "item_" i)) + (if (< i 1000) + (loop r (+ i 1)) + (if (< r 100) + (begin + (print "get-tests-for-run "r) + (rmt:get-tests-for-run r "%" '() '() 0 #f #f #f #f #f 0 #f) + (loop (+ r 1) 0))))) +))) +(thread-start! th1) +(thread-join! th1) + + Index: tests/simplerun/megatest.config ================================================================== --- tests/simplerun/megatest.config +++ tests/simplerun/megatest.config @@ -21,10 +21,13 @@ [setup] # Adjust max_concurrent_jobs to limit how much you load your machines max_concurrent_jobs 50 +[server] +timeout 3600 + # Uncomment this to make the in-mem db into a disk based db (slower but good for debug) # be aware that some unit tests will fail with this due to persistent data # # tmpdb /tmp Index: ulex/ulex.scm ================================================================== --- ulex/ulex.scm +++ ulex/ulex.scm @@ -218,26 +218,28 @@ (send uconn host-port 'ping cmd data)) (else (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? (qrykey (car cmbox)) (mbox (cdr cmbox)) - (mbox-time (current-milliseconds))) - (if (eq? (send uconn host-port qrykey cmd data) 'ack) - (let* ((mbox-timeout-secs (if (eq? 'primordial (thread-name (current-thread))) + (mbox-time (current-milliseconds)) + (sres (send uconn host-port qrykey cmd data))) ;; short res + (if (eq? sres 'ack) + (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread))) #f 120)) ;; timeout) (mbox-timeout-result 'MBOX_TIMEOUT) (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) (mbox-receive-time (current-milliseconds))) - (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? + ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? + (hash-table-delete! (udat-mboxes uconn) qrykey) (if (eq? res 'MBOX_TIMEOUT) (begin (print "WARNING: mbox timed out for query "cmd", with data "data) #f) ;; convert to raising exception? res)) (begin - (print "ERROR: Communication failed?") + (print "ERROR: Communication failed? Got "sres) #f)))))) ;; #f means failed to communicate ;;====================================================================== ;; responder side ;;======================================================================