Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -3642,11 +3642,11 @@ tabdat (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%") (dboard:tabdat-numruns tabdat) (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") ;; generate key patterns from the target stored in tabdat - (let* ((dbkeys (dboard:tabdat-dbkeys tabdat))) + (let* ((dbkeys (dboard:tabdat-dbkeys tabdat))) (let ((fres (if (dboard:tabdat-target tabdat) (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%")))) (map (lambda (k v)(list k v)) dbkeys ptparts)) (let ((res '())) (for-each (lambda (key) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -693,11 +693,13 @@ ;; NOTE: touched logic is disabled/not done ;; sync run to disk if touched ;; (define (db:sync-inmem->disk dbstruct apath dbfile #!key (force-sync #f)) - #f) ;; disabled + (if #f + (debug:print-info 0 *default-log-port* "syncing "*toppath*" "dbfile" at "(current-seconds)) + #f)) ;; disabled ;; (let* ((dbdat (db:get-dbdat dbstruct apath dbfile)) ;; (dbfullname (conc apath "/" dbfile)) ;; (db (db:open-run-db dbfullname db:initialize-db)) ;; (dbr:dbdat-db dbdat)) ;; (inmem (dbr:dbdat-inmem dbdat)) ;; (start-t (current-seconds)) Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -2148,11 +2148,10 @@ (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")")) (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it.")) #;(loop (+ count 1) bad-sync-count start-time) )) - (debug:print-info 0 *default-log-port* "syncing "*toppath*" "dbname" at "(current-seconds)) (db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t) (mutex-unlock! *heartbeat-mutex*) ;; when things go wrong we don't want to be doing the various Index: ulex/ulex.scm ================================================================== --- ulex/ulex.scm +++ ulex/ulex.scm @@ -219,11 +219,11 @@ ;; (define (send udata host-port qrykey cmd params) (let* ((my-host-port (udat-host-port udata)) ;; remote will return to this (isme #f #;(equal? host-port my-host-port)) ;; calling myself? ;; dat is a self-contained work block that can be sent or handled locally - (dat (list my-host-port qrykey cmd params))) + (dat (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds))))) (cond (isme (ulex-handler udata dat)) ;; no transmission needed (else (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC? exn @@ -240,74 +240,88 @@ #f)))) (close-input-port inp) (close-output-port oup) ;; (mutex-unlock! *send-mutex*) res)))))))) ;; res will always be 'ack unless return-method is direct + +(define (send-via-polling uconn host-port cmd data) + (let* ((qrykey (make-cookie uconn)) + (sres (send uconn host-port qrykey cmd data))) + (case sres + ((ack) + (let loop ((start-time (current-milliseconds))) + (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout + (begin + (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data) + #f) + (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash + (if result ;; result is '(status . result-data) or #f for nothing yet + (begin + (hash-table-delete! (udat-mboxes uconn) qrykey) + (cdr result)) + (begin + (thread-sleep! 0.01) + (loop start-time))))))) + (else + (print "ULEX ERROR: Communication failed? sres="sres) + #f)))) + +(define (send-via-mailbox uconn host-port cmd data) + (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? + (qrykey (car cmbox)) + (mbox (cdr cmbox)) + (mbox-time (current-milliseconds)) + (sres (send uconn host-port qrykey cmd data))) ;; short res + (if (eq? sres 'ack) + (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread))) + #f + 120)) ;; timeout) + (mbox-timeout-result 'MBOX_TIMEOUT) + (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) + (mbox-receive-time (current-milliseconds))) + ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? + (hash-table-delete! (udat-mboxes uconn) qrykey) + (if (eq? res 'MBOX_TIMEOUT) + (begin + (print "WARNING: mbox timed out for query "cmd", with data "data + ", waiting for response from "host-port".") + + ;; here it might make sense to clean up connection records and force clean start? + ;; NO. The progam using ulex needs to do the reset. Right thing here is exception + + #f) ;; convert to raising exception? + res)) + (begin + (print "ERROR: Communication failed? Got "sres) + #f)))) ;; send a request to the given host-port and register a mailbox in udata ;; wait for the mailbox data and return it ;; (define (send-receive uconn host-port cmd data) - (cond - ((member cmd '(ping goodbye)) ;; these are immediate - (send uconn host-port 'ping cmd data)) - ((eq? (work-method) 'direct) - ;; the result from send will be the actual result, not an 'ack - (send uconn host-port 'direct cmd data)) - (else - (case (return-method) - ((polling) - (let* ((qrykey (make-cookie uconn)) - (sres (send uconn host-port qrykey cmd data))) - (case sres - ((ack) - (let loop ((start-time (current-milliseconds))) - (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout - (begin - (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data) - #f) - (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash - (if result ;; result is '(status . result-data) or #f for nothing yet - (begin - (hash-table-delete! (udat-mboxes uconn) qrykey) - (cdr result)) - (begin - (thread-sleep! 0.01) - (loop start-time))))))) - (else - (print "ULEX ERROR: Communication failed? sres="sres) - #f)))) - ((mailbox) - (let* ((cmbox (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse? - (qrykey (car cmbox)) - (mbox (cdr cmbox)) - (mbox-time (current-milliseconds)) - (sres (send uconn host-port qrykey cmd data))) ;; short res - (if (eq? sres 'ack) - (let* ((mbox-timeout-secs 120 #;(if (eq? 'primordial (thread-name (current-thread))) - #f - 120)) ;; timeout) - (mbox-timeout-result 'MBOX_TIMEOUT) - (res (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result)) - (mbox-receive-time (current-milliseconds))) - ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it? - (hash-table-delete! (udat-mboxes uconn) qrykey) - (if (eq? res 'MBOX_TIMEOUT) - (begin - (print "WARNING: mbox timed out for query "cmd", with data "data", waiting for response from "host-port".") - - ;; here it might make sense to clean up connection records and force clean start? - ;; NO. The progam using ulex needs to do the reset. Right thing here is exception - - #f) ;; convert to raising exception? - res)) - (begin - (print "ERROR: Communication failed? Got "sres) - #f)))) - (else - (print "ULEX ERROR: unrecognised return-method "(return-method)".") - #f))))) + (let* ((start-time (current-milliseconds)) + (result (cond + ((member cmd '(ping goodbye)) ;; these are immediate + (send uconn host-port 'ping cmd data)) + ((eq? (work-method) 'direct) + ;; the result from send will be the actual result, not an 'ack + (send uconn host-port 'direct cmd data)) + (else + (case (return-method) + ((polling) + (send-via-polling uconn host-port cmd data)) + ((mailbox) + (send-via-mailbox uconn host-port cmd data)) + (else + (print "ULEX ERROR: unrecognised return-method "(return-method)".") + #f)))))) + ;; this is ONLY for development and debugging. It will be removed once Ulex is stable. + (if (< 5000 (- (current-milliseconds) start-time)) + (print "ULEX WARNING: round-trip took over 5 seconds; " + cmd", host-port="host-port", data="data)) + result)) + ;;====================================================================== ;; responder side ;;====================================================================== @@ -316,11 +330,11 @@ ;; Reserved cmds; ack ping goodbye response ;; (define (ulex-handler uconn rdat) (assert (list? rdat) "FATAL: ulex-handler give rdat as not list") (match rdat ;; (string-split controldat) - ((rem-host-port qrykey cmd params) + ((rem-host-port qrykey cmd params);; timedata) ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params) (case cmd ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack) ((ping) ;; (print "Got Ping!")