Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -17,11 +17,11 @@ (use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) ;; (use zmq) (import (prefix sqlite3 sqlite3:)) -(use spiffy uri-common intarweb http-client spiffy-request-vars) +(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb) (declare (unit client)) (declare (uses common)) (declare (uses db)) @@ -75,11 +75,17 @@ ;; ;; DEBUG STUFF ;; (if (eq? *transport-type* 'fs)(begin (print "ERROR!!!!!!! refusing to run with transport " *transport-type*)(exit 99))) (debug:print-info 11 "Using transport type of " *transport-type* (if hostinfo (conc " to connect to " hostinfo) "")) (case *transport-type* - ((fs)(if (not *megatest-db*)(set! *megatest-db* (open-db)))) + ((fs) ;; (if (not *megatest-db*)(set! *megatest-db* (open-db)))) + ;; we are not doing fs any longer. let's cheat and start up a server + (set! *transport-type* #f) + (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 3") + (thread-sleep! 1) + (if (> numtries 0) + (client:setup numtries: (- numtries 1)))) ((http) (http-transport:client-connect (tasks:hostinfo-get-interface hostinfo) (tasks:hostinfo-get-port hostinfo))) ((zmq) (zmq-transport:client-connect (tasks:hostinfo-get-interface hostinfo) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1264,12 +1264,17 @@ (debug:print-info 11 "zdat=" zdat) (let* ((res #f) (rawdat (http-transport:client-send-receive serverdat zdat)) (tmp #f)) (debug:print-info 11 "Sent " zdat ", received " rawdat) - (set! tmp (db:string->obj rawdat)) - (vector-ref tmp 2)))) + (if rawdat + (begin + (set! tmp (db:string->obj rawdat)) + (vector-ref tmp 2)) + (begin + (debug:print 0 "ERROR: Communication with the server failed. Exiting if possible") + (exit 1)))))) ((zmq) (handle-exceptions exn (begin (debug:print-info 0 "cdb:client-call timeout or error. Trying again in 5 seconds") Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -11,11 +11,11 @@ (require-extension (srfi 18) extras tcp s11n) (use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) (import (prefix sqlite3 sqlite3:)) -(use spiffy uri-common intarweb http-client spiffy-request-vars) +(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb) ;; Configurations for server (tcp-buffer-size 2048) (max-connections 2048) @@ -146,17 +146,17 @@ (set! *runremote* (list ipaddrstr portnum)) ;; (open-run-close tasks:remove-server-records tasks:open-db) (open-run-close tasks:server-register tasks:open-db (current-process-id) - ipaddrstr portnum 0 'live 'http) - (print "INFO: Trying to start server on " ipaddrstr ":" portnum) + ipaddrstr portnum 0 'startup 'http) + (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL (start-server bind-address: ipaddrstr port: portnum) (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum) - (print "INFO: server has been stopped"))) + (debug:print 1 "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -164,65 +164,82 @@ ;; C L I E N T S ;;====================================================================== (define *http-mutex* (make-mutex)) +;; (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 4") + ;; ;; ;; 1 Hello, world! Goodbye Dolly ;; Send msg to serverdat and receive result (define (http-transport:client-send-receive serverdat msg) - (let* ((url (http-transport:make-server-url serverdat)) - (fullurl (conc url "/ctrl")) ;; (conc url "/?dat=" msg))) - (numretries 0)) + (let* (;; (url (http-transport:make-server-url serverdat)) + (fullurl (caddr serverdat)) ;; (conc url "/ctrl")) ;; (conc url "/?dat=" msg))) + (numretries 10) + (res #f)) (handle-exceptions exn - (begin - (debug:print 0 "WARNING: possible communication error") - (if (< numretries 200) + (begin + (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 2) + (if (> numretries 0) (http-transport:client-send-receive serverdat msg))) (begin (debug:print-info 11 "fullurl=" fullurl "\n") ;; set up the http-client here - (max-retry-attempts 100) + (max-retry-attempts 5) + ;; consider all requests indempotent (retry-request? (lambda (request) - (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10)) - (set! numretries (+ numretries 1)) - #t)) + #t)) ;; (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10)) + (set! numretries (- numretries 1)) + ;; #t)) ;; send the data and get the response ;; extract the needed info from the http data and ;; process and return it. - (mutex-lock! *http-mutex*) - (let* ((res (with-input-from-request fullurl - ;; #f - ;; msg - (list (cons 'dat msg)) - read-string))) - (close-all-connections!) - (mutex-unlock! *http-mutex*) + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + (set! res (with-input-from-request + fullurl + (list (cons 'dat msg)) + read-string)) + (close-all-connections!) + (mutex-unlock! *http-mutex*))) + (time-out (lambda () + (thread-sleep! 5) + (if (not res) + (debug:print 0 "ERROR: communication with the server timed out. Exiting.")))) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (thread-terminate! th2) (debug:print-info 11 "got res=" res) (let ((match (string-search (regexp "(.*)<.body>") res))) (debug:print-info 11 "match=" match) (let ((final (cadr match))) (debug:print-info 11 "final=" final) final))))))) (define (http-transport:client-connect iface port) (let* ((login-res #f) - (serverdat (list iface port))) + (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) + (serverdat (list iface port uri-dat))) (set! login-res (client:login serverdat)) (if (and (not (null? login-res)) (car login-res)) (begin (debug:print-info 2 "Logged in and connected to " iface ":" port) (set! *runremote* serverdat) serverdat) (begin - (debug:print-info 0 "Failed to login or connect to " iface ":" port) - (set! *runremote* #f) - (set! *transport-type* 'fs) - #f)))) + (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) + (exit 1))))) +;; (set! *runremote* #f) +;; (set! *transport-type* 'fs) +;; #f)))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; @@ -233,11 +250,12 @@ (let* ((server-info (let loop () (let ((sdat #f)) (mutex-lock! *heartbeat-mutex*) (set! sdat *runremote*) (mutex-unlock! *heartbeat-mutex*) - (if sdat sdat + (if sdat + sdat (begin (sleep 4) (loop)))))) (iface (car server-info)) (port (cadr server-info)) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -26,11 +26,11 @@ (let* ((dbpath (conc *toppath* "/monitor.db")) (exists (file-exists? dbpath)) (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (sqlite3:set-busy-handler! mdb handler) - (sqlite3:execute mdb (conc "PRAGMA synchronous = 1;")) + (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) (if (not exists) (begin (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, @@ -142,11 +142,17 @@ (if hostname hostname iface)(if pid pid port)) res)) (define (tasks:server-update-heartbeat mdb server-id) (debug:print-info 0 "Heart beat update of server id=" server-id) - (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)) + (handle-exceptions + exn + (begin + (debug:print 0 "WARNING: probable timeout on monitor.db access") + (thread-sleep! 1) + (tasks:server-update-heartbeat mdb server-id)) + (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id))) ;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds (define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f)) (let* ((server-id (if server-id server-id