Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -50,89 +50,92 @@ ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; (define (api:execute-requests dbstruct cmd params) - (case (string->symbol cmd) - ;; SERVERS - ((start-server) (apply server:kind-run params)) - ((kill-server) (set! *server-run* #f)) - - ;; KEYS - ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) - ((get-keys) (db:get-keys dbstruct)) - - ;; TESTS - ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) - ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) - ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) - ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) - ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) - ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) - ((delete-test-records) (apply db:delete-test-records dbstruct params)) - ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) - ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) - ((test-set-status-state) (apply db:test-set-status-state dbstruct params)) - ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) - ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) - ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) - ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) - ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) - ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) - ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) - ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) - ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) - ((roll-up-pass-fail-counts) (apply db:roll-up-pass-fail-counts dbstruct params)) - ((update-fail-pass-counts) (apply db:general-call dbstruct 'update-pass-fail-counts params)) - ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) - - ;; RUNS - ((get-run-info) (apply db:get-run-info dbstruct params)) - ((get-run-status) (apply db:get-run-status dbstruct params)) - ((set-run-status) (apply db:set-run-status dbstruct params)) - ((register-run) (apply db:register-run dbstruct params)) - ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) - ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) - ((get-test-id) (apply db:get-test-id dbstruct params)) - ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) - ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) - ((delete-run) (apply db:delete-run dbstruct params)) - ((get-runs) (apply db:get-runs dbstruct params)) - ((get-all-run-ids) (db:get-all-run-ids dbstruct)) - ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) - ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) - ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) - ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) - ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) - ((find-and-mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) - - ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) - - ;; TEST DATA - ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) - ((csv->test-data) (apply db:csv->test-data dbstruct params)) - ((get-steps-data) (apply db:get-steps-data dbstruct params)) - - ;; MISC - ((login) (apply db:login dbstruct params)) - ((general-call) (let ((stmtname (car params)) - (run-id (cadr params)) - (realparams (cddr params))) - (db:with-db dbstruct run-id #t ;; these are all for modifying the db - (lambda (db) - (db:general-call db stmtname realparams))))) - ((sync-inmem->db) (db:sync-touched dbstruct run-id force-sync: #t)) - ((sdb-qry) (apply sdb:qry params)) - ((ping) (current-process-id)) - - ;; TESTMETA - ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) - ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) - ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) - (else - (list "ERROR" 0)))) + (let ((res + (case (string->symbol cmd) + ;; SERVERS + ((start-server) (apply server:kind-run params)) + ((kill-server) (set! *server-run* #f)) + + ;; KEYS + ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) + ((get-keys) (db:get-keys dbstruct)) + + ;; TESTS + ((test-toplevel-num-items) (apply db:test-toplevel-num-items dbstruct params)) + ((get-test-info-by-id) (apply db:get-test-info-by-id dbstruct params)) + ((test-get-rundir-from-test-id) (apply db:test-get-rundir-from-test-id dbstruct params)) + ((test-set-state-status-by-id) (apply db:test-set-state-status-by-id dbstruct params)) + ((get-count-tests-running) (apply db:get-count-tests-running dbstruct params)) + ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params)) + ((delete-test-records) (apply db:delete-test-records dbstruct params)) + ;; ((delete-test-step-records) (apply db:delete-test-step-records dbstruct params)) + ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params)) + ((test-set-status-state) (apply db:test-set-status-state dbstruct params)) + ((get-previous-test-run-record) (apply db:get-previous-test-run-record dbstruct params)) + ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params)) + ((test-get-logfile-info) (apply db:test-get-logfile-info dbstruct params)) + ((test-get-records-for-index-file) (apply db:test-get-records-for-index-file dbstruct params)) + ((get-testinfo-state-status) (apply db:get-testinfo-state-status dbstruct params)) + ((test-set-top-process-pid) (apply db:test-set-top-process-pid dbstruct params)) + ((test-get-top-process-pid) (apply db:test-get-top-process-pid dbstruct params)) + ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params)) + ((get-prereqs-not-met) (apply db:get-prereqs-not-met dbstruct params)) + ((roll-up-pass-fail-counts) (apply db:roll-up-pass-fail-counts dbstruct params)) + ((update-fail-pass-counts) (apply db:general-call dbstruct 'update-pass-fail-counts params)) + ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params)) + + ;; RUNS + ((get-run-info) (apply db:get-run-info dbstruct params)) + ((get-run-status) (apply db:get-run-status dbstruct params)) + ((set-run-status) (apply db:set-run-status dbstruct params)) + ((register-run) (apply db:register-run dbstruct params)) + ((set-tests-state-status) (apply db:set-tests-state-status dbstruct params)) + ((get-tests-for-run) (apply db:get-tests-for-run dbstruct params)) + ((get-test-id) (apply db:get-test-id dbstruct params)) + ((get-tests-for-runs-mindata) (apply db:get-tests-for-runs-mindata dbstruct params)) + ((get-run-name-from-id) (apply db:get-run-name-from-id dbstruct params)) + ((delete-run) (apply db:delete-run dbstruct params)) + ((get-runs) (apply db:get-runs dbstruct params)) + ((get-all-run-ids) (db:get-all-run-ids dbstruct)) + ((get-prev-run-ids) (apply db:get-prev-run-ids dbstruct params)) + ((get-run-ids-matching-target) (apply db:get-run-ids-matching-target dbstruct params)) + ((get-runs-by-patt) (apply db:get-runs-by-patt dbstruct params)) + ((lock/unlock-run) (apply db:lock/unlock-run dbstruct params)) + ((update-run-event_time) (apply db:update-run-event_time dbstruct params)) + ((find-and-mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) + + ;; STEPS + ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + + ;; TEST DATA + ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) + ((csv->test-data) (apply db:csv->test-data dbstruct params)) + ((get-steps-data) (apply db:get-steps-data dbstruct params)) + + ;; MISC + ((login) (apply db:login dbstruct params)) + ((general-call) (let ((stmtname (car params)) + (run-id (cadr params)) + (realparams (cddr params))) + (db:with-db dbstruct run-id #t ;; these are all for modifying the db + (lambda (db) + (db:general-call db stmtname realparams))))) + ((sync-inmem->db) (db:sync-touched dbstruct run-id force-sync: #t)) + ((sdb-qry) (apply sdb:qry params)) + ((ping) (current-process-id)) + + ;; TESTMETA + ((testmeta-get-record) (apply db:testmeta-get-record dbstruct params)) + ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) + ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params))))) + (vector #t res))) + ;; NO ELSE - let it return undef + ;;(else + ;; (list "ERROR" 0)))) ;; http-server send-response ;; api:process-request ;; db:* ;; Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2287,11 +2287,11 @@ (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda ()(serialize obj))))) #t)) - ((zmq nm)(with-output-to-string (lambda ()(serialize obj)))) + ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) (else obj))) (define (db:string->obj msg #!key (transport 'http)) (case transport ;; ((fs) msg) @@ -2304,11 +2304,11 @@ (regexp "_") "=" msg #t))) (lambda ()(deserialize))) (begin (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.") #f))) ;; crude reply for when things go awry - ((zmq nm)(with-input-from-string msg (lambda ()(deserialize)))) + ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) (else msg))) (define (db:test-set-status-state dbstruct run-id test-id status state msg) (let ((dbdat (db:get-db dbstruct run-id))) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) Index: nmsg-transport.scm ================================================================== --- nmsg-transport.scm +++ nmsg-transport.scm @@ -69,18 +69,20 @@ (server-thread (make-thread (lambda () (nmsg-transport:try-start-server dbstruct run-id start-port server-id)) "server thread")) (tdbdat (tasks:open-db))) (thread-start! server-thread) - (if (nmsg-transport:ping hostn start-port) + (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id)) (begin (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") - (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? + (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access (set! *inmemdb* dbstruct) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") - (thread-start! nmsg-transport:keep-running) + (thread-start! (make-thread + (lambda ()(nmsg-transport:keep-running server-id)) + "keep running")) (thread-join! server-thread)) (begin (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") (portlogger:open-run-close portlogger:set-failed start-port) (nmsg-transport:run dbstruct hostn run-id server-id))))) @@ -95,76 +97,18 @@ ((and (>= (string-length msg-in) 4) (equal? (substring msg-in 0 4) "ping")) (nn-send repsoc (conc (current-process-id))) (loop (nn-recv repsoc))) (else - (let* ((dat (db:string->obj msg-in transport: 'nm)) + (let* ((dat (db:string->obj msg-in transport: 'nmsg)) (cmd (vector-ref dat 0)) (params (vector-ref dat 1)) (result (api:execute-requests dbstruct cmd params)) - (newdat (db:obj->string result transport: 'nm))) + (newdat (db:obj->string result transport: 'nmsg))) (nn-send repsoc newdat) (loop (nn-recv repsoc)))))))) -;; run nmsg-transport:keep-running in a parallel thread to monitor that the db is being -;; used and to shutdown after sometime if it is not. -;; -(define (nmsg-transport:keep-running) - ;; if none running or if > 20 seconds since - ;; server last used then start shutdown - ;; This thread waits for the server to come alive - (let* ((server-info (let loop () - (let ((sdat #f)) - (mutex-lock! *heartbeat-mutex*) - (set! sdat *server-info*) - (mutex-unlock! *heartbeat-mutex*) - (if sdat sdat - (begin - (debug:print 12 "WARNING: server not started yet, waiting few seconds before trying again") - (sleep 4) - (loop)))))) - (iface (cadr server-info)) - (pullport (caddr server-info)) - (pubport (cadddr server-info)) ;; id interface pullport pubport) - ;; (nmsg-sockets (nmsg-transport:client-connect iface pullport pubport)) - (last-access 0)) - (debug:print-info 11 "heartbeat started for nmsg server on " iface " " pullport " " pubport) - (let loop ((count 0)) - (thread-sleep! 4) ;; no need to do this very often - ;; NB// sync currently does NOT return queue-length - ;; GET REAL QUEUE LENGTH FROM THE VARIABLE - (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call nmsg-sockets 'sync #t 1))) - ;; (print "Server running, count is " count) - (if (< count 1) ;; 3x3 = 9 secs aprox - (loop (+ count 1))) - - ;; NOTE: Get rid of this mechanism! It really is not needed... - ;; (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)) - - ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access - (mutex-lock! *heartbeat-mutex*) - (set! last-access *last-db-access*) - (mutex-unlock! *heartbeat-mutex*) - (if (> (+ last-access - ;; (* 50 60 60) ;; 48 hrs - ;; 60 ;; one minute - ;; (* 60 60) ;; one hour - (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. - ) - (current-seconds)) - (begin - (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) - (loop 0)) - (begin - (debug:print-info 0 "Starting to shutdown the server.") - ;; need to delete only *my* server entry (future use) - (set! *time-to-exit* #t) - (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name)) - (thread-sleep! 1) - (debug:print-info 0 "Max cached queries was " *max-cache-size*) - (debug:print-info 0 "Server shutdown complete. Exiting") - (exit))))))) ;; all routes though here end in exit ... ;; (define (nmsg-transport:launch run-id) (let* ((tdbdat (tasks:open-db)) @@ -223,27 +167,27 @@ ;; ping the server at host:port ;; return the open socket if successful (return-socket == #t) ;; expect the key expected-key returned in payload ;; send our-key or #f as payload ;; -(define (nmsg-transport:ping hostn port #!key (return-socket #t)(expected-key #f)(our-key #f)) +(define (nmsg-transport:ping hostn port #!key (timeout 3)(return-socket #t)(expected-key #f)(our-key #f)) ;; send a random number along with pid and check that we get it back (let* ((req (nn-socket 'req)) (host (if (or (not hostn) (equal? hostn "-")) ;; use localhost (get-host-name) hostn)) (success #f) (keepwaiting #t) - (dat (db:obj->string (vector "ping" our-key) transport: 'nm)) + (dat (db:obj->string (vector "ping" our-key) transport: 'nmsg)) (ping (make-thread (lambda () (nn-send req dat) (let* ((result (nn-recv req)) - (key (vector-ref (db:string->obj result transport: 'nm) 1))) - (if (or (not expect-key) ;; just getting a reply is good enough then - (equal? (conc (current-process-id)) expected-key)) + (key (vector-ref (db:string->obj result transport: 'nmsg) 1))) + (if (or (not expected-key) ;; just getting a reply is good enough then + (equal? key expected-key)) (begin ;; (print "ping, success: received \"" result "\"") (set! success #t)) (begin ;; (print "ping, failed: received key \"" result "\"") @@ -252,11 +196,11 @@ "ping")) (timeout (make-thread (lambda () (let loop ((count 0)) (thread-sleep! 1) (print "still waiting after count seconds...") - (if (and keepwaiting (< count 10)) + (if (and keepwaiting (< count timeout)) ;; yes, this is very aproximate (loop (+ count 1)))) (if keepwaiting (begin (print "timeout waiting for ping") (thread-terminate! ping)))) @@ -263,14 +207,14 @@ "timeout"))) (nn-connect req (conc "tcp://" host ":" port)) (handle-exceptions exn (begin - (print-call-chain) - (print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) - (print "exn=" (condition->list exn)) - (print "ping failed to connect to " host ":" port)) + ;; (print-call-chain) + ;; (print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + ;; (print "exn=" (condition->list exn)) + (debug:print-info 1 "ping failed to connect to " host ":" port)) (thread-start! timeout) (thread-start! ping) (thread-join! ping) (if success (thread-terminate! timeout))) (if return-socket @@ -297,40 +241,39 @@ #f)))) ;; run nmsg-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (nmsg-transport:keep-running) +(define (nmsg-transport:keep-running server-id) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop () (let ((sdat #f)) (mutex-lock! *heartbeat-mutex*) - (set! sdat *runremote*) + (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) - (if sdat sdat + (if sdat + (begin + (debug:print-info 0 "keep-running got sdat=" sdat) + sdat) (begin - (sleep 4) + (thread-sleep! 0.5) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) - (tdb (tasks:open-db)) - (spid (tasks:server-get-server-id tdb #f iface port #f))) - (print "Keep-running got server pid " spid ", using iface " iface " and port " port) + (tdbdat (tasks:open-db))) + (print "Keep-running got server pid " server-id ", using iface " iface " and port " port) (let loop ((count 0)) (thread-sleep! 4) ;; no need to do this very often ;; NB// sync currently does NOT return queue-length (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) - ;; NOTE: Get rid of this mechanism! It really is not needed... - (tasks:server-update-heartbeat tdb spid) - ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) (if (> (+ last-access @@ -339,21 +282,19 @@ ;; (* 60 60) ;; one hour (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. ) (current-seconds)) (begin - (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") - ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) - (tasks:server-deregister-self tdb (get-host-name)) - (thread-sleep! 1) - (debug:print-info 0 "Max cached queries was " *max-cache-size*) + (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running") (debug:print-info 0 "Server shutdown complete. Exiting") - (exit))))))) + ;; (exit) + )))))) (define (nmsg-transport:client-signal-handler signum) (handle-exceptions exn Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -86,11 +86,11 @@ (jparams (db:obj->string params))) (if connection-info ;; use the server if have connection info (let* ((dat (case *transport-type* ((http)(http-transport:client-api-send-receive run-id connection-info cmd jparams)) - ((nm) (nm-transport:client-api-send-receive run-id connection-info cmd jparams)) + ((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info cmd jparams)) (else (exit)))) (res (if (vector? dat) (vector-ref dat 1) #f)) (success (if (vector? dat) (vector-ref dat 0) #f))) (http-transport:server-dat-update-last-access connection-info) (if success