Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -48,15 +48,15 @@ ((rpc) (rpc:client-connect iface port)) ((http) (http:client-connect iface port)) ((zmq) (zmq:client-connect iface port)) (else (rpc:client-connect iface port)))) -(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0)) +(define (client:setup run-id area-dat #!key (remaining-tries 10) (failed-connects 0)) (case (server:get-transport) - ((rpc) (rpc-transport:client-setup run-id)) ;;(client:setup-rpc run-id)) - ((http)(client:setup-http run-id)) - (else (rpc-transport:client-setup run-id)))) ;; (client:setup-rpc run-id)))) + ((rpc) (rpc-transport:client-setup run-id area-dat)) + ((http)(client:setup-http run-id area-dat)) + (else (rpc-transport:client-setup run-id area-dat)))) ;; (define (client:login-no-auto-setup server-info run-id) ;; (case (server:get-transport) ;; ((rpc) (rpc:login-no-auto-client-setup server-info run-id)) ;; ((http) (rmt:login-no-auto-client-setup server-info run-id)) @@ -83,11 +83,11 @@ ;; (if (member remaining-tries '(3 4 6)) ;; (begin ;; login failed ;; (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) ;; (hash-table-delete! (common:get-remote remote) run-id) ;; (open-run-close tasks:server-force-clean-run-record -;; tasks:open-db +;; (lambda ()(tasks:open-db area-dat)) ;; run-id ;; (car host-info) ;; (cadr host-info) ;; " client:setup (host-info=#t)") ;; (thread-sleep! 5) @@ -95,11 +95,11 @@ ;; (begin ;; (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) ;; (thread-sleep! 5) ;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ;; ;; YUK: rename server-dat here -;; (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) +;; (let* ((server-dat (open-run-close tasks:get-server (lambda ()(tasks:open-db area-dat)) run-id))) ;; (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) ;; (if server-dat ;; (let* ((iface (tasks:hostinfo-get-interface server-dat)) ;; (port (tasks:hostinfo-get-port server-dat)) ;; (start-res (http-transport:client-connect iface port)) @@ -112,11 +112,11 @@ ;; (if (member remaining-tries '(2 5)) ;; (begin ;; login failed ;; (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) ;; (hash-table-delete! (common:get-remote remote) run-id) ;; (open-run-close tasks:server-force-clean-run-record -;; tasks:open-db +;; (lambda ()(tasks:open-db area-dat)) ;; run-id ;; (tasks:hostinfo-get-interface server-dat) ;; (tasks:hostinfo-get-port server-dat) ;; " client:setup (server-dat = #t)") ;; (thread-sleep! 2) @@ -128,37 +128,38 @@ ;; (thread-sleep! 5) ;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ;; (begin ;; no server registered ;; (if (eq? remaining-tries 2) ;; (begin -;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") +;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id (lambda ()(tasks:open-db area-dat)) run-id " client:setup (server-dat=#f)") ;; (client:setup run-id remaining-tries: 10)) ;; (begin ;; (thread-sleep! 2) ;; (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) -;; (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3) +;; (if (< (open-run-close tasks:num-in-available-state (lambda ()(tasks:open-db area-dat)) run-id) 3) ;; (begin -;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)") +;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id (lambda ()(tasks:open-db area-dat)) run-id " client:setup (server-dat=#f)") ;; (server:try-running run-id))) ;; (thread-sleep! 10) ;; give server a little time to start up ;; (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. -;; 1. We are a test manager and we received *transport-type* and (common:get-remote remote) via cmdline +;; 1. We are a test manager and we received transport-type and (common:get-remote remote) via cmdline ;; 2. We are a run tests, list runs or other interactive process and we must figure out -;; *transport-type* and (common:get-remote remote) from the monitor.db +;; transport-type and (common:get-remote remote) from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove (common:get-remote remote) stuff ;; -(define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0)(remote #f)) +(define (client:setup-http run-id area-dat #!key (remaining-tries 10) (failed-connects 0)(remote #f)) (debug:print-info 2 "client:setup remaining-tries=" remaining-tries) - (let* ((tdbdat (tasks:open-db))) + (let* ((tdbdat (tasks:open-db area-dat)) + (transport-type (megatest:area-transport area-dat))) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) @@ -165,14 +166,14 @@ (debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (hostname (tasks:hostinfo-get-hostname server-dat)) (port (tasks:hostinfo-get-port server-dat)) - (start-res (case *transport-type* + (start-res (case transport-type ((http)(http-transport:client-connect iface port)) ((nmsg)(nmsg-transport:client-connect hostname port)))) - (ping-res (case *transport-type* + (ping-res (case transport-type ((http)(rmt:login-no-auto-client-setup start-res run-id)) ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) (if logininfo (car (vector-ref logininfo 1)) #f)))))) @@ -182,11 +183,11 @@ (common:set-remote! remote run-id start-res) (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) - (case *transport-type* + (case transport-type ((http)(http-transport:close-connections run-id))) (common:del-remote! remote run-id) (tasks:kill-server-run-id run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id @@ -196,19 +197,19 @@ (if (> remaining-tries 8) (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up - (client:setup run-id remaining-tries: (- remaining-tries 1)) + (client:setup run-id area-dat remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id))) (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) (if (< num-available 2) (server:try-running run-id)) (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. - (client:setup run-id remaining-tries: (- remaining-tries 1))))))))) + (client:setup run-id area-dat remaining-tries: (- remaining-tries 1))))))))) ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) Index: dashboard-guimonitor.scm ================================================================== --- dashboard-guimonitor.scm +++ dashboard-guimonitor.scm @@ -175,18 +175,18 @@ (iup:attribute-set! tabtop "TABTITLE3" "Tools") tabtop)))) ;; BUG: Remember to re-instate this!!!! ;; (on-exit (lambda () -;; (let ((tdb (tasks:open-db))) +;; (let ((tdb (tasks:open-db area-dat))) ;; ;; (print "On-exit called") ;; (tasks:remove-monitor-record tdb) ;; (sqlite3:finalize! tdb)))) -(define (gui-monitor db) +(define (gui-monitor db area-dat) (let ((keys (db:get-keys db)) - (tdb (tasks:open-db))) + (tdb (tasks:open-db area-dat))) (tasks:register-monitor db tdb) ;;; let the other monitors know we are here (control-panel db tdb keys) ;(tasks:remove-monitor-record db) ;(sqlite3:finalize! db) )) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -418,11 +418,11 @@ (keydat (if testdat (db:get-key-val-pairs dbstruct run-id) #f)) (rundat (if testdat (db:get-run-info dbstruct run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-rows rundat) (db:get-header rundat) "runname") #f)) - (tdb (tdb:open-test-db-by-test-id-local dbstruct run-id test-id)) + (tdb (tdb:open-test-db-by-test-id-local dbstruct area-dat run-id test-id)) ;; These next two are intentional bad values to ensure errors if they should not ;; get filled in properly. (logfile "/this/dir/better/not/exist") (rundir (if testdat (db:test-get-rundir testdat) @@ -695,11 +695,11 @@ (db:test-data-get-tol x) (db:test-data-get-status x) (db:test-data-get-units x) (db:test-data-get-type x) (db:test-data-get-comment x))) - (tdb:open-run-close-db-by-test-id-local dbstruct run-id test-id #f tdb:read-test-data test-id "%"))) + (tdb:open-run-close-db-by-test-id-local dbstruct area-dat run-id test-id #f tdb:read-test-data test-id "%"))) "\n"))) (if (not (equal? currval newval)) (iup:attribute-set! test-data "VALUE" newval ))))) ;; "TITLE" newval))))) test-data)) ;;(dashboard:run-controls) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -649,11 +649,11 @@ (run-ids (if run-ids run-ids (if toppath (begin (db:delay-if-busy mtdb area-dat) (db:get-all-run-ids mtdb))))) - (tdbdat (tasks:open-db)) + (tdbdat (tasks:open-db area-dat)) (servers (tasks:get-all-servers (db:delay-if-busy tdbdat area-dat)))) ;; kill servers (if (member 'killservers options) (for-each Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -512,12 +512,12 @@ (iup:attribute-set! stats-matrix "WIDTHDEF" "40") (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) -(define (dcommon:servers-table) - (let* ((tdbdat (tasks:open-db)) +(define (dcommon:servers-table area-dat) + (let* ((tdbdat (tasks:open-db area-dat)) (colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -69,11 +69,11 @@ (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) - (start-port (portlogger:open-run-close portlogger:find-port)) + (start-port (portlogger:open-run-close portlogger:find-port area-dat)) (link-tree-path (configf:lookup configdat "setup" "linktree"))) ;; (set! db *inmemdb*) (debug:print-info 0 "portlogger recommended port: " start-port) (root-path (if link-tree-path link-tree-path @@ -132,18 +132,18 @@ (if (< portnum 64000) (begin (debug:print 0 "WARNING: attempt to start server failed. Trying again ...") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) - (portlogger:open-run-close portlogger:set-failed portnum) + (portlogger:open-run-close portlogger:set-failed area-dat portnum) (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; get_next_port goes here (http-transport:try-start-server run-id ipaddrstr - (portlogger:open-run-close portlogger:find-port) + (portlogger:open-run-close portlogger:find-port area-dat) server-id area-dat)) (begin (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") (print "ERROR: Tried and tried but could not start the server")))) @@ -365,16 +365,16 @@ server-dat)) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (http-transport:keep-running server-id run-id) +(define (http-transport:keep-running server-id run-id area-dat) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 "Starting the sync-back, keep alive thread in server for run-id=" run-id) - (let* ((tdbdat (tasks:open-db)) + (let* ((tdbdat (tasks:open-db area-dat)) (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) @@ -413,11 +413,11 @@ ;; inmemdb is a dbstruct (condition-case (db:sync-touched *inmemdb* *run-id* force-sync: #t) ((sync-failed)(cond ((> bad-sync-count 10) ;; time to give up - (http-transport:server-shutdown server-id port)) + (http-transport:server-shutdown server-id port area-dat)) (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop (thread-sleep! 5) (loop count server-state (+ bad-sync-count 1))))) ((exn) (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") @@ -446,11 +446,11 @@ ;; (db:get-db *inmemdb* #t) (db:get-db *inmemdb* run-id) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) (begin ;; gotta exit nicely (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") - (http-transport:server-shutdown server-id port)))))) + (http-transport:server-shutdown server-id port area-dat)))))) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1) 'running bad-sync-count)) ;; Check that iface and port have not changed (can happen if server port collides) @@ -487,23 +487,23 @@ ;; ;; (if (tasks:server-am-i-the-server? tdb run-id) ;; (tasks:server-set-state! tdb server-id "running")) ;; (loop 0 server-state bad-sync-count)) - (http-transport:server-shutdown server-id port))))) + (http-transport:server-shutdown server-id port area-dat))))) -(define (http-transport:server-shutdown server-id port) - (let ((tdbdat (tasks:open-db))) +(define (http-transport:server-shutdown server-id port area-dat) + (let ((tdbdat (tasks:open-db area-dat))) (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) ;; ;; start_shutdown ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") - (portlogger:open-run-close portlogger:set-port port "released") + (portlogger:open-run-close portlogger:set-port area-dat port "released") (thread-sleep! 5) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Number of cached writes " *number-of-writes*) (debug:print-info 0 "Average cached write time " (if (eq? *number-of-writes* 0) @@ -525,30 +525,30 @@ ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id area-dat) - (let* ((tdbdat (tasks:open-db))) + (let* ((tdbdat (tasks:open-db area-dat))) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (begin (daemon:ize) (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it (begin (current-error-port *alt-log-file*) (current-output-port *alt-log-file*))))) - (if (server:check-if-running run-id) + (if (server:check-if-running run-id area-dat) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) - (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) + (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id area-dat)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) - (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) + (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id area-dat) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") @@ -562,11 +562,11 @@ run-id server-id area-dat)) "Server run")) (th3 (make-thread (lambda () (debug:print-info 0 "Server monitor thread started") - (http-transport:keep-running server-id run-id)) + (http-transport:keep-running server-id run-id area-dat)) "Keep running"))) (thread-start! th2) (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. (thread-start! th3) (set! *didsomething* #t) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -874,11 +874,11 @@ (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) - (list 'transport (conc (megatest:area-transport area-dat))) ;;; *transport-type*)) + (list 'transport (conc (megatest:area-transport area-dat))) ;; ;; (list 'serverinf *server-info*) (list 'toppath toppath) (list 'work-area work-area) (list 'test-name test-name) (list 'runscript runscript) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -651,11 +651,11 @@ (let ((tl (launch:setup-for-run *area-dat*)) (run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) (if run-id (begin - (server:launch run-id) + (server:launch run-id *area-dat*) (set! *didsomething* #t)) (debug:print 0 "ERROR: server requires run-id be specified with -run-id"))) ;; Not a server? This section will decide how to communicate ;; @@ -688,11 +688,11 @@ (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) (let ((tl (launch:setup-for-run *area-dat*))) (if tl - (let* ((tdbdat (tasks:open-db)) + (let* ((tdbdat (tasks:open-db *area-dat*)) (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") (servers-to-kill '()) (killinfo (args:get-arg "-stop-server")) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) @@ -1051,11 +1051,11 @@ (lambda (target runname keys keyvals) ;; ;; May or may not implement it this way ... ;; ;; Insert this run into the tasks queue - ;; (open-run-close tasks:add tasks:open-db + ;; (open-run-close tasks:add (lambda ()(tasks:open-db *area-dat*)) ;; "runtests" ;; user ;; target ;; runname ;; (args:get-arg "-runtests") Index: nmsg-transport.scm ================================================================== --- nmsg-transport.scm +++ nmsg-transport.scm @@ -63,15 +63,15 @@ ;; S E R V E R ;;====================================================================== (define (nmsg-transport:run dbstruct area-dat hostn run-id server-id #!key (retrynum 1000)) (debug:print 2 "Attempting to start the server ...") - (let* ((start-port (portlogger:open-run-close portlogger:find-port)) + (let* ((start-port (portlogger:open-run-close portlogger:find-port area-dat)) (server-thread (make-thread (lambda () (nmsg-transport:try-start-server dbstruct run-id start-port server-id)) "server thread")) - (tdbdat (tasks:open-db))) + (tdbdat (tasks:open-db area-dat))) (thread-start! server-thread) (thread-sleep! 0.1) (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id)) (let ((interface (if (equal? hostn "-")(get-host-name) hostn))) (tasks:server-set-interface-port (db:delay-if-busy tdbdat) server-id interface start-port) @@ -86,11 +86,11 @@ (thread-join! server-thread)) (if (> retrynum 0) (begin (debug:print 0 "WARNING: Failed to connect to server (self) on host " hostn ":" start-port ", trying again.") (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") - (portlogger:open-run-close portlogger:set-failed start-port) + (portlogger:open-run-close portlogger:set-failed area-dat start-port) (nmsg-transport:run dbstruct area-dat hostn run-id server-id)) (begin (debug:print 0 "ERROR: could not find an open port to start server on. Giving up") (exit 1)))))) @@ -106,11 +106,11 @@ (loop (nn-recv repsoc)))))) ;; all routes though here end in exit ... ;; (define (nmsg-transport:launch run-id area-dat) - (let* ((tdbdat (tasks:open-db)) + (let* ((tdbdat (tasks:open-db area-dat)) (dbstruct (db:setup run-id)) (hostn (or (args:get-arg "-server") "-"))) (set! *run-id* run-id) (set! *inmemdb* dbstruct) ;; with nbfake daemonize isn't really needed @@ -120,21 +120,21 @@ ;; (daemon:ize) ;; (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it ;; (begin ;; (current-error-port *alt-log-file*) ;; (current-output-port *alt-log-file*))))) - (if (server:check-if-running run-id) + (if (server:check-if-running run-id area-dat) (begin (debug:print-info 0 "Server for run-id " run-id " already running") (exit 0))) (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) - (if (not (server:check-if-running run-id)) + (if (not (server:check-if-running run-id area-dat)) (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) (- remtries 1)) (begin (debug:print-info 0 "Another server took the slot, exiting") (exit 0)))) @@ -271,11 +271,11 @@ (thread-sleep! 0.5) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) - (tdbdat (tasks:open-db)) + (tdbdat (tasks:open-db area-dat)) (server-timeout (let ((tmo (configf:lookup (megatest:area-configdat area-dat) "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; (* 3 24 60 60) ;; default to three days Index: olddashboard.scm ================================================================== --- olddashboard.scm +++ olddashboard.scm @@ -474,12 +474,12 @@ (iup:attribute-set! stats-matrix "WIDTHDEF" "40") (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) -(define (dcommon:servers-table) - (let* ((tdbdat (tasks:open-db)) +(define (dcommon:servers-table area-dat) + (let* ((tdbdat (tasks:open-db area-dat)) (colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 @@ -2126,11 +2126,11 @@ (define *monitor-db-path* (conc *dbdir* "/monitor.db")) (define *last-monitor-update-time* 0) ;; Force creation of the db in case it isn't already there. -(tasks:open-db) +(tasks:open-db area-dat) (define (dashboard:get-youngest-run-db-mod-time) (handle-exceptions exn (begin Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -17,11 +17,11 @@ (declare (uses db)) ;; lsof -i -(define (portlogger:open-db fname) +(define (portlogger:open-db fname area-dat) (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away (exists (file-exists? fname)) (db (if avail (sqlite3:open-database fname) (begin @@ -36,11 +36,11 @@ ;; port INTEGER PRIMARY KEY, ;; state TEXT DEFAULT 'not-used', ;; fail_count INTEGER DEFAULT 0, ;; update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")))) (sqlite3:set-busy-handler! db handler) - (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") + (db:set-sync db area-dat) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; (if (not exists) ;; needed with IF NOT EXISTS? (sqlite3:execute db "CREATE TABLE IF NOT EXISTS ports ( port INTEGER PRIMARY KEY, @@ -47,11 +47,11 @@ state TEXT DEFAULT 'not-used', fail_count INTEGER DEFAULT 0, update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") db)) -(define (portlogger:open-run-close proc . params) +(define (portlogger:open-run-close proc area-dat . params) (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away (handle-exceptions exn (begin @@ -60,11 +60,11 @@ (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) - (db (portlogger:open-db fname)) + (db (portlogger:open-db fname area-dat)) (res (apply proc db params))) (sqlite3:finalize! db) ;; (release-dot-lock fname) res)))) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -76,12 +76,12 @@ (let ((cinfo (common:get-remote remote run-id))) (if cinfo cinfo ;; NB// can cache the answer for server running for 10 seconds ... ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) - (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id) - (client:setup run-id remote: remote) + (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db area-dat)) run-id) + (client:setup run-id area-dat remote: remote) #f)))) (define (rmt:discard-old-connections area-dat) ;; clean out old connections (mutex-lock! *db-multi-sync-mutex*) @@ -105,17 +105,18 @@ (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define (rmt:send-receive cmd rid params area-dat #!key (attemptnum 1)(remote #f)) ;; start attemptnum at 1 so the modulo below works as expected (rmt:discard-old-connections area-dat) ;; (mutex-lock! *send-receive-mutex*) - (let* ((run-id (if rid rid 0)) + (let* ((transport-type (megatest:area-transport area-dat)) + (run-id (if rid rid 0)) (configdat (megatest:area-configdat area-dat)) (connection-info (rmt:get-connection-info run-id area-dat))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; use the server if have connection info - (let* ((dat (case *transport-type* + (let* ((dat (case transport-type ((http)(condition-case (http-transport:client-api-send-receive run-id connection-info cmd params) ((commfail)(vector #f "communications fail")) ((exn)(vector #f "other fail")))) ((nmsg)(condition-case @@ -126,11 +127,11 @@ (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info)) (if success (begin ;; (mutex-unlock! *send-receive-mutex*) - (case *transport-type* + (case ((http) res) ;; (db:string->obj res)) ((nmsg) res))) ;; (vector-ref res 1))) (begin ;; let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to rmt:send-receive again.") ;; (case *transport-type* @@ -138,11 +139,11 @@ (common:del-remote! remote run-id) ;; don't keep using the same connection ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. ;; (if (eq? (modulo attemptnum 5) 0) ;; (tasks:kill-server-run-id run-id tag: "api-send-receive-failed")) ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications - (tasks:start-and-wait-for-server (tasks:open-db) run-id 15) + (tasks:start-and-wait-for-server (tasks:open-db area-dat) run-id 15) ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) ;; no longer killing the server in http-transport:client-api-send-receive ;; may kill it here but what are the criteria? ;; start with three calls then kill server @@ -159,11 +160,11 @@ (let ((faststart (configf:lookup configdat "server" "faststart"))) (common:del-remote! remote run-id) ;; (mutex-unlock! *send-receive-mutex*) (if (and faststart (equal? faststart "no")) (begin - (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10) + (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db area-dat)) run-id 10) (thread-sleep! (random 5)) ;; give some time to settle and minimize collison? (rmt:send-receive cmd rid params area-dat attemptnum: (+ attemptnum 1))) (begin (server:kind-run run-id area-dat) (rmt:open-qry-close-locally cmd run-id params area-dat)))) @@ -376,11 +377,11 @@ (define (rmt:open-test-db-by-test-id run-id test-id area-dat #!key (work-area #f)) (let* ((test-path (if (string? work-area) work-area (rmt:test-get-rundir-from-test-id run-id test-id area-dat)))) (debug:print 3 "TEST PATH: " test-path) - (open-test-db test-path))) + (open-test-db test-path area-dat))) ;; WARNING: This currently bypasses the transaction wrapped writes system (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment area-dat) (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment) area-dat)) Index: rpc-transport.scm ================================================================== --- rpc-transport.scm +++ rpc-transport.scm @@ -37,30 +37,30 @@ ;; all routes though here end in exit ... ;; ;; start_server? ;; -(define (rpc-transport:launch run-id) +(define (rpc-transport:launch run-id area-dat) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) - (if (server:check-if-running run-id) + (if (server:check-if-running run-id area-dat) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) - (let loop ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id)) + (let loop ((server-id (open-run-close tasks:server-lock-slot (lambda ()(tasks:open-db area-dat))run-id)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) - (loop (open-run-close tasks:server-lock-slot tasks:open-db run-id) + (loop (open-run-close tasks:server-lock-slot (lambda ()(tasks:open-db area-dat)) run-id) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " rpc-transport:launch"))) + (open-run-close tasks:server-delete-records-for-this-pid (lambda ()(tasks:open-db area-dat)) " rpc-transport:launch"))) (begin (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id) (exit))))) (define (rpc-transport:run hostn run-id server-id area-dat) @@ -76,11 +76,11 @@ (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) - (start-port (open-run-close tasks:server-get-next-port tasks:open-db)) + (start-port (open-run-close tasks:server-get-next-port (lambda ()(tasks:open-db area-dat)))) (link-tree-path (configf:lookup configdat "setup" "linktree")) (rpc:listener (rpc-transport:find-free-port-and-open (rpc:default-server-port))) (th1 (make-thread (lambda () ((rpc:make-server rpc:listener) #t)) @@ -93,15 +93,15 @@ (ipaddrstr (if (string=? "-" hostn) (server:get-best-guess-address hostname) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") #f)) (portnum (rpc:default-server-port)) (host:port (conc (if ipaddrstr ipaddrstr hostname) ":" portnum)) - (tdb (tasks:open-db))) + (tdb (tasks:open-db area-dat))) (thread-start! th1) (set! db *inmemdb*) (open-run-close tasks:server-set-interface-port - tasks:open-db + (lambda ()(tasks:open-db area-dat)) server-id ipaddrstr portnum) (debug:print 0 "Server started on " host:port) ;; (trace rpc:publish-procedure!) @@ -111,11 +111,11 @@ ;;====================================================================== ;; ;; end of publish-procedure section ;;====================================================================== ;; (on-exit (lambda () - (open-run-close tasks:server-set-state! tasks:open-db server-id "stopped"))) + (open-run-close tasks:server-set-state! (lambda ()(tasks:open-db area-dat)) server-id "stopped"))) (set! *rpc:listener* rpc:listener) (tasks:server-set-state! tdb server-id "running") (set! *inmemdb* (db:setup run-id)) ;; if none running or if > 20 seconds since @@ -128,11 +128,11 @@ (begin (debug:print-info 0 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) (loop (+ 1 count))) (begin (debug:print-info 0 "Starting to shutdown the server side") - (open-run-close tasks:server-delete-record tasks:open-db server-id " rpc-transport:try-start-server stop") + (open-run-close tasks:server-delete-record (lambda ()(tasks:open-db area-dat)) server-id " rpc-transport:try-start-server stop") (thread-sleep! 10) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") )))))) @@ -179,11 +179,11 @@ server-dat) (begin (server:try-running run-id) (thread-sleep! 2) (rpc-transport:client-setup run-id area-dat remtries: (- remtries 1))))) - (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id))) + (let* ((server-db-info (open-run-close tasks:get-server (lambda ()(tasks:open-db area-dat)) run-id))) (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-db-info (let* ((iface (tasks:hostinfo-get-interface server-db-info)) (port (tasks:hostinfo-get-port server-db-info)) (server-dat (list iface port #f #f #f)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -222,19 +222,19 @@ (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) - (tdbdat (tasks:open-db))) + (tdbdat (tasks:open-db area-dat))) (if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10)) (set-signal-handler! signal/int (lambda (signum) (signal-mask! signum) (print "Received signal " signum ", cleaning up before exit. Please wait...") - (let ((tdbdat (tasks:open-db))) + (let ((tdbdat (tasks:open-db area-dat))) (rmt:tasks-set-state-given-param-key task-key "killed")) (print "Killed by signal " signum ". Exiting") (exit))) ;; register this run in monitor.db @@ -919,11 +919,11 @@ (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) - (tdbdat (tasks:open-db))) + (tdbdat (tasks:open-db area-dat))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) @@ -1428,11 +1428,11 @@ ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt area-dat #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) - (tdbdat (tasks:open-db)) + (tdbdat (tasks:open-db area-dat)) (keys (rmt:get-keys area-dat)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -48,15 +48,15 @@ ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id area-dat) - (case *transport-type* + (case (megatest:area-transport area-dat) ((http)(http-transport:launch run-id area-dat)) ((nmsg)(nmsg-transport:launch run-id area-dat)) ((rpc) (rpc-transport:launch run-id area-dat)) - (else (debug:print 0 "ERROR: unknown server type " *transport-type*)))) + (else (debug:print 0 "ERROR: unknown server type " (megatest:area-transport area-dat))))) ;; (else (debug:print 0 "ERROR: No known transport set, transport=" transport ", using rpc") ;; (rpc-transport:launch run-id))))) ;;====================================================================== ;; S E R V E R U T I L I T I E S @@ -82,11 +82,11 @@ (argv))))))) ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; -(define (server:reply return-addr query-sig success/fail result #!key (remote #f)) +(define (server:reply return-addr query-sig success/fail result area-dat #!key (remote #f)) (debug:print-info 11 "server:reply return-addr=" return-addr ", result=" result) ;; (send-message pubsock target send-more: #t) ;; (send-message pubsock (case (server:get-transport) ((rpc) (db:obj->string (vector success/fail query-sig result))) @@ -95,11 +95,11 @@ (let ((pub-socket (vector-ref (common:get-remote remote #f) 1))) (send-message pub-socket return-addr send-more: #t) (send-message pub-socket (db:obj->string (vector success/fail query-sig result))))) ((fs) result) (else - (debug:print 0 "ERROR: unrecognised transport type: " *transport-type*) + (debug:print 0 "ERROR: unrecognised transport type: " (megatest:area-transport area-dat)) result))) ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host @@ -157,21 +157,21 @@ (define (server:try-running run-id area-dat) (if (eq? run-id 0) (server:run run-id area-dat) (rmt:start-server run-id))) -(define (server:check-if-running run-id) - (let ((tdbdat (tasks:open-db))) +(define (server:check-if-running run-id area-dat) + (let ((tdbdat (tasks:open-db area-dat))) (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat) run-id)) (trycount 0)) (if server ;; note: client:start will set (common:get-remote remote). this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; - (let ((res (case *transport-type* + (let ((res (case (megatest:area-transport area-dat) ((http)(server:ping-server run-id (tasks:hostinfo-get-interface server) (tasks:hostinfo-get-port server))) ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) (tasks:hostinfo-get-port server) @@ -186,12 +186,12 @@ res))) #f)))) ;; called in megatest.scm, host-port is string hostname:port ;; -(define (server:ping run-id host:port) - (let ((tdbdat (tasks:open-db))) +(define (server:ping run-id host:port area-dat) + (let ((tdbdat (tasks:open-db area-dat))) (let* ((host-port (let ((slst (string-split host:port ":"))) (if (eq? (length slst) 2) (list (car slst)(string->number (cadr slst))) #f))) (toppath (launch:setup-for-run)) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -100,11 +100,11 @@ (handler (make-busy-timeout 36000))) (if (and exists (not write-access)) (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control (sqlite3:set-busy-handler! mdb handler) - (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) + (db:set-sync mdb area-dat) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) ;; (if (or (and (not exists) ;; (file-write-access? toppath)) ;; (not (file-read-access? dbpath))) ;; (begin ;; @@ -166,21 +166,21 @@ (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) -(define (tasks:server-lock-slot mdb run-id) +(define (tasks:server-lock-slot mdb run-id area-dat) (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin - (tasks:server-set-available mdb run-id) + (tasks:server-set-available mdb run-id area-dat) (thread-sleep! (/ (random 1500) 1000)) ;; (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) ;; register that this server may come online (first to register goes though with the process) -(define (tasks:server-set-available mdb run-id) +(define (tasks:server-set-available mdb run-id area-dat) (sqlite3:execute mdb "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" (current-process-id) ;; pid @@ -190,11 +190,11 @@ (random 1000) ;; priority (used a tiebreaker on get-available) "available" ;; state (common:version-signature) ;; mt_version -1 ;; interface ;; (conc (server:get-transport)) ;; transport - (conc *transport-type*) ;; transport + (conc (megatest:area-transport area-dat)) ;; transport run-id )) (define (tasks:num-in-available-state mdb run-id) (let ((res 0)) Index: tdb.scm ================================================================== --- tdb.scm +++ tdb.scm @@ -40,11 +40,11 @@ ;;====================================================================== ;; T E S T S P E C I F I C D B ;;====================================================================== ;; Create the sqlite db for the individual test(s) -(define (open-test-db work-area) +(define (open-test-db work-area area-dat) (debug:print-info 11 "open-test-db " work-area) (if (and work-area (directory? work-area) (file-read-access? work-area)) (let* ((dbpath (conc work-area "/testdat.db")) @@ -71,11 +71,11 @@ (if (and tdb-writeable *db-write-access*) (sqlite3:set-busy-handler! db handler)) (if (not dbexists) (begin - (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") + (db:set-sync db area-dat) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") (debug:print-info 11 "Initialized test database " dbpath) (tdb:testdb-initialize db))) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") (debug:print-info 11 "open-test-db END (sucessful)" work-area) ;; now let's test that everything is correct @@ -97,31 +97,31 @@ ;; provide an in-mem db (this is dangerous!) (tdb:testdb-initialize baddb) baddb))) ;; find and open the testdat.db file for an existing test -(define (tdb:open-test-db-by-test-id test-id #!key (work-area #f)) +(define (tdb:open-test-db-by-test-id test-id area-dat #!key (work-area #f)) (let* ((test-path (if work-area work-area (rmt:test-get-rundir-from-test-id test-id)))) (debug:print 3 "TEST PATH: " test-path) - (open-test-db test-path))) + (open-test-db test-path area-dat))) ;; find and open the testdat.db file for an existing test -(define (tdb:open-test-db-by-test-id-local dbstruct run-id test-id #!key (work-area #f)) +(define (tdb:open-test-db-by-test-id-local dbstruct area-dat run-id test-id #!key (work-area #f)) (let* ((test-path (if work-area work-area (db:test-get-rundir-from-test-id dbstruct run-id test-id)))) (debug:print 3 "TEST PATH: " test-path) - (open-test-db test-path))) + (open-test-db test-path area-dat))) ;; find and open the testdat.db file for an existing test -(define (tdb:open-run-close-db-by-test-id-local dbstruct run-id test-id work-area proc . params) +(define (tdb:open-run-close-db-by-test-id-local dbstruct area-dat run-id test-id work-area proc . params) (let* ((test-path (if work-area work-area (db:test-get-rundir-from-test-id dbstruct run-id test-id))) - (tdb (open-test-db test-path))) + (tdb (open-test-db test-path area-dat))) (apply proc tdb params))) (define (tdb:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") (sqlite3:with-transaction @@ -368,14 +368,14 @@ (conc (vector-ref b 2))) #f)) (string running 0) (if (> remtries 0) (begin (thread-sleep! 1) (loop (- remtries 1) (tasks:server-running-or-starting? (db:delay-if-busy - (tasks:open-db)) + (tasks:open-db *area-dat*) *area-dat*) run-id))))))) (test "did server become available" #t (let loop ((remtries 10) - (res (tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id))) + (res (tasks:get-server (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id))) (if res (vector? res) (begin (if (> remtries 0) (begin (thread-sleep! 1.1) - (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id))) + (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id))) res))))) ) (list 0 1)) (define user (current-user-name)) @@ -117,18 +117,18 @@ (for-each (lambda (run-id) ;; test killing server ;; (tasks:kill-server-run-id run-id) -(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)) +(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id)) ) (list 0 1)) ;; Tests to assess reading/writing while servers are starting/stopping (define start-time (current-seconds)) (let loop ((test-state 'start)) - (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db)) run-id)) + (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id)) (first-dat (if (not (null? server-dats)) (car server-dats) #f))) (map (lambda (dat) (apply print (intersperse (vector->list dat) ", "))) @@ -163,17 +163,17 @@ ;; ;; (test "setup for run" #t (begin (launch:setup-for-run) ;; (string? (getenv "MT_RUN_AREA_HOME")))) ;; ;; (test "server-register, get-best-server" #t (let ((res #f)) -;; (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http) -;; (set! res (open-run-close tasks:get-best-server tasks:open-db)) +;; (open-run-close tasks:server-register tasks:open-db *area-dat* 1 "bob" 1234 100 'live 'http) +;; (set! res (open-run-close tasks:get-best-server tasks:open-db *area-dat*)) ;; (number? (vector-ref res 3)))) ;; ;; (test "de-register server" #f (let ((res #f)) -;; (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) -;; (vector? (open-run-close tasks:get-best-server tasks:open-db)))) +;; (open-run-close tasks:server-deregister tasks:open-db *area-dat* "bob" port: 1234) +;; (vector? (open-run-close tasks:get-best-server tasks:open-db *area-dat*)))) ;; ;; (define server-pid #f) ;; ;; ;; Not sure how the following should work, replacing it with system of megatest -server ;; ;; (test "launch server" #t (let ((pid (process-fork (lambda () @@ -183,19 +183,19 @@ ;; ;; (number? pid))) ;; (system "../../bin/megatest -server - -debug 22 > server.log 2> server.log &") ;; ;; (let loop ((n 10)) ;; (thread-sleep! 1) ;; need to wait for server to start. -;; (let ((res (open-run-close tasks:get-best-server tasks:open-db))) +;; (let ((res (open-run-close tasks:get-best-server tasks:open-db *area-dat*))) ;; (print "tasks:get-best-server returned " res) ;; (if (and (not res) ;; (> n 0)) ;; (loop (- n 1))))) ;; ;; (test "get-best-server" #t (begin ;; (client:launch) -;; (let ((dat (open-run-close tasks:get-best-server tasks:open-db))) +;; (let ((dat (open-run-close tasks:get-best-server tasks:open-db *area-dat*))) ;; (vector? dat)))) ;; ;; (define *keys* (keys:config-get-fields *configdat*)) ;; (define *keyvals* (keys:target->keyval *keys* "a/b/c")) ;; Index: zmq-transport.scm ================================================================== --- zmq-transport.scm +++ zmq-transport.scm @@ -110,11 +110,11 @@ ;; what to do when we quit ;; ;; (on-exit (lambda () ;; (if (and toppath *server-info*) -;; (open-run-close tasks:server-deregister-self tasks:open-db (car *server-info*)) +;; (open-run-close tasks:server-deregister-self (lambda ()(tasks:open-db area-dat)) (car *server-info*)) ;; (let loop () ;; (let ((queue-len 0)) ;; (thread-sleep! (random 5)) ;; (mutex-lock! *incoming-mutex*) ;; (set! queue-len (length *incoming-data*)) @@ -148,11 +148,11 @@ (loop (cons packet queue-lst))))))) ;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (zmq-transport:keep-running) +(define (zmq-transport:keep-running area-dat) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop () (let ((sdat #f)) @@ -178,11 +178,14 @@ ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) ;; NOTE: Get rid of this mechanism! It really is not needed... - (open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info)) + (open-run-close tasks:server-update-heartbeat + (lambda () + (tasks:open-db area-dat)) + (car server-info)) ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) @@ -198,11 +201,14 @@ (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) - (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name)) + (open-run-close tasks:server-deregister-self + (lambda () + (tasks:open-db area-dat)) + (get-host-name)) (thread-sleep! 1) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") (exit))))))) @@ -234,11 +240,11 @@ (p2 (caddr s2))) (set! *runremote* #f) (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2) (mutex-lock! *heartbeat-mutex*) (set! *server-info* (open-run-close tasks:server-register - tasks:open-db + (lambda ()(tasks:open-db area-dat)) (current-process-id) ipaddrstr p1 0 'live 'zmq @@ -304,11 +310,11 @@ #f)))) ;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; -(define (zmq-transport:keep-running) +(define (zmq-transport:keep-running area-dat) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop () (let ((sdat #f)) @@ -320,11 +326,11 @@ (sleep 4) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) - (tdb (tasks:open-db)) + (tdb (tasks:open-db area-dat)) (spid (tasks:server-get-server-id tdb #f iface port #f))) (print "Keep-running got server pid " spid ", using iface " iface " and port " port) (let loop ((count 0)) (thread-sleep! 4) ;; no need to do this very often ;; NB// sync currently does NOT return queue-length