Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -160,11 +160,11 @@ (transport-type (megatest:area-transport area-dat))) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) - (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) + (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id))) (debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (hostname (tasks:hostinfo-get-hostname server-dat)) (port (tasks:hostinfo-get-port server-dat)) @@ -187,11 +187,11 @@ (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (case transport-type ((http)(http-transport:close-connections run-id))) (common:del-remote! remote run-id) (tasks:kill-server-run-id run-id) - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) + (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat area-dat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (if (> remaining-tries 8) Index: dcommon.scm ================================================================== --- dcommon.scm +++ dcommon.scm @@ -523,11 +523,11 @@ #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () - (let ((servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) + (let ((servers (tasks:get-all-servers (db:delay-if-busy tdbdat area-dat)))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) Index: http-transport.scm ================================================================== --- http-transport.scm +++ http-transport.scm @@ -143,16 +143,16 @@ ipaddrstr (portlogger:open-run-close portlogger:find-port area-dat) server-id area-dat)) (begin - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") + (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat area-dat) run-id ipaddrstr portnum " http-transport:try-start-server") (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) (tasks:server-set-interface-port - (db:delay-if-busy tdbdat) + (db:delay-if-busy tdbdat area-dat) server-id ipaddrstr portnum) (debug:print 0 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL @@ -161,11 +161,11 @@ (start-server port: portnum bind-address: (if (equal? config-hostname "-") ipaddrstr config-hostname)) (start-server port: portnum)) ;; (portlogger:open-run-close portlogger:set-port portnum "released") - (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") + (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat area-dat) run-id ipaddrstr portnum " http-transport:try-start-server") (debug:print 1 "INFO: server has been stopped")))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== @@ -390,11 +390,11 @@ (debug:print-info 0 "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (begin (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id) - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") + (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id "failed to start, never received server alive signature") (exit)) (loop start-time (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) @@ -419,11 +419,11 @@ (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop (thread-sleep! 5) (loop count server-state (+ bad-sync-count 1))))) ((exn) (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") + (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id " http-transport:keep-running crashed") (exit))) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time) @@ -434,22 +434,22 @@ ;; ;; no *inmemdb* yet, set running after our first pass through and start the db ;; (if (eq? server-state 'available) - (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers + (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat area-dat) run-id))) ;; try to ensure no double registering of servers (if (equal? new-server-id server-id) (begin - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") + (tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "dbprep") (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access (set! *inmemdb* (db:setup run-id)) ;; force initialization ;; (db:get-db *inmemdb* #t) (db:get-db *inmemdb* run-id) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) + (tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "running")) (begin ;; gotta exit nicely - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") + (tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "collision") (http-transport:server-shutdown server-id port area-dat)))))) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1) 'running bad-sync-count)) @@ -498,11 +498,11 @@ (set! *time-to-exit* #t) (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) ;; ;; start_shutdown ;; - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") + (tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "shutting-down") (portlogger:open-run-close portlogger:set-port area-dat port "released") (thread-sleep! 5) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Number of cached writes " *number-of-writes*) (debug:print-info 0 "Average cached write time " @@ -517,11 +517,11 @@ "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete") + (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id " http-transport:keep-running complete") (exit))) ;; all routes though here end in exit ... ;; ;; start_server? @@ -538,22 +538,22 @@ (current-output-port *alt-log-file*))))) (if (server:check-if-running run-id area-dat) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) - (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id area-dat)) + (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat area-dat) run-id area-dat)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) - (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id area-dat) + (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat area-dat) run-id area-dat) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") + (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat area-dat) " http-transport:launch") )) (let* ((th2 (make-thread (lambda () (debug:print-info 0 "Server run thread started") (http-transport:run (if (args:get-arg "-server") Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -689,11 +689,11 @@ (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) (let ((tl (launch:setup-for-run *area-dat*))) (if tl (let* ((tdbdat (tasks:open-db *area-dat*)) - (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) + (servers (tasks:get-all-servers (db:delay-if-busy tdbdat *area-dat*))) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") (servers-to-kill '()) (killinfo (args:get-arg "-stop-server")) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) (sid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f))) @@ -718,13 +718,13 @@ ;; (zmq-sockets (if status (server:client-connect hostname port) #f))) ;; no need to login as status of #t indicates we are connecting to correct ;; server (if (equal? state "dead") (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day. - (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete)) + (tasks:server-deregister (db:delay-if-busy tdbdat *area-dat*) hostname pullport: pullport pid: pid action: 'delete)) (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds - (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid))) + (tasks:server-deregister (db:delay-if-busy tdbdat *area-dat*) hostname pullport: pullport pid: pid))) (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update (if status "alive" "dead") transport) (if (or (equal? id sid) (equal? sid 0)) ;; kill all/any (begin Index: nmsg-transport.scm ================================================================== --- nmsg-transport.scm +++ nmsg-transport.scm @@ -72,24 +72,24 @@ (tdbdat (tasks:open-db area-dat))) (thread-start! server-thread) (thread-sleep! 0.1) (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id)) (let ((interface (if (equal? hostn "-")(get-host-name) hostn))) - (tasks:server-set-interface-port (db:delay-if-busy tdbdat) server-id interface start-port) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") + (tasks:server-set-interface-port (db:delay-if-busy tdbdat area-dat) server-id interface start-port) + (tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "dbprep") (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access ;; (set! *inmemdb* dbstruct) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") + (tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "running") (thread-start! (make-thread (lambda ()(nmsg-transport:keep-running server-id run-id area-dat)) "keep running")) (thread-join! server-thread)) (if (> retrynum 0) (begin (debug:print 0 "WARNING: Failed to connect to server (self) on host " hostn ":" start-port ", trying again.") - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") + (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id "failed to start, never received server alive signature") (portlogger:open-run-close portlogger:set-failed area-dat start-port) (nmsg-transport:run dbstruct area-dat hostn run-id server-id)) (begin (debug:print 0 "ERROR: could not find an open port to start server on. Giving up") (exit 1)))))) @@ -124,26 +124,26 @@ ;; (current-output-port *alt-log-file*))))) (if (server:check-if-running run-id area-dat) (begin (debug:print-info 0 "Server for run-id " run-id " already running") (exit 0))) - (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) + (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat area-dat) run-id)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) (if (not (server:check-if-running run-id area-dat)) - (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) + (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat area-dat) run-id) (- remtries 1)) (begin (debug:print-info 0 "Another server took the slot, exiting") (exit 0)))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") - (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") + (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat area-dat) " http-transport:launch") )) ;; locked in a server id, try to start up (nmsg-transport:run dbstruct area-dat hostn run-id server-id)) (set! *didsomething* #t) (exit)))) @@ -303,11 +303,11 @@ (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") (set! *time-to-exit* #t) (db:sync-touched *inmemdb* run-id force-sync: #t) - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running") + (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id " http-transport:keep-running") (debug:print-info 0 "Server shutdown complete. Exiting") (exit) )))))) ;;====================================================================== Index: olddashboard.scm ================================================================== --- olddashboard.scm +++ olddashboard.scm @@ -485,11 +485,11 @@ #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () - (let ((servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))) + (let ((servers (tasks:get-all-servers (db:delay-if-busy tdbdat area-dat)))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -159,11 +159,11 @@ (server:run run-id area-dat) (rmt:start-server run-id))) (define (server:check-if-running run-id area-dat) (let ((tdbdat (tasks:open-db area-dat))) - (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat) run-id)) + (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id)) (trycount 0)) (if server ;; note: client:start will set (common:get-remote remote). this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; @@ -179,11 +179,11 @@ ;; if the server didn't respond we must remove the record (if res #t (begin (debug:print-info 0 "server at " server " not responding, removing record") - (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id + (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat area-dat) run-id " server:check-if-running") res))) #f)))) ;; called in megatest.scm, host-port is string hostname:port @@ -193,11 +193,11 @@ (let* ((host-port (let ((slst (string-split host:port ":"))) (if (eq? (length slst) 2) (list (car slst)(string->number (cadr slst))) #f))) (toppath (launch:setup-for-run)) - (server-db-dat (if (not host-port)(tasks:get-server (db:delay-if-busy tdbdat) run-id) #f))) + (server-db-dat (if (not host-port)(tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id) #f))) (if (not run-id) (begin (debug:print 0 "ERROR: must specify run-id when doing ping, -run-id n") (print "ERROR: No run-id") (exit 1)) Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -379,23 +379,23 @@ ;; (else ;; #f)))) ;; try to start a server and wait for it to be available ;; -(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries) +(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries area-dat) ;; ensure a server is running for this run - (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)) + (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id)) (delay-time 0)) (if (and (not server-dat) (< delay-time delay-max-tries)) (begin (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) (debug:print 0 "Try starting server for run-id " run-id)) (thread-sleep! (/ (random 2000) 1000)) (server:kind-run run-id) (thread-sleep! (min delay-time 1)) - (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)))))) + (loop (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id)(+ delay-time 1)))))) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) @@ -430,19 +430,19 @@ ;; look up a server by run-id and send it a kill, also delete the record for that server ;; (define (tasks:kill-server-run-id run-id area-dat #!key (tag "default")) (let* ((tdbdat (tasks:open-db area-dat)) - (sdat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) + (sdat (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id))) (if sdat (let ((hostname (vector-ref sdat 6)) (pid (vector-ref sdat 5)) (server-id (vector-ref sdat 0))) - (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed") + (tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "killed") (debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid) (tasks:kill-server hostname pid) - (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id tag) ) + (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id tag) ) (debug:print-info 0 "No server found for run-id " run-id ", nothing to kill")) ;; (sqlite3:finalize! tdb) )) ;;====================================================================== @@ -698,16 +698,16 @@ (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" param-key state-patt action-patt test-patt))))) -(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) +(define (tasks:find-task-queue-records dbstruct area-dat target run-name test-patt state-patt action-patt) ;; (handle-exceptions ;; exn ;; '() ;; (sqlite3:first-row - (let ((db (db:delay-if-busy (db:get-db dbstruct #f))) + (let ((db (db:delay-if-busy (db:get-db dbstruct #f) area-dat)) (res '())) (sqlite3:for-each-row (lambda (a . b) (set! res (cons (cons a b) res))) db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue