@@ -155,14 +155,16 @@ (http-transport:try-start-server run-id ipaddrstr (portlogger:open-run-close portlogger:find-port) server-id)) (begin + (tasks:wait-on-busy-monitor.db) (tasks:server-force-clean-run-record (tasks:get-db) run-id ipaddrstr portnum " http-transport:try-start-server") (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) + (tasks:wait-on-busy-monitor.db) (tasks:server-set-interface-port (tasks:get-db) server-id ipaddrstr portnum) (debug:print 0 "INFO: Trying to start server on " ipaddrstr ":" portnum) @@ -173,10 +175,11 @@ (start-server port: portnum bind-address: (if (equal? config-hostname "-") ipaddrstr config-hostname)) (start-server port: portnum)) ;; (portlogger:open-run-close portlogger:set-port portnum "released") + (tasks:wait-on-busy-monitor.db) (tasks:server-force-clean-run-record (tasks:get-db) run-id ipaddrstr portnum " http-transport:try-start-server") (debug:print 1 "INFO: server has been stopped")))) ;;====================================================================== ;; S E R V E R U T I L I T I E S @@ -256,10 +259,11 @@ (close-all-connections!)) (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries) (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1))) (begin (mutex-unlock! *http-mutex*) + (tasks:wait-on-busy-monitor.db) (tasks:kill-server-run-id run-id) #f)) (begin (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") ;; set up the http-client here @@ -369,10 +373,11 @@ (debug:print-info 0 "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (let ((tdb (tasks:open-db))) (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id) + (tasks:wait-on-busy-monitor.db) (tasks:server-delete-record tdb server-id "failed to start, never received server alive signature") (sqlite3:finalize! tdb) (exit)) (loop start-time (equal? sdat last-sdat) @@ -404,10 +409,11 @@ ;; ;; set_running after our first pass through and start the db ;; (if (eq? server-state 'available) (begin + (tasks:wait-on-busy-monitor.db) (tasks:server-set-state! tdb server-id "dbprep") (thread-sleep! 5) ;; give some margin for queries to complete before switching from file based access to server based access (set! *inmemdb* (db:setup run-id)) (tasks:server-set-state! tdb server-id "running"))) @@ -463,10 +469,11 @@ (loop 0 server-state)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) + (tasks:wait-on-busy-monitor.db) ;; wait here in addition to just before the shutting-down (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) ;; ;; start_shutdown ;; (tasks:server-set-state! tdb server-id "shutting-down") @@ -486,10 +493,11 @@ "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") + (tasks:wait-on-busy-monitor.db) (tasks:server-delete-record tdb server-id " http-transport:keep-running") (exit)))))) ;; all routes though here end in exit ... ;; @@ -506,21 +514,24 @@ (current-output-port *alt-log-file*))))) (if (server:check-if-running run-id) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) + (tasks:wait-on-busy-monitor.db) (let loop ((server-id (tasks:server-lock-slot (tasks:get-db) run-id)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) + (tasks:wait-on-busy-monitor.db) (loop (tasks:server-lock-slot (tasks:get-db) run-id) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") + (tasks:wait-on-busy-monitor.db) (tasks:server-delete-records-for-this-pid (tasks:get-db) " http-transport:launch") )) (let* ((th2 (make-thread (lambda () (debug:print-info 0 "Server run thread started") (http-transport:run