Index: lock-queue.scm ================================================================== --- lock-queue.scm +++ lock-queue.scm @@ -143,16 +143,24 @@ (handle-exceptions exn (begin (debug:print 0 "WARNING: Failed to release queue lock. Will try again in few seconds") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) - (thread-sleep! 10) + (thread-sleep! (/ count 10)) (if (> count 0) (begin (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat)) (lock-queue:release-lock fname test-id count: (- count 1))) - #f)) + (let ((journal (conc fname "-journal"))) + ;; If we've tried ten times and failed there is a serious problem + ;; try to remove the lock db and allow it to be recreated + (handle-exceptions + exn + #f + (if (file-exists? journal)(delete-file journal)) + (if (file-exists? fname) (delete-file fname)) + #f)))) (sqlite3:execute (lock-queue:db-dat-get-db dbdat) "DELETE FROM runlocks WHERE test_id=?;" test-id) (sqlite3:finalize! (lock-queue:db-dat-get-db dbdat))))) (define (lock-queue:steal-lock dbdat test-id #!key (count 10)) (debug:print-info 0 "Attempting to steal lock at " (lock-queue:db-dat-get-path dbdat)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -90,10 +90,11 @@ (http-transport:server-dat-update-last-access connection-info) (if res (db:string->obj res) (let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.") + (hash-table-delete! *runremote* run-id) (rmt:send-receive cmd run-id params)))) (let ((max-avg-qry (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "-1")))) (debug:print-info 4 "no server and read-only query, bypassing normal channel") ;; (if (rmt:write-frequency-over-limit? cmd run-id)(server:kind-run run-id)) (let ((curr-max (rmt:get-max-query-average)))