Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -1860,15 +1860,18 @@ ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; (define (db:no-sync-db db-in) - (if db-in - db-in - (let ((db (db:open-no-sync-db))) - (set! *no-sync-db* db) - db))) + (mutex-lock! *db-access-mutex*) + (let ((res (if db-in + db-in + (let ((db (db:open-no-sync-db))) + (set! *no-sync-db* db) + db)))) + (mutex-unlock! *db-access-mutex*) + res)) (define (db:no-sync-set db var val) (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) (define (db:no-sync-del! db var) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1961,11 +1961,25 @@ (let* ((run-dir (db:test-get-rundir test)) ;; run dir is from the link tree (real-dir (if (common:file-exists? run-dir) ;; (resolve-pathname run-dir) (common:nice-path run-dir) #f)) - (clean-mode (or mode 'remove-all))) + (clean-mode (or mode 'remove-all)) + (test-id (db:test-get-id test)) + (lock-key (conc "test-" test-id)) + (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + (expire-time (+ (current-seconds) 30))) ;; give up on getting the lock and steal it after 15 seconds + (if (car lock) + #t + (if (> (current-seconds) expire-time) + (begin + (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to clean test with id " test-id) + (rmt:no-sync-del! lock-key) ;; destroy the lock + (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + (begin + (thread-sleep! 1) + (loop (rmt:no-sync-get-lock lock-key) expire-time))))))) (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)) ((remove-all) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f))) (debug:print-info 1 *default-log-port* "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) @@ -2001,11 +2015,12 @@ )) ;; Only delete the records *after* removing the directory. If things fail we have a record (case clean-mode ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) (db:test-get-state test)(db:test-get-status test) #f)) ((archive-remove) (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f)) - (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))))) + (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))) + (rmt:no-sync-del! lock-key))) ;;====================================================================== ;; Routines for manipulating runs ;;======================================================================