From: Somnath Roy Date: Tue, 16 Sep 2014 00:23:51 +0000 (-0700) Subject: FileStore: Race condition during object delete is fixed X-Git-Tag: v0.86~25^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=86a4bed6732b18ee3dcf729d7e5fc784b6680843;p=ceph.git FileStore: Race condition during object delete is fixed There was a race condition (hence OSD crash) between lfn_unlink and lfn_open. The reason was FDCache lookup was called without taking index lock from lfn_open. Lookup will increase reference count and thus Clear will not be able to delete those FDs. FDs will be leaked. The assert within FDCache clear was hitting because of this. Fixes: #9480 Signed-off-by: Somnath Roy --- diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index e91bda69d37..f22ca6bb25c 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -230,23 +230,9 @@ int FileStore::lfn_open(coll_t cid, oid.generation == ghobject_t::NO_GEN )); assert(outfd); int r = 0; - bool need_lock = true; - if (!replaying) { - *outfd = fdcache.lookup(oid); - if (*outfd) { - if (!index) { - return 0; - } else { - if (!((*index).index)) { - r = get_index(cid, index); - return r; - } - } - } - } - int flags = O_RDWR; + if (create) flags |= O_CREAT; @@ -254,7 +240,6 @@ int FileStore::lfn_open(coll_t cid, if (!index) { index = &index2; } - if (!((*index).index)) { r = get_index(cid, index); } else { @@ -266,6 +251,16 @@ int FileStore::lfn_open(coll_t cid, if (need_lock) { ((*index).index)->access_lock.get_write(); } + if (!replaying) { + *outfd = fdcache.lookup(oid); + if (*outfd) { + if (need_lock) { + ((*index).index)->access_lock.put_write(); + } + return 0; + } + } + IndexedPath path2; IndexedPath *path = &path2;