From: Somnath Roy Date: Tue, 16 Sep 2014 00:23:51 +0000 (-0700) Subject: FileStore: Race condition during object delete is fixed X-Git-Tag: v0.86~25^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=86a4bed6732b18ee3dcf729d7e5fc784b6680843;p=ceph.git FileStore: Race condition during object delete is fixed There was a race condition (hence OSD crash) between lfn_unlink and lfn_open. The reason was FDCache lookup was called without taking index lock from lfn_open. Lookup will increase reference count and thus Clear will not be able to delete those FDs. FDs will be leaked. The assert within FDCache clear was hitting because of this. Fixes: #9480 Signed-off-by: Somnath Roy --- diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index e91bda69d371..f22ca6bb25cc 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -230,23 +230,9 @@ int FileStore::lfn_open(coll_t cid, oid.generation == ghobject_t::NO_GEN )); assert(outfd); int r = 0; - bool need_lock = true; - if (!replaying) { - *outfd = fdcache.lookup(oid); - if (*outfd) { - if (!index) { - return 0; - } else { - if (!((*index).index)) { - r = get_index(cid, index); - return r; - } - } - } - } - int flags = O_RDWR; + if (create) flags |= O_CREAT; @@ -254,7 +240,6 @@ int FileStore::lfn_open(coll_t cid, if (!index) { index = &index2; } - if (!((*index).index)) { r = get_index(cid, index); } else { @@ -266,6 +251,16 @@ int FileStore::lfn_open(coll_t cid, if (need_lock) { ((*index).index)->access_lock.get_write(); } + if (!replaying) { + *outfd = fdcache.lookup(oid); + if (*outfd) { + if (need_lock) { + ((*index).index)->access_lock.put_write(); + } + return 0; + } + } + IndexedPath path2; IndexedPath *path = &path2;