From: Sage Weil Date: Fri, 27 Dec 2013 21:42:54 +0000 (-0800) Subject: osd/ReplicatedPG: refuse to flush when older dirty clones are present X-Git-Tag: v0.77~22^2~32 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c91166eca022de7ac2dd73894af0ea5517fc8b71;p=ceph.git osd/ReplicatedPG: refuse to flush when older dirty clones are present If the next oldest clone is dirty, we cannot flush. That is, we must always flush starting with the oldest dirty clone. Note that we can never have a sequence like dirty -> clean -> dirty, because clones are only dirty on creation, are created in order, and cannot be flushed (cleaned) out of order. Thus checking the previous clone is sufficient (and thankfully cheap). Signed-off-by: Sage Weil --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 1e5d368697f..3da324ef819 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5242,6 +5242,35 @@ int ReplicatedPG::start_flush(OpContext *ctx, bool blocking) << " " << (blocking ? "blocking" : "non-blocking/best-effort") << dendl; + // verify there are no (older) check for dirty clones + SnapSet& snapset = ctx->obc->ssc->snapset; + { + dout(20) << " snapset " << snapset << dendl; + vector::reverse_iterator p = snapset.clones.rbegin(); + while (p != snapset.clones.rend() && *p >= soid.snap) + ++p; + if (p != snapset.clones.rend()) { + hobject_t next = soid; + next.snap = *p; + assert(next.snap < soid.snap); + ObjectContextRef older_obc = get_object_context(next, false); + if (older_obc) { + dout(20) << __func__ << " next oldest clone is " << older_obc->obs.oi + << dendl; + if (older_obc->obs.oi.is_dirty()) { + dout(10) << __func__ << " next oldest clone is dirty: " + << older_obc->obs.oi << dendl; + return -EBUSY; + } + } else { + dout(20) << __func__ << " next oldest clone " << next + << " is not present; implicitly clean" << dendl; + } + } else { + dout(20) << __func__ << " no older clones" << dendl; + } + } + if (blocking) ctx->obc->start_block(); diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc index 1a714c5bddc..7c0ed607f5e 100644 --- a/src/test/librados/tier.cc +++ b/src/test/librados/tier.cc @@ -1204,6 +1204,129 @@ TEST(LibRadosTier, Flush) { ASSERT_EQ(0, destroy_one_pool_pp(base_pool_name, cluster)); } +TEST(LibRadosTier, FlushSnap) { + Rados cluster; + std::string base_pool_name = get_temp_pool_name(); + std::string cache_pool_name = base_pool_name + "-cache"; + ASSERT_EQ("", create_one_pool_pp(base_pool_name, cluster)); + ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str())); + IoCtx cache_ioctx; + ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx)); + IoCtx base_ioctx; + ASSERT_EQ(0, cluster.ioctx_create(base_pool_name.c_str(), base_ioctx)); + + // configure cache + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier add\", \"pool\": \"" + base_pool_name + + "\", \"tierpool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + base_pool_name + + "\", \"overlaypool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name + + "\", \"mode\": \"writeback\"}", + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // create object + { + bufferlist bl; + bl.append("a"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, base_ioctx.operate("foo", &op)); + } + + // create a snapshot, clone + vector my_snaps(1); + ASSERT_EQ(0, base_ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, base_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + { + bufferlist bl; + bl.append("b"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, base_ioctx.operate("foo", &op)); + } + + // and another + my_snaps.resize(2); + my_snaps[1] = my_snaps[0]; + ASSERT_EQ(0, base_ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, base_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + { + bufferlist bl; + bl.append("c"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, base_ioctx.operate("foo", &op)); + } + + // verify the object is present in the cache tier + { + ObjectIterator it = cache_ioctx.objects_begin(); + ASSERT_TRUE(it != cache_ioctx.objects_end()); + ASSERT_TRUE(it->first == string("foo")); + ++it; + ASSERT_TRUE(it == cache_ioctx.objects_end()); + } + + // verify the object is NOT present in the base tier + { + ObjectIterator it = base_ioctx.objects_begin(); + ASSERT_TRUE(it == base_ioctx.objects_end()); + } + + // flush on head (should fail) + { + ObjectReadOperation op; + op.cache_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, base_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_safe(); + ASSERT_EQ(-EBUSY, completion->get_return_value()); + completion->release(); + } + // flush on recent snap (should fail) + base_ioctx.snap_set_read(my_snaps[0]); + { + ObjectReadOperation op; + op.cache_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, base_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_safe(); + ASSERT_EQ(-EBUSY, completion->get_return_value()); + completion->release(); + } + + // tear down tiers + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + base_pool_name + + "\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier remove\", \"pool\": \"" + base_pool_name + + "\", \"tierpool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + + base_ioctx.close(); + cache_ioctx.close(); + + cluster.pool_delete(cache_pool_name.c_str()); + ASSERT_EQ(0, destroy_one_pool_pp(base_pool_name, cluster)); +} + TEST(LibRadosTier, FlushWriteRaces) { Rados cluster; std::string base_pool_name = get_temp_pool_name();