From: Jason Dillaman Date: Sun, 15 Oct 2017 21:26:25 +0000 (-0400) Subject: librbd: batch large object map updates into multiple chunks X-Git-Tag: v12.2.2~86^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f8bfd9458d44d96d4ef56a28195a9e36fc0190c6;p=ceph.git librbd: batch large object map updates into multiple chunks Fixes: http://tracker.ceph.com/issues/21797 Signed-off-by: Jason Dillaman (cherry picked from commit 04c5d12acc045731fbf1d0ddce276b5743d9fd89) --- diff --git a/src/librbd/ObjectMap.cc b/src/librbd/ObjectMap.cc index 0930e6a56a4..257554dc171 100644 --- a/src/librbd/ObjectMap.cc +++ b/src/librbd/ObjectMap.cc @@ -111,10 +111,10 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const } template -bool ObjectMap::update_required(uint64_t object_no, uint8_t new_state) { +bool ObjectMap::update_required(const ceph::BitVector<2>::Iterator& it, + uint8_t new_state) { assert(m_image_ctx.object_map_lock.is_wlocked()); - uint8_t state = (*this)[object_no]; - + uint8_t state = *it; if ((state == new_state) || (new_state == OBJECT_PENDING && state == OBJECT_NONEXISTENT) || (new_state == OBJECT_NONEXISTENT && state != OBJECT_PENDING)) { @@ -224,7 +224,7 @@ void ObjectMap::detained_aio_update(UpdateOperation &&op) { BlockGuardCell *cell; int r = m_update_guard->detain({op.start_object_no, op.end_object_no}, - &op, &cell); + &op, &cell); if (r < 0) { lderr(cct) << "failed to detain object map update: " << cpp_strerror(r) << dendl; @@ -297,13 +297,14 @@ void ObjectMap::aio_update(uint64_t snap_id, uint64_t start_object_no, return; } - uint64_t object_no; - for (object_no = start_object_no; object_no < end_object_no; ++object_no) { - if (update_required(object_no, new_state)) { + auto it = m_object_map.begin() + start_object_no; + auto end_it = m_object_map.begin() + end_object_no; + for (; it != end_it; ++it) { + if (update_required(it, new_state)) { break; } } - if (object_no == end_object_no) { + if (it == end_it) { ldout(cct, 20) << "object map update not required" << dendl; m_image_ctx.op_work_queue->queue(on_finish, 0); return; diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h index 427ecdf165a..ebd1a9ba310 100644 --- a/src/librbd/ObjectMap.h +++ b/src/librbd/ObjectMap.h @@ -69,15 +69,15 @@ public: const ZTracer::Trace &parent_trace, T *callback_object) { assert(start_object_no < end_object_no); if (snap_id == CEPH_NOSNAP) { - uint64_t object_no; - for (object_no = start_object_no; object_no < end_object_no; - ++object_no) { - if (update_required(object_no, new_state)) { + auto it = m_object_map.begin() + start_object_no; + auto end_it = m_object_map.begin() + end_object_no; + for (; it != end_it; ++it) { + if (update_required(it, new_state)) { break; } } - if (object_no == end_object_no) { + if (it == end_it) { return false; } @@ -133,7 +133,8 @@ private: uint64_t end_object_no, uint8_t new_state, const boost::optional ¤t_state, const ZTracer::Trace &parent_trace, Context *on_finish); - bool update_required(uint64_t object_no, uint8_t new_state); + bool update_required(const ceph::BitVector<2>::Iterator &it, + uint8_t new_state); }; diff --git a/src/librbd/object_map/UpdateRequest.cc b/src/librbd/object_map/UpdateRequest.cc index 8c0ec69de8d..9ceb28a1773 100644 --- a/src/librbd/object_map/UpdateRequest.cc +++ b/src/librbd/object_map/UpdateRequest.cc @@ -7,28 +7,44 @@ #include "common/dout.h" #include "librbd/ImageCtx.h" #include "librbd/ObjectMap.h" +#include "librbd/Utils.h" #include "cls/lock/cls_lock_client.h" #include #define dout_subsys ceph_subsys_rbd #undef dout_prefix -#define dout_prefix *_dout << "librbd::object_map::UpdateRequest: " +#define dout_prefix *_dout << "librbd::object_map::UpdateRequest: " << this \ + << " " << __func__ << ": " namespace librbd { namespace object_map { +namespace { + +// keep aligned to bit_vector 4K block sizes +const uint64_t MAX_OBJECTS_PER_UPDATE = 256 * (1 << 10); + +} + template void UpdateRequest::send() { + update_object_map(); +} + +template +void UpdateRequest::update_object_map() { assert(m_image_ctx.snap_lock.is_locked()); assert(m_image_ctx.object_map_lock.is_locked()); CephContext *cct = m_image_ctx.cct; - // safe to update in-memory state first without handling rollback since any - // failures will invalidate the object map + // break very large requests into manageable batches + m_update_end_object_no = MIN( + m_end_object_no, m_update_start_object_no + MAX_OBJECTS_PER_UPDATE); + std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id, m_snap_id)); - ldout(cct, 20) << this << " updating object map" - << ": ictx=" << &m_image_ctx << ", oid=" << oid << ", [" - << m_start_object_no << "," << m_end_object_no << ") = " + ldout(cct, 20) << "ictx=" << &m_image_ctx << ", oid=" << oid << ", " + << "[" << m_update_start_object_no << "," + << m_update_end_object_no << ") = " << (m_current_state ? stringify(static_cast(*m_current_state)) : "") << "->" << static_cast(m_new_state) @@ -38,10 +54,12 @@ void UpdateRequest::send() { if (m_snap_id == CEPH_NOSNAP) { rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", ""); } - cls_client::object_map_update(&op, m_start_object_no, m_end_object_no, - m_new_state, m_current_state); + cls_client::object_map_update(&op, m_update_start_object_no, + m_update_end_object_no, m_new_state, + m_current_state); - librados::AioCompletion *rados_completion = create_callback_completion(); + auto rados_completion = librbd::util::create_rados_callback< + UpdateRequest, &UpdateRequest::handle_update_object_map>(this); std::vector snaps; int r = m_image_ctx.md_ctx.aio_operate( oid, rados_completion, &op, 0, snaps, @@ -51,26 +69,53 @@ void UpdateRequest::send() { } template -void UpdateRequest::finish_request() { - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); - ldout(m_image_ctx.cct, 20) << this << " on-disk object map updated" - << dendl; +void UpdateRequest::handle_update_object_map(int r) { + ldout(m_image_ctx.cct, 20) << "r=" << r << dendl; + + { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); + update_in_memory_object_map(); + + if (m_update_end_object_no < m_end_object_no) { + m_update_start_object_no = m_update_end_object_no; + update_object_map(); + return; + } + } + + // no more batch updates to send + complete(r); +} + +template +void UpdateRequest::update_in_memory_object_map() { + assert(m_image_ctx.snap_lock.is_locked()); + assert(m_image_ctx.object_map_lock.is_locked()); // rebuilding the object map might update on-disk only if (m_snap_id == m_image_ctx.snap_id) { - for (uint64_t object_no = m_start_object_no; - object_no < MIN(m_end_object_no, m_object_map.size()); - ++object_no) { - uint8_t state = m_object_map[object_no]; + ldout(m_image_ctx.cct, 20) << dendl; + + auto it = m_object_map.begin() + + MIN(m_update_start_object_no, m_object_map.size()); + auto end_it = m_object_map.begin() + + MIN(m_update_end_object_no, m_object_map.size()); + for (; it != end_it; ++it) { + auto state_ref = *it; + uint8_t state = state_ref; if (!m_current_state || state == *m_current_state || (*m_current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN)) { - m_object_map[object_no] = m_new_state; + state_ref = m_new_state; } } } } +template +void UpdateRequest::finish_request() { +} + } // namespace object_map } // namespace librbd diff --git a/src/librbd/object_map/UpdateRequest.h b/src/librbd/object_map/UpdateRequest.h index 175160752da..cb9804d07c9 100644 --- a/src/librbd/object_map/UpdateRequest.h +++ b/src/librbd/object_map/UpdateRequest.h @@ -41,7 +41,8 @@ public: const ZTracer::Trace &parent_trace, Context *on_finish) : Request(image_ctx, snap_id, on_finish), m_object_map(*object_map), m_start_object_no(start_object_no), m_end_object_no(end_object_no), - m_new_state(new_state), m_current_state(current_state), + m_update_start_object_no(start_object_no), m_new_state(new_state), + m_current_state(current_state), m_trace(util::create_trace(image_ctx, "update object map", parent_trace)) { m_trace.event("start"); @@ -56,12 +57,35 @@ protected: void finish_request() override; private: + /** + * @verbatim + * + * + * | + * |/------------------\ + * v | (repeat in batches) + * UPDATE_OBJECT_MAP -----/ + * | + * v + * + * + * @endverbatim + */ + ceph::BitVector<2> &m_object_map; uint64_t m_start_object_no; uint64_t m_end_object_no; + uint64_t m_update_start_object_no; + uint64_t m_update_end_object_no = 0; uint8_t m_new_state; boost::optional m_current_state; ZTracer::Trace m_trace; + + void update_object_map(); + void handle_update_object_map(int r); + + void update_in_memory_object_map(); + }; } // namespace object_map diff --git a/src/test/librbd/object_map/test_mock_UpdateRequest.cc b/src/test/librbd/object_map/test_mock_UpdateRequest.cc index 7f47be28176..453e49cd636 100644 --- a/src/test/librbd/object_map/test_mock_UpdateRequest.cc +++ b/src/test/librbd/object_map/test_mock_UpdateRequest.cc @@ -18,12 +18,22 @@ namespace object_map { using ::testing::_; using ::testing::DoDefault; +using ::testing::InSequence; using ::testing::Return; using ::testing::StrEq; class TestMockObjectMapUpdateRequest : public TestMockFixture { public: - void expect_update(librbd::ImageCtx *ictx, uint64_t snap_id, int r) { + void expect_update(librbd::ImageCtx *ictx, uint64_t snap_id, + uint64_t start_object_no, uint64_t end_object_no, + uint8_t new_state, + const boost::optional& current_state, int r) { + bufferlist bl; + ::encode(start_object_no, bl); + ::encode(end_object_no, bl); + ::encode(new_state, bl); + ::encode(current_state, bl); + std::string oid(ObjectMap<>::object_map_name(ictx->id, snap_id)); if (snap_id == CEPH_NOSNAP) { EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), @@ -33,11 +43,13 @@ public: if (r < 0) { EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(oid, _, StrEq("rbd"), StrEq("object_map_update"), _, _, _)) + exec(oid, _, StrEq("rbd"), StrEq("object_map_update"), + ContentsEqual(bl), _, _)) .WillOnce(Return(r)); } else { EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(oid, _, StrEq("rbd"), StrEq("object_map_update"), _, _, _)) + exec(oid, _, StrEq("rbd"), StrEq("object_map_update"), + ContentsEqual(bl), _, _)) .WillOnce(DoDefault()); } } @@ -92,7 +104,7 @@ TEST_F(TestMockObjectMapUpdateRequest, UpdateHeadOnDisk) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); ASSERT_EQ(0, acquire_exclusive_lock(*ictx)); - expect_update(ictx, CEPH_NOSNAP, 0); + expect_update(ictx, CEPH_NOSNAP, 0, 1, OBJECT_NONEXISTENT, OBJECT_EXISTS, 0); ceph::BitVector<2> object_map; object_map.resize(1); @@ -122,7 +134,7 @@ TEST_F(TestMockObjectMapUpdateRequest, UpdateSnapOnDisk) { "snap1")); uint64_t snap_id = ictx->snap_id; - expect_update(ictx, snap_id, 0); + expect_update(ictx, snap_id, 0, 1, OBJECT_NONEXISTENT, OBJECT_EXISTS, 0); ceph::BitVector<2> object_map; object_map.resize(1); @@ -148,7 +160,8 @@ TEST_F(TestMockObjectMapUpdateRequest, UpdateOnDiskError) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); ASSERT_EQ(0, acquire_exclusive_lock(*ictx)); - expect_update(ictx, CEPH_NOSNAP, -EINVAL); + expect_update(ictx, CEPH_NOSNAP, 0, 1, OBJECT_NONEXISTENT, OBJECT_EXISTS, + -EINVAL); expect_invalidate(ictx); ceph::BitVector<2> object_map; @@ -178,7 +191,8 @@ TEST_F(TestMockObjectMapUpdateRequest, RebuildSnapOnDisk) { ASSERT_EQ(CEPH_NOSNAP, ictx->snap_id); uint64_t snap_id = ictx->snap_info.rbegin()->first; - expect_update(ictx, snap_id, 0); + expect_update(ictx, snap_id, 0, 1, OBJECT_EXISTS_CLEAN, + boost::optional(), 0); expect_unlock_exclusive_lock(*ictx); ceph::BitVector<2> object_map; @@ -199,5 +213,40 @@ TEST_F(TestMockObjectMapUpdateRequest, RebuildSnapOnDisk) { ASSERT_NE(OBJECT_EXISTS_CLEAN, object_map[0]); } +TEST_F(TestMockObjectMapUpdateRequest, BatchUpdate) { + REQUIRE_FEATURE(RBD_FEATURE_OBJECT_MAP); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + librbd::NoOpProgressContext no_progress; + ASSERT_EQ(0, ictx->operations->resize(712312 * ictx->get_object_size(), false, + no_progress)); + ASSERT_EQ(0, acquire_exclusive_lock(*ictx)); + + InSequence seq; + expect_update(ictx, CEPH_NOSNAP, 0, 262144, OBJECT_NONEXISTENT, OBJECT_EXISTS, + 0); + expect_update(ictx, CEPH_NOSNAP, 262144, 524288, OBJECT_NONEXISTENT, + OBJECT_EXISTS, 0); + expect_update(ictx, CEPH_NOSNAP, 524288, 712312, OBJECT_NONEXISTENT, + OBJECT_EXISTS, 0); + expect_unlock_exclusive_lock(*ictx); + + ceph::BitVector<2> object_map; + object_map.resize(712312); + + C_SaferCond cond_ctx; + AsyncRequest<> *req = new UpdateRequest<>( + *ictx, &object_map, CEPH_NOSNAP, 0, object_map.size(), OBJECT_NONEXISTENT, + OBJECT_EXISTS, {}, &cond_ctx); + { + RWLock::RLocker snap_locker(ictx->snap_lock); + RWLock::WLocker object_map_locker(ictx->object_map_lock); + req->send(); + } + ASSERT_EQ(0, cond_ctx.wait()); +} + } // namespace object_map } // namespace librbd