From: Venky Shankar Date: Sat, 15 Oct 2016 11:48:30 +0000 (+0530) Subject: librbd: batch ObjectMap updations upon trim X-Git-Tag: v11.1.0~398^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=05653b7c512334533b801013f7e426363237301b;p=ceph.git librbd: batch ObjectMap updations upon trim Shrinking a clone which has snapshots and does not share majority of objects with its parent (i.e., there are less objects to be copied up) involves huge number of object map updates -- two (pre, post) per object. This results in lots of requests to be send to OSDs especially when trimming a gigantus image. This situation can be optimized by sending batch ObjectMap updates for an object range thereby significantly cutting down OSD traffic resulting in faster trim times. Fixes: http://tracker.ceph.com/issues/17356 Signed-off-by: Venky Shankar --- diff --git a/src/librbd/AioObjectRequest.h b/src/librbd/AioObjectRequest.h index 2c7a14866cf1..9ae2a84327c0 100644 --- a/src/librbd/AioObjectRequest.h +++ b/src/librbd/AioObjectRequest.h @@ -330,11 +330,14 @@ private: class AioObjectTrim : public AbstractAioObjectWrite { public: + // we'd need to only conditionally specify if a post object map + // update is needed. pre update is decided as usual (by checking + // the state of the object in the map). AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - const ::SnapContext &snapc, Context *completion) + const ::SnapContext &snapc, Context *completion, + bool post_object_map_update) : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, - true) { - } + true), m_post_object_map_update(post_object_map_update) { } protected: virtual void add_write_ops(librados::ObjectWriteOperation *wr) { @@ -350,8 +353,11 @@ protected: } virtual bool post_object_map_update() { - return true; + return m_post_object_map_update; } + +private: + bool m_post_object_map_update; }; class AioObjectTruncate : public AbstractAioObjectWrite { diff --git a/src/librbd/operation/TrimRequest.cc b/src/librbd/operation/TrimRequest.cc index 3992fb75e21c..f0ba3209ffa6 100644 --- a/src/librbd/operation/TrimRequest.cc +++ b/src/librbd/operation/TrimRequest.cc @@ -46,7 +46,7 @@ public: ldout(image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl; AioObjectRequest<> *req = new AioObjectTrim(&image_ctx, oid, m_object_no, - m_snapc, this); + m_snapc, this, false); req->send(); return 0; } @@ -131,8 +131,18 @@ bool TrimRequest::should_complete(int r) RWLock::RLocker owner_lock(image_ctx.owner_lock); switch (m_state) { + case STATE_PRE_COPYUP: + ldout(cct, 5) << " PRE_COPYUP" << dendl; + send_copyup_objects(); + break; + case STATE_COPYUP_OBJECTS: ldout(cct, 5) << " COPYUP_OBJECTS" << dendl; + send_post_copyup(); + break; + + case STATE_POST_COPYUP: + ldout(cct, 5) << " POST_COPYUP" << dendl; send_pre_remove(); break; @@ -170,58 +180,33 @@ bool TrimRequest::should_complete(int r) template void TrimRequest::send() { - send_copyup_objects(); + send_pre_copyup(); } -template +template void TrimRequest::send_copyup_objects() { I &image_ctx = this->m_image_ctx; assert(image_ctx.owner_lock.is_locked()); - assert(image_ctx.exclusive_lock == nullptr || - image_ctx.exclusive_lock->is_lock_owner()); - if (m_delete_start >= m_num_objects) { - send_clean_boundary(); - return; - } + ldout(image_ctx.cct, 5) << this << " send_copyup_objects: " + << " start object=" << m_copyup_start << ", " + << " end object=" << m_copyup_end << dendl; + m_state = STATE_COPYUP_OBJECTS; ::SnapContext snapc; - bool has_snapshots; - uint64_t parent_overlap; { RWLock::RLocker snap_locker(image_ctx.snap_lock); RWLock::RLocker parent_locker(image_ctx.parent_lock); - snapc = image_ctx.snapc; - has_snapshots = !image_ctx.snaps.empty(); - int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap); - assert(r == 0); } - // copyup is only required for portion of image that overlaps parent - uint64_t copyup_end = Striper::get_num_objects(image_ctx.layout, - parent_overlap); - // TODO: protect against concurrent shrink and snap create? - if (copyup_end <= m_delete_start || !has_snapshots) { - send_pre_remove(); - return; - } - - uint64_t copyup_start = m_delete_start; - m_delete_start = copyup_end; - - ldout(image_ctx.cct, 5) << this << " send_copyup_objects: " - << " start object=" << copyup_start << ", " - << " end object=" << copyup_end << dendl; - m_state = STATE_COPYUP_OBJECTS; - Context *ctx = this->create_callback_context(); typename AsyncObjectThrottle::ContextFactory context_factory( boost::lambda::bind(boost::lambda::new_ptr >(), boost::lambda::_1, &image_ctx, snapc, boost::lambda::_2)); AsyncObjectThrottle *throttle = new AsyncObjectThrottle( - this, image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start, - copyup_end); + this, image_ctx, context_factory, ctx, &m_prog_ctx, m_copyup_start, + m_copyup_end); throttle->start_ops(image_ctx.concurrent_management_ops); } @@ -245,6 +230,68 @@ void TrimRequest::send_remove_objects() { throttle->start_ops(image_ctx.concurrent_management_ops); } +template +void TrimRequest::send_pre_copyup() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.owner_lock.is_locked()); + + if (m_delete_start >= m_num_objects) { + send_clean_boundary(); + return; + } + + bool has_snapshots; + uint64_t parent_overlap; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker parent_locker(image_ctx.parent_lock); + + has_snapshots = !image_ctx.snaps.empty(); + int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap); + assert(r == 0); + } + + // copyup is only required for portion of image that overlaps parent + m_copyup_end = Striper::get_num_objects(image_ctx.layout, parent_overlap); + + // TODO: protect against concurrent shrink and snap create? + // skip to remove if no copyup is required. + if (m_copyup_end <= m_delete_start || !has_snapshots) { + send_pre_remove(); + return; + } + + m_copyup_start = m_delete_start; + m_delete_start = m_copyup_end; + + bool copyup_objects = false; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.object_map == nullptr) { + copyup_objects = true; + } else { + ldout(image_ctx.cct, 5) << this << " send_pre_copyup: " + << " copyup_start=" << m_copyup_start + << " copyup_end=" << m_copyup_end << dendl; + m_state = STATE_PRE_COPYUP; + + assert(image_ctx.exclusive_lock->is_lock_owner()); + + Context *ctx = this->create_callback_context(); + RWLock::WLocker object_map_locker(image_ctx.object_map_lock); + if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end, + OBJECT_PENDING, OBJECT_EXISTS, ctx)) { + delete ctx; + copyup_objects = true; + } + } + } + + if (copyup_objects) { + send_copyup_objects(); + } +} + template void TrimRequest::send_pre_remove() { I &image_ctx = this->m_image_ctx; @@ -286,6 +333,39 @@ void TrimRequest::send_pre_remove() { } } +template +void TrimRequest::send_post_copyup() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.owner_lock.is_locked()); + + bool pre_remove_objects = false; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.object_map == nullptr) { + pre_remove_objects = true; + } else { + ldout(image_ctx.cct, 5) << this << " send_post_copyup:" + << " copyup_start=" << m_copyup_start + << " copyup_end=" << m_copyup_end << dendl; + m_state = STATE_POST_COPYUP; + + assert(image_ctx.exclusive_lock->is_lock_owner()); + + Context *ctx = this->create_callback_context(); + RWLock::WLocker object_map_locker(image_ctx.object_map_lock); + if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end, + OBJECT_NONEXISTENT, OBJECT_PENDING, ctx)) { + delete ctx; + pre_remove_objects = true; + } + } + } + + if (pre_remove_objects) { + send_pre_remove(); + } +} + template void TrimRequest::send_post_remove() { I &image_ctx = this->m_image_ctx; @@ -364,7 +444,7 @@ void TrimRequest::send_clean_boundary() { AioObjectRequest<> *req; if (p->offset == 0) { req = new AioObjectTrim(&image_ctx, p->oid.name, p->objectno, snapc, - req_comp); + req_comp, true); } else { req = new AioObjectTruncate(&image_ctx, p->oid.name, p->objectno, p->offset, snapc, req_comp); diff --git a/src/librbd/operation/TrimRequest.h b/src/librbd/operation/TrimRequest.h index 58312813dfc4..814d18023097 100644 --- a/src/librbd/operation/TrimRequest.h +++ b/src/librbd/operation/TrimRequest.h @@ -34,14 +34,17 @@ protected: * @verbatim * * . . . . > STATE_FINISHED . . . . . . . . . - * | . . - * | . . . . . . . . . . . . . - * | . . - * v . . - * STATE_COPYUP_OBJECTS . . . . . - * | . . . - * | . . . - * v v v . + * | . . . . . . . . . . > . . . . . . . . . . + * | / . . + * STATE_PRE_COPYUP ---> STATE_COPYUP_OBJECTS . . + * | . . + * /-----------------------/ v . + * | . . + * v . . + * STATE_POST_COPYUP. . . > . . . + * | . . . . . . . . . . < . . . . . . . . . . + * | | . . + * v v v . * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS . * | . . . * /-----------------------/ . . . . . . . . @@ -64,7 +67,9 @@ protected: */ enum State { + STATE_PRE_COPYUP, STATE_COPYUP_OBJECTS, + STATE_POST_COPYUP, STATE_PRE_REMOVE, STATE_REMOVE_OBJECTS, STATE_POST_REMOVE, @@ -83,14 +88,21 @@ private: uint64_t m_new_size; ProgressContext &m_prog_ctx; + uint64_t m_copyup_start; + uint64_t m_copyup_end; + TrimRequest(ImageCtxT &image_ctx, Context *on_finish, uint64_t original_size, uint64_t new_size, ProgressContext &prog_ctx); + void send_pre_copyup(); void send_copyup_objects(); - void send_remove_objects(); + void send_post_copyup(); + void send_pre_remove(); + void send_remove_objects(); void send_post_remove(); + void send_clean_boundary(); void send_finish(int r); };