From 4240846f5cf1525952ac42c0d6eddf9dfa459ce4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 22 Oct 2012 17:57:08 -0700 Subject: [PATCH] librbd: use assert_exists() to simplify copyup check Previously we would explicitly STAT the object to see if it exists before sending the write to the OSD. Instead, send the write optimistically, and assert that the object already exists. This avoids an extra round trip in the optimistic/common case, and makes the existence check in the initial first-write case more expensive because we send the data payload along. Signed-off-by: Sage Weil --- src/librbd/AioRequest.cc | 52 +++++++++++++++++----------------------- src/librbd/AioRequest.h | 28 +++++++++++----------- 2 files changed, 36 insertions(+), 44 deletions(-) diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index 1e535d13d1d8d..363aa2610f3e7 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -110,7 +110,7 @@ namespace librbd { /** read **/ AbstractWrite::AbstractWrite() - : m_state(LIBRBD_AIO_WRITE_FINAL), + : m_state(LIBRBD_AIO_WRITE_FLAT), m_parent_overlap(0) {} AbstractWrite::AbstractWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, uint64_t len, @@ -119,10 +119,9 @@ namespace librbd { const ::SnapContext &snapc, librados::snap_t snap_id, Context *completion, bool hide_enoent) - : AioRequest(ictx, oid, object_no, object_off, len, snap_id, completion, hide_enoent) + : AioRequest(ictx, oid, object_no, object_off, len, snap_id, completion, hide_enoent), + m_state(LIBRBD_AIO_WRITE_FLAT) { - m_state = LIBRBD_AIO_WRITE_FINAL; - m_object_image_extents = objectx; m_parent_overlap = object_overlap; @@ -138,13 +137,10 @@ namespace librbd { void AbstractWrite::guard_write() { if (has_parent()) { - m_state = LIBRBD_AIO_WRITE_CHECK_EXISTS; - m_read.stat(NULL, NULL, NULL); + m_state = LIBRBD_AIO_WRITE_GUARD; + m_write.assert_exists(); + ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl; } - ldout(m_ictx->cct, 20) << __func__ << " has_parent = " << has_parent() - << " m_state = " << m_state << " check exists = " - << LIBRBD_AIO_WRITE_CHECK_EXISTS << dendl; - } bool AbstractWrite::should_complete(int r) @@ -154,13 +150,9 @@ namespace librbd { bool finished = true; switch (m_state) { - case LIBRBD_AIO_WRITE_CHECK_EXISTS: - ldout(m_ictx->cct, 20) << "WRITE_CHECK_EXISTS" << dendl; - if (r < 0 && r != -ENOENT) { - ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl; - break; - } - finished = false; + case LIBRBD_AIO_WRITE_GUARD: + ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; + if (r == -ENOENT) { Mutex::Locker l(m_ictx->snap_lock); Mutex::Locker l2(m_ictx->parent_lock); @@ -171,24 +163,29 @@ namespace librbd { m_state = LIBRBD_AIO_WRITE_COPYUP; read_from_parent(m_object_image_extents); + finished = false; + break; + } + if (r < 0) { + ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl; break; } - ldout(m_ictx->cct, 20) << "no need to read from parent" << dendl; - m_state = LIBRBD_AIO_WRITE_FINAL; - send(); break; + case LIBRBD_AIO_WRITE_COPYUP: ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; - m_state = LIBRBD_AIO_WRITE_FINAL; + m_state = LIBRBD_AIO_WRITE_GUARD; if (r < 0) return should_complete(r); send_copyup(); finished = false; break; - case LIBRBD_AIO_WRITE_FINAL: - ldout(m_ictx->cct, 20) << "WRITE_FINAL" << dendl; + + case LIBRBD_AIO_WRITE_FLAT: + ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; // nothing to do break; + default: lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; assert(0); @@ -201,13 +198,8 @@ namespace librbd { librados::AioCompletion *rados_completion = librados::Rados::aio_create_completion(this, NULL, rados_req_cb); int r; - if (m_state == LIBRBD_AIO_WRITE_CHECK_EXISTS) { - assert(m_read.size()); - r = m_ioctx.aio_operate(m_oid, rados_completion, &m_read, &m_read_data); - } else { - assert(m_write.size()); - r = m_ioctx.aio_operate(m_oid, rados_completion, &m_write); - } + assert(m_write.size()); + r = m_ioctx.aio_operate(m_oid, rados_completion, &m_write); rados_completion->release(); return r; } diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h index ce1ad8db87745..259af88dda716 100644 --- a/src/librbd/AioRequest.h +++ b/src/librbd/AioRequest.h @@ -109,24 +109,25 @@ namespace librbd { private: /** - * Writes go through the following state machine to - * deal with layering: + * Writes go through the following state machine to deal with + * layering: + * * need copyup - * LIBRBD_AIO_CHECK_EXISTS ---------------> LIBRBD_AIO_WRITE_COPYUP - * | | - * | no overlap or object exists | parent data read - * | | - * v | - * LIBRBD_AIO_WRITE_FINAL <--------------------------/ + * LIBRBD_AIO_WRITE_GUARD ---------------> LIBRBD_AIO_WRITE_COPYUP + * | ^ | + * v \------------------------------/ + * done + * ^ + * | + * LIBRBD_AIO_WRITE_FLAT * - * By default images start in LIBRBD_AIO_WRITE_FINAL. - * If the write may need a copyup, it will start in - * LIBRBD_AIO_WRITE_CHECK_EXISTS instead. + * Writes start in LIBRBD_AIO_WRITE_GUARD or _FLAT, depending on whether + * there is a parent or not. */ enum write_state_d { - LIBRBD_AIO_WRITE_CHECK_EXISTS, + LIBRBD_AIO_WRITE_GUARD, LIBRBD_AIO_WRITE_COPYUP, - LIBRBD_AIO_WRITE_FINAL + LIBRBD_AIO_WRITE_FLAT }; protected: @@ -135,7 +136,6 @@ namespace librbd { write_state_d m_state; vector > m_object_image_extents; uint64_t m_parent_overlap; - librados::ObjectReadOperation m_read; librados::ObjectWriteOperation m_write; librados::ObjectWriteOperation m_copyup; -- 2.39.5