]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: merge copyup object map update states
authorJason Dillaman <dillaman@redhat.com>
Tue, 2 Apr 2019 15:51:36 +0000 (11:51 -0400)
committerJason Dillaman <dillaman@redhat.com>
Wed, 15 May 2019 20:05:36 +0000 (16:05 -0400)
The object map HEAD and HEAD/snapshot update states have been
simplified and merged into a single state. This also fixes
several potential race conditions and an issue where CoR might
incorrectly mark the HEAD object has exists+dirty.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
(cherry picked from commit 5b90dbe0ae4be3dccfc3fbc0c8d5eb1ed7bb3168)

src/librbd/io/CopyupRequest.cc
src/librbd/io/CopyupRequest.h

index a79350d6f862ce697d5ed4519b947e0d1c4d41ef..d96b3a344920fa4c705949b7c4cedc52bad96050 100644 (file)
@@ -35,37 +35,54 @@ namespace {
 class C_UpdateObjectMap : public C_AsyncObjectThrottle<> {
 public:
   C_UpdateObjectMap(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
-                    uint64_t object_no, const std::vector<uint64_t> *snap_ids,
+                    uint64_t object_no, uint8_t head_object_map_state,
+                    const std::vector<uint64_t> *snap_ids,
                     const ZTracer::Trace &trace, size_t snap_id_idx)
     : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no),
-      m_snap_ids(*snap_ids), m_trace(trace), m_snap_id_idx(snap_id_idx)
+      m_head_object_map_state(head_object_map_state), m_snap_ids(*snap_ids),
+      m_trace(trace), m_snap_id_idx(snap_id_idx)
   {
   }
 
   int send() override {
+    ceph_assert(m_image_ctx.owner_lock.is_locked());
+    if (m_image_ctx.exclusive_lock == nullptr) {
+      return 1;
+    }
+    ceph_assert(m_image_ctx.exclusive_lock->is_lock_owner());
+
+    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+    if (m_image_ctx.object_map == nullptr) {
+      return 1;
+    }
+
     uint64_t snap_id = m_snap_ids[m_snap_id_idx];
     if (snap_id == CEPH_NOSNAP) {
-      RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-      RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
-      ceph_assert(m_image_ctx.exclusive_lock->is_lock_owner());
-      ceph_assert(m_image_ctx.object_map != nullptr);
-      bool sent = m_image_ctx.object_map->aio_update<Context>(
-        CEPH_NOSNAP, m_object_no, OBJECT_EXISTS, {}, m_trace, false, this);
-      return (sent ? 0 : 1);
+      return update_head();
+    } else {
+      return update_snapshot(snap_id);
     }
+  }
 
+  int update_head() {
+    RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
+    bool sent = m_image_ctx.object_map->aio_update<Context>(
+      CEPH_NOSNAP, m_object_no, m_head_object_map_state, {}, m_trace, false,
+      this);
+    return (sent ? 0 : 1);
+  }
+
+  int update_snapshot(uint64_t snap_id) {
     uint8_t state = OBJECT_EXISTS;
-    if (m_image_ctx.test_features(RBD_FEATURE_FAST_DIFF) &&
-        m_snap_id_idx + 1 < m_snap_ids.size()) {
+    if (m_image_ctx.test_features(RBD_FEATURE_FAST_DIFF,
+                                  m_image_ctx.snap_lock) &&
+        m_snap_id_idx > 0) {
+      // first snapshot should be exists+dirty since it contains
+      // the copyup data -- later snapshots inherit the data.
       state = OBJECT_EXISTS_CLEAN;
     }
 
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
     RWLock::RLocker object_map_locker(m_image_ctx.object_map_lock);
-    if (m_image_ctx.object_map == nullptr) {
-      return 1;
-    }
-
     bool sent = m_image_ctx.object_map->aio_update<Context>(
       snap_id, m_object_no, state, {}, m_trace, true, this);
     ceph_assert(sent);
@@ -74,6 +91,7 @@ public:
 
 private:
   uint64_t m_object_no;
+  uint8_t m_head_object_map_state;
   const std::vector<uint64_t> &m_snap_ids;
   const ZTracer::Trace &m_trace;
   size_t m_snap_id_idx;
@@ -177,7 +195,7 @@ void CopyupRequest<I>::handle_read_from_parent(int r) {
   }
   m_ictx->copyup_list_lock.Unlock();
 
-  update_object_map_head();
+  update_object_maps();
 }
 
 template <typename I>
@@ -250,138 +268,60 @@ void CopyupRequest<I>::handle_deep_copy(int r) {
   m_ictx->copyup_list_lock.Unlock();
   m_ictx->snap_lock.put_read();
 
-  update_object_map_head();
+  update_object_maps();
 }
 
 template <typename I>
-void CopyupRequest<I>::update_object_map_head() {
-  auto cct = m_ictx->cct;
-  ldout(cct, 20) << "oid=" << m_oid << dendl;
-
-  {
-    RWLock::RLocker owner_locker(m_ictx->owner_lock);
-    RWLock::RLocker snap_locker(m_ictx->snap_lock);
-    if (m_ictx->object_map != nullptr) {
-      bool copy_on_read = m_pending_requests.empty();
-      ceph_assert(m_ictx->exclusive_lock->is_lock_owner());
-
-      RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
-
-      if (!m_ictx->snaps.empty()) {
-        if (m_deep_copy) {
-          // don't copy ids for the snaps updated by object deep copy or
-          // that don't overlap
-          std::set<uint64_t> deep_copied;
-          for (auto &it : m_ictx->migration_info.snap_map) {
-            if (it.first != CEPH_NOSNAP) {
-              deep_copied.insert(it.second.front());
-            }
-          }
-          std::copy_if(m_ictx->snaps.begin(), m_ictx->snaps.end(),
-                       std::back_inserter(m_snap_ids),
-                       [this, cct, &deep_copied](uint64_t snap_id) {
-                         if (deep_copied.count(snap_id)) {
-                           return false;
-                         }
-                         RWLock::RLocker parent_locker(m_ictx->parent_lock);
-                         uint64_t parent_overlap = 0;
-                         int r = m_ictx->get_parent_overlap(snap_id,
-                                                            &parent_overlap);
-                         if (r < 0) {
-                           ldout(cct, 5) << "failed getting parent overlap for "
-                                         << "snap_id: " << snap_id << ": "
-                                         << cpp_strerror(r) << dendl;
-                         }
-                         if (parent_overlap == 0) {
-                           return false;
-                         }
-                         std::vector<std::pair<uint64_t, uint64_t>> extents;
-                         Striper::extent_to_file(cct, &m_ictx->layout,
-                                                 m_object_no, 0,
-                                                 m_ictx->layout.object_size,
-                                                 extents);
-                         auto overlap = m_ictx->prune_parent_extents(
-                             extents, parent_overlap);
-                         return overlap > 0;
-                       });
-        } else {
-          m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
-                            m_ictx->snaps.end());
-        }
-      }
-      if (copy_on_read &&
-          (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
-        m_snap_ids.insert(m_snap_ids.begin(), CEPH_NOSNAP);
-        object_map_locker.unlock();
-        snap_locker.unlock();
-        owner_locker.unlock();
-
-        update_object_maps();
-        return;
-      }
-
-      bool may_update = false;
-      uint8_t new_state;
-      uint8_t current_state = (*m_ictx->object_map)[m_object_no];
+void CopyupRequest<I>::update_object_maps() {
+  RWLock::RLocker owner_locker(m_ictx->owner_lock);
+  RWLock::RLocker snap_locker(m_ictx->snap_lock);
+  if (m_ictx->object_map == nullptr) {
+    snap_locker.unlock();
+    owner_locker.unlock();
 
-      auto r_it = m_pending_requests.rbegin();
-      if (r_it != m_pending_requests.rend()) {
-        auto req = *r_it;
-        new_state = req->get_pre_write_object_map_state();
+    copyup();
+    return;
+  }
 
-        ldout(cct, 20) << req->get_op_type() << " object no "
-                       << m_object_no << " current state "
-                       << stringify(static_cast<uint32_t>(current_state))
-                       << " new state " << stringify(static_cast<uint32_t>(new_state))
-                       << dendl;
-        may_update = true;
-      }
+  auto cct = m_ictx->cct;
+  ldout(cct, 20) << "oid=" << m_oid << dendl;
 
-      if (may_update && (new_state != current_state) &&
-          m_ictx->object_map->aio_update<
-              CopyupRequest<I>,
-              &CopyupRequest<I>::handle_update_object_map_head>(
-            CEPH_NOSNAP, m_object_no, new_state, current_state, m_trace,
-            false, this)) {
-        return;
-      }
+  if (!m_ictx->snaps.empty()) {
+    if (m_deep_copy) {
+      compute_deep_copy_snap_ids();
+    } else {
+      m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.rbegin(),
+                        m_ictx->snaps.rend());
     }
   }
 
-  update_object_maps();
-}
-
-template <typename I>
-void CopyupRequest<I>::handle_update_object_map_head(int r) {
-  ldout(m_ictx->cct, 20) << "oid=" << m_oid << ", "
-                         << "r=" << r << dendl;
-
-  if (r < 0) {
-    lderr(m_ictx->cct) << "failed to update head object map: "
-                       << cpp_strerror(r) << dendl;
-    finish(r);
-    return;
+  bool copy_on_read = m_pending_requests.empty();
+  uint8_t head_object_map_state = OBJECT_EXISTS;
+  if (copy_on_read && !m_snap_ids.empty() &&
+      m_ictx->test_features(RBD_FEATURE_FAST_DIFF, m_ictx->snap_lock)) {
+    // HEAD is non-dirty since data is tied to first snapshot
+    head_object_map_state = OBJECT_EXISTS_CLEAN;
   }
 
-  update_object_maps();
-}
-
-template <typename I>
-void CopyupRequest<I>::update_object_maps() {
-  if (m_snap_ids.empty()) {
-    // no object map update required
-    copyup();
-    return;
+  auto r_it = m_pending_requests.rbegin();
+  if (r_it != m_pending_requests.rend()) {
+    // last write-op determines the final object map state
+    head_object_map_state = (*r_it)->get_pre_write_object_map_state();
   }
 
-  // update object maps for HEAD and all existing snapshots
-  ldout(m_ictx->cct, 20) << "oid=" << m_oid << dendl;
+  RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+  if ((*m_ictx->object_map)[m_object_no] != head_object_map_state) {
+    // (maybe) need to update the HEAD object map state
+    m_snap_ids.push_back(CEPH_NOSNAP);
+  }
+  object_map_locker.unlock();
+  snap_locker.unlock();
 
-  RWLock::RLocker owner_locker(m_ictx->owner_lock);
+  ceph_assert(m_ictx->exclusive_lock->is_lock_owner());
   AsyncObjectThrottle<>::ContextFactory context_factory(
     boost::lambda::bind(boost::lambda::new_ptr<C_UpdateObjectMap>(),
-    boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, m_trace,
-    boost::lambda::_2));
+    boost::lambda::_1, m_ictx, m_object_no, head_object_map_state, &m_snap_ids,
+    m_trace, boost::lambda::_2));
   auto ctx = util::create_context_callback<
     CopyupRequest<I>, &CopyupRequest<I>::handle_update_object_maps>(this);
   auto throttle = new AsyncObjectThrottle<>(
@@ -403,6 +343,11 @@ void CopyupRequest<I>::handle_update_object_maps(int r) {
     return;
   }
 
+  copyup();
+}
+
+template <typename I>
+void CopyupRequest<I>::copyup() {
   m_ictx->copyup_list_lock.Lock();
   if (!m_copyup_required) {
     m_ictx->copyup_list_lock.Unlock();
@@ -413,11 +358,6 @@ void CopyupRequest<I>::handle_update_object_maps(int r) {
   }
   m_ictx->copyup_list_lock.Unlock();
 
-  copyup();
-}
-
-template <typename I>
-void CopyupRequest<I>::copyup() {
   ldout(m_ictx->cct, 20) << "oid=" << m_oid << dendl;
 
   m_ictx->snap_lock.get_read();
@@ -600,6 +540,48 @@ bool CopyupRequest<I>::is_update_object_map_required(int r) {
   return it->second[0] != CEPH_NOSNAP;
 }
 
+template <typename I>
+void CopyupRequest<I>::compute_deep_copy_snap_ids() {
+  ceph_assert(m_ictx->snap_lock.is_locked());
+
+  // don't copy ids for the snaps updated by object deep copy or
+  // that don't overlap
+  std::set<uint64_t> deep_copied;
+  for (auto &it : m_ictx->migration_info.snap_map) {
+    if (it.first != CEPH_NOSNAP) {
+      deep_copied.insert(it.second.front());
+    }
+  }
+
+  RWLock::RLocker parent_locker(m_ictx->parent_lock);
+  std::copy_if(m_ictx->snaps.rbegin(), m_ictx->snaps.rend(),
+               std::back_inserter(m_snap_ids),
+               [this, cct=m_ictx->cct, &deep_copied](uint64_t snap_id) {
+      if (deep_copied.count(snap_id)) {
+        return false;
+      }
+
+      uint64_t parent_overlap = 0;
+      int r = m_ictx->get_parent_overlap(snap_id,
+                                         &parent_overlap);
+      if (r < 0) {
+        ldout(cct, 5) << "failed getting parent overlap for snap_id: "
+                      << snap_id << ": " << cpp_strerror(r) << dendl;
+      }
+      if (parent_overlap == 0) {
+        return false;
+      }
+      std::vector<std::pair<uint64_t, uint64_t>> extents;
+      Striper::extent_to_file(cct, &m_ictx->layout,
+                              m_object_no, 0,
+                              m_ictx->layout.object_size,
+                              extents);
+      auto overlap = m_ictx->prune_parent_extents(
+          extents, parent_overlap);
+      return overlap > 0;
+    });
+}
+
 } // namespace io
 } // namespace librbd
 
index 8cc060424608cbc26e31911665ca1cc4dcd4f1c8..24d5323849171a8b0a7f8a003c9340e8b8c9de65 100644 (file)
@@ -53,25 +53,26 @@ private:
    *
    *              <start>
    *                 |
+   *      /---------/ \---------\
+   *      |                     |
+   *      v                     v
+   *  READ_FROM_PARENT      DEEP_COPY
+   *      |                     |
+   *      \---------\ /---------/
+   *                 |
+   *                 v (skip if not needed)
+   *         UPDATE_OBJECT_MAPS
+   *                 |
+   *                 v (skip if not needed)
+   *              COPYUP
+   *                 |
    *                 v
-   *    . . .STATE_READ_FROM_PARENT. . .
-   *    . .          |                 .
-   *    . .          v                 .
-   *    . .  STATE_OBJECT_MAP_HEAD     v (copy on read /
-   *    . .          |                 .  no HEAD rev. update)
-   *    v v          v                 .
-   *    . .    STATE_OBJECT_MAP. . . . .
-   *    . .          |
-   *    . .          v
-   *    . . . . > STATE_COPYUP
-   *    .            |
-   *    .            v
-   *    . . . . > <finish>
+   *              <finish>
    *
    * @endverbatim
    *
-   * The _OBJECT_MAP state is skipped if the object map isn't enabled or if
-   * an object map update isn't required. The _COPYUP state is skipped if
+   * The OBJECT_MAP state is skipped if the object map isn't enabled or if
+   * an object map update isn't required. The COPYUP state is skipped if
    * no data was read from the parent *and* there are no additional ops.
    */
 
@@ -103,9 +104,6 @@ private:
   void deep_copy();
   void handle_deep_copy(int r);
 
-  void update_object_map_head();
-  void handle_update_object_map_head(int r);
-
   void update_object_maps();
   void handle_update_object_maps(int r);
 
@@ -121,6 +119,7 @@ private:
   bool is_update_object_map_required(int r);
   bool is_deep_copy() const;
 
+  void compute_deep_copy_snap_ids();
 };
 
 } // namespace io