]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: fix traversal behavior in scan_mapped_space to avoid replay... 49605/head
authormyoungwon oh <ohmyoungwon@gmail.com>
Tue, 14 Feb 2023 09:39:54 +0000 (18:39 +0900)
committermyoungwon oh <ohmyoungwon@gmail.com>
Fri, 17 Feb 2023 08:25:05 +0000 (17:25 +0900)
1. Traverse the allocations from backref-tree leaf-node entries;
2. Traverse the pending alloc-deltas by sequence;
3. Traverse the allocations from backref-tree internal-node entries;

Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
src/crimson/os/seastore/backref/btree_backref_manager.cc

index 5c47001e77976722e24bce9fd8ff7984c49eb832..c1405ebc18d465b889e223a8600e0291cf48bf99 100644 (file)
@@ -298,91 +298,116 @@ BtreeBackrefManager::scan_mapped_space(
     [this, c, FNAME](auto &scan_visitor)
   {
     auto block_size = cache.get_block_size();
-    BackrefBtree::mapped_space_visitor_t f =
-      [&scan_visitor, block_size, FNAME, c](
-        paddr_t paddr, extent_len_t len,
-        depth_t depth, extent_types_t type) {
-      TRACET("tree node {}~{} {}, depth={} used",
-             c.trans, paddr, len, type, depth);
-      ceph_assert(paddr.is_absolute());
-      ceph_assert(len > 0 && len % block_size == 0);
-      ceph_assert(depth >= 1);
-      ceph_assert(is_backref_node(type));
-      return scan_visitor(paddr, len, type, L_ADDR_NULL);
-    };
-    return seastar::do_with(
-      std::move(f),
-      [this, c, &scan_visitor, block_size, FNAME](auto &tree_visitor)
+    // traverse leaf-node entries
+    return with_btree<BackrefBtree>(
+      cache, c,
+      [c, &scan_visitor, block_size, FNAME](auto &btree)
     {
-      return with_btree<BackrefBtree>(
-        cache, c,
-        [c, &scan_visitor, &tree_visitor, block_size, FNAME](auto &btree)
-      {
-        return BackrefBtree::iterate_repeat(
-          c,
-          btree.lower_bound(
-            c,
-            P_ADDR_MIN,
-            &tree_visitor),
-          [c, &scan_visitor, block_size, FNAME](auto &pos) {
-            if (pos.is_end()) {
-              return BackrefBtree::iterate_repeat_ret_inner(
-                interruptible::ready_future_marker{},
-                seastar::stop_iteration::yes);
-            }
-            TRACET("tree value {}~{} {}~{} {} used",
-                   c.trans,
-                   pos.get_key(),
-                   pos.get_val().len,
-                   pos.get_val().laddr,
-                   pos.get_val().len,
-                   pos.get_val().type);
-            ceph_assert(pos.get_key().is_absolute());
-            ceph_assert(pos.get_val().len > 0 &&
-                        pos.get_val().len % block_size == 0);
-            ceph_assert(!is_backref_node(pos.get_val().type));
-            ceph_assert(pos.get_val().laddr != L_ADDR_NULL);
-            scan_visitor(
-                pos.get_key(),
-                pos.get_val().len,
-                pos.get_val().type,
-                pos.get_val().laddr);
-            return BackrefBtree::iterate_repeat_ret_inner(
-              interruptible::ready_future_marker{},
-              seastar::stop_iteration::no);
-          },
-          &tree_visitor
-        );
-      });
+      return BackrefBtree::iterate_repeat(
+       c,
+       btree.lower_bound(
+         c,
+         P_ADDR_MIN),
+       [c, &scan_visitor, block_size, FNAME](auto &pos) {
+         if (pos.is_end()) {
+           return BackrefBtree::iterate_repeat_ret_inner(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         }
+         TRACET("tree value {}~{} {}~{} {} used",
+                c.trans,
+                pos.get_key(),
+                pos.get_val().len,
+                pos.get_val().laddr,
+                pos.get_val().len,
+                pos.get_val().type);
+         ceph_assert(pos.get_key().is_absolute());
+         ceph_assert(pos.get_val().len > 0 &&
+                     pos.get_val().len % block_size == 0);
+         ceph_assert(!is_backref_node(pos.get_val().type));
+         ceph_assert(pos.get_val().laddr != L_ADDR_NULL);
+         scan_visitor(
+             pos.get_key(),
+             pos.get_val().len,
+             pos.get_val().type,
+             pos.get_val().laddr);
+         return BackrefBtree::iterate_repeat_ret_inner(
+           interruptible::ready_future_marker{},
+           seastar::stop_iteration::no);
+       }
+      );
     }).si_then([this, &scan_visitor, c, FNAME, block_size] {
+      // traverse alloc-deltas in order
       auto &backref_entry_mset = cache.get_backref_entry_mset();
       DEBUGT("scan {} backref entries", c.trans, backref_entry_mset.size());
       for (auto &backref_entry : backref_entry_mset) {
-        if (backref_entry.laddr == L_ADDR_NULL) {
-          TRACET("backref entry {}~{} {} free",
-                 c.trans,
-                 backref_entry.paddr,
-                 backref_entry.len,
-                 backref_entry.type);
-        } else {
-          TRACET("backref entry {}~{} {}~{} {} used",
-                 c.trans,
-                 backref_entry.paddr,
-                 backref_entry.len,
-                 backref_entry.laddr,
-                 backref_entry.len,
-                 backref_entry.type);
-        }
-        ceph_assert(backref_entry.paddr.is_absolute());
-        ceph_assert(backref_entry.len > 0 &&
-                    backref_entry.len % block_size == 0);
-        ceph_assert(!is_backref_node(backref_entry.type));
-        scan_visitor(
-            backref_entry.paddr,
-            backref_entry.len,
-            backref_entry.type,
-            backref_entry.laddr);
+       if (backref_entry.laddr == L_ADDR_NULL) {
+         TRACET("backref entry {}~{} {} free",
+                c.trans,
+                backref_entry.paddr,
+                backref_entry.len,
+                backref_entry.type);
+       } else {
+         TRACET("backref entry {}~{} {}~{} {} used",
+                c.trans,
+                backref_entry.paddr,
+                backref_entry.len,
+                backref_entry.laddr,
+                backref_entry.len,
+                backref_entry.type);
+       }
+       ceph_assert(backref_entry.paddr.is_absolute());
+       ceph_assert(backref_entry.len > 0 &&
+                   backref_entry.len % block_size == 0);
+       ceph_assert(!is_backref_node(backref_entry.type));
+       scan_visitor(
+           backref_entry.paddr,
+           backref_entry.len,
+           backref_entry.type,
+           backref_entry.laddr);
       }
+    }).si_then([this, &scan_visitor, block_size, c, FNAME] {
+      BackrefBtree::mapped_space_visitor_t f =
+       [&scan_visitor, block_size, FNAME, c](
+         paddr_t paddr, extent_len_t len,
+         depth_t depth, extent_types_t type) {
+       TRACET("tree node {}~{} {}, depth={} used",
+              c.trans, paddr, len, type, depth);
+       ceph_assert(paddr.is_absolute());
+       ceph_assert(len > 0 && len % block_size == 0);
+       ceph_assert(depth >= 1);
+       ceph_assert(is_backref_node(type));
+       return scan_visitor(paddr, len, type, L_ADDR_NULL);
+      };
+      return seastar::do_with(
+       std::move(f),
+       [this, c](auto &tree_visitor)
+      {
+       // traverse internal-node entries
+       return with_btree<BackrefBtree>(
+         cache, c,
+         [c, &tree_visitor](auto &btree)
+       {
+         return BackrefBtree::iterate_repeat(
+           c,
+           btree.lower_bound(
+             c,
+             P_ADDR_MIN,
+             &tree_visitor),
+           [](auto &pos) {
+             if (pos.is_end()) {
+               return BackrefBtree::iterate_repeat_ret_inner(
+                 interruptible::ready_future_marker{},
+                 seastar::stop_iteration::yes);
+             }
+             return BackrefBtree::iterate_repeat_ret_inner(
+               interruptible::ready_future_marker{},
+               seastar::stop_iteration::no);
+           },
+           &tree_visitor
+         );
+       });
+      });
     });
   });
 }