]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/object_metadata_helper: Introduce calc_*_subsets
authorMatan Breizman <mbreizma@redhat.com>
Mon, 9 Jan 2023 14:07:20 +0000 (16:07 +0200)
committerMatan Breizman <mbreizma@redhat.com>
Thu, 4 Jan 2024 15:18:11 +0000 (15:18 +0000)
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/osd/CMakeLists.txt
src/crimson/osd/object_metadata_helper.cc [new file with mode: 0644]
src/crimson/osd/object_metadata_helper.h [new file with mode: 0644]
src/crimson/osd/replicated_recovery_backend.h
src/test/crimson/CMakeLists.txt
src/test/crimson/test_calc_subsets.cc [new file with mode: 0644]

index 65fb7201f7661562a007cf33208e1986650b0c9d..c77da7575a50acd6f1bb035406de28063ac60864 100644 (file)
@@ -15,6 +15,7 @@ add_executable(crimson-osd
   pg_shard_manager.cc
   object_context.cc
   object_context_loader.cc
+  object_metadata_helper.cc
   ops_executer.cc
   osd_operation.cc
   osd_operations/client_request.cc
diff --git a/src/crimson/osd/object_metadata_helper.cc b/src/crimson/osd/object_metadata_helper.cc
new file mode 100644 (file)
index 0000000..3016b0a
--- /dev/null
@@ -0,0 +1,222 @@
+#include "crimson/osd/object_metadata_helper.h"
+
+namespace {
+  seastar::logger& logger() {
+    return crimson::get_logger(ceph_subsys_osd);
+  }
+}
+
+namespace crimson::osd {
+
+/*
+ *   The clone object content may already overlap with the
+ *   next older and the next newest clone obejct.
+ *   Use the existing (next) clones object overlaps instead
+ *   of pushing the whole clone object to the replica.
+ */
+
+subsets_t calc_clone_subsets(
+  SnapSet& snapset, const hobject_t& soid,
+  const pg_missing_t& missing,
+  const hobject_t &last_backfill)
+{
+  subsets_t subsets;
+  logger().debug("{}: {} clone_overlap {} ",
+                 __func__, soid, snapset.clone_overlap);
+
+  uint64_t size = snapset.clone_size[soid.snap];
+  if (size) {
+    subsets.data_subset.insert(0, size);
+  }
+
+  // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson
+  // Skips clone subsets if caching was enabled (allow_incomplete_clones).
+
+#ifndef UNIT_TESTS_BUILT
+  if (!crimson::common::local_conf()->osd_recover_clone_overlap) {
+    logger().debug("{} {} -- osd_recover_clone_overlap is disabled",
+                   __func__, soid); ;
+    return subsets;
+  }
+#endif
+
+  if (snapset.clones.empty()) {
+    logger().debug("{} {} -- no clones", __func__, soid);
+    return subsets;
+  }
+
+  auto soid_snap_iter = find(snapset.clones.begin(),
+                             snapset.clones.end(),
+                             soid.snap);
+  assert(soid_snap_iter != snapset.clones.end());
+  auto soid_snap_index = soid_snap_iter - snapset.clones.begin();
+
+  // any overlap with next older clone?
+  interval_set<uint64_t> cloning;
+  interval_set<uint64_t> prev;
+  if (size) {
+    prev.insert(0, size);
+  }
+  for (int i = soid_snap_index - 1; i >= 0; i--) {
+    hobject_t clone = soid;
+    clone.snap = snapset.clones[i];
+    // clone_overlap of i holds the overlap between i to i+1
+    prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]);
+    if (!missing.is_missing(clone) && clone < last_backfill) {
+      logger().debug("{} {} has prev {} overlap {}",
+                     __func__, soid, clone, prev);
+      subsets.clone_subsets[clone] = prev;
+      cloning.union_of(prev);
+      break;
+    }
+    logger().debug("{} {} does not have prev {} overlap {}",
+                   __func__, soid, clone, prev);
+  }
+
+  // overlap with next newest?
+  interval_set<uint64_t> next;
+  if (size) {
+    next.insert(0, size);
+  }
+  for (unsigned i = soid_snap_index+1;
+       i < snapset.clones.size(); i++) {
+    hobject_t clone = soid;
+    clone.snap = snapset.clones[i];
+    // clone_overlap of i-1 holds the overlap between i-1 to i
+    next.intersection_of(snapset.clone_overlap[snapset.clones[i - 1]]);
+    if (!missing.is_missing(clone) && clone < last_backfill) {
+      logger().debug("{} {} has next {} overlap {}",
+                     __func__, soid, clone, next);
+      subsets.clone_subsets[clone] = next;
+      cloning.union_of(next);
+      break;
+    }
+    logger().debug("{} {} does not have next {} overlap {}",
+                   __func__, soid, clone, next);
+  }
+
+#ifndef UNIT_TESTS_BUILT
+  if (cloning.num_intervals() >
+      crimson::common::local_conf().get_val<uint64_t>
+      ("osd_recover_clone_overlap_limit")) {
+    logger().debug("skipping clone, too many holes");
+    subsets.clone_subsets.clear();
+    cloning.clear();
+  }
+#endif
+
+  // what's left for us to push?
+  subsets.data_subset.subtract(cloning);
+  logger().debug("{} {} data_subsets {}"
+                 "clone_subsets {}",
+                 __func__, soid, subsets.data_subset, subsets.clone_subsets);
+  return subsets;
+}
+
+/*
+ * Instead of pushing the whole object to the replica,
+ * make use of:
+ * 1) ObjectCleanRegion - push modified content only.
+ *    - See: dev/osd_internals/partial_object_recovery
+ * 2) The modified content may already overlap with the
+ *    next older clone obejct. Use the existing clone
+ *    object overlap as well.
+ */
+
+subsets_t calc_head_subsets(
+  uint64_t obj_size,
+  SnapSet& snapset,
+  const hobject_t& head,
+  const pg_missing_t& missing,
+  const hobject_t &last_backfill)
+{
+  logger().debug("{}: {} clone_overlap {} ",
+                 __func__, head, snapset.clone_overlap);
+
+  subsets_t subsets;
+
+// 1) Calculate modified content only
+  if (obj_size) {
+    subsets.data_subset.insert(0, obj_size);
+  }
+  assert(missing.get_items().contains(head));
+  const pg_missing_item missing_item = missing.get_items().at(head);
+  // let data_subset store only the modified content of the object.
+  subsets.data_subset.intersection_of(missing_item.clean_regions.get_dirty_regions());
+  logger().debug("{} {} data_subset {}",
+                 __func__, head, subsets.data_subset);
+
+  // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson
+  // Skips clone subsets if caching was enabled (allow_incomplete_clones).
+
+#ifndef UNIT_TESTS_BUILT
+  if (!crimson::common::local_conf()->osd_recover_clone_overlap) {
+    logger().debug("{} {} -- osd_recover_clone_overlap is disabled",
+                   __func__, head);
+    return subsets;
+  }
+#endif
+
+  if (snapset.clones.empty()) {
+    logger().debug("{} {} -- no clones", __func__, head);
+    return subsets;
+  }
+
+  // 2) Find any overlap with next older clone
+  interval_set<uint64_t> cloning;
+  interval_set<uint64_t> prev;
+  hobject_t clone = head;
+  if (obj_size) {
+    prev.insert(0, obj_size);
+  }
+  for (int i = snapset.clones.size()-1; i >= 0; i--) {
+    clone.snap = snapset.clones[i];
+    // let prev store only the overlap with clone i
+    prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]);
+    if (!missing.is_missing(clone) && clone < last_backfill) {
+      logger().debug("{} {} has prev {} overlap {}",
+                     __func__, head, clone, prev);
+      cloning = prev;
+      break;
+    }
+    logger().debug("{} {} does not have prev {} overlap {}",
+                   __func__, head, clone, prev);
+  }
+
+  // let cloning store only the overlap with data_subset
+  cloning.intersection_of(subsets.data_subset);
+  if (cloning.empty()) {
+    logger().debug("skipping clone, nothing needs to clone");
+    return subsets;
+  }
+
+#ifndef UNIT_TESTS_BUILT
+  if (cloning.num_intervals() >
+      crimson::common::local_conf().get_val<uint64_t>
+      ("osd_recover_clone_overlap_limit")) {
+    logger().debug("skipping clone, too many holes");
+    subsets.clone_subsets.clear();
+    cloning.clear();
+  }
+#endif
+
+  // what's left for us to push?
+  subsets.clone_subsets[clone] = cloning;
+  subsets.data_subset.subtract(cloning);
+  logger().debug("{} {} data_subsets {}"
+                 "clone_subsets {}",
+                 __func__, head, subsets.data_subset, subsets.clone_subsets);
+
+  return subsets;
+}
+
+void set_subsets(
+  const subsets_t& subsets,
+  ObjectRecoveryInfo& recovery_info)
+{
+  recovery_info.copy_subset = subsets.data_subset;
+  recovery_info.clone_subset = subsets.clone_subsets;
+}
+
+
+}
diff --git a/src/crimson/osd/object_metadata_helper.h b/src/crimson/osd/object_metadata_helper.h
new file mode 100644 (file)
index 0000000..927fc48
--- /dev/null
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "osd/osd_types_fmt.h"
+
+namespace crimson::osd {
+  struct subsets_t {
+    interval_set<uint64_t> data_subset;
+    std::map<hobject_t, interval_set<uint64_t>> clone_subsets;
+  };
+
+  subsets_t calc_clone_subsets(
+    SnapSet& snapset, const hobject_t& soid,
+    const pg_missing_t& missing,
+    const hobject_t &last_backfill);
+  subsets_t calc_head_subsets(
+    uint64_t obj_size,
+    SnapSet& snapset,
+    const hobject_t& head,
+    const pg_missing_t& missing,
+    const hobject_t &last_backfill);
+  void set_subsets(
+    const subsets_t& subsets,
+    ObjectRecoveryInfo& recovery_info);
+}
index 16d6369a91f3256efa78282848288df85b93cd1f..8fdaf9a10f8c8aade692c9586455a1cd211cca9b 100644 (file)
@@ -6,6 +6,7 @@
 #include "crimson/common/interruptible_future.h"
 #include "crimson/osd/pg_interval_interrupt_condition.h"
 #include "crimson/osd/recovery_backend.h"
+#include "crimson/osd/object_metadata_helper.h"
 
 #include "messages/MOSDPGPull.h"
 #include "messages/MOSDPGPush.h"
index c943ff885464a0b12e3e9c6c080570a6983d55b7..e1a5dfe73dfd3bafb8f4e4958e6c1a08598e6f1a 100644 (file)
@@ -71,6 +71,13 @@ add_ceph_unittest(unittest-seastar-lru
   --memory 256M --smp 1)
 target_link_libraries(unittest-seastar-lru crimson GTest::Main)
 
+add_executable(unittest-seastar-calc-subsets
+    ${PROJECT_SOURCE_DIR}/src/crimson/osd/object_metadata_helper.cc
+  test_calc_subsets.cc)
+add_ceph_unittest(unittest-seastar-calc-subsets
+  --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-calc-subsets crimson GTest::Main)
+
 add_executable(unittest-fixed-kv-node-layout
   test_fixed_kv_node_layout.cc)
 add_ceph_unittest(unittest-fixed-kv-node-layout)
diff --git a/src/test/crimson/test_calc_subsets.cc b/src/test/crimson/test_calc_subsets.cc
new file mode 100644 (file)
index 0000000..7d23810
--- /dev/null
@@ -0,0 +1,255 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "gtest/gtest.h"
+#include "crimson/osd/object_metadata_helper.h"
+
+
+TEST(head_subsets, dirty_region)
+{
+  uint64_t obj_size = 10;
+  SnapSet empty_ss;
+  hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+  pg_missing_t missing;
+  pg_missing_item item;
+  uint64_t offset_1, len_1;
+  offset_1 = 3;
+  len_1 = 2;
+  item.clean_regions.mark_data_region_dirty(offset_1, len_1);
+  missing.add(head, std::move(item));
+  hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+  interval_set<uint64_t> expect_data_region;
+  expect_data_region.insert(offset_1, len_1);
+
+// ****
+
+  crimson::osd::subsets_t result =
+    crimson::osd::calc_head_subsets(obj_size,
+                                    empty_ss,
+                                    head,
+                                    missing,
+                                    last_backfill);
+
+  EXPECT_TRUE(result.clone_subsets.empty());
+  EXPECT_TRUE(result.data_subset == expect_data_region);
+}
+
+TEST(head_subsets, head_all_clean)
+{
+  uint64_t obj_size = 10;
+  SnapSet empty_ss;
+  hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+  pg_missing_t missing;
+  pg_missing_item item;
+  missing.add(head, std::move(item));
+  hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+// ****
+
+  crimson::osd::subsets_t result =
+    crimson::osd::calc_head_subsets(obj_size,
+                                    empty_ss,
+                                    head,
+                                    missing,
+                                    last_backfill);
+
+  EXPECT_TRUE(result.clone_subsets.empty());
+  EXPECT_TRUE(result.data_subset.empty());
+}
+
+TEST(head_subsets, all_dirty)
+{
+  uint64_t obj_size = 10;
+  SnapSet empty_ss;
+  hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+  pg_missing_t missing;
+  pg_missing_item item;
+  item.clean_regions.mark_fully_dirty();
+  missing.add(head, std::move(item));
+  hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+// ****
+
+  crimson::osd::subsets_t result =
+    crimson::osd::calc_head_subsets(obj_size,
+                                    empty_ss,
+                                    head,
+                                    missing,
+                                    last_backfill);
+
+  EXPECT_TRUE(result.clone_subsets.empty());
+  EXPECT_TRUE(result.data_subset.size() == obj_size);
+}
+
+TEST(head_subsets, clone_overlap)
+{
+  uint64_t obj_size = 10;
+  SnapSet ss;
+  hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+  pg_missing_t missing;
+  pg_missing_item item;
+  item.clean_regions.mark_fully_dirty();
+  missing.add(head, std::move(item));
+  hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+  // Clone object:
+  hobject_t clone = head;
+  clone.snap = 0;
+  std::map<snapid_t, interval_set<uint64_t>> clone_overlap;  // overlap w/ next
+  interval_set<uint64_t> overlap;
+  uint64_t offset_2, len_2;
+  offset_2 = 2;
+  len_2 = 2;
+  overlap.insert(offset_2, len_2);
+  clone_overlap[clone.snap] = overlap;
+
+  // Snapset:
+  // ss.seq = 0;
+  // ss.snaps = snaps; (legacy)
+  ss.clones.push_back(clone.snap);
+  ss.clone_overlap = clone_overlap;
+  // ss.clone_size = clone_size;
+  // ss.clone_snaps = clone_snaps;
+
+  // Expected intervals:
+  interval_set<uint64_t> expect_clone_subset;
+  expect_clone_subset.insert(offset_2, len_2);
+
+// ****
+
+  crimson::osd::subsets_t result =
+    crimson::osd::calc_head_subsets(obj_size,
+                                    ss,
+                                    head,
+                                    missing,
+                                    last_backfill);
+  EXPECT_TRUE(result.clone_subsets[clone] == expect_clone_subset);
+}
+
+TEST(head_subsets, dirty_region_and_clone_overlap)
+{
+  uint64_t obj_size = 100;
+  SnapSet ss;
+  hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+  pg_missing_t missing;
+  pg_missing_item item;
+  uint64_t offset_1, len_1;
+  offset_1 = 3;
+  len_1 = 2;
+  item.clean_regions.mark_data_region_dirty(offset_1, len_1);
+  missing.add(head, std::move(item));
+  hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+  interval_set<uint64_t> expect_data_region;
+  expect_data_region.insert(offset_1, len_1);
+
+  // Clone object:
+  hobject_t clone = head;
+  clone.snap = 0;
+  std::map<snapid_t, interval_set<uint64_t>> clone_overlap;  // overlap w/ next
+  interval_set<uint64_t> overlap;
+  uint64_t offset_2, len_2;
+  offset_2 = 2;
+  len_2 = 2;
+  overlap.insert(offset_2, len_2);
+  clone_overlap[clone.snap] = overlap;
+
+  // Snapset:
+  // ss.seq = 0;
+  // ss.snaps = snaps; (legacy)
+  ss.clones.push_back(clone.snap);
+  ss.clone_overlap = clone_overlap;
+  // ss.clone_size = clone_size;
+  // ss.clone_snaps = clone_snaps;
+
+  // Expected intervals:
+  interval_set<uint64_t> expect_clone_subset;
+  expect_clone_subset.insert(offset_2, len_2);
+  expect_clone_subset.intersection_of(expect_data_region);
+  expect_data_region.subtract(expect_clone_subset);
+
+// ****
+
+  crimson::osd::subsets_t result =
+    crimson::osd::calc_head_subsets(obj_size,
+                                    ss,
+                                    head,
+                                    missing,
+                                    last_backfill);
+  EXPECT_TRUE(result.clone_subsets[clone] == expect_clone_subset);
+  EXPECT_TRUE(result.data_subset == expect_data_region);
+}
+
+TEST(clone_subsets, overlap)
+{
+  uint64_t clone_size = 10;
+  SnapSet ss;
+  hobject_t clone{object_t{"foo"}, "foo", 1, 42, 0, "nspace"};
+  ss.clone_size[1] = clone_size;
+  ss.clones.push_back(snapid_t(0));
+  ss.clones.push_back(snapid_t(1));
+  ss.clones.push_back(snapid_t(2));
+  pg_missing_t missing;
+  pg_missing_item item;
+  missing.add(clone, std::move(item));
+  hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+  interval_set<uint64_t> expect_clone_subset1, expect_clone_subset2;
+
+  // Next older clone:
+  hobject_t older_clone = clone;
+  older_clone.snap = 0;
+  {
+    std::map<snapid_t, interval_set<uint64_t>> clone_overlap;  // overlap w/ next
+    interval_set<uint64_t> overlap;
+    uint64_t offset_2, len_2;
+    offset_2 = 4;
+    len_2 = 2;
+    overlap.insert(offset_2, len_2);
+    ss.clone_overlap[older_clone.snap] = overlap;
+
+    // Snapset:
+    // ss.seq = 0;
+    // ss.snaps = snaps; (legacy)
+    // ss.clones.push_back(snapid_t());
+    // ss.clone_overlap = clone_overlap;
+    // ss.clone_size = clone_size;
+    // ss.clone_snaps = clone_snaps;
+
+    // Expected intervals:
+    expect_clone_subset1.insert(offset_2, len_2);
+  }
+
+  // Next newest clone:
+  hobject_t newest_clone = clone;
+  newest_clone.snap = 2;
+  {
+    std::map<snapid_t, interval_set<uint64_t>> clone_overlap;  // overlap w/ next
+    interval_set<uint64_t> overlap;
+    uint64_t offset_2, len_2;
+    offset_2 = 2;
+    len_2 = 2;
+    overlap.insert(offset_2, len_2);
+    ss.clone_overlap[newest_clone.snap - 1] = overlap;
+
+    // Snapset:
+    // ss.seq = 0;
+    // ss.snaps = snaps; (legacy)
+    // ss.clones.push_back(snapid_t());
+    // ss.clone_overlap = clone_overlap;
+    // ss.clone_size = clone_size;
+    // ss.clone_snaps = clone_snaps;
+
+    // Expected intervals:
+    expect_clone_subset2.insert(offset_2, len_2);
+  }
+
+// ****
+
+  crimson::osd::subsets_t result =
+    crimson::osd::calc_clone_subsets(ss,
+                                     clone,
+                                     missing,
+                                     last_backfill);
+  EXPECT_TRUE(result.clone_subsets[older_clone] == expect_clone_subset1);
+  EXPECT_TRUE(result.clone_subsets[newest_clone] == expect_clone_subset2);
+}