]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: introduce laddr_hint_t and associated factory methods
authorZhang Song <zhangsong02@qianxin.com>
Wed, 14 May 2025 08:34:00 +0000 (16:34 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Sun, 24 May 2026 04:06:20 +0000 (12:06 +0800)
Signed-off-by: Zhang Song <zhangsong02@qianxin.com>
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/seastore_types.cc
src/crimson/os/seastore/seastore_types.h

index 0d599436265459f46c18c4ef43588ab5b8357950..69bede32c90e42aeaf03d559ad2abc8b5745a071 100644 (file)
@@ -154,6 +154,271 @@ std::ostream &operator<<(std::ostream &out, const laddr_offset_t &laddr_offset)
             << "+0x" << std::hex << laddr_offset.get_offset() << std::dec;
 }
 
+std::ostream &operator<<(std::ostream &out, const laddr_hint_t &hint) {
+  out << "laddr_hint_t(" << hint.addr << ", condition=";
+  switch(hint.condition) {
+  case laddr_conflict_condition_t::object_prefix_at_object_id:
+    out << "object_prefix_at_object_id";
+    break;
+  case laddr_conflict_condition_t::clone_prefix_at_clone_id:
+    out << "clone_prefix_at_clone_id";
+    break;
+  case laddr_conflict_condition_t::all_at_object_content:
+    out << "all_at_object_content";
+    break;
+  case laddr_conflict_condition_t::all_at_block_offset:
+    out << "all_at_block_offset";
+    break;
+  case laddr_conflict_condition_t::all_at_never:
+    out << "all_at_never";
+    break;
+  default:
+    __builtin_unreachable();
+  }
+  out << ", policy=";
+  switch(hint.policy) {
+  case laddr_conflict_policy_t::gen_random:
+    out << "gen_random";
+    break;
+  case laddr_conflict_policy_t::linear_search:
+    out << "linear_search";
+    break;
+  default:
+    __builtin_unreachable();
+  }
+  return out << ", block_size=" << hint.block_size << ")";
+}
+
+namespace {
+struct random_generator_t {
+  static random_generator_t &get() {
+    static thread_local random_generator_t r{};
+    return r;
+  }
+  random_generator_t()
+      : eng(std::random_device{}()),
+        global(0, std::numeric_limits<uint64_t>::max()) {}
+  std::default_random_engine eng;
+  std::uniform_int_distribution<uint64_t> global;
+  uint64_t operator()() { return global(eng); }
+};
+uint64_t get_block_size_mask(uint64_t block_size) {
+  assert(block_size != 0 && (block_size & (block_size - 1)) == 0);
+  return (block_size >> laddr_t::UNIT_SHIFT) - 1;
+}
+uint64_t rand_field() {
+  return random_generator_t::get()();
+}
+uint64_t rand_field_aligned(uint64_t block_size) {
+  return rand_field() & (~get_block_size_mask(block_size));
+}
+} // namespace
+
+#define CHECK_OBJECT_INFO(addr, shard, pool, crush)                            \
+  assert(addr.match_shard_bits(shard));                                        \
+  assert(addr.match_pool_bits(pool));                                          \
+  assert(addr.get_reversed_hash() == crush);
+
+laddr_hint_t laddr_hint_t::create_global_md_hint(extent_len_t block_size) {
+  laddr_t addr = L_ADDR_MIN.with_object_content(rand_field_aligned(block_size));
+  assert(addr.is_global_address());
+  return {
+    addr,
+    laddr_conflict_condition_t::all_at_object_content,
+    laddr_conflict_policy_t::linear_search,
+    block_size
+  };
+}
+
+laddr_hint_t laddr_hint_t::create_onode_hint(
+  laddr_shard_t shard,
+  laddr_pool_t pool,
+  laddr_crush_hash_t crush,
+  extent_len_t block_size)
+{
+  laddr_t addr = L_ADDR_MIN;
+  addr.set_shard(shard);
+  addr.set_pool(pool);
+  addr.set_reversed_hash(crush);
+  addr.set_object_content(rand_field_aligned(block_size));
+
+  CHECK_OBJECT_INFO(addr, shard, pool, crush);
+  assert(addr.is_onode_extent_address());
+
+  return {
+    addr,
+    laddr_conflict_condition_t::all_at_object_content,
+    laddr_conflict_policy_t::linear_search,
+    block_size
+  };
+}
+
+laddr_hint_t laddr_hint_t::create_fresh_object_data_hint(
+  laddr_shard_t shard,
+  laddr_pool_t pool,
+  laddr_crush_hash_t crush,
+  extent_len_t block_size)
+{
+  laddr_hint_t hint{
+    L_ADDR_MIN,
+    laddr_conflict_condition_t::object_prefix_at_object_id,
+    laddr_conflict_policy_t::gen_random,
+    block_size
+  };
+  hint.addr.set_shard(shard);
+  hint.addr.set_pool(pool);
+  hint.addr.set_reversed_hash(crush);
+  hint.find_next_random();
+
+  CHECK_OBJECT_INFO(hint.addr, shard, pool, crush);
+  assert(!hint.addr.is_metadata());
+  assert(hint.addr.get_offset_bytes() == 0);
+  return hint;
+}
+
+laddr_hint_t laddr_hint_t::create_fresh_object_md_hint(
+  laddr_shard_t shard,
+  laddr_pool_t pool,
+  laddr_crush_hash_t crush,
+  extent_len_t block_size)
+{
+  auto hint = create_fresh_object_data_hint(shard, pool, crush, block_size);
+  auto addr = hint.addr;
+
+  hint.addr.set_metadata(true);
+
+  assert(hint.addr.get_clone_prefix() == addr.get_clone_prefix());
+  assert(hint.addr.is_metadata());
+  assert(hint.addr.get_offset_bytes() == 0);
+  boost::ignore_unused(addr);
+  return hint;
+}
+
+laddr_hint_t laddr_hint_t::create_clone_object_data_hint(
+  laddr_shard_t shard,
+  laddr_pool_t pool,
+  laddr_crush_hash_t crush,
+  local_object_id_t id,
+  extent_len_t block_size)
+{
+  laddr_hint_t hint{
+    L_ADDR_MIN,
+    laddr_conflict_condition_t::clone_prefix_at_clone_id,
+    laddr_conflict_policy_t::gen_random,
+    block_size
+  };
+  hint.addr.set_shard(shard);
+  hint.addr.set_pool(pool);
+  hint.addr.set_reversed_hash(crush);
+  hint.addr.set_local_object_id(id);
+  hint.find_next_random();
+
+  CHECK_OBJECT_INFO(hint.addr, shard, pool, crush);
+  assert(hint.addr.get_local_object_id() == id);
+  assert(!hint.addr.is_metadata());
+  assert(hint.addr.get_offset_bytes() == 0);
+  return hint;
+}
+
+laddr_hint_t laddr_hint_t::create_clone_object_md_hint(
+  laddr_shard_t shard,
+  laddr_pool_t pool,
+  laddr_crush_hash_t crush,
+  local_object_id_t id,
+  extent_len_t block_size)
+{
+  auto hint = create_clone_object_data_hint(shard, pool, crush, id, block_size);
+  auto addr = hint.addr;
+
+  hint.addr.set_metadata(true);
+
+  CHECK_OBJECT_INFO(hint.addr, shard, pool, crush);
+  assert(hint.addr.get_clone_prefix() == addr.get_clone_prefix());
+  assert(hint.addr.is_metadata());
+  boost::ignore_unused(addr);
+  return hint;
+}
+
+laddr_hint_t laddr_hint_t::create_object_data_hint(
+  laddr_t clone_prefix,
+  extent_len_t block_size)
+{
+  laddr_hint_t hint{
+    clone_prefix,
+    laddr_conflict_condition_t::all_at_never,
+    laddr_conflict_policy_t::linear_search,
+    block_size
+  };
+
+  assert(!hint.addr.is_metadata());
+  assert(hint.addr.get_offset_blocks() == 0);
+  return hint;
+}
+
+laddr_hint_t laddr_hint_t::create_object_md_hint(
+  laddr_t clone_prefix,
+  extent_len_t block_size)
+{
+  laddr_hint_t hint{
+    clone_prefix,
+    laddr_conflict_condition_t::all_at_block_offset,
+    laddr_conflict_policy_t::gen_random,
+    block_size
+  };
+
+  hint.addr.set_metadata(true);
+  hint.find_next_random();
+
+  return hint;
+}
+
+void laddr_hint_t::find_next_random() {
+  assert(policy == laddr_conflict_policy_t::gen_random);
+
+  auto orig_addr = addr;
+  switch (condition) {
+  case laddr_conflict_condition_t::object_prefix_at_object_id:
+    do {
+      addr.set_local_object_id(rand_field());
+    } while (orig_addr == addr || !addr.is_object_address());
+    assert(orig_addr.get_shard() == addr.get_shard());
+    assert(orig_addr.get_pool() == addr.get_pool());
+    assert(orig_addr.get_reversed_hash() == addr.get_reversed_hash());
+    assert(orig_addr.get_object_content() == addr.get_object_content());
+    assert(addr.is_object_address());
+    break;
+  case laddr_conflict_condition_t::clone_prefix_at_clone_id:
+    do {
+      addr.set_local_clone_id(rand_field());
+    } while (orig_addr.get_local_clone_id() == addr.get_local_clone_id());
+    assert(orig_addr.get_object_prefix() == addr.get_object_prefix());
+    assert(orig_addr.is_metadata() == addr.is_metadata());
+    assert(orig_addr.get_offset_bytes() == addr.get_offset_bytes());
+    assert(addr.is_object_address());
+    break;
+  case laddr_conflict_condition_t::all_at_object_content:
+    do {
+      addr.set_object_content(rand_field_aligned(block_size));
+    } while (orig_addr == addr);
+    assert(orig_addr.get_object_prefix() == addr.get_object_prefix());
+    assert(addr.is_global_address() || addr.is_onode_extent_address());
+    break;
+  case laddr_conflict_condition_t::all_at_block_offset:
+    do {
+      addr.set_offset_by_blocks(rand_field_aligned(block_size));
+    } while (orig_addr.get_offset_bytes() == addr.get_offset_bytes());
+    assert(orig_addr.get_object_info() == addr.get_object_info());
+    assert(orig_addr.is_metadata() == addr.is_metadata());
+    assert(orig_addr.get_local_clone_id() == addr.get_local_clone_id());
+    assert((addr.get_offset_bytes() & get_block_size_mask(block_size)) == 0);
+    break;
+  case laddr_conflict_condition_t::all_at_never:
+    ceph_abort("impossible conflict case");
+  default:
+    __builtin_unreachable();
+  }
+}
+
 std::ostream &operator<<(std::ostream &out, const pladdr_t &pladdr)
 {
   out << "pladdr(";
index 9bbb35495c87f85abe3d27247beb94f7f6f57141..0979ec1c1cfb29d8e32530e4b7b97eb571ed1022 100644 (file)
@@ -1624,6 +1624,150 @@ constexpr laddr_t L_ADDR_MAX = laddr_t::from_raw_uint(laddr_t::RAW_VALUE_MAX);
 constexpr laddr_t L_ADDR_MIN = laddr_t::from_raw_uint(0);
 constexpr laddr_t L_ADDR_NULL = L_ADDR_MAX;
 
+// This enum specifies the conflict condition for laddr allocation.
+enum class laddr_conflict_condition_t {
+  // Fixed shard, pool, and reversed_hash, allocate a unique local object id.
+  object_prefix_at_object_id,
+  // Fixed object prefix, allocate a unique local clone id.
+  clone_prefix_at_clone_id,
+  // Fixed object prefix, allocate a unique object content value.
+  all_at_object_content,
+  // Fixed clone prefix, allocate a unique block offset.
+  all_at_block_offset,
+  // Fixed laddr, conflicts never occur
+  all_at_never,
+};
+
+// The behavior of handling laddr allocation conflict
+// see BtreeLBAManager::search_insert_pos()
+enum class laddr_conflict_policy_t {
+  // Find appropriate address by following the lba iterator, only
+  // laddr_conflict_policy_t::{all_at_object_content, all_at_block_offset}
+  // could use this policy.
+  linear_search,
+  // Generate a new random hint.
+  gen_random,
+};
+
+struct laddr_hint_t {
+  laddr_t addr;
+  laddr_conflict_condition_t condition;
+  laddr_conflict_policy_t policy;
+  extent_len_t block_size;
+
+  static laddr_hint_t create_as_fixed(
+    laddr_t laddr,
+    extent_len_t block_size = laddr_t::UNIT_SIZE)
+  {
+    return {
+      laddr,
+      laddr_conflict_condition_t::all_at_never,
+      laddr_conflict_policy_t::linear_search,
+      block_size
+    };
+  }
+  static laddr_hint_t create_global_md_hint(
+    extent_len_t block_size = laddr_t::UNIT_SIZE);
+  static laddr_hint_t create_onode_hint(
+    laddr_shard_t shard,
+    laddr_pool_t pool,
+    laddr_crush_hash_t crush,
+    extent_len_t block_size);
+
+  // According to the state of Onode, there are 6 valid cases when constructing
+  // laddr hint:
+  // |No.|object id|clone id|is metadata|description|
+  // | 1 | N | N | N | write data to a fresh object                       |
+  // | 2 | N | N | Y | write omap/xattr to a fresh object                 |
+  // | 3 | N | Y | N | invalid case                                       |
+  // | 4 | N | Y | Y | invalid case                                       |
+  // | 5 | Y | N | N | clone existing onode                               |
+  // | 6 | Y | N | Y | clone existing onode that might only contains omap |
+  // | 7 | Y | Y | N | it might occur if first write omap then write data |
+  // | 8 | Y | Y | Y | allocate omap extents in existing onode            |
+
+  // 1
+  static laddr_hint_t create_fresh_object_data_hint(
+    laddr_shard_t shard,
+    laddr_pool_t pool,
+    laddr_crush_hash_t crush,
+    extent_len_t block_size);
+  // 2
+  static laddr_hint_t create_fresh_object_md_hint(
+    laddr_shard_t shard,
+    laddr_pool_t pool,
+    laddr_crush_hash_t crush,
+    extent_len_t block_size);
+  // 5
+  static laddr_hint_t create_clone_object_data_hint(
+    laddr_shard_t shard,
+    laddr_pool_t pool,
+    laddr_crush_hash_t crush,
+    local_object_id_t object_id,
+    extent_len_t block_size);
+  // 6
+  static laddr_hint_t create_clone_object_md_hint(
+    laddr_shard_t shard,
+    laddr_pool_t pool,
+    laddr_crush_hash_t crush,
+    local_object_id_t object_id,
+    extent_len_t block_size);
+  // 7
+  static laddr_hint_t create_object_data_hint(
+    laddr_t clone_prefix,
+    extent_len_t block_size);
+  // 8
+  static laddr_hint_t create_object_md_hint(
+    laddr_t clone_prefix,
+    extent_len_t block_size);
+
+  void find_next_random();
+
+  bool conflict_with(laddr_t other) const {
+    switch (condition) {
+    case laddr_conflict_condition_t::object_prefix_at_object_id:
+      assert(addr.is_object_address());
+      return addr.get_object_prefix() == other.get_object_prefix();
+    case laddr_conflict_condition_t::clone_prefix_at_clone_id:
+      assert(addr.is_object_address());
+      return addr.get_clone_prefix() == other.get_clone_prefix();
+    case laddr_conflict_condition_t::all_at_object_content:
+    case laddr_conflict_condition_t::all_at_block_offset:
+    case laddr_conflict_condition_t::all_at_never:
+      return addr == other;
+    default:
+      __builtin_unreachable();
+    }
+  }
+
+  laddr_t lower_boundary() const {
+    switch (condition) {
+    case laddr_conflict_condition_t::object_prefix_at_object_id:
+      assert(addr.is_object_address());
+      return addr.with_object_content(0);
+    case laddr_conflict_condition_t::clone_prefix_at_clone_id:
+      assert(addr.is_object_address());
+      return addr.with_offset_by_blocks(0).without_metadata();
+    case laddr_conflict_condition_t::all_at_object_content:
+    case laddr_conflict_condition_t::all_at_block_offset:
+    case laddr_conflict_condition_t::all_at_never:
+      return addr;
+    default:
+      __builtin_unreachable();
+    }
+  }
+
+  bool operator==(const laddr_hint_t&) const = default;
+};
+std::ostream &operator<<(std::ostream &out, const laddr_hint_t &hint);
+
+constexpr laddr_hint_t LADDR_HINT_NULL = {
+  L_ADDR_NULL,
+  laddr_conflict_condition_t::all_at_never,
+  laddr_conflict_policy_t::gen_random,
+  /*block_size=*/ 0
+};
+
 struct __attribute__((packed)) laddr_le_t {
   ceph_le64 low64;
   ceph_le64 high64;
@@ -3489,6 +3633,7 @@ template <> struct fmt::formatter<crimson::os::seastore::journal_seq_t> : fmt::o
 template <> struct fmt::formatter<crimson::os::seastore::backend_type_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::journal_tail_delta_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::laddr_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::laddr_hint_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::laddr_offset_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::laddr_list_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::omap_root_t> : fmt::ostream_formatter {};