From 88ac6d938ff28041d92b8ce828e59cd08a791387 Mon Sep 17 00:00:00 2001 From: Bill Scales <156200352+bill-scales@users.noreply.github.com> Date: Thu, 6 Mar 2025 09:44:00 +0000 Subject: [PATCH] osd: EC optimizations: add shard_versions to object_info_t EC optimized pools do not always update every shard for every write I/O, this includes not updating the object_info_t (OI attribute). This means different shards can have OI indicaiting the object is at different versions. When an I/O updates a subset of the shards, the OI for the updated shards will record the old version number for the unmodified shards in the shard_versions map. The latest OI therefore has a record of the expected version number for all the shards which can be used to work out what needs to be backfilled. An empty shard_versions map imples that the OI attribute should be the same on all shards. Signed-off-by: Bill Scales --- src/osd/osd_types.cc | 16 ++++++++++++++-- src/osd/osd_types.h | 2 ++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 366d98a6c2d..6068e6a1b1f 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -6439,7 +6439,7 @@ void object_info_t::encode(ceph::buffer::list& bl, uint64_t features) const for (auto i = watchers.cbegin(); i != watchers.cend(); ++i) { old_watchers.insert(make_pair(i->first.second, i->second)); } - ENCODE_START(17, 8, bl); + ENCODE_START(18, 8, bl); encode(soid, bl); encode(myoloc, bl); //Retained for compatibility encode((__u32)0, bl); // was category, no longer used @@ -6473,13 +6473,14 @@ void object_info_t::encode(ceph::buffer::list& bl, uint64_t features) const if (has_manifest()) { encode(manifest, bl); } + encode(shard_versions, bl); ENCODE_FINISH(bl); } void object_info_t::decode(ceph::buffer::list::const_iterator& bl) { object_locator_t myoloc; - DECODE_START_LEGACY_COMPAT_LEN(17, 8, 8, bl); + DECODE_START_LEGACY_COMPAT_LEN(18, 8, 8, bl); map old_watchers; decode(soid, bl); decode(myoloc, bl); @@ -6565,6 +6566,9 @@ void object_info_t::decode(ceph::buffer::list::const_iterator& bl) decode(manifest, bl); } } + if (struct_v >= 18) { + decode(shard_versions, bl); + } DECODE_FINISH(bl); } @@ -6604,6 +6608,14 @@ void object_info_t::dump(Formatter *f) const f->close_section(); } f->close_section(); + f->open_array_section("shard_versions"); + for (auto p = shard_versions.cbegin(); p != shard_versions.cend(); ++p) { + f->open_object_section("shard"); + f->dump_int("id", int(p->first)); + f->dump_stream("version") << p->second; + f->close_section(); + } + f->close_section(); } void object_info_t::generate_test_instances(list& o) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c1786b56d7a..b521f82183b 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -6088,6 +6088,8 @@ struct object_info_t { struct object_manifest_t manifest; + std::map shard_versions; + void copy_user_bits(const object_info_t& other); bool test_flag(flag_t f) const { -- 2.39.5