From 553048fbf97af999783deb7e992c8ecfa5e55500 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 11 Oct 2017 14:17:39 -0500 Subject: [PATCH] osd/OSDMap: track newly removed and purged snaps in each epoch Instead of maintaining a set of snapids that have been removed over all time, instead note just the newly removed and newly purged snaps in each OSDMap epoch. This is easier to consume for both the Objecter and OSD. Also keep the interval of snaps that have been removed but not perged in each OSDMap. This is extremely convenient because it frees the OSDs from having to maintain this information in parallel even when they may not have PGs belonging to those pools. These structures will be large right when the ugprade happens and the pg_pool_t::removed_snaps gets copied to the new fields, but in the steady state it will be relatively small, reflecting only the set of snaps that are currently being removed. This also provides convenient visibility into the "trimming snaps" set that the cluster is working on. Signed-off-by: Sage Weil --- src/osd/OSDMap.cc | 142 ++++++++++++++++++++++++++++++++++++++++++++-- src/osd/OSDMap.h | 28 +++++++++ 2 files changed, 164 insertions(+), 6 deletions(-) diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 4588a34ab4418..c7b13a77d6981 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -472,10 +472,13 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const ENCODE_START(8, 7, bl); { - uint8_t v = 5; + uint8_t v = 6; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { v = 3; } + if (!HAVE_FEATURE(features, SERVER_MIMIC)) { + v = 5; + } ENCODE_START(v, 1, bl); // client-usable data ::encode(fsid, bl); ::encode(epoch, bl); @@ -512,6 +515,10 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const ::encode(new_pg_upmap_items, bl); ::encode(old_pg_upmap_items, bl); } + if (v >= 6) { + ::encode(new_removed_snaps, bl); + ::encode(new_purged_snaps, bl); + } ENCODE_FINISH(bl); // client-usable data } @@ -693,7 +700,7 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl) return; } { - DECODE_START(5, bl); // client-usable data + DECODE_START(6, bl); // client-usable data ::decode(fsid, bl); ::decode(epoch, bl); ::decode(modified, bl); @@ -736,6 +743,10 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl) ::decode(new_pg_upmap_items, bl); ::decode(old_pg_upmap_items, bl); } + if (struct_v >= 6) { + ::decode(new_removed_snaps, bl); + ::decode(new_purged_snaps, bl); + } DECODE_FINISH(bl); // client-usable data } @@ -1053,6 +1064,37 @@ void OSDMap::Incremental::dump(Formatter *f) const f->dump_string("old", erasure_code_profile.c_str()); } f->close_section(); + + f->open_array_section("new_removed_snaps"); + for (auto& p : new_removed_snaps) { + f->open_object_section("pool"); + f->dump_int("pool", p.first); + f->open_array_section("snaps"); + for (auto q = p.second.begin(); q != p.second.end(); ++q) { + f->open_object_section("interval"); + f->dump_unsigned("begin", q.get_start()); + f->dump_unsigned("length", q.get_len()); + f->close_section(); + } + f->close_section(); + f->close_section(); + } + f->close_section(); + f->open_array_section("new_purged_snaps"); + for (auto& p : new_purged_snaps) { + f->open_object_section("pool"); + f->dump_int("pool", p.first); + f->open_array_section("snaps"); + for (auto q = p.second.begin(); q != p.second.end(); ++q) { + f->open_object_section("interval"); + f->dump_unsigned("begin", q.get_start()); + f->dump_unsigned("length", q.get_len()); + f->close_section(); + } + f->close_section(); + f->close_section(); + } + f->close_section(); } void OSDMap::Incremental::generate_test_instances(list& o) @@ -1602,6 +1644,24 @@ int OSDMap::apply_incremental(const Incremental &inc) pools[pool.first].last_change = epoch; } + new_removed_snaps = inc.new_removed_snaps; + new_purged_snaps = inc.new_purged_snaps; + for (auto p = new_removed_snaps.begin(); + p != new_removed_snaps.end(); + ++p) { + removed_snaps_queue[p->first].union_of(p->second); + } + for (auto p = new_purged_snaps.begin(); + p != new_purged_snaps.end(); + ++p) { + auto q = removed_snaps_queue.find(p->first); + assert(q != removed_snaps_queue.end()); + q->second.subtract(p->second); + if (q->second.empty()) { + removed_snaps_queue.erase(q); + } + } + for (const auto &pname : inc.new_pool_names) { auto pool_name_entry = pool_name.find(pname.first); if (pool_name_entry != pool_name.end()) { @@ -2306,10 +2366,13 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const ENCODE_START(8, 7, bl); { - uint8_t v = 6; + uint8_t v = 7; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { v = 3; } + if (!HAVE_FEATURE(features, SERVER_MIMIC)) { + v = 6; + } ENCODE_START(v, 1, bl); // client-usable data // base ::encode(fsid, bl); @@ -2372,14 +2435,21 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const if (v >= 6) { ::encode(crush_version, bl); } + if (v >= 7) { + ::encode(new_removed_snaps, bl); + ::encode(new_purged_snaps, bl); + } ENCODE_FINISH(bl); // client-usable data } { - uint8_t target_v = 5; + uint8_t target_v = 6; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { target_v = 1; } + if (!HAVE_FEATURE(features, SERVER_MIMIC)) { + target_v = 5; + } ENCODE_START(target_v, 1, bl); // extended, osd-only data ::encode(osd_addrs->hb_back_addr, bl, features); ::encode(osd_info, bl); @@ -2407,6 +2477,9 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const ::encode(require_min_compat_client, bl); ::encode(require_osd_release, bl); } + if (target_v >= 6) { + ::encode(removed_snaps_queue, bl); + } ENCODE_FINISH(bl); // osd-only data } @@ -2582,7 +2655,7 @@ void OSDMap::decode(bufferlist::iterator& bl) * Since we made it past that hurdle, we can use our normal paths. */ { - DECODE_START(6, bl); // client-usable data + DECODE_START(7, bl); // client-usable data // base ::decode(fsid, bl); ::decode(epoch, bl); @@ -2640,11 +2713,15 @@ void OSDMap::decode(bufferlist::iterator& bl) if (struct_v >= 6) { ::decode(crush_version, bl); } + if (struct_v >= 7) { + ::decode(new_removed_snaps, bl); + ::decode(new_purged_snaps, bl); + } DECODE_FINISH(bl); // client-usable data } { - DECODE_START(5, bl); // extended, osd-only data + DECODE_START(6, bl); // extended, osd-only data ::decode(osd_addrs->hb_back_addr, bl); ::decode(osd_info, bl); ::decode(blacklist, bl); @@ -2693,6 +2770,9 @@ void OSDMap::decode(bufferlist::iterator& bl) require_osd_release = 0; } } + if (struct_v >= 6) { + ::decode(removed_snaps_queue, bl); + } DECODE_FINISH(bl); // osd-only data } @@ -2882,6 +2962,52 @@ void OSDMap::dump(Formatter *f) const f->close_section(); dump_erasure_code_profiles(erasure_code_profiles, f); + + f->open_array_section("removed_snaps_queue"); + for (auto& p : removed_snaps_queue) { + f->open_object_section("pool"); + f->dump_int("pool", p.first); + f->open_array_section("snaps"); + for (auto q = p.second.begin(); q != p.second.end(); ++q) { + f->open_object_section("interval"); + f->dump_unsigned("begin", q.get_start()); + f->dump_unsigned("length", q.get_len()); + f->close_section(); + } + f->close_section(); + f->close_section(); + } + f->close_section(); + f->open_array_section("new_removed_snaps"); + for (auto& p : new_removed_snaps) { + f->open_object_section("pool"); + f->dump_int("pool", p.first); + f->open_array_section("snaps"); + for (auto q = p.second.begin(); q != p.second.end(); ++q) { + f->open_object_section("interval"); + f->dump_unsigned("begin", q.get_start()); + f->dump_unsigned("length", q.get_len()); + f->close_section(); + } + f->close_section(); + f->close_section(); + } + f->close_section(); + f->open_array_section("new_purged_snaps"); + for (auto& p : new_purged_snaps) { + f->open_object_section("pool"); + f->dump_int("pool", p.first); + f->open_array_section("snaps"); + for (auto q = p.second.begin(); q != p.second.end(); ++q) { + f->open_object_section("interval"); + f->dump_unsigned("begin", q.get_start()); + f->dump_unsigned("length", q.get_len()); + f->close_section(); + } + f->close_section(); + f->close_section(); + } + f->close_section(); } void OSDMap::generate_test_instances(list& o) @@ -2968,6 +3094,10 @@ void OSDMap::print_pools(ostream& out) const if (!pool.second.removed_snaps.empty()) out << "\tremoved_snaps " << pool.second.removed_snaps << "\n"; + auto p = removed_snaps_queue.find(pool.first); + if (p != removed_snaps_queue.end()) { + out << "\tremoved_snaps_queue " << p->second << "\n"; + } } out << std::endl; } diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 0deaae4d7c0d2..d817a632a017b 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -343,6 +343,10 @@ class OSDMap { public: MEMPOOL_CLASS_HELPERS(); + typedef interval_set< + snapid_t, + mempool::osdmap::flat_map> snap_interval_set_t; + class Incremental { public: MEMPOOL_CLASS_HELPERS(); @@ -389,6 +393,8 @@ public: mempool::osdmap::map> new_pg_upmap; mempool::osdmap::map>> new_pg_upmap_items; mempool::osdmap::set old_pg_upmap, old_pg_upmap_items; + mempool::osdmap::map new_removed_snaps; + mempool::osdmap::map new_purged_snaps; string cluster_snapshot; @@ -523,6 +529,15 @@ private: mempool::osdmap::unordered_map blacklist; + /// queue of snaps to remove + mempool::osdmap::map removed_snaps_queue; + + /// removed_snaps additions this epoch + mempool::osdmap::map new_removed_snaps; + + /// removed_snaps removals this epoch + mempool::osdmap::map new_purged_snaps; + epoch_t cluster_snapshot_epoch; string cluster_snapshot; bool new_blacklist_entries; @@ -1140,6 +1155,19 @@ public: return false; } + const mempool::osdmap::map& + get_removed_snaps_queue() const { + return removed_snaps_queue; + } + const mempool::osdmap::map& + get_new_removed_snaps() const { + return new_removed_snaps; + } + const mempool::osdmap::map& + get_new_purged_snaps() const { + return new_purged_snaps; + } + int64_t lookup_pg_pool_name(const string& name) const { auto p = name_pool.find(name); if (p == name_pool.end()) -- 2.39.5