From 49833c3bb264949b8126796997a95a95b50af411 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 12 Oct 2017 15:44:48 -0500 Subject: [PATCH] mon/OSDMonitor: share snaps removed during a map gap If a client requests a map older than the mon's oldest, share with them snaps deleted during the gap too. Signed-off-by: Sage Weil --- src/messages/MOSDMap.h | 22 +++++++++++++++++++--- src/mon/OSDMonitor.cc | 34 +++++++++++++++++++++++++++++----- src/mon/OSDMonitor.h | 4 ++++ 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/src/messages/MOSDMap.h b/src/messages/MOSDMap.h index 865642cf417c3..96b40012e140e 100644 --- a/src/messages/MOSDMap.h +++ b/src/messages/MOSDMap.h @@ -22,7 +22,8 @@ class MOSDMap : public Message { - static const int HEAD_VERSION = 3; + static const int HEAD_VERSION = 4; + static const int COMPAT_VERSION = 3; public: uuid_d fsid; @@ -30,6 +31,11 @@ class MOSDMap : public Message { map incremental_maps; epoch_t oldest_map =0, newest_map = 0; + // if we are fetching maps from the mon and have to jump a gap + // (client's next needed map is older than mon's oldest) we can + // share removed snaps from the gap here. + mempool::osdmap::map gap_removed_snaps; + epoch_t get_first() const { epoch_t e = 0; map::const_iterator i = maps.begin(); @@ -56,9 +62,9 @@ class MOSDMap : public Message { } - MOSDMap() : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION) { } + MOSDMap() : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION) { } MOSDMap(const uuid_d &f) - : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION), + : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION), fsid(f), oldest_map(0), newest_map(0) { } private: @@ -78,9 +84,13 @@ public: oldest_map = 0; newest_map = 0; } + if (header.version >= 4) { + ::decode(gap_removed_snaps, p); + } } void encode_payload(uint64_t features) override { header.version = HEAD_VERSION; + header.compat_version = COMPAT_VERSION; ::encode(fsid, payload); if ((features & CEPH_FEATURE_PGID64) == 0 || (features & CEPH_FEATURE_PGPOOL3) == 0 || @@ -93,6 +103,7 @@ public: header.version = 1; // old old_client version else if ((features & CEPH_FEATURE_OSDENC) == 0) header.version = 2; // old pg_pool_t + header.compat_version = 0; // reencode maps using old format // @@ -138,6 +149,9 @@ public: ::encode(oldest_map, payload); ::encode(newest_map, payload); } + if (header.version >= 4) { + ::encode(gap_removed_snaps, payload); + } } const char *get_type_name() const override { return "osdmap"; } @@ -145,6 +159,8 @@ public: out << "osd_map(" << get_first() << ".." << get_last(); if (oldest_map || newest_map) out << " src has " << oldest_map << ".." << newest_map; + if (!gap_removed_snaps.empty()) + out << " +gap_removed_snaps"; out << ")"; } }; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 37dd8988ab031..13bf749b2a29e 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2966,18 +2966,20 @@ void OSDMonitor::send_incremental(epoch_t first, } if (first < get_first_committed()) { + MOSDMap *m = new MOSDMap(osdmap.get_fsid()); + m->oldest_map = get_first_committed(); + m->newest_map = osdmap.get_epoch(); + + // share removed snaps during the gap + get_removed_snaps_range(first, m->oldest_map, &m->gap_removed_snaps); + first = get_first_committed(); bufferlist bl; int err = get_version_full(first, bl); assert(err == 0); assert(bl.length()); - dout(20) << "send_incremental starting with base full " << first << " " << bl.length() << " bytes" << dendl; - - MOSDMap *m = new MOSDMap(osdmap.get_fsid()); - m->oldest_map = get_first_committed(); - m->newest_map = osdmap.get_epoch(); m->maps[first] = bl; if (req) { @@ -3010,6 +3012,28 @@ void OSDMonitor::send_incremental(epoch_t first, } } +void OSDMonitor::get_removed_snaps_range( + epoch_t start, epoch_t end, + mempool::osdmap::map *gap_removed_snaps) +{ + // we only care about pools that exist now. + for (auto& p : osdmap.get_pools()) { + auto& t = (*gap_removed_snaps)[p.first]; + for (epoch_t epoch = start; epoch < end; ++epoch) { + string k = make_snap_epoch_key(p.first, epoch); + bufferlist v; + mon->store->get(OSD_SNAP_PREFIX, k, v); + if (v.length()) { + auto q = v.begin(); + OSDMap::snap_interval_set_t snaps; + ::decode(snaps, q); + t.union_of(snaps); + } + } + dout(10) << __func__ << " " << p.first << " " << t << dendl; + } +} + int OSDMonitor::get_version(version_t ver, bufferlist& bl) { if (inc_osd_cache.lookup(ver, &bl)) { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 70ca0e6bbcc10..abb1926b167a4 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -531,6 +531,10 @@ public: send_incremental(op, start); } + void get_removed_snaps_range( + epoch_t start, epoch_t end, + mempool::osdmap::map *gap_removed_snaps); + int get_version(version_t ver, bufferlist& bl) override; int get_version_full(version_t ver, bufferlist& bl) override; -- 2.39.5