From 1c2eb40651b306434bd3f126aa0c17f73ed61b62 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 6 Aug 2018 12:54:55 -0500 Subject: [PATCH] mon/OSDMonitor: clean temps and upmaps in encode_pending, efficiently - do not rebuild the next map when we already have it - do this work in encode_pending, not create_pending, so we get bad values before they are published. Signed-off-by: Sage Weil --- src/mon/OSDMonitor.cc | 13 ++++++------- src/osd/OSDMap.cc | 31 ++++++++++++++----------------- src/osd/OSDMap.h | 7 +++++-- src/test/osd/TestOSDMap.cc | 21 +++++++++++++++++---- 4 files changed, 42 insertions(+), 30 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index c30a18ee9e5..dc7aefd493e 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -695,10 +695,6 @@ void OSDMonitor::create_pending() dout(10) << "create_pending e " << pending_inc.epoch << dendl; - // clean up pg_temp, primary_temp - OSDMap::clean_temps(cct, osdmap, &pending_inc); - dout(10) << "create_pending did clean_temps" << dendl; - // safety checks (this shouldn't really happen) { if (osdmap.backfillfull_ratio <= 0) { @@ -1069,6 +1065,12 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) tmp.deepish_copy_from(osdmap); tmp.apply_incremental(pending_inc); + // clean pg_temp mappings + OSDMap::clean_temps(cct, osdmap, tmp, &pending_inc); + + // clean inappropriate pg_upmap/pg_upmap_items (if any) + osdmap.maybe_remove_pg_upmaps(cct, osdmap, tmp, &pending_inc); + // update creating pgs first so that we can remove the created pgid and // process the pool flag removal below in the same osdmap epoch. auto pending_creatings = update_pending_pgs(pending_inc, tmp); @@ -1399,9 +1401,6 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) } } - // clean inappropriate pg_upmap/pg_upmap_items (if any) - osdmap.maybe_remove_pg_upmaps(cct, osdmap, &pending_inc); - // features for osdmap and its incremental uint64_t features; diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index b78ddb2b957..ddc3ab8f27e 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1641,18 +1641,17 @@ void OSDMap::dedup(const OSDMap *o, OSDMap *n) } void OSDMap::clean_temps(CephContext *cct, - const OSDMap& osdmap, Incremental *pending_inc) + const OSDMap& oldmap, + const OSDMap& tmpmap, + Incremental *pending_inc) { ldout(cct, 10) << __func__ << dendl; - OSDMap tmpmap; - tmpmap.deepish_copy_from(osdmap); - tmpmap.apply_incremental(*pending_inc); for (auto pg : *tmpmap.pg_temp) { // if pool does not exist, remove any existing pg_temps associated with // it. we don't care about pg_temps on the pending_inc either; if there // are new_pg_temp entries on the pending, clear them out just as well. - if (!osdmap.have_pg_pool(pg.first.pool())) { + if (!tmpmap.have_pg_pool(pg.first.pool())) { ldout(cct, 10) << __func__ << " removing pg_temp " << pg.first << " for nonexistent pool " << pg.first.pool() << dendl; pending_inc->new_pg_temp[pg.first].clear(); @@ -1679,7 +1678,7 @@ void OSDMap::clean_temps(CephContext *cct, if (raw_up == pg.second) { ldout(cct, 10) << __func__ << " removing pg_temp " << pg.first << " " << pg.second << " that matches raw_up mapping" << dendl; - if (osdmap.pg_temp->count(pg.first)) + if (oldmap.pg_temp->count(pg.first)) pending_inc->new_pg_temp[pg.first].clear(); else pending_inc->new_pg_temp.erase(pg.first); @@ -1704,7 +1703,7 @@ void OSDMap::clean_temps(CephContext *cct, ldout(cct, 10) << __func__ << " removing primary_temp " << pgid << " -> " << real_primary << " (unnecessary/redundant)" << dendl; - if (osdmap.primary_temp->count(pgid)) + if (oldmap.primary_temp->count(pgid)) pending_inc->new_primary_temp[pgid] = -1; else pending_inc->new_primary_temp.erase(pgid); @@ -1713,13 +1712,11 @@ void OSDMap::clean_temps(CephContext *cct, } void OSDMap::maybe_remove_pg_upmaps(CephContext *cct, - const OSDMap& osdmap, + const OSDMap& oldmap, + const OSDMap& tmpmap, Incremental *pending_inc) { ldout(cct, 10) << __func__ << dendl; - OSDMap tmpmap; - tmpmap.deepish_copy_from(osdmap); - tmpmap.apply_incremental(*pending_inc); set to_check; set to_cancel; map> rule_weight_map; @@ -1814,10 +1811,10 @@ void OSDMap::maybe_remove_pg_upmaps(CephContext *cct, << dendl; pending_inc->new_pg_upmap.erase(it); } - if (osdmap.pg_upmap.count(pg)) { + if (oldmap.pg_upmap.count(pg)) { ldout(cct, 10) << __func__ << " cancel invalid pg_upmap entry " - << osdmap.pg_upmap.find(pg)->first << "->" - << osdmap.pg_upmap.find(pg)->second + << oldmap.pg_upmap.find(pg)->first << "->" + << oldmap.pg_upmap.find(pg)->second << dendl; pending_inc->old_pg_upmap.insert(pg); } @@ -1831,11 +1828,11 @@ void OSDMap::maybe_remove_pg_upmaps(CephContext *cct, << dendl; pending_inc->new_pg_upmap_items.erase(it); } - if (osdmap.pg_upmap_items.count(pg)) { + if (oldmap.pg_upmap_items.count(pg)) { ldout(cct, 10) << __func__ << " cancel invalid " << "pg_upmap_items entry " - << osdmap.pg_upmap_items.find(pg)->first << "->" - << osdmap.pg_upmap_items.find(pg)->second + << oldmap.pg_upmap_items.find(pg)->first << "->" + << oldmap.pg_upmap_items.find(pg)->second << dendl; pending_inc->old_pg_upmap_items.insert(pg); } diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index d3e8853f32d..f1ef8f3189c 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -1006,7 +1006,8 @@ public: uint64_t get_up_osd_features() const; void maybe_remove_pg_upmaps(CephContext *cct, - const OSDMap& osdmap, + const OSDMap& oldmap, + const OSDMap& nextmap, Incremental *pending_inc); int apply_incremental(const Incremental &inc); @@ -1014,7 +1015,9 @@ public: /// try to re-use/reference addrs in oldmap from newmap static void dedup(const OSDMap *oldmap, OSDMap *newmap); - static void clean_temps(CephContext *cct, const OSDMap& osdmap, + static void clean_temps(CephContext *cct, + const OSDMap& oldmap, + const OSDMap& nextmap, Incremental *pending_inc); // serialize, unserialize diff --git a/src/test/osd/TestOSDMap.cc b/src/test/osd/TestOSDMap.cc index eb291426ffc..c812daab47c 100644 --- a/src/test/osd/TestOSDMap.cc +++ b/src/test/osd/TestOSDMap.cc @@ -368,7 +368,10 @@ TEST_F(OSDMapTest, CleanTemps) { osdmap.apply_incremental(pgtemp_map); - OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc); + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) && pending_inc.new_pg_temp[pga].size() == 0); @@ -418,7 +421,10 @@ TEST_F(OSDMapTest, KeepsNecessaryTemps) { OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); - OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc); + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid)); EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid)); } @@ -654,7 +660,10 @@ TEST_F(OSDMapTest, CleanPGUpmaps) { { // STEP-2: apply cure OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); - osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, &pending_inc); + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, tmpmap, &pending_inc); osdmap.apply_incremental(pending_inc); { // validate pg_upmap is gone (reverted) @@ -788,7 +797,11 @@ TEST_F(OSDMapTest, CleanPGUpmaps) { { // STEP-4: apply cure OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); - osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, &pending_inc); + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + osdmap.maybe_remove_pg_upmaps(g_ceph_context, osdmap, tmpmap, + &pending_inc); osdmap.apply_incremental(pending_inc); { // validate pg_upmap_items is gone (reverted) -- 2.39.5