From 4b1d6b05d251a08c17105c310e2a3167c4be2891 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Apr 2017 18:54:16 -0400 Subject: [PATCH] osd/OSDMap: move to 'osdmap' mempool We leave a few things out: - strings (to annoying, and they are small) - erasure code profiles (ditto) Signed-off-by: Sage Weil --- src/mgr/DaemonServer.cc | 2 +- src/mon/OSDMonitor.cc | 46 +++++++----- src/mon/OSDMonitor.h | 16 +++-- src/mon/PGMap.cc | 2 +- src/mon/PGMap.h | 2 +- src/osd/OSDMap.cc | 17 ++--- src/osd/OSDMap.h | 143 +++++++++++++++++++++---------------- src/test/osd/TestOSDMap.cc | 12 ++-- 8 files changed, 137 insertions(+), 103 deletions(-) diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index f54d903dbd214..fe0729d7484e9 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -572,7 +572,7 @@ bool DaemonServer::handle_command(MCommand *m) string no_increasing; cmd_getval(g_ceph_context, cmdmap, "no_increasing", no_increasing); string out_str; - map new_weights; + mempool::osdmap::map new_weights; r = cluster_state.with_pgmap([&](const PGMap& pgmap) { return cluster_state.with_osdmap([&](const OSDMap& osdmap) { return reweight::by_utilization(osdmap, pgmap, diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index a31d7bce40a0d..128b3fde0938b 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1093,7 +1093,9 @@ void OSDMonitor::prime_pg_temp( { Mutex::Locker l(prime_pg_temp_lock); // do not touch a mapping if a change is pending - pending_inc.new_pg_temp.emplace(pgid, acting); + pending_inc.new_pg_temp.emplace( + pgid, + mempool::osdmap::vector(acting.begin(), acting.end())); } } @@ -2631,7 +2633,7 @@ bool OSDMonitor::preprocess_pgtemp(MonOpRequestRef op) { MOSDPGTemp *m = static_cast(op->get_req()); dout(10) << "preprocess_pgtemp " << *m << dendl; - vector empty; + mempool::osdmap::vector empty; int from = m->get_orig_source().num(); size_t ignore_cnt = 0; @@ -2695,9 +2697,10 @@ bool OSDMonitor::preprocess_pgtemp(MonOpRequestRef op) // change? // NOTE: we assume that this will clear pg_primary, so consider // an existing pg_primary field to imply a change - if (p->second.size() && (osdmap.pg_temp->count(p->first) == 0 || - (*osdmap.pg_temp)[p->first] != p->second || - osdmap.primary_temp->count(p->first))) + if (p->second.size() && + (osdmap.pg_temp->count(p->first) == 0 || + !vectors_equal((*osdmap.pg_temp)[p->first], p->second) || + osdmap.primary_temp->count(p->first))) return false; } @@ -2744,7 +2747,8 @@ bool OSDMonitor::prepare_pgtemp(MonOpRequestRef op) << ": pool has been removed" << dendl; continue; } - pending_inc.new_pg_temp[p->first] = p->second; + pending_inc.new_pg_temp[p->first] = + mempool::osdmap::vector(p->second.begin(), p->second.end()); // unconditionally clear pg_primary (until this message can encode // a change for that, too.. at which point we need to also fix @@ -3136,10 +3140,11 @@ void OSDMonitor::check_pg_creates_sub(Subscription *sub) } } -void OSDMonitor::scan_for_creating_pgs(const map& pools, - const set& removed_pools, - utime_t modified, - creating_pgs_t* creating_pgs) const +void OSDMonitor::scan_for_creating_pgs( + const mempool::osdmap::map& pools, + const mempool::osdmap::set& removed_pools, + utime_t modified, + creating_pgs_t* creating_pgs) const { for (auto& p : pools) { int64_t poolid = p.first; @@ -5243,9 +5248,10 @@ bool OSDMonitor::validate_crush_against_features(const CrushWrapper *newcrush, return false; } -bool OSDMonitor::erasure_code_profile_in_use(const map &pools, - const string &profile, - ostream *ss) +bool OSDMonitor::erasure_code_profile_in_use( + const mempool::osdmap::map &pools, + const string &profile, + ostream *ss) { bool found = false; for (map::const_iterator p = pools.begin(); @@ -7500,7 +7506,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, new_pg_temp.push_back(osd); } - pending_inc.new_pg_temp[pgid] = new_pg_temp; + pending_inc.new_pg_temp[pgid] = mempool::osdmap::vector( + new_pg_temp.begin(), new_pg_temp.end()); ss << "set " << pgid << " pg_temp mapping to " << new_pg_temp; goto update; } else if (prefix == "osd primary-temp") { @@ -7602,7 +7609,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, new_pg_upmap.push_back(osd); } - pending_inc.new_pg_upmap[pgid] = new_pg_upmap; + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector( + new_pg_upmap.begin(), new_pg_upmap.end()); ss << "set " << pgid << " pg_upmap mapping to " << new_pg_upmap; goto update; } else if (prefix == "osd rm-pg-upmap") { @@ -7718,7 +7726,9 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, new_pg_upmap_items.push_back(make_pair(from, to)); } - pending_inc.new_pg_upmap_items[pgid] = new_pg_upmap_items; + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); ss << "set " << pgid << " pg_upmap_items mapping to " << new_pg_upmap_items; goto update; } else if (prefix == "osd rm-pg-upmap-items") { @@ -7849,7 +7859,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, << cmd_vartype_stringify(cmdmap["weights"]) << "'"; goto reply; } - pending_inc.new_weight = std::move(weights); + pending_inc.new_weight.insert(weights.begin(), weights.end()); wait_for_finished_proposal( op, new Monitor::C_Command(mon, op, 0, rs, rdata, get_last_committed() + 1)); @@ -8913,7 +8923,7 @@ done: string no_increasing; cmd_getval(g_ceph_context, cmdmap, "no_increasing", no_increasing); string out_str; - map new_weights; + mempool::osdmap::map new_weights; err = reweight::by_utilization(osdmap, mon->pgmon()->pg_map, oload, diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 4674241592f77..c6735e3656827 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -331,9 +331,10 @@ private: const string &ruleset_name, int *crush_ruleset, ostream *ss); - bool erasure_code_profile_in_use(const map &pools, - const string &profile, - ostream *ss); + bool erasure_code_profile_in_use( + const mempool::osdmap::map &pools, + const string &profile, + ostream *ss); int parse_erasure_code_profile(const vector &erasure_code_profile, map *erasure_code_profile_map, ostream *ss); @@ -452,10 +453,11 @@ private: creating_pgs_t update_pending_pgs(const OSDMap::Incremental& inc); void trim_creating_pgs(creating_pgs_t *creating_pgs, const PGMap& pgm); - void scan_for_creating_pgs(const std::map& pools, - const std::set& removed_pools, - utime_t modified, - creating_pgs_t* creating_pgs) const; + void scan_for_creating_pgs( + const mempool::osdmap::map& pools, + const mempool::osdmap::set& removed_pools, + utime_t modified, + creating_pgs_t* creating_pgs) const; pair get_parent_pg(pg_t pgid) const; void update_creating_pgs(); void check_pg_creates_subs(); diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 9e5524865f37e..57d95805a5144 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -2860,7 +2860,7 @@ int reweight::by_utilization( int max_osds, bool by_pg, const set *pools, bool no_increasing, - map* new_weights, + mempool::osdmap::map* new_weights, std::stringstream *ss, std::string *out_str, Formatter *f) diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 05ccea3f03fc6..506f86d9300c3 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -453,7 +453,7 @@ namespace reweight { int max_osds, bool by_pg, const set *pools, bool no_increasing, - map* new_weights, + mempool::osdmap::map* new_weights, std::stringstream *ss, std::string *out_str, Formatter *f); diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 1c70174b0c0f6..10223af137e36 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1355,7 +1355,7 @@ void OSDMap::clean_temps(CephContext *cct, vector raw_up; int primary; tmpmap.pg_to_raw_up(pg.first, &raw_up, &primary); - if (raw_up == pg.second) { + if (vectors_equal(raw_up, pg.second)) { ldout(cct, 10) << __func__ << " removing pg_temp " << pg.first << " " << pg.second << " that matches raw_up mapping" << dendl; if (osdmap.pg_temp->count(pg.first)) @@ -1714,7 +1714,7 @@ void OSDMap::_apply_remap(const pg_pool_t& pi, pg_t raw_pg, vector *raw) co return; } } - *raw = p->second; + *raw = vector(p->second.begin(), p->second.end()); return; } @@ -2364,7 +2364,7 @@ void OSDMap::decode(bufferlist::iterator& bl) ::decode(*pg_temp, bl); ::decode(*primary_temp, bl); if (struct_v >= 2) { - osd_primary_affinity.reset(new vector<__u32>); + osd_primary_affinity.reset(new mempool::osdmap::vector<__u32>); ::decode(*osd_primary_affinity, bl); if (osd_primary_affinity->empty()) osd_primary_affinity.reset(); @@ -2461,8 +2461,9 @@ void OSDMap::post_decode() _calc_up_osd_features(); } -void OSDMap::dump_erasure_code_profiles(const map > &profiles, - Formatter *f) +void OSDMap::dump_erasure_code_profiles( + const mempool::osdmap::map>& profiles, + Formatter *f) { f->open_object_section("erasure_code_profiles"); for (const auto &profile : profiles) { @@ -3321,7 +3322,7 @@ int OSDMap::clean_pg_upmaps( vector raw; int primary; pg_to_raw_osds(p.first, &raw, &primary); - if (raw == p.second) { + if (vectors_equal(raw, p.second)) { ldout(cct, 10) << " removing redundant pg_upmap " << p.first << " " << p.second << dendl; pending_inc->old_pg_upmap.insert(p.first); @@ -3332,7 +3333,7 @@ int OSDMap::clean_pg_upmaps( vector raw; int primary; pg_to_raw_osds(p.first, &raw, &primary); - vector> newmap; + mempool::osdmap::vector> newmap; for (auto& q : p.second) { if (std::find(raw.begin(), raw.end(), q.first) != raw.end()) { newmap.push_back(q); @@ -3530,7 +3531,7 @@ int OSDMap::calc_pg_upmaps( continue; } assert(orig != out); - vector>& rmi = tmp.pg_upmap_items[pg]; + auto& rmi = tmp.pg_upmap_items[pg]; for (unsigned i = 0; i < out.size(); ++i) { if (orig[i] != out[i]) { rmi.push_back(make_pair(orig[i], out[i])); diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 6151d09493c23..691a413b60e35 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -40,6 +40,17 @@ using namespace std; class CephContext; class CrushWrapper; +// FIXME C++11 does not have std::equal for two differently-typed containers. +// use this until we move to c++14 +template +bool vectors_equal(A a, B b) +{ + return + a.size() == b.size() && + (a.empty() || + memcmp((char*)&a[0], (char*)&b[0], sizeof(a[0]) * a.size()) == 0); +} + /* * we track up to two intervals during which the osd was alive and @@ -125,32 +136,32 @@ public: // incremental int32_t new_max_osd; - map new_pools; - map new_pool_names; - set old_pools; - map > new_erasure_code_profiles; - vector old_erasure_code_profiles; - map new_up_client; - map new_up_cluster; - map new_state; // XORed onto previous state. - map new_weight; - map > new_pg_temp; // [] to remove - map new_primary_temp; // [-1] to remove - map new_primary_affinity; - map new_up_thru; - map > new_last_clean_interval; - map new_lost; - map new_uuid; - map new_xinfo; - - map new_blacklist; - vector old_blacklist; - map new_hb_back_up; - map new_hb_front_up; - - map> new_pg_upmap; - map>> new_pg_upmap_items; - set old_pg_upmap, old_pg_upmap_items; + mempool::osdmap::map new_pools; + mempool::osdmap::map new_pool_names; + mempool::osdmap::set old_pools; + mempool::osdmap::map > new_erasure_code_profiles; + mempool::osdmap::vector old_erasure_code_profiles; + mempool::osdmap::map new_up_client; + mempool::osdmap::map new_up_cluster; + mempool::osdmap::map new_state; // XORed onto previous state. + mempool::osdmap::map new_weight; + mempool::osdmap::map > new_pg_temp; // [] to remove + mempool::osdmap::map new_primary_temp; // [-1] to remove + mempool::osdmap::map new_primary_affinity; + mempool::osdmap::map new_up_thru; + mempool::osdmap::map > new_last_clean_interval; + mempool::osdmap::map new_lost; + mempool::osdmap::map new_uuid; + mempool::osdmap::map new_xinfo; + + mempool::osdmap::map new_blacklist; + mempool::osdmap::vector old_blacklist; + mempool::osdmap::map new_hb_back_up; + mempool::osdmap::map new_hb_front_up; + + mempool::osdmap::map> new_pg_upmap; + mempool::osdmap::map>> new_pg_upmap_items; + mempool::osdmap::set old_pg_upmap, old_pg_upmap_items; string cluster_snapshot; @@ -198,7 +209,7 @@ public: return i != new_erasure_code_profiles.end(); } void set_erasure_code_profile(const string &name, - const map &profile) { + const map& profile) { new_erasure_code_profiles[name] = profile; } @@ -222,33 +233,33 @@ private: vector osd_state; struct addrs_s { - vector > client_addr; - vector > cluster_addr; - vector > hb_back_addr; - vector > hb_front_addr; + mempool::osdmap::vector > client_addr; + mempool::osdmap::vector > cluster_addr; + mempool::osdmap::vector > hb_back_addr; + mempool::osdmap::vector > hb_front_addr; entity_addr_t blank; }; ceph::shared_ptr osd_addrs; - vector<__u32> osd_weight; // 16.16 fixed point, 0x10000 = "in", 0 = "out" - vector osd_info; - ceph::shared_ptr< map > > pg_temp; // temp pg mapping (e.g. while we rebuild) - ceph::shared_ptr< map > primary_temp; // temp primary mapping (e.g. while we rebuild) - ceph::shared_ptr< vector<__u32> > osd_primary_affinity; ///< 16.16 fixed point, 0x10000 = baseline + mempool::osdmap::vector<__u32> osd_weight; // 16.16 fixed point, 0x10000 = "in", 0 = "out" + mempool::osdmap::vector osd_info; + ceph::shared_ptr< mempool::osdmap::map > > pg_temp; // temp pg mapping (e.g. while we rebuild) + ceph::shared_ptr< mempool::osdmap::map > primary_temp; // temp primary mapping (e.g. while we rebuild) + ceph::shared_ptr< mempool::osdmap::vector<__u32> > osd_primary_affinity; ///< 16.16 fixed point, 0x10000 = baseline // remap (post-CRUSH, pre-up) - map> pg_upmap; ///< remap pg - map>> pg_upmap_items; ///< remap osds in up set + mempool::osdmap::map> pg_upmap; ///< remap pg + mempool::osdmap::map>> pg_upmap_items; ///< remap osds in up set - map pools; - map pool_name; - map > erasure_code_profiles; - map name_pool; + mempool::osdmap::map pools; + mempool::osdmap::map pool_name; + mempool::osdmap::map > erasure_code_profiles; + mempool::osdmap::map name_pool; - ceph::shared_ptr< vector > osd_uuid; - vector osd_xinfo; + ceph::shared_ptr< mempool::osdmap::vector > osd_uuid; + mempool::osdmap::vector osd_xinfo; - ceph::unordered_map blacklist; + mempool::osdmap::unordered_map blacklist; epoch_t cluster_snapshot_epoch; string cluster_snapshot; @@ -278,9 +289,9 @@ private: num_osd(0), num_up_osd(0), num_in_osd(0), max_osd(0), osd_addrs(std::make_shared()), - pg_temp(std::make_shared>>()), - primary_temp(std::make_shared>()), - osd_uuid(std::make_shared>()), + pg_temp(std::make_shared>>()), + primary_temp(std::make_shared>()), + osd_uuid(std::make_shared>()), cluster_snapshot_epoch(0), new_blacklist_entries(false), cached_up_osd_features(0), @@ -297,12 +308,12 @@ public: void deepish_copy_from(const OSDMap& o) { *this = o; - primary_temp.reset(new map(*o.primary_temp)); - pg_temp.reset(new map >(*o.pg_temp)); - osd_uuid.reset(new vector(*o.osd_uuid)); + primary_temp.reset(new mempool::osdmap::map(*o.primary_temp)); + pg_temp.reset(new mempool::osdmap::map >(*o.pg_temp)); + osd_uuid.reset(new mempool::osdmap::vector(*o.osd_uuid)); if (o.osd_primary_affinity) - osd_primary_affinity.reset(new vector<__u32>(*o.osd_primary_affinity)); + osd_primary_affinity.reset(new mempool::osdmap::vector<__u32>(*o.osd_primary_affinity)); // NOTE: this still references shared entity_addr_t's. osd_addrs.reset(new addrs_s(*o.osd_addrs)); @@ -343,8 +354,11 @@ public: return nearfull_ratio; } void count_full_nearfull_osds(int *full, int *backfill, int *nearfull) const; - void get_full_osd_util(const ceph::unordered_map &osd_stat, - map *full, map *backfill, map *nearfull) const; + void get_full_osd_util( + const ceph::unordered_map &osd_stat, + map *full, + map *backfill, + map *nearfull) const; /***** cluster state *****/ /* osds */ @@ -408,8 +422,9 @@ public: void set_primary_affinity(int o, int w) { assert(o < max_osd); if (!osd_primary_affinity) - osd_primary_affinity.reset(new vector<__u32>(max_osd, - CEPH_OSD_DEFAULT_PRIMARY_AFFINITY)); + osd_primary_affinity.reset( + new mempool::osdmap::vector<__u32>( + max_osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY)); (*osd_primary_affinity)[o] = w; } unsigned get_primary_affinity(int o) const { @@ -430,10 +445,11 @@ public: map &profile_map, ostream *ss); void set_erasure_code_profile(const string &name, - const map &profile) { + const map& profile) { erasure_code_profiles[name] = profile; } - const map &get_erasure_code_profile(const string &name) const { + const map &get_erasure_code_profile( + const string &name) const { static map empty; auto i = erasure_code_profiles.find(name); if (i == erasure_code_profiles.end()) @@ -441,7 +457,7 @@ public: else return i->second; } - const map > &get_erasure_code_profiles() const { + const mempool::osdmap::map > &get_erasure_code_profiles() const { return erasure_code_profiles; } @@ -763,10 +779,10 @@ public: int64_t get_pool_max() const { return pool_max; } - const map& get_pools() const { + const mempool::osdmap::map& get_pools() const { return pools; } - map& get_pools() { + mempool::osdmap::map& get_pools() { return pools; } const string& get_pool_name(int64_t p) const { @@ -933,8 +949,9 @@ public: string get_flag_string() const; static string get_flag_string(unsigned flags); - static void dump_erasure_code_profiles(const map > &profiles, - Formatter *f); + static void dump_erasure_code_profiles( + const mempool::osdmap::map > &profiles, + Formatter *f); void dump(Formatter *f) const; static void generate_test_instances(list& o); bool check_new_blacklist_entries() const { return new_blacklist_entries; } diff --git a/src/test/osd/TestOSDMap.cc b/src/test/osd/TestOSDMap.cc index a8692d6043d91..e833ab2e4ff5a 100644 --- a/src/test/osd/TestOSDMap.cc +++ b/src/test/osd/TestOSDMap.cc @@ -235,7 +235,8 @@ TEST_F(OSDMapTest, PGTempRespected) { // apply pg_temp to osdmap OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); - pgtemp_map.new_pg_temp[pgid] = new_acting_osds; + pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector( + new_acting_osds.begin(), new_acting_osds.end()); osdmap.apply_incremental(pgtemp_map); osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, @@ -276,7 +277,8 @@ TEST_F(OSDMapTest, CleanTemps) { int up_primary, acting_primary; osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary, &acting_osds, &acting_primary); - pgtemp_map.new_pg_temp[pga] = up_osds; + pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector( + up_osds.begin(), up_osds.end()); pgtemp_map.new_primary_temp[pga] = up_primary; } pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, 0)); @@ -285,7 +287,8 @@ TEST_F(OSDMapTest, CleanTemps) { int up_primary, acting_primary; osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary, &acting_osds, &acting_primary); - pending_inc.new_pg_temp[pgb] = up_osds; + pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector( + up_osds.begin(), up_osds.end()); pending_inc.new_primary_temp[pgb] = up_primary; } @@ -334,7 +337,8 @@ TEST_F(OSDMapTest, KeepsNecessaryTemps) { if (i == (int)get_num_osds()) FAIL() << "did not find unused OSD for temp mapping"; - pgtemp_map.new_pg_temp[pgid] = up_osds; + pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector( + up_osds.begin(), up_osds.end()); pgtemp_map.new_primary_temp[pgid] = up_osds[1]; osdmap.apply_incremental(pgtemp_map); -- 2.39.5