From 40bdcb88504aea6288d461d29d24d5b0bf7aeebc Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 19 Feb 2014 12:16:46 -0800 Subject: [PATCH] osd/,mon/: add (up|acting)_primary to pg_stat_t We use pg_stat_t information to determine pg create targeting. Fixes: #7481 Signed-off-by: Samuel Just Reviewed-by: Greg Farnum --- src/mon/PGMap.cc | 4 +++- src/mon/PGMonitor.cc | 34 ++++++++++++++++++++++------------ src/osd/PG.cc | 8 +++++++- src/osd/osd_types.cc | 25 +++++++++++++++++++++++-- src/osd/osd_types.h | 8 +++++++- 5 files changed, 62 insertions(+), 17 deletions(-) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 56376d8b6ce36..ad2aacbf6db09 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -622,7 +622,7 @@ void PGMap::dump_osd_stats(Formatter *f) const void PGMap::dump_pg_stats_plain(ostream& ss, const ceph::unordered_map& pg_stats) const { - ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl; + ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tup_primary\tacting\tacting_primary\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl; for (ceph::unordered_map::const_iterator i = pg_stats.begin(); i != pg_stats.end(); ++i) { const pg_stat_t &st(i->second); @@ -640,7 +640,9 @@ void PGMap::dump_pg_stats_plain(ostream& ss, << "\t" << st.version << "\t" << st.reported_epoch << ":" << st.reported_seq << "\t" << st.up + << "\t" << st.up_primary << "\t" << st.acting + << "\t" << st.acting_primary << "\t" << st.last_scrub << "\t" << st.last_scrub_stamp << "\t" << st.last_deep_scrub << "\t" << st.last_deep_scrub_stamp << std::endl; diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index a858a55061093..43683b67fc97d 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -1071,15 +1071,25 @@ void PGMonitor::map_pg_creates() pg_stat_t& s = pg_map.pg_stat[pgid]; if (s.parent_split_bits) on = s.parent; - vector acting; - int nrep = osdmap->pg_to_acting_osds(on, acting); - if (s.acting.size()) { - pg_map.creating_pgs_by_osd[s.acting[0]].erase(pgid); - if (pg_map.creating_pgs_by_osd[s.acting[0]].size() == 0) - pg_map.creating_pgs_by_osd.erase(s.acting[0]); + vector up, acting; + int up_primary, acting_primary; + osdmap->pg_to_up_acting_osds( + on, + &up, + &up_primary, + &acting, + &acting_primary); + + if (s.acting_primary != -1) { + pg_map.creating_pgs_by_osd[s.acting_primary].erase(pgid); + if (pg_map.creating_pgs_by_osd[s.acting_primary].size() == 0) + pg_map.creating_pgs_by_osd.erase(s.acting_primary); } + s.up = up; + s.up_primary = up_primary; s.acting = acting; + s.acting_primary = acting_primary; // don't send creates for localized pgs if (pgid.preferred() >= 0) @@ -1089,8 +1099,8 @@ void PGMonitor::map_pg_creates() if (s.parent_split_bits) continue; - if (nrep) { - pg_map.creating_pgs_by_osd[acting[0]].insert(pgid); + if (acting_primary != -1) { + pg_map.creating_pgs_by_osd[acting_primary].insert(pgid); } else { dout(20) << "map_pg_creates " << pgid << " -> no osds in epoch " << mon->osdmon()->osdmap.get_epoch() << ", skipping" << dendl; @@ -1159,8 +1169,8 @@ bool PGMonitor::check_down_pgs() p != pg_map.pg_stat.end(); ++p) { if ((p->second.state & PG_STATE_STALE) == 0 && - p->second.acting.size() && - osdmap->is_down(p->second.acting[0])) { + p->second.acting_primary != -1 && + osdmap->is_down(p->second.acting_primary)) { dout(10) << " marking pg " << p->first << " stale with acting " << p->second.acting << dendl; map::iterator q = pending_inc.pg_stat_updates.find(p->first); @@ -1529,12 +1539,12 @@ bool PGMonitor::preprocess_command(MMonCommand *m) r = -ENOENT; goto reply; } - if (!pg_map.pg_stat[pgid].acting.size()) { + if (pg_map.pg_stat[pgid].acting_primary != -1) { ss << "pg " << pgid << " has no primary osd"; r = -EAGAIN; goto reply; } - int osd = pg_map.pg_stat[pgid].acting[0]; + int osd = pg_map.pg_stat[pgid].acting_primary; if (!mon->osdmon()->osdmap.is_up(osd)) { ss << "pg " << pgid << " primary osd." << osd << " not up"; r = -EAGAIN; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index bfb48feb880e8..5812b8bbb7bf7 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2341,7 +2341,9 @@ void PG::init( past_intervals.swap(pi); info.stats.up = up; + info.stats.up_primary = new_up_primary; info.stats.acting = acting; + info.stats.acting_primary = new_acting_primary; info.stats.mapping_epoch = info.history.same_interval_since; if (backfill) { @@ -4638,9 +4640,13 @@ void PG::start_peering_interval( new_acting_primary); if (info.stats.up != up || - info.stats.acting != acting) { + info.stats.acting != acting || + info.stats.up_primary != new_up_primary || + info.stats.acting_primary != new_acting_primary) { info.stats.up = up; + info.stats.up_primary = new_up_primary; info.stats.acting = acting; + info.stats.acting_primary = new_acting_primary; info.stats.mapping_epoch = info.history.same_interval_since; } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index d8af4c1097f26..f0a82a8a56ee2 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1541,6 +1541,8 @@ void pg_stat_t::dump(Formatter *f) const f->open_array_section("acting"); for (vector::const_iterator p = acting.begin(); p != acting.end(); ++p) f->dump_int("osd", *p); + f->dump_int("up_primary", up_primary); + f->dump_int("acting_primary", acting_primary); f->close_section(); } @@ -1554,12 +1556,14 @@ void pg_stat_t::dump_brief(Formatter *f) const f->open_array_section("acting"); for (vector::const_iterator p = acting.begin(); p != acting.end(); ++p) f->dump_int("osd", *p); + f->dump_int("up_primary", up_primary); + f->dump_int("acting_primary", acting_primary); f->close_section(); } void pg_stat_t::encode(bufferlist &bl) const { - ENCODE_START(14, 8, bl); + ENCODE_START(15, 8, bl); ::encode(version, bl); ::encode(reported_seq, bl); ::encode(reported_epoch, bl); @@ -1589,12 +1593,14 @@ void pg_stat_t::encode(bufferlist &bl) const ::encode(last_clean_scrub_stamp, bl); ::encode(last_became_active, bl); ::encode(dirty_stats_invalid, bl); + ::encode(up_primary, bl); + ::encode(acting_primary, bl); ENCODE_FINISH(bl); } void pg_stat_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(14, 8, 8, bl); + DECODE_START_LEGACY_COMPAT_LEN(15, 8, 8, bl); ::decode(version, bl); ::decode(reported_seq, bl); ::decode(reported_epoch, bl); @@ -1681,6 +1687,13 @@ void pg_stat_t::decode(bufferlist::iterator &bl) // encoder may not have supported num_objects_dirty accounting. dirty_stats_invalid = true; } + if (struct_v >= 15) { + ::decode(up_primary, bl); + ::decode(acting_primary, bl); + } else { + up_primary = up.size() ? up[0] : -1; + acting_primary = acting.size() ? acting[0] : -1; + } DECODE_FINISH(bl); } @@ -1716,7 +1729,15 @@ void pg_stat_t::generate_test_instances(list& o) a.log_size = 99; a.ondisk_log_size = 88; a.up.push_back(123); + a.up_primary = 123; a.acting.push_back(456); + a.acting_primary = 456; + o.push_back(new pg_stat_t(a)); + + a.up.push_back(124); + a.up_primary = 124; + a.acting.push_back(124); + a.acting_primary = 124; o.push_back(new pg_stat_t(a)); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 732439cb941bb..f74723bbbaac1 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1287,6 +1287,10 @@ struct pg_stat_t { /// maintained starting from pool creation) bool dirty_stats_invalid; + /// up, acting primaries + int up_primary; + int acting_primary; + pg_stat_t() : reported_seq(0), reported_epoch(0), @@ -1296,7 +1300,9 @@ struct pg_stat_t { stats_invalid(false), log_size(0), ondisk_log_size(0), mapping_epoch(0), - dirty_stats_invalid(false) + dirty_stats_invalid(false), + up_primary(-1), + acting_primary(-1) { } epoch_t get_effective_last_epoch_clean() const { -- 2.39.5