From: Samuel Just Date: Wed, 12 Feb 2014 18:44:45 +0000 (-0800) Subject: osd/: extend pg_interval_t to include primary X-Git-Tag: v0.78~163^2~26 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5db3b2dc2cefac8af702f23f64e6b2a49697be2f;p=ceph.git osd/: extend pg_interval_t to include primary Otherwise, we cannot correctly determine up_from/up_thru for old intervals. Also, we need this information to determine when a new interval starts due to a new primary without a change in the acting set. Signed-off-by: Samuel Just --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 15c38833ade5..7cd40ac775f6 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2100,6 +2100,7 @@ struct pistate { epoch_t start, end; vector old_acting, old_up; epoch_t same_interval_since; + int primary; }; void OSD::build_past_intervals_parallel() @@ -2151,7 +2152,9 @@ void OSD::build_past_intervals_parallel() continue; vector acting, up; - cur_map->pg_to_up_acting_osds(pg->info.pgid.pgid, up, acting); + int primary; + cur_map->pg_to_up_acting_osds( + pg->info.pgid.pgid, &up, 0, &acting, &primary); if (p.same_interval_since == 0) { dout(10) << __func__ << " epoch " << cur_epoch << " pg " << pg->info.pgid @@ -2160,12 +2163,15 @@ void OSD::build_past_intervals_parallel() p.same_interval_since = cur_epoch; p.old_up = up; p.old_acting = acting; + p.primary = primary; continue; } assert(last_map); std::stringstream debug; bool new_interval = pg_interval_t::check_new_interval( + p.primary, + primary, p.old_acting, acting, p.old_up, up, p.same_interval_since, diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 954c0339901f..1e36d3fd90f0 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -606,10 +606,13 @@ void PG::generate_past_intervals() } OSDMapRef last_map, cur_map; + int primary = -1; + int old_primary = -1; vector acting, up, old_acting, old_up; cur_map = osd->get_map(cur_epoch); - cur_map->pg_to_up_acting_osds(get_pgid().pgid, up, acting); + cur_map->pg_to_up_acting_osds( + get_pgid().pgid, &up, 0, &acting, &primary); epoch_t same_interval_since = cur_epoch; dout(10) << __func__ << " over epochs " << cur_epoch << "-" << end_epoch << dendl; @@ -618,12 +621,16 @@ void PG::generate_past_intervals() last_map.swap(cur_map); old_up.swap(up); old_acting.swap(acting); + old_primary = primary; cur_map = osd->get_map(cur_epoch); - cur_map->pg_to_up_acting_osds(get_pgid().pgid, up, acting); + cur_map->pg_to_up_acting_osds( + get_pgid().pgid, &up, 0, &acting, &primary); std::stringstream debug; bool new_interval = pg_interval_t::check_new_interval( + old_primary, + primary, old_acting, acting, old_up, @@ -2093,10 +2100,14 @@ void PG::update_heartbeat_peers() set new_peers; if (is_primary()) { - for (unsigned i=0; i::iterator p = peer_info.begin(); p != peer_info.end(); ++p) @@ -4584,6 +4595,8 @@ void PG::start_peering_interval( } else { std::stringstream debug; bool new_interval = pg_interval_t::check_new_interval( + oldprimary.osd, + new_acting_primary, oldacting, newacting, oldup, newup, info.history.same_interval_since, diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index caae5782b646..d8af4c1097f2 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2056,23 +2056,30 @@ ostream &operator<<(ostream &lhs, const pg_notify_t ¬ify) void pg_interval_t::encode(bufferlist& bl) const { - ENCODE_START(2, 2, bl); + ENCODE_START(3, 2, bl); ::encode(first, bl); ::encode(last, bl); ::encode(up, bl); ::encode(acting, bl); ::encode(maybe_went_rw, bl); + ::encode(primary, bl); ENCODE_FINISH(bl); } void pg_interval_t::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); ::decode(first, bl); ::decode(last, bl); ::decode(up, bl); ::decode(acting, bl); ::decode(maybe_went_rw, bl); + if (struct_v >= 3) { + ::decode(primary, bl); + } else { + if (acting.size()) + primary = acting[0]; + } DECODE_FINISH(bl); } @@ -2104,6 +2111,8 @@ void pg_interval_t::generate_test_instances(list& o) } bool pg_interval_t::check_new_interval( + int old_primary, + int new_primary, const vector &old_acting, const vector &new_acting, const vector &old_up, @@ -2118,7 +2127,8 @@ bool pg_interval_t::check_new_interval( std::ostream *out) { // remember past interval - if (new_acting != old_acting || new_up != old_up || + if (old_primary != new_primary || + new_acting != old_acting || new_up != old_up || (!(lastmap->get_pools().count(pool_id))) || (lastmap->get_pools().find(pool_id)->second.min_size != osdmap->get_pools().find(pool_id)->second.min_size) || @@ -2129,24 +2139,25 @@ bool pg_interval_t::check_new_interval( i.last = osdmap->get_epoch() - 1; i.acting = old_acting; i.up = old_up; + i.primary = old_primary; - if (!i.acting.empty() && + if (!i.acting.empty() && i.primary != -1 && i.acting.size() >= lastmap->get_pools().find(pool_id)->second.min_size) { if (out) *out << "generate_past_intervals " << i << ": not rw," - << " up_thru " << lastmap->get_up_thru(i.acting[0]) - << " up_from " << lastmap->get_up_from(i.acting[0]) + << " up_thru " << lastmap->get_up_thru(i.primary) + << " up_from " << lastmap->get_up_from(i.primary) << " last_epoch_clean " << last_epoch_clean << std::endl; - if (lastmap->get_up_thru(i.acting[0]) >= i.first && - lastmap->get_up_from(i.acting[0]) <= i.first) { + if (lastmap->get_up_thru(i.primary) >= i.first && + lastmap->get_up_from(i.primary) <= i.first) { i.maybe_went_rw = true; if (out) *out << "generate_past_intervals " << i - << " : primary up " << lastmap->get_up_from(i.acting[0]) - << "-" << lastmap->get_up_thru(i.acting[0]) + << " : primary up " << lastmap->get_up_from(i.primary) + << "-" << lastmap->get_up_thru(i.primary) << " includes interval" << std::endl; } else if (last_epoch_clean >= i.first && @@ -2168,8 +2179,8 @@ bool pg_interval_t::check_new_interval( i.maybe_went_rw = false; if (out) *out << "generate_past_intervals " << i - << " : primary up " << lastmap->get_up_from(i.acting[0]) - << "-" << lastmap->get_up_thru(i.acting[0]) + << " : primary up " << lastmap->get_up_from(i.primary) + << "-" << lastmap->get_up_thru(i.primary) << " does not include interval" << std::endl; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index fe0493436b5a..c6895f5f3aa7 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1634,8 +1634,9 @@ struct pg_interval_t { vector up, acting; epoch_t first, last; bool maybe_went_rw; + int primary; - pg_interval_t() : first(0), last(0), maybe_went_rw(false) {} + pg_interval_t() : first(0), last(0), maybe_went_rw(false), primary(-1) {} void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); @@ -1647,6 +1648,8 @@ struct pg_interval_t { * if an interval was closed out. */ static bool check_new_interval( + int old_primary, ///< [in] primary as of lastmap + int new_primary, ///< [in] primary as of lastmap const vector &old_acting, ///< [in] acting as of lastmap const vector &new_acting, ///< [in] acting as of osdmap const vector &old_up, ///< [in] up as of lastmap @@ -2685,11 +2688,10 @@ public: void dec(list *requeue) { assert(count > 0); assert(requeue); - assert(requeue->empty()); count--; if (count == 0) { state = RWNONE; - requeue->swap(waiters); + requeue->splice(requeue->end(), waiters); } } void put_read(list *requeue) { diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc index a0d660fa92a5..6380211cf811 100644 --- a/src/test/osd/types.cc +++ b/src/test/osd/types.cc @@ -151,6 +151,8 @@ TEST(pg_interval_t, check_new_interval) new_acting.push_back(osd_id); new_acting.push_back(osd_id + 1); vector old_acting = new_acting; + int old_primary = osd_id; + int new_primary = osd_id; vector new_up; new_up.push_back(osd_id); vector old_up = new_up; @@ -166,7 +168,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_FALSE(pg_interval_t::check_new_interval(old_acting, + ASSERT_FALSE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -192,7 +196,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -215,13 +221,15 @@ TEST(pg_interval_t, check_new_interval) // { vector new_acting; - int new_primary = osd_id + 1; - new_acting.push_back(new_primary); + int _new_primary = osd_id + 1; + new_acting.push_back(_new_primary); map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -232,6 +240,7 @@ TEST(pg_interval_t, check_new_interval) pool_id, pgid, &past_intervals)); + old_primary = new_primary; ASSERT_EQ((unsigned int)1, past_intervals.size()); ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first); ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last); @@ -244,13 +253,15 @@ TEST(pg_interval_t, check_new_interval) // { vector new_up; - int new_primary = osd_id + 1; - new_up.push_back(new_primary); + int _new_primary = osd_id + 1; + new_up.push_back(_new_primary); map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -285,7 +296,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -320,7 +333,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -350,7 +365,9 @@ TEST(pg_interval_t, check_new_interval) ostringstream out; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -398,7 +415,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -429,7 +448,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -470,7 +491,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up, @@ -515,7 +538,9 @@ TEST(pg_interval_t, check_new_interval) map past_intervals; ASSERT_TRUE(past_intervals.empty()); - ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary, + new_primary, + old_acting, new_acting, old_up, new_up,