From ee9f04c5f8674bc2e8e6c5d92e9193d904215cb5 Mon Sep 17 00:00:00 2001 From: Loic Dachary Date: Sun, 28 Jul 2013 13:50:43 +0200 Subject: [PATCH] check_new_interval must compare old acting with old osdmap When trying to establish if the old acting set is either empty or smaller than the min_size of the osdmap, pg_interval_t::check_new_interval compares with the min_size of the new osdmap. Since the goal is to try to determine if the previous interval may have been writeable, it should not enter the if when there were not enough osds in the acting set ( i.e. < min_size ). But it may enter it anyway if min_size was decremented in the new osdmap. A complete set of unit tests were added to cover the logic of check_new_interval. The parameters are prepared to describe a situation where the function returns false (i.e. no new interval). Each case is described in a separate bloc that introduces the minimal changes to demonstrate the intended test case. Because a number of cases have the same output while implementing a different logic, the debug output is parsed to differentiate between them. A test case demonstrating the problem ( check_new_interval must compare old acting with old osdmap ) is added, with a link to the bug number for future reference. The problem is fixed. The text of two debug messages are slightly changed to make the maintenance of the test that match them easier. http://tracker.ceph.com/issues/5780 refs #5780 Signed-off-by: Loic Dachary Reviewed-by: Sage Weil Reviewed-by: Samuel Just --- src/osd/osd_types.cc | 5 +- src/test/test_osd_types.cc | 416 +++++++++++++++++++++++++++++++++++++ 2 files changed, 419 insertions(+), 2 deletions(-) diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 02c1ef7b69d18..fbd5cbbe9a06f 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1715,7 +1715,7 @@ bool pg_interval_t::check_new_interval( if (!i.acting.empty() && i.acting.size() >= - osdmap->get_pools().find(pool_id)->second.min_size) { + lastmap->get_pools().find(pool_id)->second.min_size) { if (out) *out << "generate_past_intervals " << i << ": not rw," @@ -1730,6 +1730,7 @@ bool pg_interval_t::check_new_interval( *out << "generate_past_intervals " << i << " : primary up " << lastmap->get_up_from(i.acting[0]) << "-" << lastmap->get_up_thru(i.acting[0]) + << " includes interval" << std::endl; } else if (last_epoch_clean >= i.first && last_epoch_clean <= i.last) { @@ -1758,7 +1759,7 @@ bool pg_interval_t::check_new_interval( } else { i.maybe_went_rw = false; if (out) - *out << "generate_past_intervals " << i << " : empty" << std::endl; + *out << "generate_past_intervals " << i << " : acting set is too small" << std::endl; } return true; } else { diff --git a/src/test/test_osd_types.cc b/src/test/test_osd_types.cc index 7a43b7b892a01..fa4ae6163ac7f 100644 --- a/src/test/test_osd_types.cc +++ b/src/test/test_osd_types.cc @@ -17,6 +17,7 @@ #include "include/types.h" #include "osd/osd_types.h" +#include "osd/OSDMap.h" #include "gtest/gtest.h" #include "common/Thread.h" @@ -117,6 +118,421 @@ TEST(hobject, prefixes5) ASSERT_EQ(prefixes_out, prefixes_correct); } +TEST(pg_interval_t, check_new_interval) +{ + // + // Create a situation where osdmaps are the same so that + // each test case can diverge from it using minimal code. + // + int osd_id = 1; + epoch_t epoch = 40; + std::tr1::shared_ptr osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + std::tr1::shared_ptr lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + epoch_t same_interval_since = epoch; + epoch_t last_epoch_clean = same_interval_since; + int64_t pool_id = 200; + int pg_num = 4; + __u8 min_size = 2; + { + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_up_thru[osd_id] = epoch + 1; + osdmap->apply_incremental(inc); + lastmap->apply_incremental(inc); + } + vector new_acting; + new_acting.push_back(osd_id); + new_acting.push_back(osd_id + 1); + vector old_acting = new_acting; + vector new_up; + new_up.push_back(osd_id); + vector old_up = new_up; + pg_t pgid; + pgid.set_pool(pool_id); + + // + // Do nothing if there are no modifications in + // acting, up or pool size and that the pool is not + // being split + // + { + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_FALSE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals)); + ASSERT_TRUE(past_intervals.empty()); + } + + // + // pool did not exist in the old osdmap + // + { + std::tr1::shared_ptr lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first); + ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]); + } + + // + // The acting set has changed + // + { + vector new_acting; + int new_primary = osd_id + 1; + new_acting.push_back(new_primary); + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first); + ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]); + } + + // + // The up set has changed + // + { + vector new_up; + int new_primary = osd_id + 1; + new_up.push_back(new_primary); + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first); + ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]); + } + + // + // PG is splitting + // + { + std::tr1::shared_ptr osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + int new_pg_num = pg_num ^ 2; + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(new_pg_num); + osdmap->apply_incremental(inc); + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first); + ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]); + } + + // + // PG size has changed + // + { + std::tr1::shared_ptr osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + __u8 new_min_size = min_size + 1; + inc.new_pools[pool_id].min_size = new_min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + osdmap->apply_incremental(inc); + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first); + ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].acting[0]); + ASSERT_EQ(osd_id, past_intervals[same_interval_since].up[0]); + } + + // + // The old acting set was empty : the previous interval could not + // have been rw + // + { + vector old_acting; + + map past_intervals; + + ostringstream out; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals, + &out)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_FALSE(past_intervals[same_interval_since].maybe_went_rw); + ASSERT_NE(string::npos, out.str().find("acting set is too small")); + } + + // + // The old acting set did not have enough osd : it could + // not have been rw + // + { + vector old_acting; + old_acting.push_back(osd_id); + + // + // see http://tracker.ceph.com/issues/5780 + // the size of the old acting set should be compared + // with the min_size of the old osdmap + // + // The new osdmap is created so that it triggers the + // bug. + // + std::tr1::shared_ptr osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + __u8 new_min_size = old_acting.size(); + inc.new_pools[pool_id].min_size = new_min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + osdmap->apply_incremental(inc); + + ostringstream out; + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals, + &out)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_FALSE(past_intervals[same_interval_since].maybe_went_rw); + ASSERT_NE(string::npos, out.str().find("acting set is too small")); + } + + // + // The acting set changes. The old acting set primary was up during the + // previous interval and may have been rw. + // + { + vector new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + ostringstream out; + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals, + &out)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_TRUE(past_intervals[same_interval_since].maybe_went_rw); + ASSERT_NE(string::npos, out.str().find("includes interval")); + } + // + // The acting set changes. The old acting set primary was not up + // during the old interval but last_epoch_clean is in the + // old interval and it may have been rw. + // + { + vector new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + std::tr1::shared_ptr lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_up_thru[osd_id] = epoch - 10; + lastmap->apply_incremental(inc); + + ostringstream out; + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals, + &out)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_TRUE(past_intervals[same_interval_since].maybe_went_rw); + ASSERT_NE(string::npos, out.str().find("presumed to have been rw")); + } + + // + // The acting set changes. The old acting set primary was not up + // during the old interval and last_epoch_clean is before the + // old interval : the previous interval could not possibly have + // been rw. + // + { + vector new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + epoch_t last_epoch_clean = epoch - 10; + + std::tr1::shared_ptr lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_up_thru[osd_id] = last_epoch_clean; + lastmap->apply_incremental(inc); + + ostringstream out; + + map past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting, + new_acting, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pool_id, + pgid, + &past_intervals, + &out)); + ASSERT_EQ((unsigned int)1, past_intervals.size()); + ASSERT_FALSE(past_intervals[same_interval_since].maybe_went_rw); + ASSERT_NE(string::npos, out.str().find("does not include interval")); + } +} + TEST(pg_t, split) { pg_t pgid(0, 0, -1); -- 2.39.5