From c053499011eab0e7ce43fc8847c0c5cd5dbd6f2e Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 21 Sep 2015 15:56:35 -0700 Subject: [PATCH] osd/: eliminate unnecessary pg_hit_set_history_t::current_info The only field actually relevant from this structure is .begin, which duplicates the information in hit_set_start_stamp less well. The problem is that the starting point of the currently open hit set is ephemeral state which shouldn't go into the pg_info_t structure. This also caused 13185 since pg_info_t.hit_set.current_info gets default constructed with use_gmt = true regardless of the pool setting. This becomes a problem in hit_set_persist since the oid is generated using the pool setting, rather than the use_gmt value in current_info which is placed into the history list. That discrepancy then causes a crash in hit set trim. There would also be a related bug if the pool setting is changed between when current_info is constructed and when it is written out. Since current_info isn't actually useful, I'm removing it so that we don't later rely on invalid fields. Fixes: 13185 Signed-off-by: Samuel Just --- src/osd/ReplicatedPG.cc | 43 ++++++++++++++++++----------------------- src/osd/osd_types.cc | 16 +++++++-------- src/osd/osd_types.h | 1 - 3 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 439c355eb1440..b968bdb5fde90 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1208,9 +1208,7 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) p != info.hit_set.history.end(); ++p) ls.push_back(make_pair(p->begin, p->end)); - if (info.hit_set.current_info.begin) - ls.push_back(make_pair(info.hit_set.current_info.begin, utime_t())); - else if (hit_set) + if (hit_set) ls.push_back(make_pair(hit_set_start_stamp, utime_t())); ::encode(ls, osd_op.outdata); } @@ -1219,9 +1217,7 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) case CEPH_OSD_OP_PG_HITSET_GET: { utime_t stamp(osd_op.op.hit_set_get.stamp); - if ((info.hit_set.current_info.begin && - stamp >= info.hit_set.current_info.begin) || - stamp >= hit_set_start_stamp) { + if (hit_set_start_stamp && stamp >= hit_set_start_stamp) { // read the current in-memory HitSet, not the version we've // checkpointed. if (!hit_set) { @@ -10907,14 +10903,6 @@ void ReplicatedPG::hit_set_persist() return; } - utime_t start = info.hit_set.current_info.begin; - if (!start) - start = hit_set_start_stamp; - oid = get_hit_set_archive_object(start, now, pool.info.use_gmt_hitset); - // If the current object is degraded we skip this persist request - if (scrubber.write_blocked_by_scrub(oid, get_sort_bitwise())) - return; - // If backfill is in progress and we could possibly overlap with the // hit_set_* objects, back off. Since these all have // hobject_t::hash set to pgid.ps(), and those sort first, we can @@ -10935,16 +10923,25 @@ void ReplicatedPG::hit_set_persist() } } - if (!info.hit_set.current_info.begin) - info.hit_set.current_info.begin = hit_set_start_stamp; + + pg_hit_set_info_t new_hset = pg_hit_set_info_t(pool.info.use_gmt_hitset); + new_hset.begin = hit_set_start_stamp; + new_hset.end = now; + oid = get_hit_set_archive_object( + new_hset.begin, + new_hset.end, + new_hset.using_gmt); + + // If the current object is degraded we skip this persist request + if (scrubber.write_blocked_by_scrub(oid, get_sort_bitwise())) + return; hit_set->seal(); ::encode(*hit_set, bl); - info.hit_set.current_info.end = now; dout(20) << __func__ << " archive " << oid << dendl; if (agent_state) { - agent_state->add_hit_set(info.hit_set.current_info.begin, hit_set); + agent_state->add_hit_set(new_hset.begin, hit_set); uint32_t size = agent_state->hit_set_map.size(); if (size >= pool.info.hit_set_count) { size = pool.info.hit_set_count > 0 ? pool.info.hit_set_count - 1: 0; @@ -10953,8 +10950,8 @@ void ReplicatedPG::hit_set_persist() } // hold a ref until it is flushed to disk - hit_set_flushing[info.hit_set.current_info.begin] = hit_set; - flush_time = info.hit_set.current_info.begin; + hit_set_flushing[new_hset.begin] = hit_set; + flush_time = new_hset.begin; ObjectContextRef obc = get_object_context(oid, true); repop = simple_repop_create(obc); @@ -10965,13 +10962,11 @@ void ReplicatedPG::hit_set_persist() ctx->updated_hset_history = info.hit_set; pg_hit_set_history_t &updated_hit_set_hist = *(ctx->updated_hset_history); - updated_hit_set_hist.current_last_update = info.last_update; - updated_hit_set_hist.current_info.version = ctx->at_version; + new_hset.version = ctx->at_version; - updated_hit_set_hist.history.push_back(updated_hit_set_hist.current_info); + updated_hit_set_hist.history.push_back(new_hset); hit_set_create(); - updated_hit_set_hist.current_info = pg_hit_set_info_t(pool.info.use_gmt_hitset); // fabricate an object_info_t and SnapSet obc->obs.oi.version = ctx->at_version; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index e33acc08e82a4..7f1fbfc60f15e 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -4004,7 +4004,10 @@ void pg_hit_set_history_t::encode(bufferlist& bl) const utime_t dummy_stamp; ::encode(dummy_stamp, bl); } - ::encode(current_info, bl); + { + pg_hit_set_info_t dummy_info; + ::encode(dummy_info, bl); + } ::encode(history, bl); ENCODE_FINISH(bl); } @@ -4017,7 +4020,10 @@ void pg_hit_set_history_t::decode(bufferlist::iterator& p) utime_t dummy_stamp; ::decode(dummy_stamp, p); } - ::decode(current_info, p); + { + pg_hit_set_info_t dummy_info; + ::decode(dummy_info, p); + } ::decode(history, p); DECODE_FINISH(p); } @@ -4025,9 +4031,6 @@ void pg_hit_set_history_t::decode(bufferlist::iterator& p) void pg_hit_set_history_t::dump(Formatter *f) const { f->dump_stream("current_last_update") << current_last_update; - f->open_object_section("current_info"); - current_info.dump(f); - f->close_section(); f->open_array_section("history"); for (list::const_iterator p = history.begin(); p != history.end(); ++p) { @@ -4043,9 +4046,6 @@ void pg_hit_set_history_t::generate_test_instances(list& ls.push_back(new pg_hit_set_history_t); ls.push_back(new pg_hit_set_history_t); ls.back()->current_last_update = eversion_t(1, 2); - ls.back()->current_info.begin = utime_t(2, 4); - ls.back()->current_info.end = utime_t(62, 24); - ls.back()->history.push_back(ls.back()->current_info); ls.back()->history.push_back(pg_hit_set_info_t()); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 832d8d07996de..4b22a3c9b1a9c 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1740,7 +1740,6 @@ WRITE_CLASS_ENCODER(pg_hit_set_info_t) */ struct pg_hit_set_history_t { eversion_t current_last_update; ///< last version inserted into current set - pg_hit_set_info_t current_info; ///< metadata about the current set list history; ///< archived sets, sorted oldest -> newest void encode(bufferlist &bl) const; -- 2.39.5