From 8358fb8946d8809d695092baa4a6abf5d5b5e265 Mon Sep 17 00:00:00 2001 From: Loic Dachary Date: Wed, 18 Nov 2015 18:08:58 +0100 Subject: [PATCH] revert: osd: use GMT time for hitsets "Merge pull request #5825 from tchaikov/wip-12848-hammer" This reverts commit 39544718dc2f09bcfdc632ac72fd2a3cda87687e, reversing changes made to 4ad97162026e1eb6e6e948ddf3eb39f711431e45. http://tracker.ceph.com/issues/13812 Fixes: #13812 Signed-off-by: Loic Dachary --- src/common/config_opts.h | 1 - src/include/ceph_features.h | 2 - src/mon/MonCommands.h | 2 +- src/mon/OSDMonitor.cc | 38 ----------------- src/osd/HitSet.cc | 3 ++ src/osd/ReplicatedPG.cc | 84 +++++++------------------------------ src/osd/ReplicatedPG.h | 5 +-- src/osd/osd_types.cc | 32 ++------------ src/osd/osd_types.h | 11 ++--- 9 files changed, 28 insertions(+), 150 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 0f67da5a3afe8..dd376a6f724cf 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -497,7 +497,6 @@ OPTION(osd_client_message_cap, OPT_U64, 100) // num client messages OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host -OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it. OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET) OPTION(osd_pool_erasure_code_stripe_width, OPT_U32, OSD_POOL_ERASURE_CODE_STRIPE_WIDTH) // in bytes diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 205e18fb12c83..781df1b3003b1 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -64,7 +64,6 @@ // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */ #define CEPH_FEATURE_MON_METADATA (1ULL<<50) -#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54) /* ... */ #define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55) @@ -152,7 +151,6 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) { CEPH_FEATURE_MDS_QUOTA | \ CEPH_FEATURE_CRUSH_V4 | \ CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY | \ - CEPH_FEATURE_OSD_HITSET_GMT | \ CEPH_FEATURE_HAMMER_0_94_4 | \ 0ULL) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 4ddf7ba6d0532..a75b067362ccb 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -634,7 +634,7 @@ COMMAND("osd pool get " \ "get pool parameter ", "osd", "r", "cli,rest") COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed " \ + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed " \ "name=val,type=CephString " \ "name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \ "set pool parameter to ", "osd", "rw", "cli,rest") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index f4ac262bd699f..2f08e41bdf632 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -16,7 +16,6 @@ * */ -#include #include #include "OSDMonitor.h" @@ -1573,9 +1572,6 @@ void OSDMonitor::take_all_failures(list& ls) failure_info.clear(); } -static bool uses_gmt_hitset(const std::pair& pool) { - return pool.second.use_gmt_hitset; -} // boot -- @@ -1645,19 +1641,6 @@ bool OSDMonitor::preprocess_boot(MOSDBoot *m) } } - if (std::find_if(osdmap.get_pools().begin(), - osdmap.get_pools().end(), - uses_gmt_hitset) != osdmap.get_pools().end()) { - assert(osdmap.get_num_up_osds() == 0 || - osdmap.get_up_osd_features() & CEPH_FEATURE_OSD_HITSET_GMT); - if (!(m->osd_features & CEPH_FEATURE_OSD_HITSET_GMT)) { - dout(0) << __func__ << " one or more pools uses GMT hitsets but osd at " - << m->get_orig_source_inst() - << " doesn't announce support -- ignore" << dendl; - goto ignore; - } - } - // already booted? if (osdmap.is_up(from) && osdmap.get_inst(from) == m->get_orig_source_inst()) { @@ -3101,7 +3084,6 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) if (!p->is_tier() && (var == "hit_set_type" || var == "hit_set_period" || var == "hit_set_count" || var == "hit_set_fpp" || - var == "use_gmt_hitset" || var == "target_max_objects" || var == "target_max_bytes" || var == "cache_target_full_ratio" || var == "cache_target_dirty_ratio" || @@ -3154,8 +3136,6 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) BloomHitSet::Params *bloomp = static_cast(p->hit_set_params.impl.get()); f->dump_float("hit_set_fpp", bloomp->get_fpp()); } - } else if (var == "use_gmt_hitset") { - f->dump_bool("use_gmt_hitset", p->use_gmt_hitset); } else if (var == "target_max_objects") { f->dump_unsigned("target_max_objects", p->target_max_objects); } else if (var == "target_max_bytes") { @@ -3213,8 +3193,6 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) } BloomHitSet::Params *bloomp = static_cast(p->hit_set_params.impl.get()); ss << "hit_set_fpp: " << bloomp->get_fpp(); - } else if (var == "use_gmt_hitset") { - ss << "use_gmt_hitset: " << p->use_gmt_hitset << "\n"; } else if (var == "target_max_objects") { ss << "target_max_objects: " << p->target_max_objects; } else if (var == "target_max_bytes") { @@ -4097,11 +4075,6 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid, pi->set_flag(pg_pool_t::FLAG_NOPGCHANGE); if (g_conf->osd_pool_default_flag_nosizechange) pi->set_flag(pg_pool_t::FLAG_NOSIZECHANGE); - if (g_conf->osd_pool_use_gmt_hitset && - (osdmap.get_up_osd_features() & CEPH_FEATURE_OSD_HITSET_GMT)) - pi->use_gmt_hitset = true; - else - pi->use_gmt_hitset = false; pi->size = size; pi->min_size = min_size; @@ -4445,17 +4418,6 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, } BloomHitSet::Params *bloomp = static_cast(p.hit_set_params.impl.get()); bloomp->set_fpp(f); - } else if (var == "use_gmt_hitset") { - if (val == "true" || (interr.empty() && n == 1)) { - if (!(osdmap.get_up_osd_features() & CEPH_FEATURE_OSD_HITSET_GMT)) { - ss << "not all OSDs support GMT hit set."; - return -EINVAL; - } - p.use_gmt_hitset = true; - } else { - ss << "expecting value 'true' or '1'"; - return -EINVAL; - } } else if (var == "debug_fake_ec_pool") { if (val == "true" || (interr.empty() && n == 1)) { p.flags |= pg_pool_t::FLAG_DEBUG_FAKE_EC_POOL; diff --git a/src/osd/HitSet.cc b/src/osd/HitSet.cc index 597b1f7d8f560..700da5d4a83a6 100644 --- a/src/osd/HitSet.cc +++ b/src/osd/HitSet.cc @@ -36,6 +36,9 @@ HitSet::HitSet(const HitSet::Params& params) impl.reset(new ExplicitObjectHitSet(static_cast(params.impl.get()))); break; + case TYPE_NONE: + break; + default: assert (0 == "unknown HitSet type"); } diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 0fad912f496e6..59f41a1119f9b 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1135,7 +1135,7 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) p != info.hit_set.history.end(); ++p) { if (stamp >= p->begin && stamp <= p->end) { - oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); + oid = get_hit_set_archive_object(p->begin, p->end); break; } } @@ -10134,19 +10134,10 @@ hobject_t ReplicatedPG::get_hit_set_current_object(utime_t stamp) return hoid; } -hobject_t ReplicatedPG::get_hit_set_archive_object(utime_t start, - utime_t end, - bool using_gmt) +hobject_t ReplicatedPG::get_hit_set_archive_object(utime_t start, utime_t end) { ostringstream ss; - ss << "hit_set_" << info.pgid.pgid << "_archive_"; - if (using_gmt) { - start.gmtime(ss) << "_"; - end.gmtime(ss); - } else { - start.localtime(ss) << "_"; - end.localtime(ss); - } + ss << "hit_set_" << info.pgid.pgid << "_archive_" << start << "_" << end; hobject_t hoid(sobject_t(ss.str(), CEPH_NOSNAP), "", info.pgid.ps(), info.pgid.pool(), cct->_conf->osd_hit_set_namespace); @@ -10165,19 +10156,12 @@ void ReplicatedPG::hit_set_clear() void ReplicatedPG::hit_set_setup() { if (!is_active() || - !is_primary()) { - hit_set_clear(); - return; - } - - if (is_active() && is_primary() && - (!pool.info.hit_set_count || - !pool.info.hit_set_period || - pool.info.hit_set_params.get_type() == HitSet::TYPE_NONE)) { + !is_primary() || + !pool.info.hit_set_count || + !pool.info.hit_set_period || + pool.info.hit_set_params.get_type() == HitSet::TYPE_NONE) { hit_set_clear(); - - // only primary is allowed to remove all the hit set objects - hit_set_remove_all(); + //hit_set_remove_all(); // FIXME: implement me soon return; } @@ -10189,46 +10173,6 @@ void ReplicatedPG::hit_set_setup() hit_set_apply_log(); } -void ReplicatedPG::hit_set_remove_all() -{ - // If any archives are degraded we skip this - for (list::iterator p = info.hit_set.history.begin(); - p != info.hit_set.history.end(); - ++p) { - hobject_t aoid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); - - // Once we hit a degraded object just skip - if (is_degraded_or_backfilling_object(aoid)) - return; - if (scrubber.write_blocked_by_scrub(aoid)) - return; - } - - if (!info.hit_set.history.empty()) { - list::reverse_iterator p = info.hit_set.history.rbegin(); - assert(p != info.hit_set.history.rend()); - hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); - assert(!is_degraded_or_backfilling_object(oid)); - ObjectContextRef obc = get_object_context(oid, false); - assert(obc); - - RepGather *repop = simple_repop_create(obc); - OpContext *ctx = repop->ctx; - ctx->at_version = get_next_version(); - ctx->updated_hset_history = info.hit_set; - utime_t now = ceph_clock_now(cct); - ctx->mtime = now; - hit_set_trim(repop, 0); - info.stats.stats.add(ctx->delta_stats); - simple_repop_submit(repop); - } - - info.hit_set = pg_hit_set_history_t(); - if (agent_state) { - agent_state->discard_hit_sets(); - } -} - void ReplicatedPG::hit_set_create() { utime_t now = ceph_clock_now(NULL); @@ -10330,7 +10274,7 @@ void ReplicatedPG::hit_set_persist() for (list::iterator p = info.hit_set.history.begin(); p != info.hit_set.history.end(); ++p) { - hobject_t aoid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); + hobject_t aoid = get_hit_set_archive_object(p->begin, p->end); // Once we hit a degraded object just skip further trim if (is_degraded_or_backfilling_object(aoid)) @@ -10339,8 +10283,10 @@ void ReplicatedPG::hit_set_persist() return; } - oid = get_hit_set_archive_object(start, now, pool.info.use_gmt_hitset); + oid = get_hit_set_archive_object(start, now); // If the current object is degraded we skip this persist request + if (is_degraded_or_backfilling_object(oid)) + return; if (scrubber.write_blocked_by_scrub(oid)) return; @@ -10431,7 +10377,7 @@ void ReplicatedPG::hit_set_persist() updated_hit_set_hist.history.push_back(updated_hit_set_hist.current_info); hit_set_create(); - updated_hit_set_hist.current_info = pg_hit_set_info_t(pool.info.use_gmt_hitset); + updated_hit_set_hist.current_info = pg_hit_set_info_t(); updated_hit_set_hist.current_last_stamp = utime_t(); // fabricate an object_info_t and SnapSet @@ -10494,7 +10440,7 @@ void ReplicatedPG::hit_set_trim(RepGather *repop, unsigned max) for (unsigned num = updated_hit_set_hist.history.size(); num > max; --num) { list::iterator p = updated_hit_set_hist.history.begin(); assert(p != updated_hit_set_hist.history.end()); - hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); + hobject_t oid = get_hit_set_archive_object(p->begin, p->end); assert(!is_degraded_or_backfilling_object(oid)); @@ -10779,7 +10725,7 @@ void ReplicatedPG::agent_load_hit_sets() continue; } - hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); + hobject_t oid = get_hit_set_archive_object(p->begin, p->end); if (is_unreadable_object(oid)) { dout(10) << __func__ << " unreadable " << oid << ", waiting" << dendl; break; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 6dbcd191394a3..48e0def334ef8 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -901,12 +901,9 @@ protected: bool hit_set_apply_log(); ///< apply log entries to update in-memory HitSet void hit_set_trim(RepGather *repop, unsigned max); ///< discard old HitSets void hit_set_in_memory_trim(); ///< discard old in memory HitSets - void hit_set_remove_all(); hobject_t get_hit_set_current_object(utime_t stamp); - hobject_t get_hit_set_archive_object(utime_t start, - utime_t end, - bool using_gmt); + hobject_t get_hit_set_archive_object(utime_t start, utime_t end); // agent boost::scoped_ptr agent_state; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index b301b72c491ea..94ca0832313ef 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -926,7 +926,6 @@ void pg_pool_t::dump(Formatter *f) const f->close_section(); // hit_set_params f->dump_unsigned("hit_set_period", hit_set_period); f->dump_unsigned("hit_set_count", hit_set_count); - f->dump_bool("use_gmt_hitset", use_gmt_hitset); f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote); f->dump_unsigned("stripe_width", get_stripe_width()); f->dump_unsigned("expected_num_objects", expected_num_objects); @@ -1239,7 +1238,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const return; } - ENCODE_START(21, 5, bl); + ENCODE_START(17, 5, bl); ::encode(type, bl); ::encode(size, bl); ::encode(crush_ruleset, bl); @@ -1281,15 +1280,12 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const ::encode(last_force_op_resend, bl); ::encode(min_read_recency_for_promote, bl); ::encode(expected_num_objects, bl); - ::encode(uint32_t(.6 * 1e6), bl); - ::encode(uint32_t(1), bl); - ::encode(use_gmt_hitset, bl); ENCODE_FINISH(bl); } void pg_pool_t::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(21, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(17, 5, 5, bl); ::decode(type, bl); ::decode(size, bl); ::decode(crush_ruleset, bl); @@ -1401,19 +1397,6 @@ void pg_pool_t::decode(bufferlist::iterator& bl) } else { expected_num_objects = 0; } - if (struct_v >= 19) { - uint32_t dummy; - ::decode(dummy, bl); - } - if (struct_v >= 20) { - uint32_t dummy; - ::decode(dummy, bl); - } - if (struct_v >= 21) { - ::decode(use_gmt_hitset, bl); - } else { - use_gmt_hitset = false; - } DECODE_FINISH(bl); calc_pg_masks(); } @@ -3806,25 +3789,19 @@ void pg_create_t::generate_test_instances(list& o) void pg_hit_set_info_t::encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); + ENCODE_START(1, 1, bl); ::encode(begin, bl); ::encode(end, bl); ::encode(version, bl); - ::encode(using_gmt, bl); ENCODE_FINISH(bl); } void pg_hit_set_info_t::decode(bufferlist::iterator& p) { - DECODE_START(2, p); + DECODE_START(1, p); ::decode(begin, p); ::decode(end, p); ::decode(version, p); - if (struct_v >= 2) { - ::decode(using_gmt, p); - } else { - using_gmt = false; - } DECODE_FINISH(p); } @@ -3833,7 +3810,6 @@ void pg_hit_set_info_t::dump(Formatter *f) const f->dump_stream("begin") << begin; f->dump_stream("end") << end; f->dump_stream("version") << version; - f->dump_stream("using_gmt") << using_gmt; } void pg_hit_set_info_t::generate_test_instances(list& ls) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 6477180a3abac..52d45d293a1e7 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1035,7 +1035,6 @@ public: HitSet::Params hit_set_params; ///< The HitSet params to use on this pool uint32_t hit_set_period; ///< periodicity of HitSet segments (seconds) uint32_t hit_set_count; ///< number of periods to retain - bool use_gmt_hitset; ///< use gmt to name the hitset archive object uint32_t min_read_recency_for_promote; ///< minimum number of HitSet to check before promote uint32_t stripe_width; ///< erasure coded stripe size in bytes @@ -1064,7 +1063,6 @@ public: hit_set_params(), hit_set_period(0), hit_set_count(0), - use_gmt_hitset(true), min_read_recency_for_promote(0), stripe_width(0), expected_num_objects(0) @@ -1602,11 +1600,10 @@ WRITE_CLASS_ENCODER_FEATURES(pool_stat_t) struct pg_hit_set_info_t { utime_t begin, end; ///< time interval eversion_t version; ///< version this HitSet object was written - bool using_gmt; ///< use gmt for creating the hit_set archive object name - pg_hit_set_info_t(bool using_gmt = true) - : using_gmt(using_gmt) {} - pg_hit_set_info_t(utime_t b, bool using_gmt) - : begin(b), using_gmt(using_gmt) {} + + pg_hit_set_info_t() {} + pg_hit_set_info_t(utime_t b) + : begin(b) {} void encode(bufferlist &bl) const; void decode(bufferlist::iterator &bl); -- 2.47.3