From 25f0afc8735c2ccb920af720a476cfcf69affa85 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Jul 2015 14:51:12 -0400 Subject: [PATCH] osd: allow sort order to randomly revert to nibblewise via debug config Note that this option is misused and applied to some but not all OSDs they will disagree on the sort order and get confused, potentially losing data. It must be global to the cluster in order to be useful. Signed-off-by: Sage Weil --- src/common/config_opts.h | 1 + src/osd/PG.cc | 12 +++++++++++- src/osd/PG.h | 5 ++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index b21f69a788ba7..ff083fb0349ab 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -693,6 +693,7 @@ OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false) OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false) OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0) OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion +OPTION(osd_debug_randomize_hobject_sort_order, OPT_BOOL, false) OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 628bd130023ab..ef3a969caaec2 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -217,7 +217,8 @@ PG::PG(OSDService *o, OSDMapRef curmap, pg_id(p), peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT), acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), - upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT) + upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), + randomly_sort_nibblewise(false) { #ifdef PG_DEBUG_REFS osd->add_pgid(p, this); @@ -4730,6 +4731,15 @@ void PG::start_peering_interval( upacting_features &= osdmap->get_xinfo(*p).features; } + if (g_conf->osd_debug_randomize_hobject_sort_order) { + // randomly use a nibblewise sort (when we otherwise might have + // done bitwise) based on some *deterministic* function such that + // all peers/osds will agree. + randomly_sort_nibblewise = (info.history.same_interval_since + info.pgid.ps()) & 1; + } else { + randomly_sort_nibblewise = false; + } + dout(10) << " up " << oldup << " -> " << up << ", acting " << oldacting << " -> " << acting << ", acting_primary " << old_acting_primary << " -> " << new_acting_primary diff --git a/src/osd/PG.h b/src/osd/PG.h index 6789872aaa8a6..8e4de79de14cf 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -2016,6 +2016,8 @@ public: uint64_t acting_features; uint64_t upacting_features; + bool randomly_sort_nibblewise; + public: const spg_t& get_pgid() const { return pg_id; } int get_nrep() const { return acting.size(); } @@ -2031,7 +2033,8 @@ public: /// true if we will sort hobjects bitwise for this pg interval bool get_sort_bitwise() const { - return get_min_upacting_features() & CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT; + return (get_min_upacting_features() & CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT) && + !randomly_sort_nibblewise; } void init_primary_up_acting( -- 2.39.5