]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
revert: osd: use GMT time for hitsets 6644/head
authorLoic Dachary <loic@dachary.org>
Wed, 18 Nov 2015 17:08:58 +0000 (18:08 +0100)
committerLoic Dachary <ldachary@redhat.com>
Thu, 19 Nov 2015 00:07:20 +0000 (01:07 +0100)
"Merge pull request #5825 from tchaikov/wip-12848-hammer"

This reverts commit 39544718dc2f09bcfdc632ac72fd2a3cda87687e, reversing
changes made to 4ad97162026e1eb6e6e948ddf3eb39f711431e45.

http://tracker.ceph.com/issues/13812 Fixes: #13812

Signed-off-by: Loic Dachary <loic@dachary.org>
src/common/config_opts.h
src/include/ceph_features.h
src/mon/MonCommands.h
src/mon/OSDMonitor.cc
src/osd/HitSet.cc
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h
src/osd/osd_types.cc
src/osd/osd_types.h

index 0f67da5a3afe86664ba664665401cb58344c2f0c..dd376a6f724cf81e459b0647c267c4ebea16cabc 100644 (file)
@@ -497,7 +497,6 @@ OPTION(osd_client_message_cap, OPT_U64, 100)              // num client messages
 OPTION(osd_pg_bits, OPT_INT, 6)  // bits per osd
 OPTION(osd_pgp_bits, OPT_INT, 6)  // bits per osd
 OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
-OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
 OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset
 OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET)
 OPTION(osd_pool_erasure_code_stripe_width, OPT_U32, OSD_POOL_ERASURE_CODE_STRIPE_WIDTH) // in bytes
index 205e18fb12c83bbc222f8f99f961aa279afb1194..781df1b3003b1d15c6969de158448ff891b6f382 100644 (file)
@@ -64,7 +64,6 @@
 // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
 #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
 #define CEPH_FEATURE_MON_METADATA (1ULL<<50)
-#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
 /* ... */
 #define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
 
@@ -152,7 +151,6 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
         CEPH_FEATURE_MDS_QUOTA | \
          CEPH_FEATURE_CRUSH_V4 |            \
          CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY |           \
-     CEPH_FEATURE_OSD_HITSET_GMT |                       \
         CEPH_FEATURE_HAMMER_0_94_4 |            \
         0ULL)
 
index 4ddf7ba6d05329c2c7fcdbd41cdd8dcc4c78f622..a75b067362ccb5c074d206358e9f7afc1e6af13a 100644 (file)
@@ -634,7 +634,7 @@ COMMAND("osd pool get " \
        "get pool parameter <var>", "osd", "r", "cli,rest")
 COMMAND("osd pool set " \
        "name=pool,type=CephPoolname " \
-       "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed " \
+       "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed " \
        "name=val,type=CephString " \
        "name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
        "set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
index f4ac262bd699fc4716e8dc9e3e59f8b221609fa8..2f08e41bdf632756080e8ba5a6ee44b1e9b51dac 100644 (file)
@@ -16,7 +16,6 @@
  * 
  */
 
-#include <algorithm>
 #include <sstream>
 
 #include "OSDMonitor.h"
@@ -1573,9 +1572,6 @@ void OSDMonitor::take_all_failures(list<MOSDFailure*>& ls)
   failure_info.clear();
 }
 
-static bool uses_gmt_hitset(const std::pair<int64_t, pg_pool_t>& pool) {
-  return pool.second.use_gmt_hitset;
-}
 
 // boot --
 
@@ -1645,19 +1641,6 @@ bool OSDMonitor::preprocess_boot(MOSDBoot *m)
     }
   }
 
-  if (std::find_if(osdmap.get_pools().begin(),
-                  osdmap.get_pools().end(),
-                  uses_gmt_hitset) != osdmap.get_pools().end()) {
-    assert(osdmap.get_num_up_osds() == 0 ||
-          osdmap.get_up_osd_features() & CEPH_FEATURE_OSD_HITSET_GMT);
-    if (!(m->osd_features & CEPH_FEATURE_OSD_HITSET_GMT)) {
-      dout(0) << __func__ << " one or more pools uses GMT hitsets but osd at "
-             << m->get_orig_source_inst()
-             << " doesn't announce support -- ignore" << dendl;
-      goto ignore;
-    }
-  }
-
   // already booted?
   if (osdmap.is_up(from) &&
       osdmap.get_inst(from) == m->get_orig_source_inst()) {
@@ -3101,7 +3084,6 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
     if (!p->is_tier() &&
         (var == "hit_set_type" || var == "hit_set_period" ||
          var == "hit_set_count" || var == "hit_set_fpp" ||
-        var == "use_gmt_hitset" ||
          var == "target_max_objects" || var == "target_max_bytes" ||
          var == "cache_target_full_ratio" ||
          var == "cache_target_dirty_ratio" ||
@@ -3154,8 +3136,6 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
          BloomHitSet::Params *bloomp = static_cast<BloomHitSet::Params*>(p->hit_set_params.impl.get());
          f->dump_float("hit_set_fpp", bloomp->get_fpp());
        }
-      } else if (var == "use_gmt_hitset") {
-       f->dump_bool("use_gmt_hitset", p->use_gmt_hitset);
       } else if (var == "target_max_objects") {
         f->dump_unsigned("target_max_objects", p->target_max_objects);
       } else if (var == "target_max_bytes") {
@@ -3213,8 +3193,6 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
        }
        BloomHitSet::Params *bloomp = static_cast<BloomHitSet::Params*>(p->hit_set_params.impl.get());
        ss << "hit_set_fpp: " << bloomp->get_fpp();
-      } else if (var == "use_gmt_hitset") {
-       ss << "use_gmt_hitset: " << p->use_gmt_hitset << "\n";
       } else if (var == "target_max_objects") {
         ss << "target_max_objects: " << p->target_max_objects;
       } else if (var == "target_max_bytes") {
@@ -4097,11 +4075,6 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid,
     pi->set_flag(pg_pool_t::FLAG_NOPGCHANGE);
   if (g_conf->osd_pool_default_flag_nosizechange)
     pi->set_flag(pg_pool_t::FLAG_NOSIZECHANGE);
-  if (g_conf->osd_pool_use_gmt_hitset &&
-      (osdmap.get_up_osd_features() & CEPH_FEATURE_OSD_HITSET_GMT))
-    pi->use_gmt_hitset = true;
-  else
-    pi->use_gmt_hitset = false;
 
   pi->size = size;
   pi->min_size = min_size;
@@ -4445,17 +4418,6 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
     }
     BloomHitSet::Params *bloomp = static_cast<BloomHitSet::Params*>(p.hit_set_params.impl.get());
     bloomp->set_fpp(f);
-  } else if (var == "use_gmt_hitset") {
-    if (val == "true" || (interr.empty() && n == 1)) {
-      if (!(osdmap.get_up_osd_features() & CEPH_FEATURE_OSD_HITSET_GMT)) {
-       ss << "not all OSDs support GMT hit set.";
-       return -EINVAL;
-      }
-      p.use_gmt_hitset = true;
-    } else {
-      ss << "expecting value 'true' or '1'";
-      return -EINVAL;
-    }
   } else if (var == "debug_fake_ec_pool") {
     if (val == "true" || (interr.empty() && n == 1)) {
       p.flags |= pg_pool_t::FLAG_DEBUG_FAKE_EC_POOL;
index 597b1f7d8f560e176837c3a0b141930f5cacc89a..700da5d4a83a6def7db9040280f0f8f252efa2cc 100644 (file)
@@ -36,6 +36,9 @@ HitSet::HitSet(const HitSet::Params& params)
     impl.reset(new ExplicitObjectHitSet(static_cast<ExplicitObjectHitSet::Params*>(params.impl.get())));
     break;
 
+  case TYPE_NONE:
+    break;
+
   default:
     assert (0 == "unknown HitSet type");
   }
index 0fad912f496e6c1b21569187ad954b0f10b7f5ee..59f41a1119f9b50cd5ddeb529d66c39cd1f051e8 100644 (file)
@@ -1135,7 +1135,7 @@ void ReplicatedPG::do_pg_op(OpRequestRef op)
               p != info.hit_set.history.end();
               ++p) {
            if (stamp >= p->begin && stamp <= p->end) {
-             oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt);
+             oid = get_hit_set_archive_object(p->begin, p->end);
              break;
            }
          }
@@ -10134,19 +10134,10 @@ hobject_t ReplicatedPG::get_hit_set_current_object(utime_t stamp)
   return hoid;
 }
 
-hobject_t ReplicatedPG::get_hit_set_archive_object(utime_t start,
-                                                  utime_t end,
-                                                  bool using_gmt)
+hobject_t ReplicatedPG::get_hit_set_archive_object(utime_t start, utime_t end)
 {
   ostringstream ss;
-  ss << "hit_set_" << info.pgid.pgid << "_archive_";
-  if (using_gmt) {
-    start.gmtime(ss) << "_";
-    end.gmtime(ss);
-  } else {
-    start.localtime(ss) << "_";
-    end.localtime(ss);
-  }
+  ss << "hit_set_" << info.pgid.pgid << "_archive_" << start << "_" << end;
   hobject_t hoid(sobject_t(ss.str(), CEPH_NOSNAP), "",
                 info.pgid.ps(), info.pgid.pool(),
                 cct->_conf->osd_hit_set_namespace);
@@ -10165,19 +10156,12 @@ void ReplicatedPG::hit_set_clear()
 void ReplicatedPG::hit_set_setup()
 {
   if (!is_active() ||
-      !is_primary()) {
-    hit_set_clear();
-    return;
-  }
-
-  if (is_active() && is_primary() &&
-      (!pool.info.hit_set_count ||
-       !pool.info.hit_set_period ||
-       pool.info.hit_set_params.get_type() == HitSet::TYPE_NONE)) {
+      !is_primary() ||
+      !pool.info.hit_set_count ||
+      !pool.info.hit_set_period ||
+      pool.info.hit_set_params.get_type() == HitSet::TYPE_NONE) {
     hit_set_clear();
-
-    // only primary is allowed to remove all the hit set objects
-    hit_set_remove_all();
+    //hit_set_remove_all();  // FIXME: implement me soon
     return;
   }
 
@@ -10189,46 +10173,6 @@ void ReplicatedPG::hit_set_setup()
   hit_set_apply_log();
 }
 
-void ReplicatedPG::hit_set_remove_all()
-{
-  // If any archives are degraded we skip this
-  for (list<pg_hit_set_info_t>::iterator p = info.hit_set.history.begin();
-       p != info.hit_set.history.end();
-       ++p) {
-    hobject_t aoid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt);
-
-    // Once we hit a degraded object just skip
-    if (is_degraded_or_backfilling_object(aoid))
-      return;
-    if (scrubber.write_blocked_by_scrub(aoid))
-      return;
-  }
-
-  if (!info.hit_set.history.empty()) {
-    list<pg_hit_set_info_t>::reverse_iterator p = info.hit_set.history.rbegin();
-    assert(p != info.hit_set.history.rend());
-    hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt);
-    assert(!is_degraded_or_backfilling_object(oid));
-    ObjectContextRef obc = get_object_context(oid, false);
-    assert(obc);
-
-    RepGather *repop = simple_repop_create(obc);
-    OpContext *ctx = repop->ctx;
-    ctx->at_version = get_next_version();
-    ctx->updated_hset_history = info.hit_set;
-    utime_t now = ceph_clock_now(cct);
-    ctx->mtime = now;
-    hit_set_trim(repop, 0);
-    info.stats.stats.add(ctx->delta_stats);
-    simple_repop_submit(repop);
-  }
-
-  info.hit_set = pg_hit_set_history_t();
-  if (agent_state) {
-    agent_state->discard_hit_sets();
-  }
-}
-
 void ReplicatedPG::hit_set_create()
 {
   utime_t now = ceph_clock_now(NULL);
@@ -10330,7 +10274,7 @@ void ReplicatedPG::hit_set_persist()
   for (list<pg_hit_set_info_t>::iterator p = info.hit_set.history.begin();
        p != info.hit_set.history.end();
        ++p) {
-    hobject_t aoid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt);
+    hobject_t aoid = get_hit_set_archive_object(p->begin, p->end);
 
     // Once we hit a degraded object just skip further trim
     if (is_degraded_or_backfilling_object(aoid))
@@ -10339,8 +10283,10 @@ void ReplicatedPG::hit_set_persist()
       return;
   }
 
-  oid = get_hit_set_archive_object(start, now, pool.info.use_gmt_hitset);
+  oid = get_hit_set_archive_object(start, now);
   // If the current object is degraded we skip this persist request
+  if (is_degraded_or_backfilling_object(oid))
+    return;
   if (scrubber.write_blocked_by_scrub(oid))
     return;
 
@@ -10431,7 +10377,7 @@ void ReplicatedPG::hit_set_persist()
 
   updated_hit_set_hist.history.push_back(updated_hit_set_hist.current_info);
   hit_set_create();
-  updated_hit_set_hist.current_info = pg_hit_set_info_t(pool.info.use_gmt_hitset);
+  updated_hit_set_hist.current_info = pg_hit_set_info_t();
   updated_hit_set_hist.current_last_stamp = utime_t();
 
   // fabricate an object_info_t and SnapSet
@@ -10494,7 +10440,7 @@ void ReplicatedPG::hit_set_trim(RepGather *repop, unsigned max)
   for (unsigned num = updated_hit_set_hist.history.size(); num > max; --num) {
     list<pg_hit_set_info_t>::iterator p = updated_hit_set_hist.history.begin();
     assert(p != updated_hit_set_hist.history.end());
-    hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt);
+    hobject_t oid = get_hit_set_archive_object(p->begin, p->end);
 
     assert(!is_degraded_or_backfilling_object(oid));
 
@@ -10779,7 +10725,7 @@ void ReplicatedPG::agent_load_hit_sets()
          continue;
        }
 
-       hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt);
+       hobject_t oid = get_hit_set_archive_object(p->begin, p->end);
        if (is_unreadable_object(oid)) {
          dout(10) << __func__ << " unreadable " << oid << ", waiting" << dendl;
          break;
index 6dbcd191394a3b499d32cc06556f7d388a563f90..48e0def334ef8893ec32caddc125f796e384f0f7 100644 (file)
@@ -901,12 +901,9 @@ protected:
   bool hit_set_apply_log(); ///< apply log entries to update in-memory HitSet
   void hit_set_trim(RepGather *repop, unsigned max); ///< discard old HitSets
   void hit_set_in_memory_trim();                     ///< discard old in memory HitSets
-  void hit_set_remove_all();
 
   hobject_t get_hit_set_current_object(utime_t stamp);
-  hobject_t get_hit_set_archive_object(utime_t start,
-                                      utime_t end,
-                                      bool using_gmt);
+  hobject_t get_hit_set_archive_object(utime_t start, utime_t end);
 
   // agent
   boost::scoped_ptr<TierAgentState> agent_state;
index b301b72c491eaf382a8e074e859c47f298fe3551..94ca0832313efc2f288b77b93c6cbf09174bf27b 100644 (file)
@@ -926,7 +926,6 @@ void pg_pool_t::dump(Formatter *f) const
   f->close_section(); // hit_set_params
   f->dump_unsigned("hit_set_period", hit_set_period);
   f->dump_unsigned("hit_set_count", hit_set_count);
-  f->dump_bool("use_gmt_hitset", use_gmt_hitset);
   f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
   f->dump_unsigned("stripe_width", get_stripe_width());
   f->dump_unsigned("expected_num_objects", expected_num_objects);
@@ -1239,7 +1238,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
     return;
   }
 
-  ENCODE_START(21, 5, bl);
+  ENCODE_START(17, 5, bl);
   ::encode(type, bl);
   ::encode(size, bl);
   ::encode(crush_ruleset, bl);
@@ -1281,15 +1280,12 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
   ::encode(last_force_op_resend, bl);
   ::encode(min_read_recency_for_promote, bl);
   ::encode(expected_num_objects, bl);
-  ::encode(uint32_t(.6 * 1e6), bl);
-  ::encode(uint32_t(1), bl);
-  ::encode(use_gmt_hitset, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_pool_t::decode(bufferlist::iterator& bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(21, 5, 5, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(17, 5, 5, bl);
   ::decode(type, bl);
   ::decode(size, bl);
   ::decode(crush_ruleset, bl);
@@ -1401,19 +1397,6 @@ void pg_pool_t::decode(bufferlist::iterator& bl)
   } else {
     expected_num_objects = 0;
   }
-  if (struct_v >= 19) {
-    uint32_t dummy;        
-    ::decode(dummy, bl);
-  }
-  if (struct_v >= 20) {
-    uint32_t dummy;
-    ::decode(dummy, bl);
-  }
-  if (struct_v >= 21) {
-    ::decode(use_gmt_hitset, bl);
-  } else {
-    use_gmt_hitset = false;
-  }
   DECODE_FINISH(bl);
   calc_pg_masks();
 }
@@ -3806,25 +3789,19 @@ void pg_create_t::generate_test_instances(list<pg_create_t*>& o)
 
 void pg_hit_set_info_t::encode(bufferlist& bl) const
 {
-  ENCODE_START(2, 1, bl);
+  ENCODE_START(1, 1, bl);
   ::encode(begin, bl);
   ::encode(end, bl);
   ::encode(version, bl);
-  ::encode(using_gmt, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_hit_set_info_t::decode(bufferlist::iterator& p)
 {
-  DECODE_START(2, p);
+  DECODE_START(1, p);
   ::decode(begin, p);
   ::decode(end, p);
   ::decode(version, p);
-  if (struct_v >= 2) {
-    ::decode(using_gmt, p);
-  } else {
-    using_gmt = false;
-  }
   DECODE_FINISH(p);
 }
 
@@ -3833,7 +3810,6 @@ void pg_hit_set_info_t::dump(Formatter *f) const
   f->dump_stream("begin") << begin;
   f->dump_stream("end") << end;
   f->dump_stream("version") << version;
-  f->dump_stream("using_gmt") << using_gmt;
 }
 
 void pg_hit_set_info_t::generate_test_instances(list<pg_hit_set_info_t*>& ls)
index 6477180a3abacd13745c12c57624a04bef7d7b7c..52d45d293a1e74c5aab438dd0ea89e4dbca5782c 100644 (file)
@@ -1035,7 +1035,6 @@ public:
   HitSet::Params hit_set_params; ///< The HitSet params to use on this pool
   uint32_t hit_set_period;      ///< periodicity of HitSet segments (seconds)
   uint32_t hit_set_count;       ///< number of periods to retain
-  bool use_gmt_hitset;         ///< use gmt to name the hitset archive object
   uint32_t min_read_recency_for_promote;   ///< minimum number of HitSet to check before promote
 
   uint32_t stripe_width;        ///< erasure coded stripe size in bytes
@@ -1064,7 +1063,6 @@ public:
       hit_set_params(),
       hit_set_period(0),
       hit_set_count(0),
-      use_gmt_hitset(true),
       min_read_recency_for_promote(0),
       stripe_width(0),
       expected_num_objects(0)
@@ -1602,11 +1600,10 @@ WRITE_CLASS_ENCODER_FEATURES(pool_stat_t)
 struct pg_hit_set_info_t {
   utime_t begin, end;   ///< time interval
   eversion_t version;   ///< version this HitSet object was written
-  bool using_gmt;      ///< use gmt for creating the hit_set archive object name
-  pg_hit_set_info_t(bool using_gmt = true)
-    : using_gmt(using_gmt) {}
-  pg_hit_set_info_t(utime_t b, bool using_gmt)
-    : begin(b), using_gmt(using_gmt) {}
+
+  pg_hit_set_info_t() {}
+  pg_hit_set_info_t(utime_t b)
+    : begin(b) {}
 
   void encode(bufferlist &bl) const;
   void decode(bufferlist::iterator &bl);