]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: use -1 for deep scrub digest seed on new OSDs
authorSage Weil <sage@redhat.com>
Wed, 10 Dec 2014 00:00:56 +0000 (16:00 -0800)
committerSage Weil <sage@redhat.com>
Sat, 20 Dec 2014 15:28:52 +0000 (07:28 -0800)
0 is a weak initial value for a CRC since it doesn't change with a sequence
of 0 bytes (which are relatively common).  -1 is better.  Use -1 when
everyone in the acting set supports it.

Signed-off-by: Sage Weil <sage@redhat.com>
src/include/ceph_features.h
src/messages/MOSDRepScrub.h
src/osd/ECBackend.cc
src/osd/ECBackend.h
src/osd/PG.cc
src/osd/PG.h
src/osd/PGBackend.cc
src/osd/PGBackend.h
src/osd/ReplicatedBackend.cc
src/osd/ReplicatedBackend.h

index 006a88adc155ba6ed2eafb373eed3f68b3c8cc3c..151e547780ebd5332119f773f6072dc07f6e07ab 100644 (file)
@@ -55,6 +55,7 @@
 #define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
 #define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
 #define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
+#define CEPH_FEATURE_OSD_OBJECT_DIGEST  (1ULL<<46)  /* overlap with fadvise */
 #define CEPH_FEATURE_MDS_QUOTA      (1ULL<<47)
 
 #define CEPH_FEATURE_RESERVED2 (1ULL<<61)  /* slow down, we are almost out... */
@@ -134,7 +135,8 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
         CEPH_FEATURE_OSD_POOLRESEND |  \
          CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 |   \
          CEPH_FEATURE_OSD_SET_ALLOC_HINT |   \
-        CEPH_FEATURE_OSD_FADVISE_FLAGS | \
+        CEPH_FEATURE_OSD_FADVISE_FLAGS |     \
+        CEPH_FEATURE_OSD_OBJECT_DIGEST |    \
         CEPH_FEATURE_MDS_QUOTA | \
         0ULL)
 
index 52a03b8491097b0da6d89dbd6fa781c39aaa23c3..5d0a6041c2fc38bb3ba968894f4958746a52128e 100644 (file)
@@ -24,7 +24,7 @@
 
 struct MOSDRepScrub : public Message {
 
-  static const int HEAD_VERSION = 5;
+  static const int HEAD_VERSION = 6;
   static const int COMPAT_VERSION = 2;
 
   spg_t pgid;             // PG to scrub
@@ -35,10 +35,13 @@ struct MOSDRepScrub : public Message {
   hobject_t start;       // lower bound of scrub, inclusive
   hobject_t end;         // upper bound of scrub, exclusive
   bool deep;             // true if scrub should be deep
+  uint32_t seed;         // seed value for digest calculation
 
-  MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
+  MOSDRepScrub()
+    : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
       chunky(false),
-      deep(false) { }
+      deep(false),
+      seed(0) { }
 
   MOSDRepScrub(spg_t pgid, eversion_t scrub_from, eversion_t scrub_to,
               epoch_t map_epoch)
@@ -48,10 +51,11 @@ struct MOSDRepScrub : public Message {
       scrub_to(scrub_to),
       map_epoch(map_epoch),
       chunky(false),
-      deep(false) { }
+      deep(false),
+      seed(0) { }
 
   MOSDRepScrub(spg_t pgid, eversion_t scrub_to, epoch_t map_epoch,
-               hobject_t start, hobject_t end, bool deep)
+               hobject_t start, hobject_t end, bool deep, uint32_t seed)
     : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
       pgid(pgid),
       scrub_to(scrub_to),
@@ -59,7 +63,8 @@ struct MOSDRepScrub : public Message {
       chunky(true),
       start(start),
       end(end),
-      deep(deep) { }
+      deep(deep),
+      seed(seed) { }
 
 
 private:
@@ -73,6 +78,7 @@ public:
         << ",epoch:" << map_epoch << ",start:" << start << ",end:" << end
         << ",chunky:" << chunky
         << ",deep:" << deep
+       << ",seed:" << seed
         << ",version:" << header.version;
     out << ")";
   }
@@ -87,6 +93,7 @@ public:
     ::encode(end, payload);
     ::encode(deep, payload);
     ::encode(pgid.shard, payload);
+    ::encode(seed, payload);
   }
   void decode_payload() {
     bufferlist::iterator p = payload.begin();
@@ -114,6 +121,11 @@ public:
     } else {
       pgid.shard = shard_id_t::NO_SHARD;
     }
+    if (header.version >= 6) {
+      ::decode(seed, p);
+    } else {
+      seed = 0;
+    }
   }
 };
 
index 4031b4c671edbaf9440cd9526b9eca699c00320f..9a5a6a1a0e3b29d79afdf18533e083d8d1117866 100644 (file)
@@ -1746,9 +1746,10 @@ void ECBackend::rollback_append(
 
 void ECBackend::be_deep_scrub(
   const hobject_t &poid,
+  uint32_t seed,
   ScrubMap::object &o,
   ThreadPool::TPHandle &handle) {
-  bufferhash h(-1);
+  bufferhash h(-1); // we always used -1
   int r;
   uint64_t stride = cct->_conf->osd_deep_scrub_stride;
   if (stride % sinfo.get_chunk_size())
index 147e3e85d1db8e04f785b650ed926822b2249d0c..d13d8bb4857556c0eafb3747c47e4e45f3ba9d28 100644 (file)
@@ -469,6 +469,7 @@ public:
 
   void be_deep_scrub(
     const hobject_t &obj,
+    uint32_t seed,
     ScrubMap::object &o,
     ThreadPool::TPHandle &handle);
   uint64_t be_get_ondisk_size(uint64_t logical_size) {
index eea7ffd323df41068b68b88574ba4328982a392d..71be9c8a3ed0643a7335dec6f68b4663d537d242 100644 (file)
@@ -3288,14 +3288,15 @@ void PG::_request_scrub_map_classic(pg_shard_t replica, eversion_t version)
 void PG::_request_scrub_map(
   pg_shard_t replica, eversion_t version,
   hobject_t start, hobject_t end,
-  bool deep)
+  bool deep, uint32_t seed)
 {
   assert(replica != pg_whoami);
-  dout(10) << "scrub  requesting scrubmap from osd." << replica << dendl;
+  dout(10) << "scrub  requesting scrubmap from osd." << replica
+          << " deep " << (int)deep << " seed " << seed << dendl;
   MOSDRepScrub *repscrubop = new MOSDRepScrub(
     spg_t(info.pgid.pgid, replica.shard), version,
     get_osdmap()->get_epoch(),
-    start, end, deep);
+    start, end, deep, seed);
   osd->send_message_osd_cluster(
     replica.osd, repscrubop, get_osdmap()->get_epoch());
 }
@@ -3561,10 +3562,11 @@ void PG::_scan_snaps(ScrubMap &smap)
  */
 int PG::build_scrub_map_chunk(
   ScrubMap &map,
-  hobject_t start, hobject_t end, bool deep,
+  hobject_t start, hobject_t end, bool deep, uint32_t seed,
   ThreadPool::TPHandle &handle)
 {
-  dout(10) << __func__ << " [" << start << "," << end << ")" << dendl;
+  dout(10) << __func__ << " [" << start << "," << end << ") "
+          << " seed " << seed << dendl;
 
   map.valid_through = info.last_update;
 
@@ -3583,7 +3585,7 @@ int PG::build_scrub_map_chunk(
   }
 
 
-  get_pgbackend()->be_scan_list(map, ls, deep, handle);
+  get_pgbackend()->be_scan_list(map, ls, deep, seed, handle);
   _scan_rollback_obs(rollback_obs, handle);
   _scan_snaps(map);
 
@@ -3612,7 +3614,7 @@ void PG::build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle)
   vector<hobject_t> ls;
   osd->store->collection_list(coll, ls);
 
-  get_pgbackend()->be_scan_list(map, ls, false, handle);
+  get_pgbackend()->be_scan_list(map, ls, false, 0, handle);
   lock();
   _scan_snaps(map);
 
@@ -3658,7 +3660,7 @@ void PG::build_inc_scrub_map(
     }
   }
 
-  get_pgbackend()->be_scan_list(map, ls, false, handle);
+  get_pgbackend()->be_scan_list(map, ls, false, 0, handle);
 }
 
 void PG::repair_object(
@@ -3723,7 +3725,7 @@ void PG::replica_scrub(
   }
 
   build_scrub_map_chunk(
-    map, msg->start, msg->end, msg->deep,
+    map, msg->start, msg->end, msg->deep, msg->seed,
     handle);
 
   vector<OSDOp> scrub(1);
@@ -3926,6 +3928,12 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
          oss << info.pgid.pgid << " " << mode << " starts" << std::endl;
          osd->clog->info(oss);
        }
+
+       if (peer_features & CEPH_FEATURE_OSD_OBJECT_DIGEST)
+         scrubber.seed = -1; // better, and enables oi digest checks
+       else
+         scrubber.seed = 0;  // compat
+
         break;
 
       case PG::Scrubber::NEW_CHUNK:
@@ -4007,7 +4015,8 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
             ++i) {
          if (*i == pg_whoami) continue;
           _request_scrub_map(*i, scrubber.subset_last_update,
-                             scrubber.start, scrubber.end, scrubber.deep);
+                             scrubber.start, scrubber.end, scrubber.deep,
+                            scrubber.seed);
           scrubber.waiting_on_whom.insert(*i);
           ++scrubber.waiting_on;
         }
@@ -4041,7 +4050,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
         // build my own scrub map
         ret = build_scrub_map_chunk(scrubber.primary_scrubmap,
                                     scrubber.start, scrubber.end,
-                                    scrubber.deep,
+                                    scrubber.deep, scrubber.seed,
                                    handle);
         if (ret < 0) {
           dout(5) << "error building scrub map: " << ret << ", aborting" << dendl;
index 21fed9c2f7856f5dbbf000ca614517112421c8e3..25ee5cd6c977011ca18460c88ddae096b8014784 100644 (file)
@@ -1030,7 +1030,8 @@ public:
       active_rep_scrub(0),
       must_scrub(false), must_deep_scrub(false), must_repair(false),
       state(INACTIVE),
-      deep(false)
+      deep(false),
+      seed(0)
     {
     }
 
@@ -1081,6 +1082,7 @@ public:
 
     // deep scrub
     bool deep;
+    uint32_t seed;
 
     list<Context*> callbacks;
     void add_callback(Context *context) {
@@ -1151,6 +1153,7 @@ public:
       deep_errors = 0;
       fixed = 0;
       deep = false;
+      seed = 0;
       run_callbacks();
       inconsistent.clear();
       missing.clear();
@@ -1183,10 +1186,11 @@ public:
     ThreadPool::TPHandle &handle);
   void _request_scrub_map_classic(pg_shard_t replica, eversion_t version);
   void _request_scrub_map(pg_shard_t replica, eversion_t version,
-                          hobject_t start, hobject_t end, bool deep);
+                          hobject_t start, hobject_t end, bool deep,
+                         uint32_t seed);
   int build_scrub_map_chunk(
     ScrubMap &map,
-    hobject_t start, hobject_t end, bool deep,
+    hobject_t start, hobject_t end, bool deep, uint32_t seed,
     ThreadPool::TPHandle &handle);
   void build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle);
   void build_inc_scrub_map(
index 4c4787cc5058153b7b031df3ba582a5dac16dbb4..60cc40c9afb1f080a62e2a39fc375b45319826b0 100644 (file)
@@ -320,7 +320,7 @@ PGBackend *PGBackend::build_pg_backend(
  * pg lock may or may not be held
  */
 void PGBackend::be_scan_list(
-  ScrubMap &map, const vector<hobject_t> &ls, bool deep,
+  ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
   ThreadPool::TPHandle &handle)
 {
   dout(10) << __func__ << " scanning " << ls.size() << " objects"
@@ -351,7 +351,7 @@ void PGBackend::be_scan_list(
 
       // calculate the CRC32 on deep scrubs
       if (deep) {
-       be_deep_scrub(*p, o, handle);
+       be_deep_scrub(*p, seed, o, handle);
       }
 
       dout(25) << __func__ << "  " << poid << dendl;
index 3b9f2532356039107743584da0eb9b2475244d3c..c829b9468a619c157241f097c5e14ea9bc6dd532 100644 (file)
 
    virtual bool scrub_supported() { return false; }
    void be_scan_list(
-     ScrubMap &map, const vector<hobject_t> &ls, bool deep,
+     ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
      ThreadPool::TPHandle &handle);
    enum scrub_error_type be_compare_scrub_objects(
      const ScrubMap::object &auth,
      uint64_t logical_size) { assert(0); return 0; }
    virtual void be_deep_scrub(
      const hobject_t &poid,
+     uint32_t seed,
      ScrubMap::object &o,
      ThreadPool::TPHandle &handle) { assert(0); }
 
index e19ad4b0465ee21fe004c80e35a7ac5eab88ceaa..e646b2e4ff153b86f532d90768621e190f1e7880 100644 (file)
@@ -693,10 +693,12 @@ void ReplicatedBackend::sub_op_modify_reply(OpRequestRef op)
 
 void ReplicatedBackend::be_deep_scrub(
   const hobject_t &poid,
+  uint32_t seed,
   ScrubMap::object &o,
   ThreadPool::TPHandle &handle)
 {
-  bufferhash h, oh;
+  dout(10) << __func__ << " " << poid << " seed " << seed << dendl;
+  bufferhash h(seed), oh(seed);
   bufferlist bl, hdrbl;
   int r;
   __u64 pos = 0;
@@ -726,12 +728,19 @@ void ReplicatedBackend::be_deep_scrub(
     ghobject_t(
       poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
     &hdrbl, true);
-  if (r == 0) {
+  // NOTE: bobtail to giant, we would crc the head as (len, head).
+  // that changes at the same time we start using a non-zero seed.
+  if (r == 0 && hdrbl.length()) {
     dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length())
              << dendl;
-    ::encode(hdrbl, bl);
-    oh << bl;
-    bl.clear();
+    if (seed == 0) {
+      // legacy
+      bufferlist bl;
+      ::encode(hdrbl, bl);
+      oh << bl;
+    } else {
+      oh << hdrbl;
+    }
   } else if (r == -EIO) {
     dout(25) << __func__ << "  " << poid << " got "
             << r << " on omap header read, read_error" << dendl;
index 67a4a1f7ffbf735df4a96e450ad0cbde0117d57f..927ebb87a270e1b11af641a544d029bad26838e5 100644 (file)
@@ -413,6 +413,7 @@ private:
 
   void be_deep_scrub(
     const hobject_t &obj,
+    uint32_t seed,
     ScrubMap::object &o,
     ThreadPool::TPHandle &handle);
   uint64_t be_get_ondisk_size(uint64_t logical_size) { return logical_size; }