]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: deep scrub, read file contents from disk and compare digest
authorMike Ryan <mike.ryan@inktank.com>
Mon, 27 Aug 2012 18:16:17 +0000 (11:16 -0700)
committerMike Ryan <mike.ryan@inktank.com>
Wed, 5 Sep 2012 21:19:19 +0000 (14:19 -0700)
Deep scrub reads the contents of every file from the store and computes
a crc32 digest. The primary compares the digest of all replicas and will
mark the PG inconsistent if any don't match.

OSDs that do not support deep scrub simply perform an ordinary chunky
scrub. Any subset of OSDs that do support deep scrub will have their
digests compared.

Signed-off-by: Mike Ryan <mike.ryan@inktank.com>
16 files changed:
doc/control.rst
src/bash_completion/ceph
src/common/config_opts.h
src/messages/MOSDRepScrub.h
src/messages/MOSDScrub.h
src/mon/OSDMonitor.cc
src/mon/PGMap.cc
src/mon/PGMonitor.cc
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h
src/osd/ReplicatedPG.cc
src/osd/osd_types.cc
src/osd/osd_types.h
src/test/cli/ceph/help.t
src/tools/ceph.cc

index 19c976e995a6511607d6e945419ec6a0f28c7f26..0e4cbdeacbfd25012e95cad50df8a76ea104670f 100644 (file)
@@ -260,6 +260,11 @@ Get the value of a pool setting. Valid fields are:
 Sends a scrub command to osdN. To send the command to all osds, use ``*``.
 TODO: what does this actually do ::
 
+       $ ceph osd deep-scrub N
+
+Sends a deep scrub command to osdN. A deep scrub compares both the
+metadata and the contents of objects between replicas.
+
        $ ceph osd repair N
 
 Sends a repair command to osdN. To send the command to all osds, use ``*``.
index cec2b852e5b945267e98bc2d8b944f3cc0c9c562..2ea53c603072ebe67bfbad637837b7ae29c1e991 100644 (file)
@@ -36,7 +36,7 @@ _ceph()
                 return 0
                 ;;
             pg)
-                COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub repair" -- ${cur}) )
+                COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub deep-scrub repair" -- ${cur}) )
                 return 0
                 ;;
             osd)
index 2ad3b0a23cadbae9021c4739f3f494c246f3a29b..8e959f775ca061fb8a1fb20cf5bc8db81ca346c6 100644 (file)
@@ -322,6 +322,8 @@ OPTION(osd_max_scrubs, OPT_INT, 1)
 OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
 OPTION(osd_scrub_min_interval, OPT_FLOAT, 300)
 OPTION(osd_scrub_max_interval, OPT_FLOAT, 60*60*24)   // once a day
+OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
+OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
 OPTION(osd_auto_weight, OPT_BOOL, false)
 OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
 OPTION(osd_check_for_log_corruption, OPT_BOOL, false)
index 184d153bcc99fc966aa24c52e6479adfc4bb62d5..2d3a66d96af17cb993d7104db1040f9a1643a132 100644 (file)
@@ -24,7 +24,7 @@
 
 struct MOSDRepScrub : public Message {
 
-  static const int HEAD_VERSION = 3;
+  static const int HEAD_VERSION = 4;
   static const int COMPAT_VERSION = 2;
 
   pg_t pgid;             // PG to scrub
@@ -34,6 +34,7 @@ struct MOSDRepScrub : public Message {
   bool chunky;           // true for chunky scrubs
   hobject_t start;       // lower bound of scrub, inclusive
   hobject_t end;         // upper bound of scrub, exclusive
+  bool deep;             // true if scrub should be deep
 
   MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION) { }
   MOSDRepScrub(pg_t pgid, eversion_t scrub_from, eversion_t scrub_to,
@@ -43,17 +44,19 @@ struct MOSDRepScrub : public Message {
       scrub_from(scrub_from),
       scrub_to(scrub_to),
       map_epoch(map_epoch),
-      chunky(false) { }
+      chunky(false),
+      deep(false) { }
 
   MOSDRepScrub(pg_t pgid, eversion_t scrub_to, epoch_t map_epoch,
-               hobject_t start, hobject_t end)
+               hobject_t start, hobject_t end, bool deep)
     : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
       pgid(pgid),
       scrub_to(scrub_to),
       map_epoch(map_epoch),
       chunky(true),
       start(start),
-      end(end) { }
+      end(end),
+      deep(deep) { }
 
 
 private:
@@ -66,6 +69,7 @@ public:
     out << pgid << ",from:" << scrub_from << ",to:" << scrub_to
         << ",epoch:" << map_epoch << ",start:" << start << ",end:" << end
         << ",chunky:" << chunky
+        << ",deep:" << deep
         << ",version:" << header.version;
     out << ")";
   }
@@ -78,6 +82,7 @@ public:
     ::encode(chunky, payload);
     ::encode(start, payload);
     ::encode(end, payload);
+    ::encode(deep, payload);
   }
   void decode_payload() {
     bufferlist::iterator p = payload.begin();
@@ -90,8 +95,14 @@ public:
       ::decode(chunky, p);
       ::decode(start, p);
       ::decode(end, p);
+      if (header.version >= 4) {
+        ::decode(deep, p);
+      } else {
+        deep = false;
+      }
     } else { // v2 scrub: non-chunky
       chunky = false;
+      deep = false;
     }
   }
 };
index e4c9bd158688bc938329199e023d2a9750043d9d..72661f89598034e767f3d08aa97f9e9a57838fe1 100644 (file)
  */
 
 struct MOSDScrub : public Message {
+
+  static const int HEAD_VERSION = 2;
+  static const int COMPAT_VERSION = 1;
+
   uuid_d fsid;
   vector<pg_t> scrub_pgs;
   bool repair;
+  bool deep;
 
-  MOSDScrub() : Message(MSG_OSD_SCRUB) {}
-  MOSDScrub(const uuid_d& f, bool r) :
-    Message(MSG_OSD_SCRUB),
-    fsid(f), repair(r) {}
-  MOSDScrub(const uuid_d& f, vector<pg_t>& pgs, bool r) :
-    Message(MSG_OSD_SCRUB),
-    fsid(f), scrub_pgs(pgs), repair(r) {}
+  MOSDScrub() : Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION) {}
+  MOSDScrub(const uuid_d& f, bool r, bool d) :
+    Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION),
+    fsid(f), repair(r), deep(d) {}
+  MOSDScrub(const uuid_d& f, vector<pg_t>& pgs, bool r, bool d) :
+    Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION),
+    fsid(f), scrub_pgs(pgs), repair(r), deep(d) {}
 private:
   ~MOSDScrub() {}
 
@@ -47,6 +52,8 @@ public:
       out << scrub_pgs;
     if (repair)
       out << " repair";
+    if (deep)
+      out << " deep";
     out << ")";
   }
 
@@ -54,12 +61,18 @@ public:
     ::encode(fsid, payload);
     ::encode(scrub_pgs, payload);
     ::encode(repair, payload);
+    ::encode(deep, payload);
   }
   void decode_payload() {
     bufferlist::iterator p = payload.begin();
     ::decode(fsid, p);
     ::decode(scrub_pgs, p);
     ::decode(repair, p);
+    if (header.version >= 2) {
+      ::decode(deep, p);
+    } else {
+      deep = false;
+    }
   }
 };
 
index 3e7604f9ef3ebfafda178a462a487cc8c987a624..adfe86a0e35f28c6049ce96e25cdfd25b7b7e39f 100644 (file)
@@ -1649,10 +1649,12 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
        r = 0;
       }
     }
-    else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair")) {
+    else if ((m->cmd[1] == "scrub" ||
+             m->cmd[1] == "deep-scrub" ||
+             m->cmd[1] == "repair")) {
       if (m->cmd.size() <= 2) {
        r = -EINVAL;
-       ss << "usage: osd [scrub|repair] <who>";
+       ss << "usage: osd [scrub|deep-scrub|repair] <who>";
        goto out;
       }
       if (m->cmd[2] == "*") {
@@ -1662,7 +1664,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
          if (osdmap.is_up(i)) {
            ss << (c++ ? ",":"") << i;
            mon->try_send_message(new MOSDScrub(osdmap.get_fsid(),
-                                               m->cmd[1] == "repair"),
+                                               m->cmd[1] == "repair",
+                                               m->cmd[1] == "deep-scrub"),
                                  osdmap.get_inst(i));
          }         
        r = 0;
@@ -1671,7 +1674,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
        long osd = strtol(m->cmd[2].c_str(), 0, 10);
        if (osdmap.is_up(osd)) {
          mon->try_send_message(new MOSDScrub(osdmap.get_fsid(),
-                                             m->cmd[1] == "repair"),
+                                             m->cmd[1] == "repair",
+                                             m->cmd[1] == "deep-scrub"),
                                osdmap.get_inst(osd));
          r = 0;
          ss << "osd." << osd << " instructed to " << m->cmd[1];
index accc1b73a200f83664b22f2e84b3c8ae3d76a72a..f056271e2274b290f9ec947730c39bf40de6407d 100644 (file)
@@ -464,7 +464,7 @@ void PGMap::dump_osd_stats(Formatter *f) const
 void PGMap::dump_pg_stats_plain(ostream& ss,
                                const hash_map<pg_t, pg_stat_t>& pg_stats) const
 {
-  ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp" << std::endl;
+  ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl;
   for (hash_map<pg_t, pg_stat_t>::const_iterator i = pg_stats.begin();
        i != pg_stats.end(); ++i) {
     const pg_stat_t &st(i->second);
@@ -484,6 +484,7 @@ void PGMap::dump_pg_stats_plain(ostream& ss,
        << "\t" << st.up
        << "\t" << st.acting
        << "\t" << st.last_scrub << "\t" << st.last_scrub_stamp
+       << "\t" << st.last_deep_scrub << "\t" << st.last_deep_scrub_stamp
        << std::endl;
   }
 }
index d18aeaa226bafcfe257f75235e0fa3508cdf850f..a052bb85bd43b295873725c19aed4a51ac09837a 100644 (file)
@@ -997,7 +997,9 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
       } else
        ss << "invalid pgid '" << m->cmd[2] << "'";
     }
-    else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair") && m->cmd.size() == 3) {
+    else if ((m->cmd[1] == "scrub" ||
+             m->cmd[1] == "deep-scrub" ||
+             m->cmd[1] == "repair") && m->cmd.size() == 3) {
       pg_t pgid;
       r = -EINVAL;
       if (pgid.parse(m->cmd[2].c_str())) {
@@ -1008,7 +1010,8 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
              vector<pg_t> pgs(1);
              pgs[0] = pgid;
              mon->try_send_message(new MOSDScrub(mon->monmap->fsid, pgs,
-                                                 m->cmd[1] == "repair"),
+                                                 m->cmd[1] == "repair",
+                                                 m->cmd[1] == "deep-scrub"),
                                    mon->osdmon()->osdmap.get_inst(osd));
              ss << "instructing pg " << pgid << " on osd." << osd << " to " << m->cmd[1];
              r = 0;
index 271ec8f2d938f20a78ca6c923a9a6422a43b9aea..5add67505589267ec5056900abf4c9eedc9d05fc 100644 (file)
@@ -3257,6 +3257,8 @@ void OSD::handle_scrub(MOSDScrub *m)
       if (pg->is_primary()) {
        if (m->repair)
          pg->state_set(PG_STATE_REPAIR);
+       if (m->deep)
+         pg->state_set(PG_STATE_DEEP_SCRUB);
        if (pg->queue_scrub()) {
          dout(10) << "queueing " << *pg << " for scrub" << dendl;
        }
@@ -3273,6 +3275,8 @@ void OSD::handle_scrub(MOSDScrub *m)
        if (pg->is_primary()) {
          if (m->repair)
            pg->state_set(PG_STATE_REPAIR);
+         if (m->deep)
+           pg->state_set(PG_STATE_DEEP_SCRUB);
          if (pg->queue_scrub()) {
            dout(10) << "queueing " << *pg << " for scrub" << dendl;
          }
index 3e8135b47507ce7b45c65255f576e523af095d78..ff8555efc94158e5ee89184bd5f46960cd0a52c1 100644 (file)
@@ -1926,6 +1926,8 @@ void PG::update_stats()
     info.stats.created = info.history.epoch_created;
     info.stats.last_scrub = info.history.last_scrub;
     info.stats.last_scrub_stamp = info.history.last_scrub_stamp;
+    info.stats.last_deep_scrub = info.history.last_deep_scrub;
+    info.stats.last_deep_scrub_stamp = info.history.last_deep_scrub_stamp;
     info.stats.last_epoch_clean = info.history.last_epoch_clean;
 
     utime_t now = ceph_clock_now(g_ceph_context);
@@ -2712,6 +2714,11 @@ bool PG::sched_scrub()
     return true;
   }
 
+  if (ceph_clock_now(g_ceph_context) > info.history.last_deep_scrub_stamp + g_conf->osd_deep_scrub_interval) {
+    dout(10) << "sched_scrub: scrub will be deep" << dendl;
+    scrubber.deep = true;
+  }
+
   bool ret = false;
   if (!scrubber.reserved) {
     assert(scrubber.reserved_peers.empty());
@@ -2804,9 +2811,10 @@ void PG::sub_op_scrub_map(OpRequestRef op)
 /* 
  * pg lock may or may not be held
  */
-void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls)
+void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep)
 {
-  dout(10) << "_scan_list scanning " << ls.size() << " objects" << dendl;
+  dout(10) << "_scan_list scanning " << ls.size() << " objects"
+           << (deep ? " deeply" : "") << dendl;
   int i = 0;
   for (vector<hobject_t>::iterator p = ls.begin(); 
        p != ls.end(); 
@@ -2820,6 +2828,23 @@ void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls)
       o.size = st.st_size;
       assert(!o.negative);
       osd->store->getattrs(coll, poid, o.attrs);
+
+      // calculate the CRC32 on deep scrubs
+      if (deep) {
+        bufferhash h;
+        bufferlist bl;
+        int r;
+        __u64 pos = 0;
+        while ( (r = osd->store->read(coll, poid, pos,
+                                      g_conf->osd_deep_scrub_stride, bl)) > 0) {
+          h << bl;
+          pos += bl.length();
+          bl.clear();
+        }
+        o.digest = h.digest();
+        o.digest_present = true;
+      }
+
       dout(25) << "_scan_list  " << poid << dendl;
     } else {
       dout(25) << "_scan_list  " << poid << " got " << r << ", skipping" << dendl;
@@ -2840,13 +2865,15 @@ void PG::_request_scrub_map_classic(int replica, eversion_t version)
 }
 
 // send scrub v3 messages (chunky scrub)
-void PG::_request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end)
+void PG::_request_scrub_map(int replica, eversion_t version,
+                            hobject_t start, hobject_t end,
+                            bool deep)
 {
   assert(replica != osd->whoami);
   dout(10) << "scrub  requesting scrubmap from osd." << replica << dendl;
   MOSDRepScrub *repscrubop = new MOSDRepScrub(info.pgid, version,
                                               get_osdmap()->get_epoch(),
-                                              start, end);
+                                              start, end, deep);
   osd->cluster_messenger->send_message(repscrubop,
                                        get_osdmap()->get_cluster_inst(replica));
 }
@@ -2977,7 +3004,8 @@ void PG::scrub_unreserve_replicas()
  * build a scrub map over a chunk without releasing the lock
  * only used by chunky scrub
  */
-int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end)
+int PG::build_scrub_map_chunk(ScrubMap &map,
+                              hobject_t start, hobject_t end, bool deep)
 {
   dout(10) << "build_scrub_map" << dendl;
   dout(20) << "scrub_map_chunk [" << start << "," << end << ")" << dendl;
@@ -2992,7 +3020,7 @@ int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end)
     return ret;
   }
 
-  _scan_list(map, ls);
+  _scan_list(map, ls, deep);
 
   // pg attrs
   osd->store->collection_getattrs(coll, map.attrs);
@@ -3025,7 +3053,7 @@ void PG::build_scrub_map(ScrubMap &map)
   vector<hobject_t> ls;
   osd->store->collection_list(coll, ls);
 
-  _scan_list(map, ls);
+  _scan_list(map, ls, false);
   lock();
 
   if (epoch != info.history.same_interval_since) {
@@ -3073,7 +3101,7 @@ void PG::build_inc_scrub_map(ScrubMap &map, eversion_t v)
     }
   }
 
-  _scan_list(map, ls);
+  _scan_list(map, ls, false);
   // pg attrs
   osd->store->collection_getattrs(coll, map.attrs);
 
@@ -3155,7 +3183,7 @@ void PG::replica_scrub(MOSDRepScrub *msg)
       return;
     }
 
-    build_scrub_map_chunk(map, msg->start, msg->end);
+    build_scrub_map_chunk(map, msg->start, msg->end, msg->deep);
 
   } else {
     if (msg->scrub_from > eversion_t()) {
@@ -3216,7 +3244,7 @@ void PG::scrub()
     return;
   }
 
-  // when the scrub is not active, we need to determine which type of scrub to do
+  // when we're starting a scrub, we need to determine which type of scrub to do
   if (!scrubber.active) {
     OSDMapRef curmap = osd->get_osdmap();
     scrubber.is_chunky = true;
@@ -3231,6 +3259,12 @@ void PG::scrub()
       }
     }
 
+    if (scrubber.is_chunky) {
+      scrubber.deep = state_test(PG_STATE_DEEP_SCRUB);
+    } else {
+      state_clear(PG_STATE_DEEP_SCRUB);
+    }
+
     dout(10) << "starting a new " << (scrubber.is_chunky ? "chunky" : "classic") << " scrub" << dendl;
   }
 
@@ -3548,7 +3582,7 @@ void PG::chunky_scrub() {
         // request maps from replicas
         for (unsigned i=1; i<acting.size(); i++) {
           _request_scrub_map(acting[i], scrubber.subset_last_update,
-                             scrubber.start, scrubber.end);
+                             scrubber.start, scrubber.end, scrubber.deep);
           scrubber.waiting_on_whom.insert(acting[i]);
           ++scrubber.waiting_on;
         }
@@ -3580,7 +3614,9 @@ void PG::chunky_scrub() {
         assert(last_update_applied >= scrubber.subset_last_update);
 
         // build my own scrub map
-        ret = build_scrub_map_chunk(scrubber.primary_scrubmap, scrubber.start, scrubber.end);
+        ret = build_scrub_map_chunk(scrubber.primary_scrubmap,
+                                    scrubber.start, scrubber.end,
+                                    scrubber.deep);
         if (ret < 0) {
           dout(5) << "error building scrub map: " << ret << ", aborting" << dendl;
           scrub_clear_state();
@@ -3645,6 +3681,7 @@ void PG::scrub_clear_state()
   assert(_lock.is_locked());
   state_clear(PG_STATE_SCRUBBING);
   state_clear(PG_STATE_REPAIR);
+  state_clear(PG_STATE_DEEP_SCRUB);
   update_stats();
 
   // active -> nothing.
@@ -3691,6 +3728,16 @@ bool PG::_compare_scrub_objects(ScrubMap::object &auth,
     errorstream << "size " << candidate.size 
                << " != known size " << auth.size;
   }
+  if (auth.digest_present && candidate.digest_present) {
+    if (auth.digest != candidate.digest) {
+      if (!ok)
+        errorstream << ", ";
+      ok = false;
+
+      errorstream << "digest " << candidate.digest
+                  << " != known digest " << auth.digest;
+    }
+  }
   for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
        i != auth.attrs.end();
        i++) {
@@ -3755,6 +3802,7 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps,
                                      j->second->objects[*k],
                                      ss)) {
            cur_inconsistent.insert(j->first);
+            ++scrubber.errors;
            errorstream << info.pgid << " osd." << acting[j->first]
                        << ": soid " << *k << " " << ss.str() << std::endl;
          }
@@ -3781,7 +3829,8 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps,
 void PG::scrub_compare_maps() {
   dout(10) << "scrub_compare_maps has maps, analyzing" << dendl;
   bool repair = state_test(PG_STATE_REPAIR);
-  const char *mode = repair ? "repair":"scrub";
+  bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+  const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
   if (acting.size() > 1) {
     dout(10) << "scrub  comparing replica scrub maps" << dendl;
 
@@ -3837,6 +3886,7 @@ void PG::scrub_compare_maps() {
                            &maps[i->second]->objects[i->first],
                            acting[*j],
                            acting[i->second]);
+              ++scrubber.fixed;
            }
          }
 
@@ -3883,7 +3933,8 @@ void PG::scrub_finalize() {
 // the part that actually finalizes a scrub
 void PG::scrub_finish() {
   bool repair = state_test(PG_STATE_REPAIR);
-  const char *mode = repair ? "repair":"scrub";
+  bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+  const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
 
   // type-specific finish (can tally more errors)
   _scrub_finish();
@@ -3911,6 +3962,10 @@ void PG::scrub_finish() {
   osd->unreg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp);
   info.history.last_scrub = info.last_update;
   info.history.last_scrub_stamp = ceph_clock_now(g_ceph_context);
+  if (scrubber.deep) {
+    info.history.last_deep_scrub = info.last_update;
+    info.history.last_deep_scrub_stamp = ceph_clock_now(g_ceph_context);
+  }
   osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp);
 
   {
index 5c39cd89f101c96402547829ab96ed577458b4f0..0ad34514ea8a7684353de68d0616ec84ab953591 100644 (file)
@@ -778,7 +778,8 @@ public:
       reserved(false), reserve_failed(false),
       block_writes(false), active(false), waiting_on(0),
       errors(0), fixed(0), active_rep_scrub(0),
-      finalizing(false), is_chunky(false), state(INACTIVE)
+      finalizing(false), is_chunky(false), state(INACTIVE),
+      deep(false)
     {
     }
 
@@ -818,6 +819,9 @@ public:
       FINISH,
     } state;
 
+    // deep scrub
+    bool deep;
+
     static const char *state_string(const PG::Scrubber::State& state) {
       const char *ret = NULL;
       switch( state )
@@ -855,6 +859,7 @@ public:
       subset_last_update = eversion_t();
       errors = 0;
       fixed = 0;
+      deep = false;
     }
 
   } scrubber;
@@ -878,10 +883,12 @@ public:
   void scrub_finish();
   void scrub_clear_state();
   bool scrub_gather_replica_maps();
-  void _scan_list(ScrubMap &map, vector<hobject_t> &ls);
+  void _scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep);
   void _request_scrub_map_classic(int replica, eversion_t version);
-  void _request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end);
-  int build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end);
+  void _request_scrub_map(int replica, eversion_t version,
+                          hobject_t start, hobject_t end, bool deep);
+  int build_scrub_map_chunk(ScrubMap &map,
+                            hobject_t start, hobject_t end, bool deep);
   void build_scrub_map(ScrubMap &map);
   void build_inc_scrub_map(ScrubMap &map, eversion_t v);
   virtual void _scrub(ScrubMap &map) { }
index 01cab81e52b912fbb626c7767488d9331f23f252..332e0501b9e5d57513f93a7df1a1af159e366d87 100644 (file)
@@ -6596,7 +6596,8 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap)
 
   coll_t c(info.pgid);
   bool repair = state_test(PG_STATE_REPAIR);
-  const char *mode = repair ? "repair":"scrub";
+  bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+  const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
 
   // traverse in reverse order.
   hobject_t head;
@@ -6685,6 +6686,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap)
     //bufferlist data;
     //osd->store->read(c, poid, 0, 0, data);
     //assert(data.length() == p->size);
+    //
 
     if (soid.snap == CEPH_NOSNAP) {
       if (!snapset.head_exists) {
@@ -6737,7 +6739,8 @@ void ReplicatedPG::_scrub_clear_state()
 void ReplicatedPG::_scrub_finish()
 {
   bool repair = state_test(PG_STATE_REPAIR);
-  const char *mode = repair ? "repair":"scrub";
+  bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
+  const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
 
   dout(10) << mode << " got "
           << scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, "
index cdbdfa957048cbee6eb8674dad85e541eb23dd57..a981d02f9006f642c4ba423998b98fcd5fee5e29 100644 (file)
@@ -406,6 +406,8 @@ std::string pg_state_string(int state)
     oss << "remapped+";
   if (state & PG_STATE_SCRUBBING)
     oss << "scrubbing+";
+  if (state & PG_STATE_DEEP_SCRUB)
+    oss << "deep+";
   if (state & PG_STATE_SCRUBQ)
     oss << "scrubq+";
   if (state & PG_STATE_INCONSISTENT)
@@ -971,6 +973,8 @@ void pg_stat_t::dump(Formatter *f) const
   f->dump_unsigned("parent_split_bits", parent_split_bits);
   f->dump_stream("last_scrub") << last_scrub;
   f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
+  f->dump_stream("last_deep_scrub") << last_deep_scrub;
+  f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
   f->dump_unsigned("log_size", log_size);
   f->dump_unsigned("ondisk_log_size", ondisk_log_size);
   stats.dump(f);
@@ -986,7 +990,7 @@ void pg_stat_t::dump(Formatter *f) const
 
 void pg_stat_t::encode(bufferlist &bl) const
 {
-  ENCODE_START(9, 8, bl);
+  ENCODE_START(10, 8, bl);
   ::encode(version, bl);
   ::encode(reported, bl);
   ::encode(state, bl);
@@ -1009,12 +1013,14 @@ void pg_stat_t::encode(bufferlist &bl) const
   ::encode(last_clean, bl);
   ::encode(last_unstale, bl);
   ::encode(mapping_epoch, bl);
+  ::encode(last_deep_scrub, bl);
+  ::encode(last_deep_scrub_stamp, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_stat_t::decode(bufferlist::iterator &bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(9, 8, 8, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(10, 8, 8, bl);
   ::decode(version, bl);
   ::decode(reported, bl);
   ::decode(state, bl);
@@ -1072,6 +1078,10 @@ void pg_stat_t::decode(bufferlist::iterator &bl)
       ::decode(last_clean, bl);
       ::decode(last_unstale, bl);
       ::decode(mapping_epoch, bl);
+      if (struct_v >= 10) {
+        ::decode(last_deep_scrub, bl);
+        ::decode(last_deep_scrub_stamp, bl);
+      }
     }
   }
   DECODE_FINISH(bl);
@@ -1099,6 +1109,8 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
   a.parent_split_bits = 12;
   a.last_scrub = eversion_t(9, 10);
   a.last_scrub_stamp = utime_t(11, 12);
+  a.last_deep_scrub = eversion_t(13, 14);
+  a.last_deep_scrub_stamp = utime_t(15, 16);
   list<object_stat_collection_t*> l;
   object_stat_collection_t::generate_test_instances(l);
   a.stats = *l.back();
@@ -1177,7 +1189,7 @@ void pool_stat_t::generate_test_instances(list<pool_stat_t*>& o)
 
 void pg_history_t::encode(bufferlist &bl) const
 {
-  ENCODE_START(4, 4, bl);
+  ENCODE_START(5, 4, bl);
   ::encode(epoch_created, bl);
   ::encode(last_epoch_started, bl);
   ::encode(last_epoch_clean, bl);
@@ -1187,12 +1199,14 @@ void pg_history_t::encode(bufferlist &bl) const
   ::encode(same_primary_since, bl);
   ::encode(last_scrub, bl);
   ::encode(last_scrub_stamp, bl);
+  ::encode(last_deep_scrub, bl);
+  ::encode(last_deep_scrub_stamp, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_history_t::decode(bufferlist::iterator &bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl);
   ::decode(epoch_created, bl);
   ::decode(last_epoch_started, bl);
   if (struct_v >= 3)
@@ -1206,6 +1220,10 @@ void pg_history_t::decode(bufferlist::iterator &bl)
   if (struct_v >= 2) {
     ::decode(last_scrub, bl);
     ::decode(last_scrub_stamp, bl);
+    if (struct_v >= 5) {
+      ::decode(last_deep_scrub, bl);
+      ::decode(last_deep_scrub_stamp, bl);
+    }
   }
   DECODE_FINISH(bl);
 }
@@ -1221,6 +1239,8 @@ void pg_history_t::dump(Formatter *f) const
   f->dump_int("same_primary_since", same_primary_since);
   f->dump_stream("last_scrub") << last_scrub;
   f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
+  f->dump_stream("last_deep_scrub") << last_deep_scrub;
+  f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp;
 }
 
 void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
@@ -1235,7 +1255,9 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
   o.back()->same_interval_since = 6;
   o.back()->same_primary_since = 7;
   o.back()->last_scrub = eversion_t(8, 9);
-  o.back()->last_scrub_stamp = utime_t(10, 11);  
+  o.back()->last_scrub_stamp = utime_t(10, 11);
+  o.back()->last_deep_scrub = eversion_t(12, 13);
+  o.back()->last_deep_scrub_stamp = utime_t(14, 15);
 }
 
 
@@ -2591,19 +2613,29 @@ void ScrubMap::generate_test_instances(list<ScrubMap*>& o)
 
 void ScrubMap::object::encode(bufferlist& bl) const
 {
-  ENCODE_START(2, 2, bl);
+  ENCODE_START(3, 2, bl);
   ::encode(size, bl);
   ::encode(negative, bl);
   ::encode(attrs, bl);
+  ::encode(digest, bl);
+  ::encode(digest_present, bl);
   ENCODE_FINISH(bl);
 }
 
 void ScrubMap::object::decode(bufferlist::iterator& bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
   ::decode(size, bl);
   ::decode(negative, bl);
   ::decode(attrs, bl);
+  if (struct_v >= 3) {
+    ::decode(digest, bl);
+    ::decode(digest_present, bl);
+  }
+  else {
+    digest = 0;
+    digest_present = false;
+  }
   DECODE_FINISH(bl);
 }
 
index 7cb590bdea68002416215ef1abb3e1f6e2e8e595..b2b59b33f181d723adc7ecb930171859d973ca47 100644 (file)
@@ -564,6 +564,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
 #define PG_STATE_INCOMPLETE   (1<<16) // incomplete content, peering failed.
 #define PG_STATE_STALE        (1<<17) // our state for this pg is stale, unknown.
 #define PG_STATE_REMAPPED     (1<<18) // pg is explicitly remapped to different OSDs than CRUSH
+#define PG_STATE_DEEP_SCRUB   (1<<19) // deep scrub: check CRC32 on files
 
 std::string pg_state_string(int state);
 
@@ -862,7 +863,9 @@ struct pg_stat_t {
   __u32 parent_split_bits;
 
   eversion_t last_scrub;
+  eversion_t last_deep_scrub;
   utime_t last_scrub_stamp;
+  utime_t last_deep_scrub_stamp;
 
   object_stat_collection_t stats;
 
@@ -951,7 +954,9 @@ struct pg_history_t {
   epoch_t same_primary_since;  // same primary at least back through this epoch.
 
   eversion_t last_scrub;
+  eversion_t last_deep_scrub;
   utime_t last_scrub_stamp;
+  utime_t last_deep_scrub_stamp;
 
   pg_history_t()
     : epoch_created(0),
@@ -985,6 +990,14 @@ struct pg_history_t {
       last_scrub_stamp = other.last_scrub_stamp;
       modified = true;
     }
+    if (other.last_deep_scrub > last_deep_scrub) {
+      last_deep_scrub = other.last_deep_scrub;
+      modified = true;
+    }
+    if (other.last_deep_scrub_stamp > last_deep_scrub_stamp) {
+      last_deep_scrub_stamp = other.last_deep_scrub_stamp;
+      modified = true;
+    }
     return modified;
   }
 
@@ -1777,8 +1790,10 @@ struct ScrubMap {
     uint64_t size;
     bool negative;
     map<string,bufferptr> attrs;
+    __u32 digest;
+    bool digest_present;
 
-    object(): size(0), negative(false) {}
+    object(): size(0), negative(false), digest(0), digest_present(false) {}
 
     void encode(bufferlist& bl) const;
     void decode(bufferlist::iterator& bl);
index 47e5b3850089b5a94c70bdb9a26fc4f32ef886d0..bbd7ff76d9a1d33c7dfec1a53999b2a211fc72bf 100644 (file)
@@ -56,6 +56,7 @@
     ceph osd pool rename <pool> <new pool name>
     ceph osd pool set <pool> <field> <value>
     ceph osd scrub <osd-id>
+    ceph osd deep-scrub <osd-id>
     ceph osd repair <osd-id>
     ceph osd tell N bench [bytes per write] [total bytes]
   
index b82be10b9bf78d5c2fbe5ba233733fb9f7790019..278033c46cbeec8a39230dd7e4fc01cd130d32e3 100644 (file)
@@ -99,6 +99,7 @@ static void usage()
   cout << "  ceph osd pool rename <pool> <new pool name>\n";
   cout << "  ceph osd pool set <pool> <field> <value>\n";
   cout << "  ceph osd scrub <osd-id>\n";
+  cout << "  ceph osd deep-scrub <osd-id>\n";
   cout << "  ceph osd repair <osd-id>\n";
   cout << "  ceph osd tell N bench [bytes per write] [total bytes]\n";
   cout << "\n";