]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: persist inconsistent snapsets using omap
authorKefu Chai <kchai@redhat.com>
Sat, 30 Jan 2016 07:22:49 +0000 (15:22 +0800)
committerKefu Chai <kchai@redhat.com>
Thu, 25 Feb 2016 04:41:55 +0000 (12:41 +0800)
the inconsistent snapsets are identified in ReplicatedPG::_scrub()
after we compared the authorized objects with their replica/shards.
these inconsistent information are stored in the omap of objects
with prefix "SCRUB_SS_".

Signed-off-by: Kefu Chai <kchai@redhat.com>
src/common/scrub_types.cc
src/common/scrub_types.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h
src/osd/ScrubStore.cc
src/osd/ScrubStore.h

index 04a5a3f855b3c248a1479056a1936d6877091cfa..91ac141b847a36d01d1968bfab14645f18f435fa 100644 (file)
@@ -141,3 +141,69 @@ void inconsistent_obj_wrapper::decode(bufferlist::iterator& bp)
   ::decode(shards, bp);
   DECODE_FINISH(bp);
 }
+
+inconsistent_snapset_wrapper::inconsistent_snapset_wrapper(const hobject_t& hoid)
+  : inconsistent_snapset_t{object_id_t{hoid.oid.name,
+                                       hoid.nspace,
+                                       hoid.get_key(),
+                                       hoid.snap}}
+{}
+
+using inc_snapset_t = inconsistent_snapset_t;
+
+void inconsistent_snapset_wrapper::set_headless()
+{
+  errors |= inc_snapset_t::HEADLESS_CLONE;
+}
+
+void inconsistent_snapset_wrapper::set_ss_attr_missing()
+{
+  errors |= inc_snapset_t::ATTR_MISSING;
+}
+
+void inconsistent_snapset_wrapper::set_ss_attr_corrupted()
+{
+  errors |= inc_snapset_t::ATTR_CORRUPTED;
+}
+
+void inconsistent_snapset_wrapper::set_clone_missing(snapid_t snap)
+{
+  errors |= inc_snapset_t::CLONE_MISSING;
+  missing.push_back(snap);
+}
+
+void inconsistent_snapset_wrapper::set_snapset_mismatch()
+{
+  errors |= inc_snapset_t::SNAP_MISMATCH;
+}
+
+void inconsistent_snapset_wrapper::set_head_mismatch()
+{
+  errors |= inc_snapset_t::HEAD_MISMATCH;
+}
+
+void inconsistent_snapset_wrapper::set_size_mismatch()
+{
+  errors |= inc_snapset_t::SIZE_MISMATCH;
+}
+
+void inconsistent_snapset_wrapper::encode(bufferlist& bl) const
+{
+  ENCODE_START(1, 1, bl);
+  ::encode(errors, bl);
+  ::encode(object, bl);
+  ::encode(clones, bl);
+  ::encode(missing, bl);
+  ENCODE_FINISH(bl);
+}
+
+void inconsistent_snapset_wrapper::decode(bufferlist::iterator& bp)
+{
+  DECODE_START(1, bp);
+  ::decode(errors, bp);
+  ::decode(object, bp);
+  ::decode(clones, bp);
+  ::decode(missing, bp);
+  DECODE_FINISH(bp);
+}
+
index c4012f7a536fb37de401f5f68270eeff27769fab..df219e523ce3e88100b346889e831ff49939aeec 100644 (file)
@@ -89,4 +89,33 @@ inline void decode(librados::inconsistent_obj_t& obj,
   reinterpret_cast<inconsistent_obj_wrapper&>(obj).decode(bp);
 }
 
+struct inconsistent_snapset_wrapper : public librados::inconsistent_snapset_t {
+  inconsistent_snapset_wrapper() = default;
+  inconsistent_snapset_wrapper(const hobject_t& head);
+  void set_headless();
+  // soid claims that it is a head or a snapdir, but its SS_ATTR
+  // is missing.
+  void set_ss_attr_missing();
+  void set_ss_attr_corrupted();
+  // snapset with missing clone
+  void set_clone_missing(snapid_t);
+  // the snapset is not consistent with itself
+  void set_snapset_mismatch();
+  // soid.snap inconsistent with snapset
+  void set_head_mismatch();
+  void set_size_mismatch();
+
+  void encode(bufferlist& bl) const;
+  void decode(bufferlist::iterator& bp);
+};
+
+WRITE_CLASS_ENCODER(inconsistent_snapset_wrapper)
+
+namespace librados {
+  inline void decode(librados::inconsistent_snapset_t& snapset,
+                    bufferlist::iterator& bp) {
+    reinterpret_cast<inconsistent_snapset_wrapper&>(snapset).decode(bp);
+  }
+}
+
 #endif
index 3b8fe54fd6d9b15bac9349ded96c83f776448fbb..3ef2cde4ad3e870abcf2b5e17e032f7cbf524d8d 100644 (file)
 #include "ReplicatedPG.h"
 #include "OSD.h"
 #include "OpRequest.h"
+#include "ScrubStore.h"
 #include "objclass/objclass.h"
 
 #include "common/errno.h"
+#include "common/scrub_types.h"
 #include "common/perf_counters.h"
 
 #include "messages/MOSDOp.h"
@@ -12194,7 +12196,8 @@ unsigned ReplicatedPG::process_clones_to(const boost::optional<hobject_t> &head,
   const char *mode,
   bool allow_incomplete_clones,
   boost::optional<snapid_t> target,
-  vector<snapid_t>::reverse_iterator *curclone)
+  vector<snapid_t>::reverse_iterator *curclone,
+  inconsistent_snapset_wrapper &e)
 {
   assert(head);
   assert(snapset);
@@ -12211,6 +12214,7 @@ unsigned ReplicatedPG::process_clones_to(const boost::optional<hobject_t> &head,
       clog->error() << mode << " " << pgid << " " << head.get()
                         << " expected clone " << next_clone;
       ++scrubber.shallow_errors;
+      e.set_clone_missing(next_clone.snap);
     }
     // Clones are descending
     ++(*curclone);
@@ -12260,12 +12264,14 @@ void ReplicatedPG::_scrub(
   boost::optional<SnapSet> snapset; // If initialized so will head (above)
   vector<snapid_t>::reverse_iterator curclone; // Defined only if snapset initialized
   unsigned missing = 0;
+  inconsistent_snapset_wrapper snap_error;
 
   bufferlist last_data;
 
   for (map<hobject_t,ScrubMap::object, hobject_t::BitwiseComparator>::reverse_iterator
        p = scrubmap.objects.rbegin(); p != scrubmap.objects.rend(); ++p) {
     const hobject_t& soid = p->first;
+    snap_error = inconsistent_snapset_wrapper{soid};
     object_stat_sum_t stat;
     boost::optional<object_info_t> oi;
 
@@ -12286,6 +12292,7 @@ void ReplicatedPG::_scrub(
       osd->clog->error() << mode << " " << info.pgid << " " << soid
                        << " no '" << OI_ATTR << "' attr";
       ++scrubber.shallow_errors;
+      snap_error.set_ss_attr_missing();
     } else {
       bufferlist bv;
       bv.push_back(p->second.attrs[OI_ATTR]);
@@ -12297,6 +12304,7 @@ void ReplicatedPG::_scrub(
        osd->clog->error() << mode << " " << info.pgid << " " << soid
                << " can't decode '" << OI_ATTR << "' attr " << e.what();
        ++scrubber.shallow_errors;
+       snap_error.set_ss_attr_corrupted();
       }
     }
 
@@ -12308,6 +12316,7 @@ void ReplicatedPG::_scrub(
                           << oi->size << ") adjusted for ondisk to ("
                           << pgbackend->be_get_ondisk_size(oi->size)
                           << ")";
+       snap_error.set_size_mismatch();
        ++scrubber.shallow_errors;
       }
 
@@ -12350,7 +12359,8 @@ void ReplicatedPG::_scrub(
       // Log any clones we were expecting to be there up to target
       // This will set missing, but will be a no-op if snap.soid == *curclone.
       missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode,
-                       pool.info.allow_incomplete_clones(), target, &curclone);
+                       pool.info.allow_incomplete_clones(), target, &curclone,
+                       snap_error);
     }
     bool expected;
     // Check doing_clones() again in case we ran process_clones_to()
@@ -12371,11 +12381,14 @@ void ReplicatedPG::_scrub(
       if (head && !snapset) {
        osd->clog->info() << mode << " " << info.pgid << " " << soid
                          << " clone ignored due to missing snapset";
+       scrubber.store->add_snap_error(pool.id, snap_error);
        continue;
       }
       osd->clog->error() << mode << " " << info.pgid << " " << soid
                           << " is an unexpected clone";
       ++scrubber.shallow_errors;
+      snap_error.set_headless();
+      scrubber.store->add_snap_error(pool.id, snap_error);
       continue;
     }
 
@@ -12385,11 +12398,13 @@ void ReplicatedPG::_scrub(
       if (missing) {
        log_missing(missing, head, osd->clog, info.pgid, __func__, mode,
                    pool.info.allow_incomplete_clones());
+       scrubber.store->add_snap_error(pool.id, snap_error);
       }
 
       // Set this as a new head object
       head = soid;
       missing = 0;
+      snap_error = inconsistent_snapset_wrapper{head.get()};
 
       dout(20) << __func__ << " " << mode << " new head " << head << dendl;
 
@@ -12398,6 +12413,7 @@ void ReplicatedPG::_scrub(
                          << " no '" << SS_ATTR << "' attr";
         ++scrubber.shallow_errors;
        snapset = boost::none;
+       snap_error.set_ss_attr_missing();
       } else {
        bufferlist bl;
        bl.push_back(p->second.attrs[SS_ATTR]);
@@ -12410,6 +12426,7 @@ void ReplicatedPG::_scrub(
           osd->clog->error() << mode << " " << info.pgid << " " << soid
                << " can't decode '" << SS_ATTR << "' attr " << e.what();
          ++scrubber.shallow_errors;
+         snap_error.set_ss_attr_corrupted();
         }
       }
 
@@ -12423,6 +12440,7 @@ void ReplicatedPG::_scrub(
            osd->clog->error() << mode << " " << info.pgid << " " << soid
                               << " snaps.seq not set";
            ++scrubber.shallow_errors;
+           snap_error.set_snapset_mismatch();
           }
        }
 
@@ -12430,11 +12448,13 @@ void ReplicatedPG::_scrub(
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                          << " snapset.head_exists=false, but head exists";
          ++scrubber.shallow_errors;
+         snap_error.set_head_mismatch();
        }
        if (soid.is_snapdir() && snapset->head_exists) {
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                          << " snapset.head_exists=true, but snapdir exists";
          ++scrubber.shallow_errors;
+         snap_error.set_head_mismatch();
        }
       }
     } else {
@@ -12449,19 +12469,22 @@ void ReplicatedPG::_scrub(
        osd->clog->error() << mode << " " << info.pgid << " " << soid
                           << " is missing in clone_size";
        ++scrubber.shallow_errors;
+       snap_error.set_size_mismatch();
       } else {
         if (oi && oi->size != snapset->clone_size[soid.snap]) {
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                             << " size " << oi->size << " != clone_size "
                             << snapset->clone_size[*curclone];
          ++scrubber.shallow_errors;
+         snap_error.set_size_mismatch();
         }
 
         if (snapset->clone_overlap.count(soid.snap) == 0) {
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                             << " is missing in clone_overlap";
          ++scrubber.shallow_errors;
-        } else {
+         snap_error.set_size_mismatch();
+       } else {
          // This checking is based on get_clone_bytes().  The first 2 asserts
          // can't happen because we know we have a clone_size and
          // a clone_overlap.  Now we check that the interval_set won't
@@ -12483,6 +12506,7 @@ void ReplicatedPG::_scrub(
            osd->clog->error() << mode << " " << info.pgid << " " << soid
                               << " bad interval_set in clone_overlap";
            ++scrubber.shallow_errors;
+           snap_error.set_size_mismatch();
          } else {
             stat.num_bytes += snapset->get_clone_bytes(soid.snap);
          }
@@ -12501,14 +12525,15 @@ void ReplicatedPG::_scrub(
             << " No more objects while processing " << head.get() << dendl;
 
     missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode,
-                     pool.info.allow_incomplete_clones(), all_clones, &curclone);
-
+                     pool.info.allow_incomplete_clones(), all_clones, &curclone,
+                     snap_error);
   }
   // There could be missing found by the test above or even
   // before dropping out of the loop for the last head.
   if (missing) {
     log_missing(missing, head, osd->clog, info.pgid, __func__,
                mode, pool.info.allow_incomplete_clones());
+    scrubber.store->add_snap_error(pool.id, snap_error);
   }
 
   for (map<hobject_t,pair<uint32_t,uint32_t>, hobject_t::BitwiseComparator>::const_iterator p =
index 0ee87a7f5e6603b5826a662968f02cc5ee39872c..bf95df0c46bec766baeea986f76ef99bea25ed4b 100644 (file)
@@ -59,6 +59,8 @@ void put_with_id(ReplicatedPG *pg, uint64_t id);
   typedef boost::intrusive_ptr<ReplicatedPG> ReplicatedPGRef;
 #endif
 
+struct inconsistent_snapset_wrapper;
+
 class ReplicatedPG : public PG, public PGBackend::Listener {
   friend class OSD;
   friend class Watch;
@@ -1509,7 +1511,8 @@ private:
     const char *mode,
     bool allow_incomplete_clones,
     boost::optional<snapid_t> target,
-    vector<snapid_t>::reverse_iterator *curclone);
+    vector<snapid_t>::reverse_iterator *curclone,
+    inconsistent_snapset_wrapper &snap_error);
 
 public:
   coll_t get_coll() {
index 32a7b40d7d7b20d81ced248f9cbac4dcaf456e5c..774569e0d702b8305390bba5bb9cfb241d2fd2ae 100644 (file)
@@ -50,6 +50,45 @@ string last_object_key(int64_t pool)
   hoid.build_hash_cache();
   return "SCRUB_OBJ_" + hoid.to_str();
 }
+
+string first_snap_key(int64_t pool)
+{
+  // scrub object is per spg_t object, so we can misuse the hash (pg.seed) for
+  // the representing the minimal and maximum keys. and this relies on how
+  // hobject_t::to_str() works: hex(pool).hex(revhash).
+  auto hoid = hobject_t(object_t(),
+                       "",
+                       0,
+                       0x00000000,
+                       pool,
+                       "");
+  hoid.build_hash_cache();
+  return "SCRUB_SS_" + hoid.to_str();
+}
+
+string to_snap_key(int64_t pool, const librados::object_id_t& oid)
+{
+  auto hoid = hobject_t(object_t(oid.name),
+                       oid.locator, // key
+                       oid.snap,
+                       0x77777777, // hash
+                       pool,
+                       oid.nspace);
+  hoid.build_hash_cache();
+  return "SCRUB_SS_" + hoid.to_str();
+}
+
+string last_snap_key(int64_t pool)
+{
+  auto hoid = hobject_t(object_t(),
+                       "",
+                       0,
+                       0xffffffff,
+                       pool,
+                       "");
+  hoid.build_hash_cache();
+  return "SCRUB_SS_" + hoid.to_str();
+}
 }
 
 namespace Scrub {
@@ -87,6 +126,13 @@ void Store::add_object_error(int64_t pool, const inconsistent_obj_wrapper& e)
   results[to_object_key(pool, e.object)] = bl;
 }
 
+void Store::add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e)
+{
+  bufferlist bl;
+  e.encode(bl);
+  results[to_snap_key(pool, e.object)] = bl;
+}
+
 bool Store::empty() const
 {
   return results.empty();
@@ -104,6 +150,18 @@ void Store::cleanup(ObjectStore::Transaction* t)
   t->remove(coll, hoid);
 }
 
+std::vector<bufferlist>
+Store::get_snap_errors(ObjectStore* store,
+                      int64_t pool,
+                      const librados::object_id_t& start,
+                      uint64_t max_return)
+{
+  const string begin = (start.name.empty() ?
+                       first_snap_key(pool) : to_snap_key(pool, start));
+  const string end = last_snap_key(pool);
+  return get_errors(store, begin, end, max_return);     
+}
+
 std::vector<bufferlist>
 Store::get_object_errors(ObjectStore* store,
                         int64_t pool,
@@ -132,4 +190,10 @@ Store::get_errors(ObjectStore* store,
   }
   return errors;
 }
+string to_snap_key(int64_t pool, const librados::object_id_t& oid)
+{
+  return "SCRUB_SS_" + std::to_string(pool) + "." + oid.name + oid.nspace;
+}
+
+
 } // namespace Scrub
index f14e15f56e3d703b2e558f90e535fcafabd0c56d..59c5d1f5f7b9ad635ff873380cfd6339fae1c240 100644 (file)
@@ -12,6 +12,7 @@ namespace librados {
 }
 
 struct inconsistent_obj_wrapper;
+struct inconsistent_snapset_wrapper;
 
 namespace Scrub {
 
@@ -23,9 +24,14 @@ public:
                       const spg_t& pgid,
                       const coll_t& coll);
   void add_object_error(int64_t pool, const inconsistent_obj_wrapper& e);
+  void add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e);
   bool empty() const;
   void flush(ObjectStore::Transaction *);
   void cleanup(ObjectStore::Transaction *);
+  std::vector<bufferlist> get_snap_errors(ObjectStore* store,
+                                         int64_t pool,
+                                         const librados::object_id_t& start,
+                                         uint64_t max_return);
   std::vector<bufferlist> get_object_errors(ObjectStore* store,
                                            int64_t pool,
                                            const librados::object_id_t& start,