]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd, rados: Fixes for list-inconsistent-snapset
authorDavid Zafman <dzafman@redhat.com>
Sat, 30 Apr 2016 00:09:13 +0000 (17:09 -0700)
committerDavid Zafman <dzafman@redhat.com>
Tue, 3 May 2016 19:11:56 +0000 (12:11 -0700)
Signed-off-by: David Zafman <dzafman@redhat.com>
src/common/scrub_types.cc
src/common/scrub_types.h
src/include/rados/rados_types.hpp
src/osd/ReplicatedPG.cc
src/test/osd/osd-scrub-snaps.sh
src/tools/rados/rados.cc

index 3342a49d2ed741c0fd36d58bbd6d76cc0899dbfc..336965c9015a667f47105cca84c28edb794a42e6 100644 (file)
@@ -158,12 +158,22 @@ void inconsistent_snapset_wrapper::set_headless()
 
 void inconsistent_snapset_wrapper::set_ss_attr_missing()
 {
-  errors |= inc_snapset_t::ATTR_MISSING;
+  errors |= inc_snapset_t::SNAPSET_MISSING;
+}
+
+void inconsistent_snapset_wrapper::set_oi_attr_missing()
+{
+  errors |= inc_snapset_t::OI_MISSING;
 }
 
 void inconsistent_snapset_wrapper::set_ss_attr_corrupted()
 {
-  errors |= inc_snapset_t::ATTR_CORRUPTED;
+  errors |= inc_snapset_t::SNAPSET_CORRUPTED;
+}
+
+void inconsistent_snapset_wrapper::set_oi_attr_corrupted()
+{
+  errors |= inc_snapset_t::OI_CORRUPTED;
 }
 
 void inconsistent_snapset_wrapper::set_clone_missing(snapid_t snap)
@@ -172,6 +182,12 @@ void inconsistent_snapset_wrapper::set_clone_missing(snapid_t snap)
   missing.push_back(snap);
 }
 
+void inconsistent_snapset_wrapper::set_clone(snapid_t snap)
+{
+  errors |= inc_snapset_t::EXTRA_CLONES;
+  clones.push_back(snap);
+}
+
 void inconsistent_snapset_wrapper::set_snapset_mismatch()
 {
   errors |= inc_snapset_t::SNAP_MISMATCH;
index ed45e0872b808bc9ad842ab35b88dd1a66f40427..dc93c88357d98f2ffee32a413a19c77891c62118 100644 (file)
@@ -96,9 +96,13 @@ struct inconsistent_snapset_wrapper : public librados::inconsistent_snapset_t {
   // soid claims that it is a head or a snapdir, but its SS_ATTR
   // is missing.
   void set_ss_attr_missing();
+  void set_oi_attr_missing();
   void set_ss_attr_corrupted();
+  void set_oi_attr_corrupted();
   // snapset with missing clone
   void set_clone_missing(snapid_t);
+  // Clones that are there
+  void set_clone(snapid_t);
   // the snapset is not consistent with itself
   void set_snapset_mismatch();
   // soid.snap inconsistent with snapset
index c4f268d49889f977af4e248099b63adb867da3a0..ca7a49018be19f0646f9b4183abdcf8a7c727c71 100644 (file)
@@ -131,24 +131,28 @@ struct inconsistent_snapset_t {
     : object{head}
   {}
   enum {
-    ATTR_MISSING   = 1 << 0,
-    ATTR_CORRUPTED = 1 << 1,
+    SNAPSET_MISSING = 1 << 0,
+    SNAPSET_CORRUPTED = 1 << 1,
     CLONE_MISSING  = 1 << 2,
     SNAP_MISMATCH  = 1 << 3,
     HEAD_MISMATCH  = 1 << 4,
     HEADLESS_CLONE = 1 << 5,
     SIZE_MISMATCH  = 1 << 6,
+    OI_MISSING   = 1 << 7,
+    OI_CORRUPTED = 1 << 8,
+    EXTRA_CLONES = 1 << 9,
   };
   uint64_t errors = 0;
   object_id_t object;
+  // Extra clones
   std::vector<snap_t> clones;
   std::vector<snap_t> missing;
 
   bool ss_attr_missing() const {
-    return errors & ATTR_MISSING;
+    return errors & SNAPSET_MISSING;
   }
   bool ss_attr_corrupted() const {
-    return errors & ATTR_CORRUPTED;
+    return errors & SNAPSET_CORRUPTED;
   }
   bool clone_missing() const  {
     return errors & CLONE_MISSING;
@@ -165,6 +169,15 @@ struct inconsistent_snapset_t {
   bool size_mismatch() const {
     return errors & SIZE_MISMATCH;
   }
+  bool oi_attr_missing() const {
+    return errors & OI_MISSING;
+  }
+  bool oi_attr_corrupted() const {
+    return errors & OI_CORRUPTED;
+  }
+  bool extra_clones() const {
+    return errors & EXTRA_CLONES;
+  }
 };
 
 /**
index d888dd02a1ef896a592a7f5385a8c2f4cafa9a0f..fae449a97dfda67cf582ebd8de5648be96131c82 100644 (file)
@@ -12636,14 +12636,14 @@ void ReplicatedPG::_scrub(
   boost::optional<SnapSet> snapset; // If initialized so will head (above)
   vector<snapid_t>::reverse_iterator curclone; // Defined only if snapset initialized
   unsigned missing = 0;
-  inconsistent_snapset_wrapper snap_error;
+  inconsistent_snapset_wrapper soid_error, head_error;
 
   bufferlist last_data;
 
   for (map<hobject_t,ScrubMap::object, hobject_t::BitwiseComparator>::reverse_iterator
        p = scrubmap.objects.rbegin(); p != scrubmap.objects.rend(); ++p) {
     const hobject_t& soid = p->first;
-    snap_error = inconsistent_snapset_wrapper{soid};
+    soid_error = inconsistent_snapset_wrapper{soid};
     object_stat_sum_t stat;
     boost::optional<object_info_t> oi;
 
@@ -12664,7 +12664,7 @@ void ReplicatedPG::_scrub(
       osd->clog->error() << mode << " " << info.pgid << " " << soid
                        << " no '" << OI_ATTR << "' attr";
       ++scrubber.shallow_errors;
-      snap_error.set_ss_attr_missing();
+      soid_error.set_oi_attr_missing();
     } else {
       bufferlist bv;
       bv.push_back(p->second.attrs[OI_ATTR]);
@@ -12676,7 +12676,8 @@ void ReplicatedPG::_scrub(
        osd->clog->error() << mode << " " << info.pgid << " " << soid
                << " can't decode '" << OI_ATTR << "' attr " << e.what();
        ++scrubber.shallow_errors;
-       snap_error.set_ss_attr_corrupted();
+       soid_error.set_oi_attr_corrupted();
+        soid_error.set_oi_attr_missing(); // Not available too
       }
     }
 
@@ -12688,7 +12689,7 @@ void ReplicatedPG::_scrub(
                           << oi->size << ") adjusted for ondisk to ("
                           << pgbackend->be_get_ondisk_size(oi->size)
                           << ")";
-       snap_error.set_size_mismatch();
+       soid_error.set_size_mismatch();
        ++scrubber.shallow_errors;
       }
 
@@ -12732,7 +12733,7 @@ void ReplicatedPG::_scrub(
       // This will set missing, but will be a no-op if snap.soid == *curclone.
       missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode,
                        pool.info.allow_incomplete_clones(), target, &curclone,
-                       snap_error);
+                       head_error);
     }
     bool expected;
     // Check doing_clones() again in case we ran process_clones_to()
@@ -12748,19 +12749,18 @@ void ReplicatedPG::_scrub(
       expected = soid.has_snapset();
     }
     if (!expected) {
-      // If we couldn't read the head's snapset, then just ignore clones and
-      // don't count as an error.
+      // If we couldn't read the head's snapset, just ignore clones
       if (head && !snapset) {
-       osd->clog->info() << mode << " " << info.pgid << " " << soid
+       osd->clog->error() << mode << " " << info.pgid << " " << soid
                          << " clone ignored due to missing snapset";
-       scrubber.store->add_snap_error(pool.id, snap_error);
-       continue;
-      }
-      osd->clog->error() << mode << " " << info.pgid << " " << soid
+      } else {
+       osd->clog->error() << mode << " " << info.pgid << " " << soid
                           << " is an unexpected clone";
+      }
       ++scrubber.shallow_errors;
-      snap_error.set_headless();
-      scrubber.store->add_snap_error(pool.id, snap_error);
+      soid_error.set_headless();
+      scrubber.store->add_snap_error(pool.id, soid_error);
+      head_error.set_clone(soid.snap);
       continue;
     }
 
@@ -12770,13 +12770,15 @@ void ReplicatedPG::_scrub(
       if (missing) {
        log_missing(missing, head, osd->clog, info.pgid, __func__, mode,
                    pool.info.allow_incomplete_clones());
-       scrubber.store->add_snap_error(pool.id, snap_error);
       }
 
+      // Save previous head error information
+      if (head && head_error.errors)
+       scrubber.store->add_snap_error(pool.id, head_error);
       // Set this as a new head object
       head = soid;
       missing = 0;
-      snap_error = inconsistent_snapset_wrapper{head.get()};
+      head_error = soid_error;
 
       dout(20) << __func__ << " " << mode << " new head " << head << dendl;
 
@@ -12785,7 +12787,7 @@ void ReplicatedPG::_scrub(
                          << " no '" << SS_ATTR << "' attr";
         ++scrubber.shallow_errors;
        snapset = boost::none;
-       snap_error.set_ss_attr_missing();
+       head_error.set_ss_attr_missing();
       } else {
        bufferlist bl;
        bl.push_back(p->second.attrs[SS_ATTR]);
@@ -12798,7 +12800,8 @@ void ReplicatedPG::_scrub(
           osd->clog->error() << mode << " " << info.pgid << " " << soid
                << " can't decode '" << SS_ATTR << "' attr " << e.what();
          ++scrubber.shallow_errors;
-         snap_error.set_ss_attr_corrupted();
+         head_error.set_ss_attr_corrupted();
+         head_error.set_ss_attr_missing(); // Not available too
         }
       }
 
@@ -12812,7 +12815,7 @@ void ReplicatedPG::_scrub(
            osd->clog->error() << mode << " " << info.pgid << " " << soid
                               << " snaps.seq not set";
            ++scrubber.shallow_errors;
-           snap_error.set_snapset_mismatch();
+           head_error.set_snapset_mismatch();
           }
        }
 
@@ -12820,13 +12823,13 @@ void ReplicatedPG::_scrub(
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                          << " snapset.head_exists=false, but head exists";
          ++scrubber.shallow_errors;
-         snap_error.set_head_mismatch();
+         head_error.set_head_mismatch();
        }
        if (soid.is_snapdir() && snapset->head_exists) {
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                          << " snapset.head_exists=true, but snapdir exists";
          ++scrubber.shallow_errors;
-         snap_error.set_head_mismatch();
+         head_error.set_head_mismatch();
        }
       }
     } else {
@@ -12841,21 +12844,21 @@ void ReplicatedPG::_scrub(
        osd->clog->error() << mode << " " << info.pgid << " " << soid
                           << " is missing in clone_size";
        ++scrubber.shallow_errors;
-       snap_error.set_size_mismatch();
+       soid_error.set_size_mismatch();
       } else {
         if (oi && oi->size != snapset->clone_size[soid.snap]) {
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                             << " size " << oi->size << " != clone_size "
                             << snapset->clone_size[*curclone];
          ++scrubber.shallow_errors;
-         snap_error.set_size_mismatch();
+         soid_error.set_size_mismatch();
         }
 
         if (snapset->clone_overlap.count(soid.snap) == 0) {
          osd->clog->error() << mode << " " << info.pgid << " " << soid
                             << " is missing in clone_overlap";
          ++scrubber.shallow_errors;
-         snap_error.set_size_mismatch();
+         soid_error.set_size_mismatch();
        } else {
          // This checking is based on get_clone_bytes().  The first 2 asserts
          // can't happen because we know we have a clone_size and
@@ -12878,7 +12881,7 @@ void ReplicatedPG::_scrub(
            osd->clog->error() << mode << " " << info.pgid << " " << soid
                               << " bad interval_set in clone_overlap";
            ++scrubber.shallow_errors;
-           snap_error.set_size_mismatch();
+           soid_error.set_size_mismatch();
          } else {
             stat.num_bytes += snapset->get_clone_bytes(soid.snap);
          }
@@ -12887,6 +12890,8 @@ void ReplicatedPG::_scrub(
 
       // what's next?
       ++curclone;
+      if (soid_error.errors)
+        scrubber.store->add_snap_error(pool.id, soid_error);
     }
 
     scrub_cstat.add(stat);
@@ -12898,15 +12903,16 @@ void ReplicatedPG::_scrub(
 
     missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode,
                      pool.info.allow_incomplete_clones(), all_clones, &curclone,
-                     snap_error);
+                     head_error);
   }
   // There could be missing found by the test above or even
   // before dropping out of the loop for the last head.
   if (missing) {
     log_missing(missing, head, osd->clog, info.pgid, __func__,
                mode, pool.info.allow_incomplete_clones());
-    scrubber.store->add_snap_error(pool.id, snap_error);
   }
+  if (head && head_error.errors)
+    scrubber.store->add_snap_error(pool.id, head_error);
 
   for (map<hobject_t,pair<uint32_t,uint32_t>, hobject_t::BitwiseComparator>::const_iterator p =
         missing_digest.begin();
index 982cc7afdf45ee3658b7870432727ae900771960..42b7d9c6033c70a718334a10cfff186d12fc0d48 100755 (executable)
@@ -190,15 +190,15 @@ function TEST_scrub_snaps() {
     err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
     err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 is an unexpected clone"
     err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:snapdir no 'snapset' attr"
-    err_strings[14]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj2:7 clone ignored due to missing snapset"
-    err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj2:4 clone ignored due to missing snapset"
+    err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 clone ignored due to missing snapset"
+    err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 clone ignored due to missing snapset"
     err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:snapdir expected clone .*:::obj4:7"
     err_strings[17]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:snapdir 1 missing clone[(]s[)]"
     err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 is an unexpected clone"
     err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size"
     err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone"
     err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033"
-    err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 19 errors"
+    err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 21 errors"
 
     for i in `seq 0 ${#err_strings[@]}`
     do
index c366d2b11b5e72104a1ac9847b6a89d14d2a7daf..945c3611163dcf258f954adc41f8e0275d4855c2 100644 (file)
@@ -1331,19 +1331,24 @@ static void dump_inconsistent(const inconsistent_snapset_t& inc,
   dump_object_id(inc.object, f);
   f.dump_bool("ss_attr_missing", inc.ss_attr_missing());
   f.dump_bool("ss_attr_corrupted", inc.ss_attr_corrupted());
-  f.dump_bool("clone_missing", inc.clone_missing());
+  f.dump_bool("oi_attr_missing", inc.oi_attr_missing());
+  f.dump_bool("oi_attr_corrupted", inc.oi_attr_corrupted());
   f.dump_bool("snapset_mismatch", inc.snapset_mismatch());
   f.dump_bool("head_mismatch", inc.head_mismatch());
   f.dump_bool("headless", inc.headless());
   f.dump_bool("size_mismatch", inc.size_mismatch());
 
-  if (inc.clone_missing()) {
-    f.open_array_section("clones");
+  f.dump_bool("extra_clones", inc.extra_clones());
+  if (inc.extra_clones()) {
+    f.open_array_section("extra clones");
     for (auto snap : inc.clones) {
       f.dump_unsigned("snap", snap);
     }
     f.close_section();
+  }
 
+  f.dump_bool("clone_missing", inc.clone_missing());
+  if (inc.clone_missing()) {
     f.open_array_section("missing");
     for (auto snap : inc.missing) {
       f.dump_unsigned("snap", snap);