]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Log shard errors in be_select_auth_object() as other errors
authorDavid Zafman <dzafman@redhat.com>
Wed, 1 Aug 2018 22:13:40 +0000 (15:13 -0700)
committerDavid Zafman <dzafman@redhat.com>
Thu, 23 Aug 2018 18:09:22 +0000 (11:09 -0700)
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/PGBackend.cc
src/osd/PGBackend.h

index 30db0842cbdd02450bfa0bd36441b8fe731d1f35..bc5a6ea14d1a92ac536c782c2a83b91c0216b7f9 100644 (file)
@@ -628,15 +628,6 @@ bool PGBackend::be_compare_scrub_objects(
   bool has_snapset)
 {
   enum { CLEAN, FOUND_ERROR } error = CLEAN;
-  if (candidate.stat_error) {
-    assert(shard_result.has_stat_error());
-    error = FOUND_ERROR;
-    errorstream << "candidate had a stat error";
-  }
-  if (candidate.read_error || candidate.ec_hash_mismatch || candidate.ec_size_mismatch) {
-    error = FOUND_ERROR;
-    errorstream << "candidate had a read error";
-  }
   if (auth.digest_present && candidate.digest_present) {
     if (auth.digest != candidate.digest) {
       if (error != CLEAN)
@@ -818,7 +809,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
   const map<pg_shard_t,ScrubMap*> &maps,
   object_info_t *auth_oi,
   map<pg_shard_t, shard_info_wrapper> &shard_map,
-  bool &digest_match)
+  bool &digest_match,
+  spg_t pgid,
+  ostream &errorstream)
 {
   eversion_t auth_version;
 
@@ -837,27 +830,37 @@ map<pg_shard_t, ScrubMap *>::const_iterator
   map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
   digest_match = true;
   for (auto &l : shards) {
+    ostringstream shard_errorstream;
+    bool error = false;
     map<pg_shard_t, ScrubMap *>::const_iterator j = maps.find(l);
     map<hobject_t, ScrubMap::object>::iterator i =
       j->second->objects.find(obj);
     if (i == j->second->objects.end()) {
       continue;
     }
-    string error_string;
     auto& shard_info = shard_map[j->first];
     if (j->first == get_parent()->whoami_shard())
       shard_info.primary = true;
     if (i->second.read_error) {
       shard_info.set_read_error();
-      error_string += " read_error";
+      if (error)
+        shard_errorstream << ", ";
+      error = true;
+      shard_errorstream << "candidate had a read error";
     }
     if (i->second.ec_hash_mismatch) {
       shard_info.set_ec_hash_mismatch();
-      error_string += " ec_hash_mismatch";
+      if (error)
+        shard_errorstream << ", ";
+      error = true;
+      shard_errorstream << "candidate had an ec hash mismatch";
     }
     if (i->second.ec_size_mismatch) {
       shard_info.set_ec_size_mismatch();
-      error_string += " ec_size_mismatch";
+      if (error)
+        shard_errorstream << ", ";
+      error = true;
+      shard_errorstream << "candidate had an ec size mismatch";
     }
 
     object_info_t oi;
@@ -868,7 +871,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
 
     if (i->second.stat_error) {
       shard_info.set_stat_error();
-      error_string += " stat_error";
+      if (error)
+        shard_errorstream << ", ";
+      error = true;
+      shard_errorstream << "candidate had a stat error";
       // With stat_error no further checking
       // We don't need to also see a missing_object_info_attr
       goto out;
@@ -880,7 +886,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
       k = i->second.attrs.find(SS_ATTR);
       if (k == i->second.attrs.end()) {
        shard_info.set_snapset_missing();
-       error_string += " snapset_missing";
+        if (error)
+          shard_errorstream << ", ";
+        error = true;
+        shard_errorstream << "candidate had a missing snapset key";
       } else {
         ss_bl.push_back(k->second);
         try {
@@ -889,7 +898,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
         } catch (...) {
          // invalid snapset, probably corrupt
          shard_info.set_snapset_corrupted();
-         error_string += " snapset_corrupted";
+          if (error)
+            shard_errorstream << ", ";
+          error = true;
+          shard_errorstream << "candidate had a corrupt snapset";
         }
       }
     }
@@ -899,7 +911,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
       k = i->second.attrs.find(ECUtil::get_hinfo_key());
       if (k == i->second.attrs.end()) {
        shard_info.set_hinfo_missing();
-       error_string += " hinfo_key_missing";
+        if (error)
+          shard_errorstream << ", ";
+        error = true;
+        shard_errorstream << "candidate had a missing hinfo key";
       } else {
        hk_bl.push_back(k->second);
         try {
@@ -908,7 +923,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
         } catch (...) {
          // invalid snapset, probably corrupt
          shard_info.set_hinfo_corrupted();
-         error_string += " hinfo_corrupted";
+          if (error)
+            shard_errorstream << ", ";
+          error = true;
+          shard_errorstream << "candidate had a corrupt hinfo";
         }
       }
     }
@@ -917,7 +935,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
     if (k == i->second.attrs.end()) {
       // no object info on object, probably corrupt
       shard_info.set_info_missing();
-      error_string += " info_missing";
+      if (error)
+        shard_errorstream << ", ";
+      error = true;
+      shard_errorstream << "candidate had a missing info key";
       goto out;
     }
     bl.push_back(k->second);
@@ -927,7 +948,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
     } catch (...) {
       // invalid object info, probably corrupt
       shard_info.set_info_corrupted();
-      error_string += " info_corrupted";
+      if (error)
+        shard_errorstream << ", ";
+      error = true;
+      shard_errorstream << "candidate had a corrupt info";
       goto out;
     }
 
@@ -935,9 +959,12 @@ map<pg_shard_t, ScrubMap *>::const_iterator
     assert(oi.soid == obj);
 
     if (i->second.size != be_get_ondisk_size(oi.size)) {
-      dout(5) << __func__ << " size " << i->second.size << " oi size " << oi.size << dendl;
       shard_info.set_obj_size_info_mismatch();
-      error_string += " obj_size_info_mismatch";
+      if (error)
+        shard_errorstream << ", ";
+      error = true;
+      shard_errorstream << "candidate size " << i->second.size << " info size "
+                       << oi.size << " mismatch";
     }
 
     // digest_match will only be true if computed digests are the same
@@ -964,13 +991,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
     }
 
 out:
-    // Check error_string because some errors already generated messages
-    if (error_string != "") {
-      dout(10) << __func__ << ": error(s) osd " << j->first
-              << " for obj " << obj
-              << "," << error_string
-              << dendl;
-    }
+    if (error)
+        errorstream << pgid.pgid << " shard " << l << ": soid " << obj
+                   << " " << shard_errorstream.str() << "\n";
     // Keep scanning other shards
   }
   dout(10) << __func__ << ": selecting osd " << auth->first
@@ -1008,7 +1031,8 @@ void PGBackend::be_compare_scrubmaps(
 
     bool digest_match;
     map<pg_shard_t, ScrubMap *>::const_iterator auth =
-      be_select_auth_object(*k, maps, &auth_oi, shard_map, digest_match);
+      be_select_auth_object(*k, maps, &auth_oi, shard_map, digest_match,
+                           pgid, errorstream);
 
     list<pg_shard_t> auth_list;
     set<pg_shard_t> object_errors;
index 3134540c5131d81c2169279747f950b58bfeece4..71bd6604b759c28cc3cef4d2427f441f0bcf9065 100644 (file)
@@ -577,7 +577,9 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
      const map<pg_shard_t,ScrubMap*> &maps,
      object_info_t *auth_oi,
      map<pg_shard_t, shard_info_wrapper> &shard_map,
-     bool &digest_match);
+     bool &digest_match,
+     spg_t pgid,
+     ostream &errorstream);
    void be_compare_scrubmaps(
      const map<pg_shard_t,ScrubMap*> &maps,
      const set<hobject_t> &master_set,