]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: fix unintended changes to scrub (cluster)logs 44922/head
authorRonen Friedman <rfriedma@redhat.com>
Sun, 30 Jan 2022 15:23:39 +0000 (15:23 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 8 Feb 2022 10:30:15 +0000 (10:30 +0000)
OSD logs and cluster logs are monitored by some scrub tests:
some specific strings are required to either appear or not appear in
the logs. The Scrubber backend PR has unintentionally modified some
of these logs, and here we restore the exact logs text.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/scrub_backend.cc
src/osd/scrubber/scrub_backend.h

index 49c6f725bb0af32b0a4560c4560e859a246a076b..46bc57f6ba6e06b03b2c3b8f127b95ea02b49440 100644 (file)
@@ -296,11 +296,7 @@ void ScrubBackend::update_authoritative()
     return;
   }
 
-  auto maybe_err_msg = compare_smaps();
-  if (maybe_err_msg) {
-    dout(5) << __func__ << ": " << *maybe_err_msg << dendl;
-    clog->error() << *maybe_err_msg;
-  }
+  compare_smaps();  // note: might cluster-log errors
 
   /// \todo try replacing with algorithm-based code
 
@@ -395,7 +391,7 @@ int ScrubBackend::scrub_process_inconsistent()
                              m_inconsistent.size());
 
   dout(2) << err_msg << dendl;
-  clog->error() << fmt::to_string(err_msg);
+  clog->error() << err_msg;
 
   int fixed_cnt{0};
   if (m_repair) {
@@ -518,27 +514,44 @@ auth_selection_t ScrubBackend::select_auth_object(const hobject_t& ho,
         ret_auth.auth->second.objects[ho].digest != *shard_ret.digest) {
 
       ret_auth.digest_match = false;
-      dout(10) << __func__ << " digest_match = false, " << ho
-               << " data_digest 0x" << std::hex
-               << ret_auth.auth->second.objects[ho].digest
-               << " != data_digest 0x" << *shard_ret.digest << std::dec
+      dout(10) << fmt::format(
+                    "{}: digest_match = false, {} data_digest 0x{:x} != "
+                    "data_digest 0x{:x}",
+                    __func__,
+                    ho,
+                    ret_auth.auth->second.objects[ho].digest,
+                    *shard_ret.digest)
                << dendl;
     }
 
+    dout(20) << fmt::format(
+                  "{}: {} shard {} got:{:D}", __func__, ho, l, shard_ret)
+             << dendl;
+
     if (shard_ret.possible_auth == shard_as_auth_t::usable_t::not_usable) {
 
       // Don't use this particular shard due to previous errors
       // XXX: For now we can't pick one shard for repair and another's object
       // info or snapset
 
+      ceph_assert(shard_ret.error_text.length());
       errstream << m_pg_id.pgid << " shard " << l << " soid " << ho << " : "
                 << shard_ret.error_text << "\n";
 
+    } else if (shard_ret.possible_auth ==
+               shard_as_auth_t::usable_t::not_found) {
+
+      // do not emit the returned error message to the log
+      dout(15) << fmt::format("{}: {} not found on shard {}", __func__, ho, l)
+               << dendl;
     } else {
 
-      dout(30) << __func__ << " consider using " << l
-               << " srv: " << shard_ret.oi.version
-               << " oi soid: " << shard_ret.oi.soid << dendl;
+      dout(30) << fmt::format("{}: consider using {} srv: {} oi soid: {}",
+                              __func__,
+                              l,
+                              shard_ret.oi.version,
+                              shard_ret.oi.soid)
+               << dendl;
 
       // consider using this shard as authoritative. Is it more recent?
 
@@ -546,9 +559,12 @@ auth_selection_t ScrubBackend::select_auth_object(const hobject_t& ho,
           (shard_ret.oi.version == auth_version &&
            dcount(shard_ret.oi) > dcount(ret_auth.auth_oi))) {
 
-        dout(30) << __func__ << " using " << l << " moved auth oi " << std::hex
-                 << (uint64_t)(&ret_auth.auth_oi) << " <-> "
-                 << (uint64_t)(&shard_ret.oi) << std::dec << dendl;
+        dout(30) << fmt::format("{}: using {} moved auth oi {:p} <-> {:p}",
+                                      __func__,
+                                      l,
+                                      (void*)&ret_auth.auth_oi,
+                                      (void*)&shard_ret.oi)
+                     << dendl;
 
         ret_auth.auth = shard_ret.auth_iter;
         ret_auth.auth_shard = ret_auth.auth->first;
@@ -619,7 +635,7 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
 {
   //  'maps' (called with this_chunk->maps originaly): this_chunk->maps
   //  'auth_oi' (called with 'auth_oi', which wasn't initialized at call site)
-  //     - will probably need to create and return
+  //     - create and return
   //  'shard_map' - the one created in select_auth_object()
   //     - used to access the 'shard_info'
 
@@ -628,7 +644,7 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
   const auto& j_smap = j->second;
   auto i = j_smap.objects.find(obj);
   if (i == j_smap.objects.end()) {
-    return shard_as_auth_t{""s};
+    return shard_as_auth_t{};
   }
   const auto& smap_obj = i->second;
 
@@ -704,10 +720,13 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
       }
     } else {
       // debug@dev only
-      dout(30) << __func__ << " missing snap addr: " << std::hex
-               << (uint64_t)(&smap_obj)
-               << " shard_info: " << (uint64_t)(&shard_info)
-               << " er: " << shard_info.errors << std::dec << dendl;
+      dout(30)
+        << fmt::format("{} missing snap addr: {:p} shard_info: {:p} er: {:x}",
+                       __func__,
+                       (void*)&smap_obj,
+                       (void*)&shard_info,
+                       shard_info.errors)
+        << dendl;
     }
   }
 
@@ -794,38 +813,42 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
   }
 
   ceph_assert(!err);
+  // note that the error text is made available to the caller, even
+  // for a successful shard selection
   return shard_as_auth_t{oi, j, errstream.str(), digest};
 }
 
 // re-implementation of PGBackend::be_compare_scrubmaps()
-std::optional<std::string> ScrubBackend::compare_smaps()
+void ScrubBackend::compare_smaps()
 {
   dout(10) << __func__
            << ": authoritative-set #: " << this_chunk->authoritative_set.size()
            << dendl;
 
-  std::stringstream errstream;
-  std::for_each(
-    this_chunk->authoritative_set.begin(),
-    this_chunk->authoritative_set.end(),
-    [this, &errstream](const auto& ho) { compare_obj_in_maps(ho, errstream); });
-
-  if (errstream.str().empty()) {
-    return std::nullopt;
-  } else {
-    return errstream.str();
-  }
+  std::for_each(this_chunk->authoritative_set.begin(),
+                this_chunk->authoritative_set.end(),
+                [this](const auto& ho) {
+                  if (auto maybe_clust_err = compare_obj_in_maps(ho);
+                      maybe_clust_err) {
+                    clog->error() << *maybe_clust_err;
+                  }
+                });
 }
 
-void ScrubBackend::compare_obj_in_maps(const hobject_t& ho,
-                                       stringstream& errstream)
+std::optional<std::string> ScrubBackend::compare_obj_in_maps(const hobject_t& ho)
 {
   // clear per-object data:
   this_chunk->cur_inconsistent.clear();
   this_chunk->cur_missing.clear();
   this_chunk->fix_digest = false;
 
-  auto auth_res = select_auth_object(ho, errstream);
+  stringstream candidates_errors;
+  auto auth_res = select_auth_object(ho, candidates_errors);
+  if (candidates_errors.str().size()) {
+    // a collection of shard-specific errors detected while
+    // finding the best shard to serve as authoritative
+    clog->error() << candidates_errors.str();
+  }
 
   inconsistent_obj_wrapper object_error{ho};
   if (!auth_res.is_auth_available) {
@@ -845,11 +868,12 @@ void ScrubBackend::compare_obj_in_maps(const hobject_t& ho,
     }
 
     m_scrubber.m_store->add_object_error(ho.pool, object_error);
-    errstream << m_scrubber.m_pg_id.pgid << " soid " << ho
-              << " : failed to pick suitable object info\n";
-    return;
+    return fmt::format("{} soid {} : failed to pick suitable object info\n",
+                       m_scrubber.m_pg_id.pgid,
+                       ho);
   }
 
+  stringstream errstream;
   auto& auth = auth_res.auth;
 
   // an auth source was selected
@@ -872,16 +896,12 @@ void ScrubBackend::compare_obj_in_maps(const hobject_t& ho,
 
     // At this point auth_list is populated, so we add the object error
     // shards as inconsistent.
-    inconsistents(ho,
-                  auth_object,
-                  auth_res.auth_oi,
-                  std::move(*opt_ers),
-                  errstream);
+    inconsistents(
+      ho, auth_object, auth_res.auth_oi, std::move(*opt_ers), errstream);
   } else {
 
     // both the auth & errs containers are empty
-    errstream << m_scrubber.m_pg_id.pgid << " soid " << ho
-              << " : empty auth list\n";
+    errstream << m_pg_id << " soid " << ho << " : empty auth list\n";
   }
 
   if (object_error.has_deep_errors()) {
@@ -893,6 +913,12 @@ void ScrubBackend::compare_obj_in_maps(const hobject_t& ho,
   if (object_error.errors || object_error.union_shards.errors) {
     m_scrubber.m_store->add_object_error(ho.pool, object_error);
   }
+
+  if (errstream.str().empty()) {
+    return std::nullopt;
+  } else {
+    return errstream.str();
+  }
 }
 
 
@@ -1176,10 +1202,6 @@ ScrubBackend::auth_and_obj_errs_t ScrubBackend::match_in_shards(
              << dendl;
   }
 
-  dout(15) << __func__ << ": auth_list: " << auth_list
-           << " #: " << auth_list.size()
-           << "; obj-errs#: " << object_errors.size() << dendl;
-
   dout(15) << fmt::format("{}: auth_list: {} #: {}; obj-errs#: {}",
                           __func__,
                           auth_list,
index cb23ad09bff767cbd93842d32a14ce64534a6cc9..faecd6e3e7a8715243fd2c4e99b2eedeeb55aac0 100644 (file)
@@ -79,7 +79,9 @@ struct omap_stat_t {
  * Conveys the usability of a specific shard as an auth source.
  */
 struct shard_as_auth_t {
-  enum class usable_t : uint8_t { not_usable, usable };
+  // note: 'not_found' differs from 'not_usable' in that 'not_found'
+  // does not carry an error message to be cluster-logged.
+  enum class usable_t : uint8_t { not_usable, not_found, usable };
 
   // the ctor used when the shard should not be considered as auth
   explicit shard_as_auth_t(std::string err_msg)
@@ -90,6 +92,15 @@ struct shard_as_auth_t {
       , digest{std::nullopt}
   {}
 
+  // the object cannot be found on the shard
+  explicit shard_as_auth_t()
+      : possible_auth{usable_t::not_found}
+      , error_text{}
+      , oi{}
+      , auth_iter{}
+      , digest{std::nullopt}
+  {}
+
   shard_as_auth_t(std::string err_msg, std::optional<uint32_t> data_digest)
       : possible_auth{usable_t::not_usable}
       , error_text{err_msg}
@@ -120,6 +131,50 @@ struct shard_as_auth_t {
   // other in/out arguments) via this struct
 };
 
+// the format specifier {D} is used to request debug output
+template <>
+struct fmt::formatter<shard_as_auth_t> {
+  template <typename ParseContext>
+  constexpr auto parse(ParseContext& ctx)
+  {
+    auto it = ctx.begin();
+    if (it != ctx.end()) {
+      debug_log = (*it++) == 'D';
+    }
+    return it;
+  }
+  template <typename FormatContext>
+  auto format(shard_as_auth_t const& as_auth, FormatContext& ctx)
+  {
+    if (debug_log) {
+      // note: 'if' chain, as hard to consistently (on all compilers) avoid some
+      // warnings for a switch plus multiple return paths
+      if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_usable) {
+        return format_to(
+          ctx.out(), "{{shard-not-usable:{}}}", as_auth.error_text);
+      }
+      if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_found) {
+        return format_to(ctx.out(), "{{shard-not-found}}");
+      }
+      return format_to(ctx.out(),
+                       "{{shard-usable: soid:{} {{txt:{}}} }}",
+                       as_auth.oi.soid,
+                       as_auth.error_text);
+
+    } else {
+      return format_to(
+        ctx.out(),
+        "usable:{} soid:{} {{txt:{}}}",
+        (as_auth.possible_auth == shard_as_auth_t::usable_t::usable) ? "yes"
+                                                                     : "no",
+        as_auth.oi.soid,
+        as_auth.error_text);
+    }
+  }
+
+  bool debug_log{false};
+};
+
 struct auth_selection_t {
   shard_to_scrubmap_t::iterator auth;  ///< an iter into one of this_chunk->maps
   pg_shard_t auth_shard;               // set to auth->first
@@ -301,9 +356,10 @@ class ScrubBackend {
   // note: used by both Primary & replicas
   static ScrubMap clean_meta_map(ScrubMap& cleaned, bool max_reached);
 
-  std::optional<std::string> compare_smaps();
+  void compare_smaps();
 
-  void compare_obj_in_maps(const hobject_t& ho, std::stringstream& errstream);
+  /// might return error messages to be cluster-logged
+  std::optional<std::string> compare_obj_in_maps(const hobject_t& ho);
 
   void omap_checks();