]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: discard repair_oinfo_oid() 62569/head
authorRonen Friedman <rfriedma@redhat.com>
Thu, 30 Jan 2025 09:27:58 +0000 (03:27 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Wed, 2 Apr 2025 14:57:51 +0000 (09:57 -0500)
repair_oinfo_oid(), called every scrub, has a very specific
functionality: fix the object ID specified in the Object Info
attribute, if different from the ID of the owning object.

This fix was added in 2017, as a response to a unique failure
scenario that was observed in Sepia - probably following a
filesystem bug. See https://tracker.ceph.com/issues/18409 &
https://tracker.ceph.com/issues/20471.

The limited functionality of repair_oinfo_oid() -
only repairing this one specific issue, and only if the OI_ATTR
exists and is decodable - does not justify the overhead of
running it every scrub.

(cherry picked from commit aa22f19831731185e3c115a2b4e5603e8ef2634f)
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber/scrub_backend.cc

index 893e59e74d8cd52b7a945d4306221630e209d462..dfb24d3df1d20294b808ff442a7ce2ef1fab41eb 100644 (file)
@@ -1243,63 +1243,13 @@ int PgScrubber::build_scrub_map_chunk(ScrubMap& map,
   }
 
   // finish
-  dout(20) << __func__ << " finishing" << dendl;
   ceph_assert(pos.done());
-  repair_oinfo_oid(map);
-
-  dout(20) << __func__ << " done, got " << map.objects.size() << " items"
-          << dendl;
+  dout(20) << fmt::format("{}: done. {} objects in scrub-map", __func__,
+                          map.objects.size())
+           << dendl;
   return 0;
 }
 
-/// \todo consider moving repair_oinfo_oid() back to the backend
-void PgScrubber::repair_oinfo_oid(ScrubMap& smap)
-{
-  for (auto i = smap.objects.rbegin(); i != smap.objects.rend(); ++i) {
-
-    const hobject_t& hoid = i->first;
-    ScrubMap::object& o = i->second;
-
-    if (o.attrs.find(OI_ATTR) == o.attrs.end()) {
-      continue;
-    }
-    bufferlist bl;
-    bl.push_back(o.attrs[OI_ATTR]);
-    object_info_t oi;
-    try {
-      oi.decode(bl);
-    } catch (...) {
-      continue;
-    }
-
-    if (oi.soid != hoid) {
-      ObjectStore::Transaction t;
-      OSDriver::OSTransaction _t(m_pg->osdriver.get_transaction(&t));
-
-      m_osds->clog->error()
-        << "osd." << m_pg_whoami << " found object info error on pg " << m_pg_id
-        << " oid " << hoid << " oid in object info: " << oi.soid
-        << "...repaired";
-      // Fix object info
-      oi.soid = hoid;
-      bl.clear();
-      encode(oi,
-             bl,
-             m_pg->get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, nullptr));
-
-      bufferptr bp(bl.c_str(), bl.length());
-      o.attrs[OI_ATTR] = bp;
-
-      t.setattr(m_pg->coll, ghobject_t(hoid), OI_ATTR, bl);
-      int r = m_pg->osd->store->queue_transaction(m_pg->ch, std::move(t));
-      if (r != 0) {
-        derr << __func__ << ": queue_transaction got " << cpp_strerror(r)
-             << dendl;
-      }
-    }
-  }
-}
-
 
 void PgScrubber::run_callbacks()
 {
index f172f5fe9bde5cee26429c3ec328e69624f8a842..e1641ec3109bd9f89704fc0743463f38b9f9b4a3 100644 (file)
@@ -774,8 +774,6 @@ class PgScrubber : public ScrubPgIF,
   epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was
                                ///< first scheduled
 
-  void repair_oinfo_oid(ScrubMap& smap);
-
   /*
    * the exact epoch when the scrubbing actually started (started here - cleared
    * checks for no-scrub conf). Incoming events are verified against this, with
index e25c5b99da09c7eeaad4adca6733cfd36b16cce6..c351723ee6b6a099ee0fc11de15d6045cf2a3f5c 100644 (file)
@@ -721,10 +721,17 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj,
         return shard_as_auth_t{errstream.str()};
       }
     }
-  }
 
-  // This is automatically corrected in repair_oinfo_oid()
-  ceph_assert(oi.soid == obj);
+    if (!dup_error_cond(err,
+                        false,
+                        (oi.soid != obj),
+                        shard_info,
+                        &shard_info_wrapper::set_info_corrupted,
+                        "candidate info oid mismatch"sv,
+                        errstream)) {
+      return shard_as_auth_t{errstream.str()};
+    }
+  }
 
   if (test_error_cond(smap_obj.size != logical_to_ondisk_size(oi.size),
                       shard_info,