]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/: condense missing mutations for recovery/repair/errors
authorSamuel Just <sjust@redhat.com>
Wed, 10 Apr 2019 02:29:05 +0000 (19:29 -0700)
committersjust@redhat.com <sjust@redhat.com>
Wed, 1 May 2019 18:22:28 +0000 (11:22 -0700)
At a high level, this patch attempts to unify the various
sites at which some combination of
- mark object missing in one or more pg_missing_t
- mark object needs_recovery in missing_loc
- manipulate the locations map based on external information
occur.  It seems to me that the pg_missing_t and missing_loc
should be in sync except for the mark_unfound_lost_revert
case and the case where we are about to do a backfill push.

This patch also cleans up repair_object.  It sort of worked by accident
for non-ec non-primary bad peers.  It didn't update missing_loc, so
needs_recovery() returns the wrong answer.  However, is_unfound() does
as well, so ReplicatedBackend is nevertheless happy as the object would
be present on the primary.  This patch updates the behavior to be
uniform as in the other force_obejct_missing cases.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/MissingLoc.h
src/osd/PG.cc
src/osd/PG.h
src/osd/PeeringState.cc
src/osd/PeeringState.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h

index 73d45b505fd038d1c023ded021b969e71c44b3d4..04f1cf86ffe4597c58d8fb6ead8b8daefea54422 100644 (file)
@@ -284,7 +284,7 @@ class MissingLoc {
   void rebuild(
     const hobject_t &hoid,
     pg_shard_t self,
-    const set<pg_shard_t> to_recover,
+    const set<pg_shard_t> &to_recover,
     const pg_info_t &info,
     const pg_missing_t &missing,
     const map<pg_shard_t, pg_missing_t> &pmissing,
index 0252b5311114361b56564a3e105a5bfe35e89f02..5960d1bb5c95e4e2cc64e488056274d333558b3c 100644 (file)
@@ -2385,45 +2385,42 @@ void PG::Scrubber::cleanup_store(ObjectStore::Transaction *t) {
 }
 
 void PG::repair_object(
-  const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
-  pg_shard_t bad_peer)
+  const hobject_t &soid,
+  const list<pair<ScrubMap::object, pg_shard_t> > &ok_peers,
+  const set<pg_shard_t> &bad_peers)
 {
-  list<pg_shard_t> op_shards;
-  for (auto i : *ok_peers) {
-    op_shards.push_back(i.second);
-  }
-  dout(10) << "repair_object " << soid << " bad_peer osd."
-          << bad_peer << " ok_peers osd.{" << op_shards << "}" << dendl;
-  ScrubMap::object &po = ok_peers->back().first;
+  set<pg_shard_t> ok_shards;
+  for (auto &&peer: ok_peers) ok_shards.insert(peer.second);
+
+  dout(10) << "repair_object " << soid
+          << " bad_peers osd.{" << bad_peers << "},"
+          << " ok_peers osd.{" << ok_shards << "}" << dendl;
+
+  const ScrubMap::object &po = ok_peers.back().first;
   eversion_t v;
-  bufferlist bv;
-  bv.push_back(po.attrs[OI_ATTR]);
   object_info_t oi;
   try {
+    bufferlist bv;
+    if (po.attrs.count(OI_ATTR)) {
+      bv.push_back(po.attrs.find(OI_ATTR)->second);
+    }
     auto bliter = bv.cbegin();
     decode(oi, bliter);
   } catch (...) {
-    dout(0) << __func__ << ": Need version of replica, bad object_info_t: " << soid << dendl;
+    dout(0) << __func__ << ": Need version of replica, bad object_info_t: "
+           << soid << dendl;
     ceph_abort();
   }
 
-  if (bad_peer == get_primary()) {
+  if (bad_peers.count(get_primary())) {
     // We should only be scrubbing if the PG is clean.
     ceph_assert(waiting_for_unreadable_object.empty());
     dout(10) << __func__ << ": primary = " << get_primary() << dendl;
   }
-  recovery_state.force_object_missing(bad_peer, soid, oi.version);
 
-  if (is_ec_pg() || bad_peer == get_primary()) {
-    // we'd better collect all shard for EC pg, and prepare good peers as the
-    // source of pull in the case of replicated pg.
-    missing_loc.add_missing(soid, oi.version, eversion_t());
-    list<pair<ScrubMap::object, pg_shard_t> >::iterator i;
-    for (i = ok_peers->begin();
-       i != ok_peers->end();
-       ++i)
-      missing_loc.add_location(soid, i->second);
-  }
+  /* No need to pass ok_peers, they must not be missing the object, so
+   * force_object_missing will add them to missing_loc anyway */
+  recovery_state.force_object_missing(bad_peers, soid, oi.version);
 }
 
 /* replica_scrub
@@ -3174,29 +3171,20 @@ bool PG::scrub_process_inconsistent()
             scrubber.authoritative.begin();
           i != scrubber.authoritative.end();
           ++i) {
-       set<pg_shard_t>::iterator j;
-
        auto missing_entry = scrubber.missing.find(i->first);
        if (missing_entry != scrubber.missing.end()) {
-         for (j = missing_entry->second.begin();
-              j != missing_entry->second.end();
-              ++j) {
-           repair_object(
-             i->first,
-             &(i->second),
-             *j);
-           ++scrubber.fixed;
-         }
+          repair_object(
+            i->first,
+            i->second,
+           missing_entry->second);
+         scrubber.fixed += missing_entry->second.size();
        }
        if (scrubber.inconsistent.count(i->first)) {
-         for (j = scrubber.inconsistent[i->first].begin(); 
-              j != scrubber.inconsistent[i->first].end(); 
-              ++j) {
-           repair_object(i->first, 
-             &(i->second),
-             *j);
-           ++scrubber.fixed;
-         }
+          repair_object(
+            i->first,
+            i->second,
+           scrubber.inconsistent[i->first]);
+         scrubber.fixed += missing_entry->second.size();
        }
       }
     }
index b45de5876e30108b1614c6e869edb7d7fabae265..bbee7b13de7f7823afa6e448d8f9e6e68a16dd0d 100644 (file)
@@ -1286,8 +1286,9 @@ protected:
   bool range_intersects_scrub(const hobject_t &start, const hobject_t& end);
 
   void repair_object(
-    const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
-    pg_shard_t bad_peer);
+    const hobject_t &soid,
+    const list<pair<ScrubMap::object, pg_shard_t> > &ok_peers,
+    const set<pg_shard_t> &bad_peers);
 
   void chunky_scrub(ThreadPool::TPHandle &handle);
   void scrub_compare_maps();
index 51a2189426786fb616b05239e03e7e7513141c24..628532561856539dfcced05c6792fb7212b8deea 100644 (file)
@@ -3701,16 +3701,27 @@ void PeeringState::begin_peer_recover(
 }
 
 void PeeringState::force_object_missing(
-  pg_shard_t peer,
+  const set<pg_shard_t> &peers,
   const hobject_t &soid,
   eversion_t version)
 {
-  if (peer != primary) {
-    peer_missing[peer].add(soid, version, eversion_t(), false);
-  } else {
-    pg_log.missing_add(soid, version, eversion_t());
-    pg_log.set_last_requested(0);
+  for (auto &&peer : peers) {
+    if (peer != primary) {
+      peer_missing[peer].add(soid, version, eversion_t(), false);
+    } else {
+      pg_log.missing_add(soid, version, eversion_t());
+      pg_log.set_last_requested(0);
+    }
   }
+
+  missing_loc.rebuild(
+    soid,
+    pg_whoami,
+    acting_recovery_backfill,
+    info,
+    pg_log.get_missing(),
+    peer_missing,
+    peer_info);
 }
 
 void PeeringState::pre_submit_op(
@@ -3929,6 +3940,24 @@ void PeeringState::update_peer_last_backfill(
   }
 }
 
+void PeeringState::set_revert_with_targets(
+  const hobject_t &soid,
+  const set<pg_shard_t> &good_peers)
+{
+  for (auto &&peer: good_peers) {
+    missing_loc.add_location(soid, peer);
+  }
+}
+
+void PeeringState::prepare_backfill_for_missing(
+  const hobject_t &soid,
+  const eversion_t &version,
+  const vector<pg_shard_t> &targets) {
+  for (auto &&peer: targets) {
+    peer_missing[peer].add(soid, version, eversion_t(), false);
+  }
+}
+
 /*------------ Peering State Machine----------------*/
 #undef dout_prefix
 #define dout_prefix (context< PeeringMachine >().dpp->gen_prefix(*_dout) \
index 4cb56a5743a4533c3353d5b078028f2c318a2f8d..5fdb7a6d3891eacaea1dded0bd977ebbff32f7a4 100644 (file)
@@ -1586,10 +1586,25 @@ public:
     const object_stat_sum_t &delta_stats);
 
   void force_object_missing(
-    pg_shard_t peer,
+    const pg_shard_t &peer,
+    const hobject_t &oid,
+    eversion_t version) {
+    force_object_missing(set<pg_shard_t>{peer}, oid, version);
+  }
+  void force_object_missing(
+    const set<pg_shard_t> &peer,
     const hobject_t &oid,
     eversion_t version);
 
+  void prepare_backfill_for_missing(
+    const hobject_t &soid,
+    const eversion_t &version,
+    const vector<pg_shard_t> &targets);
+
+  void set_revert_with_targets(
+    const hobject_t &soid,
+    const set<pg_shard_t> &good_peers);
+
   void update_peer_last_complete_ondisk(
     pg_shard_t fromosd,
     eversion_t lcod) {
@@ -1761,6 +1776,10 @@ public:
     return acting_recovery_backfill;
   }
 
+  const PGLog &get_pg_log() const {
+    return pg_log;
+  }
+
   bool state_test(uint64_t m) const { return (state & m) != 0; }
   void state_set(uint64_t m) { state |= m; }
   void state_clear(uint64_t m) { state &= ~m; }
@@ -1871,6 +1890,10 @@ public:
     return pg_log.get_missing().num_missing() > 0;
   }
 
+  const MissingLoc &get_missing_loc() const {
+    return missing_loc;
+  }
+
   const MissingLoc::missing_by_count_t &get_missing_by_count() const {
     return missing_loc.get_missing_by_count();
   }
index a45a0f7dbfdb2ab3b0e839be02dfb72b5a2384b5..d988c69ef808a40aa118886f90a86a81071bca67 100644 (file)
@@ -505,15 +505,6 @@ void PrimaryLogPG::send_message_osd_cluster(
   osd->send_message_osd_cluster(m, con);
 }
 
-void PrimaryLogPG::backfill_add_missing(
-  const hobject_t &oid,
-  eversion_t v)
-{
-  dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
-  backfills_in_flight.erase(oid);
-  missing_loc.add_missing(oid, v, eversion_t());
-}
-
 bool PrimaryLogPG::should_send_op(
   pg_shard_t peer,
   const hobject_t &hoid) {
@@ -11428,24 +11419,22 @@ void PrimaryLogPG::on_failed_pull(
   }
   recovering.erase(soid);
   for (auto&& i : from) {
-    missing_loc.remove_location(soid, i);
-    if (need != eversion_t()) {
-      dout(0) << __func__ << " adding " << soid << " to shard " << i
-              << "'s missing set too" << dendl;
-      auto pm = peer_missing.find(i);
-      if (pm != peer_missing.end())
-        pm->second.add(soid, need, eversion_t(), false);
+    if (i != pg_whoami) { // we'll get it below in primary_error
+      recovery_state.force_object_missing(i, soid, v);
     }
   }
+
   dout(0) << __func__ << " " << soid << " from shard " << from
-         << ", reps on " << missing_loc.get_locations(soid)
-         << " unfound? " << missing_loc.is_unfound(soid) << dendl;
+         << ", reps on " << recovery_state.get_missing_loc().get_locations(soid)
+         << " unfound? " << recovery_state.get_missing_loc().is_unfound(soid)
+         << dendl;
   finish_recovery_op(soid);  // close out this attempt,
   finish_degraded_object(soid);
 
   if (from.count(pg_whoami)) {
+    dout(0) << " primary missing oid " << soid << " version " << v << dendl;
     primary_error(soid, v);
-    backfill_add_missing(soid, v);
+    backfills_in_flight.erase(soid);
   }
 }
 
@@ -12391,15 +12380,21 @@ uint64_t PrimaryLogPG::recover_primary(uint64_t max, ThreadPool::TPHandle &handl
            eversion_t alternate_need = latest->reverting_to;
            dout(10) << " need to pull prior_version " << alternate_need << " for revert " << item << dendl;
 
+           set<pg_shard_t> good_peers;
            for (auto p = recovery_state.get_peer_missing().begin();
                 p != recovery_state.get_peer_missing().end();
-                ++p)
+                ++p) {
              if (p->second.is_missing(soid, need) &&
                  p->second.get_items().at(soid).have == alternate_need) {
-               missing_loc.add_location(soid, p->first);
+               good_peers.insert(p->first);
              }
+           }
+           recovery_state.set_revert_with_targets(
+             soid,
+             good_peers);
            dout(10) << " will pull " << alternate_need << " or " << need
-                    << " from one of " << missing_loc.get_locations(soid)
+                    << " from one of "
+                    << recovery_state.get_missing_loc().get_locations(soid)
                     << dendl;
          }
        }
@@ -12442,27 +12437,14 @@ uint64_t PrimaryLogPG::recover_primary(uint64_t max, ThreadPool::TPHandle &handl
 bool PrimaryLogPG::primary_error(
   const hobject_t& soid, eversion_t v)
 {
-  pg_log.missing_add(soid, v, eversion_t());
-  pg_log.set_last_requested(0);
-  missing_loc.remove_location(soid, pg_whoami);
-  bool uhoh = true;
-  ceph_assert(!get_acting_recovery_backfill().empty());
-  for (set<pg_shard_t>::iterator i = get_acting_recovery_backfill().begin();
-       i != get_acting_recovery_backfill().end();
-       ++i) {
-    if (*i == get_primary()) continue;
-    pg_shard_t peer = *i;
-    if (!recovery_state.get_peer_missing(peer).is_missing(soid, v)) {
-      missing_loc.add_location(soid, peer);
-      dout(10) << info.pgid << " unexpectedly missing " << soid << " v" << v
-              << ", there should be a copy on shard " << peer << dendl;
-      uhoh = false;
-    }
-  }
+  recovery_state.force_object_missing(pg_whoami, soid, v);
+  bool uhoh = recovery_state.get_missing_loc().is_unfound(soid);
   if (uhoh)
-    osd->clog->error() << info.pgid << " missing primary copy of " << soid << ", unfound";
+    osd->clog->error() << info.pgid << " missing primary copy of "
+                      << soid << ", unfound";
   else
-    osd->clog->error() << info.pgid << " missing primary copy of " << soid
+    osd->clog->error() << info.pgid << " missing primary copy of "
+                      << soid
                       << ", will try copies on "
                       << recovery_state.get_missing_loc().get_locations(soid);
   return uhoh;
@@ -13082,9 +13064,7 @@ int PrimaryLogPG::prep_backfill_object_push(
   ceph_assert(!peers.empty());
 
   backfills_in_flight.insert(oid);
-  for (unsigned int i = 0 ; i < peers.size(); ++i) {
-    recovery_state.force_object_missing(peers[i], oid, eversion_t());
-  }
+  recovery_state.prepare_backfill_for_missing(oid, v, peers);
 
   ceph_assert(!recovering.count(oid));
 
@@ -14906,11 +14886,10 @@ int PrimaryLogPG::rep_repair_primary_object(const hobject_t& soid, OpContext *ct
     return -EAGAIN;
   }
 
-  ceph_assert(!pg_log.get_missing().is_missing(soid));
+  ceph_assert(!recovery_state.get_pg_log().get_missing().is_missing(soid));
   auto& oi = ctx->new_obs.oi;
   eversion_t v = oi.version;
 
-  missing_loc.add_missing(soid, v, eversion_t());
   if (primary_error(soid, v)) {
     dout(0) << __func__ << " No other replicas available for " << soid << dendl;
     // XXX: If we knew that there is no down osd which could include this
index 4b79394f0588593894a3148ce4f8de34a3991e0e..d9d5765fd4bcf703520c7ddc110ef389b05565b0 100644 (file)
@@ -305,7 +305,6 @@ public:
 
   bool primary_error(const hobject_t& soid, eversion_t v);
 
-  void backfill_add_missing(const hobject_t &oid, eversion_t v);
   void remove_missing_object(const hobject_t &oid,
                             eversion_t v,
                             Context *on_complete) override;