]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/: move search_for_missing, discover_all_unfound, build_might_have_unfound
authorSamuel Just <sjust@redhat.com>
Fri, 12 Apr 2019 22:35:26 +0000 (15:35 -0700)
committersjust@redhat.com <sjust@redhat.com>
Wed, 1 May 2019 18:22:22 +0000 (11:22 -0700)
Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/PeeringState.cc
src/osd/PeeringState.h
src/osd/PrimaryLogPG.cc

index 7bfd4bb27f92ac0647e0a8c95fc49e167cda41eb..ddf5cf20f21f0fe399ea68a6883261a40c3dc840 100644 (file)
@@ -424,100 +424,6 @@ void PG::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead)
     newhead, info, &rollbacker, dirty_info, dirty_big_info);
 }
 
-/*
- * Process information from a replica to determine if it could have any
- * objects that i need.
- *
- * TODO: if the missing set becomes very large, this could get expensive.
- * Instead, we probably want to just iterate over our unfound set.
- */
-bool PG::search_for_missing(
-  const pg_info_t &oinfo, const pg_missing_t &omissing,
-  pg_shard_t from,
-  PeeringCtx *ctx)
-{
-  uint64_t num_unfound_before = missing_loc.num_unfound();
-  bool found_missing = missing_loc.add_source_info(
-    from, oinfo, omissing, ctx->handle);
-  if (found_missing && num_unfound_before != missing_loc.num_unfound())
-    publish_stats_to_osd();
-  // avoid doing this if the peer is empty.  This is abit of paranoia
-  // to avoid doing something rash if add_source_info() above
-  // incorrectly decided we found something new. (if the peer has
-  // last_update=0'0 that's impossible.)
-  if (found_missing &&
-      oinfo.last_update != eversion_t()) {
-    pg_info_t tinfo(oinfo);
-    tinfo.pgid.shard = pg_whoami.shard;
-    (*(ctx->info_map))[from.osd].emplace_back(
-       pg_notify_t(
-         from.shard, pg_whoami.shard,
-         get_osdmap_epoch(),
-         get_osdmap_epoch(),
-         tinfo),
-       past_intervals);
-  }
-  return found_missing;
-}
-
-void PG::discover_all_missing(map<int, map<spg_t,pg_query_t> > &query_map)
-{
-  auto &missing = pg_log.get_missing();
-  uint64_t unfound = get_num_unfound();
-
-  dout(10) << __func__ << " "
-          << missing.num_missing() << " missing, "
-          << unfound << " unfound"
-          << dendl;
-
-  std::set<pg_shard_t>::const_iterator m = might_have_unfound.begin();
-  std::set<pg_shard_t>::const_iterator mend = might_have_unfound.end();
-  for (; m != mend; ++m) {
-    pg_shard_t peer(*m);
-    
-    if (!get_osdmap()->is_up(peer.osd)) {
-      dout(20) << __func__ << " skipping down osd." << peer << dendl;
-      continue;
-    }
-
-    map<pg_shard_t, pg_info_t>::const_iterator iter = peer_info.find(peer);
-    if (iter != peer_info.end() &&
-        (iter->second.is_empty() || iter->second.dne())) {
-      // ignore empty peers
-      continue;
-    }
-
-    // If we've requested any of this stuff, the pg_missing_t information
-    // should be on its way.
-    // TODO: coalsce requested_* into a single data structure
-    if (peer_missing.find(peer) != peer_missing.end()) {
-      dout(20) << __func__ << ": osd." << peer
-              << ": we already have pg_missing_t" << dendl;
-      continue;
-    }
-    if (peer_log_requested.find(peer) != peer_log_requested.end()) {
-      dout(20) << __func__ << ": osd." << peer
-              << ": in peer_log_requested" << dendl;
-      continue;
-    }
-    if (peer_missing_requested.find(peer) != peer_missing_requested.end()) {
-      dout(20) << __func__ << ": osd." << peer
-              << ": in peer_missing_requested" << dendl;
-      continue;
-    }
-
-    // Request missing
-    dout(10) << __func__ << ": osd." << peer << ": requesting pg_missing_t"
-            << dendl;
-    peer_missing_requested.insert(peer);
-    query_map[peer.osd][spg_t(info.pgid.pgid, peer.shard)] =
-      pg_query_t(
-       pg_query_t::FULLLOG,
-       peer.shard, pg_whoami.shard,
-       info.history, get_osdmap_epoch());
-  }
-}
-
 /******* PG ***********/
 void PG::clear_primary_state()
 {
@@ -551,36 +457,6 @@ PG::Scrubber::Scrubber()
 
 PG::Scrubber::~Scrubber() {}
 
-/* Build the might_have_unfound set.
- *
- * This is used by the primary OSD during recovery.
- *
- * This set tracks the OSDs which might have unfound objects that the primary
- * OSD needs. As we receive pg_missing_t from each OSD in might_have_unfound, we
- * will remove the OSD from the set.
- */
-void PG::build_might_have_unfound()
-{
-  ceph_assert(might_have_unfound.empty());
-  ceph_assert(is_primary());
-
-  dout(10) << __func__ << dendl;
-
-  recovery_state.check_past_interval_bounds();
-
-  might_have_unfound = past_intervals.get_might_have_unfound(
-    pg_whoami,
-    pool.info.is_erasure());
-
-  // include any (stray) peers
-  for (map<pg_shard_t, pg_info_t>::iterator p = peer_info.begin();
-       p != peer_info.end();
-       ++p)
-    might_have_unfound.insert(p->first);
-
-  dout(15) << __func__ << ": built " << might_have_unfound << dendl;
-}
-
 bool PG::op_has_sufficient_caps(OpRequestRef& op)
 {
   // only check MOSDOp
@@ -2209,7 +2085,7 @@ void PG::read_state(ObjectStore *store)
     osd->clog->error() << oss.str();
 
   // log any weirdness
-  log_weirdness();
+  recovery_state.log_weirdness();
 
   if (info_struct_v < latest_struct_v) {
     upgrade(store);
@@ -2242,35 +2118,6 @@ void PG::read_state(ObjectStore *store)
   delete rctx.transaction;
 }
 
-void PG::log_weirdness()
-{
-  if (pg_log.get_tail() != info.log_tail)
-    osd->clog->error() << info.pgid
-                      << " info mismatch, log.tail " << pg_log.get_tail()
-                      << " != info.log_tail " << info.log_tail;
-  if (pg_log.get_head() != info.last_update)
-    osd->clog->error() << info.pgid
-                      << " info mismatch, log.head " << pg_log.get_head()
-                      << " != info.last_update " << info.last_update;
-
-  if (!pg_log.get_log().empty()) {
-    // sloppy check
-    if ((pg_log.get_log().log.begin()->version <= pg_log.get_tail()))
-      osd->clog->error() << info.pgid
-                       << " log bound mismatch, info (tail,head] ("
-                       << pg_log.get_tail() << "," << pg_log.get_head() << "]"
-                       << " actual ["
-                       << pg_log.get_log().log.begin()->version << ","
-                        << pg_log.get_log().log.rbegin()->version << "]";
-  }
-  
-  if (pg_log.get_log().caller_ops.size() > pg_log.get_log().log.size()) {
-    osd->clog->error() << info.pgid
-                     << " caller_ops.size " << pg_log.get_log().caller_ops.size()
-                      << " > log size " << pg_log.get_log().log.size();
-  }
-}
-
 void PG::update_snap_map(
   const vector<pg_log_entry_t> &log_entries,
   ObjectStore::Transaction &t)
@@ -4918,7 +4765,7 @@ void PG::find_unfound(epoch_t queued, PeeringCtx *rctx)
     * It may be that our initial locations were bad and we errored
     * out while trying to pull.
     */
-  discover_all_missing(*rctx->query_map);
+  recovery_state.discover_all_missing(*rctx->query_map);
   if (rctx->query_map->empty()) {
     string action;
     if (state_test(PG_STATE_BACKFILLING)) {
index 08367076ae6ebda8e4c233171c6ca029b3c03ac8..53e6f73521c1d5d249595c51675b25ede9ecba9b 100644 (file)
@@ -1057,22 +1057,14 @@ protected:
     ObjectStore::Transaction& t, pg_info_t &oinfo,
     pg_log_t &olog, pg_shard_t from);
   void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead);
-  bool search_for_missing(
-    const pg_info_t &oinfo, const pg_missing_t &omissing,
-    pg_shard_t fromosd,
-    PeeringCtx*);
-
-  void discover_all_missing(std::map<int, map<spg_t,pg_query_t> > &query_map);
-  
-  void build_might_have_unfound();
 
   void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info);
 
   bool have_unfound() const { 
-    return missing_loc.have_unfound();
+    return recovery_state.have_unfound();
   }
   uint64_t get_num_unfound() const {
-    return missing_loc.num_unfound();
+    return recovery_state.get_num_unfound();
   }
 
   virtual void check_local() = 0;
@@ -1419,9 +1411,6 @@ protected:
 
   uint64_t get_min_acting_features() const { return acting_features; }
   uint64_t get_min_upacting_features() const { return upacting_features; }
-  bool perform_deletes_during_peering() const {
-    return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES));
-  }
 
   bool hard_limit_pglog() const {
     return (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT));
@@ -1535,8 +1524,6 @@ protected:
 
   void filter_snapc(vector<snapid_t> &snaps);
 
-  void log_weirdness();
-
   virtual void kick_snap_trim() = 0;
   virtual void snap_trimmer_scrub_complete() = 0;
   bool requeue_scrub(bool high_priority = false);
index ad1df1979bff03cb55f917f60c169a7b496b6466..a6a04663a2af155ad43bf8e71dfee14a26e591de 100644 (file)
@@ -1839,6 +1839,162 @@ bool PeeringState::choose_acting(pg_shard_t &auth_log_shard_id,
   return true;
 }
 
+void PeeringState::log_weirdness()
+{
+  if (pg_log.get_tail() != info.log_tail)
+    pl->get_clog().error() << info.pgid
+                          << " info mismatch, log.tail " << pg_log.get_tail()
+                          << " != info.log_tail " << info.log_tail;
+  if (pg_log.get_head() != info.last_update)
+    pl->get_clog().error() << info.pgid
+                          << " info mismatch, log.head " << pg_log.get_head()
+                          << " != info.last_update " << info.last_update;
+
+  if (!pg_log.get_log().empty()) {
+    // sloppy check
+    if ((pg_log.get_log().log.begin()->version <= pg_log.get_tail()))
+      pl->get_clog().error() << info.pgid
+                            << " log bound mismatch, info (tail,head] ("
+                            << pg_log.get_tail() << ","
+                            << pg_log.get_head() << "]"
+                            << " actual ["
+                            << pg_log.get_log().log.begin()->version << ","
+                            << pg_log.get_log().log.rbegin()->version << "]";
+  }
+
+  if (pg_log.get_log().caller_ops.size() > pg_log.get_log().log.size()) {
+    pl->get_clog().error() << info.pgid
+                          << " caller_ops.size "
+                          << pg_log.get_log().caller_ops.size()
+                          << " > log size " << pg_log.get_log().log.size();
+  }
+}
+
+/*
+ * Process information from a replica to determine if it could have any
+ * objects that i need.
+ *
+ * TODO: if the missing set becomes very large, this could get expensive.
+ * Instead, we probably want to just iterate over our unfound set.
+ */
+bool PeeringState::search_for_missing(
+  const pg_info_t &oinfo, const pg_missing_t &omissing,
+  pg_shard_t from,
+  PeeringCtx *ctx)
+{
+  uint64_t num_unfound_before = missing_loc.num_unfound();
+  bool found_missing = missing_loc.add_source_info(
+    from, oinfo, omissing, ctx->handle);
+  if (found_missing && num_unfound_before != missing_loc.num_unfound())
+    pl->publish_stats_to_osd();
+  // avoid doing this if the peer is empty.  This is abit of paranoia
+  // to avoid doing something rash if add_source_info() above
+  // incorrectly decided we found something new. (if the peer has
+  // last_update=0'0 that's impossible.)
+  if (found_missing &&
+      oinfo.last_update != eversion_t()) {
+    pg_info_t tinfo(oinfo);
+    tinfo.pgid.shard = pg_whoami.shard;
+    (*(ctx->info_map))[from.osd].emplace_back(
+      pg_notify_t(
+       from.shard, pg_whoami.shard,
+       get_osdmap_epoch(),
+       get_osdmap_epoch(),
+       tinfo),
+      past_intervals);
+  }
+  return found_missing;
+}
+
+void PeeringState::discover_all_missing(
+  map<int, map<spg_t,pg_query_t> > &query_map)
+{
+  auto &missing = pg_log.get_missing();
+  uint64_t unfound = get_num_unfound();
+
+  psdout(10) << __func__ << " "
+            << missing.num_missing() << " missing, "
+            << unfound << " unfound"
+            << dendl;
+
+  std::set<pg_shard_t>::const_iterator m = might_have_unfound.begin();
+  std::set<pg_shard_t>::const_iterator mend = might_have_unfound.end();
+  for (; m != mend; ++m) {
+    pg_shard_t peer(*m);
+
+    if (!get_osdmap()->is_up(peer.osd)) {
+      psdout(20) << __func__ << " skipping down osd." << peer << dendl;
+      continue;
+    }
+
+    map<pg_shard_t, pg_info_t>::const_iterator iter = peer_info.find(peer);
+    if (iter != peer_info.end() &&
+        (iter->second.is_empty() || iter->second.dne())) {
+      // ignore empty peers
+      continue;
+    }
+
+    // If we've requested any of this stuff, the pg_missing_t information
+    // should be on its way.
+    // TODO: coalsce requested_* into a single data structure
+    if (peer_missing.find(peer) != peer_missing.end()) {
+      psdout(20) << __func__ << ": osd." << peer
+                << ": we already have pg_missing_t" << dendl;
+      continue;
+    }
+    if (peer_log_requested.find(peer) != peer_log_requested.end()) {
+      psdout(20) << __func__ << ": osd." << peer
+                << ": in peer_log_requested" << dendl;
+      continue;
+    }
+    if (peer_missing_requested.find(peer) != peer_missing_requested.end()) {
+      psdout(20) << __func__ << ": osd." << peer
+                << ": in peer_missing_requested" << dendl;
+      continue;
+    }
+
+    // Request missing
+    psdout(10) << __func__ << ": osd." << peer << ": requesting pg_missing_t"
+              << dendl;
+    peer_missing_requested.insert(peer);
+    query_map[peer.osd][spg_t(info.pgid.pgid, peer.shard)] =
+      pg_query_t(
+       pg_query_t::FULLLOG,
+       peer.shard, pg_whoami.shard,
+       info.history, get_osdmap_epoch());
+  }
+}
+
+/* Build the might_have_unfound set.
+ *
+ * This is used by the primary OSD during recovery.
+ *
+ * This set tracks the OSDs which might have unfound objects that the primary
+ * OSD needs. As we receive pg_missing_t from each OSD in might_have_unfound, we
+ * will remove the OSD from the set.
+ */
+void PeeringState::build_might_have_unfound()
+{
+  ceph_assert(might_have_unfound.empty());
+  ceph_assert(is_primary());
+
+  psdout(10) << __func__ << dendl;
+
+  check_past_interval_bounds();
+
+  might_have_unfound = past_intervals.get_might_have_unfound(
+    pg_whoami,
+    pool.info.is_erasure());
+
+  // include any (stray) peers
+  for (map<pg_shard_t, pg_info_t>::iterator p = peer_info.begin();
+       p != peer_info.end();
+       ++p)
+    might_have_unfound.insert(p->first);
+
+  psdout(15) << __func__ << ": built " << might_have_unfound << dendl;
+}
+
 void PeeringState::activate(
   ObjectStore::Transaction& t,
   epoch_t activation_epoch,
@@ -1910,7 +2066,7 @@ void PeeringState::activate(
     pg_log.activate_not_complete(info);
   }
 
-  pg->log_weirdness();
+  log_weirdness();
 
   if (is_primary()) {
     // initialize snap_trimq
@@ -2065,7 +2221,7 @@ void PeeringState::activate(
              ++p) {
          if (p->soid <= pi.last_backfill &&
              !p->is_error()) {
-           if (pg->perform_deletes_during_peering() && p->is_delete()) {
+           if (perform_deletes_during_peering() && p->is_delete()) {
              pm.rm(p->soid, p->version);
            } else {
              pm.add_next_event(*p);
@@ -2149,17 +2305,17 @@ void PeeringState::activate(
        if (is_acting_recovery_backfill(i->first))
          continue;
        ceph_assert(peer_info.count(i->first));
-       pg->search_for_missing(
+       search_for_missing(
          peer_info[i->first],
          i->second,
          i->first,
          ctx);
       }
 
-      pg->build_might_have_unfound();
+      build_might_have_unfound();
 
       // Always call now so _update_calc_stats() will be accurate
-      pg->discover_all_missing(query_map);
+      discover_all_missing(query_map);
 
     }
 
@@ -3579,9 +3735,9 @@ boost::statechart::result PeeringState::Active::react(const ActMap&)
 
   pl->on_active_actmap();
 
-  if (pg->have_unfound()) {
+  if (ps->have_unfound()) {
     // object may have become unfound
-    pg->discover_all_missing(*context< PeeringMachine >().get_query_map());
+    ps->discover_all_missing(*context< PeeringMachine >().get_query_map());
   }
 
   uint64_t unfound = ps->missing_loc.num_unfound();
@@ -3619,8 +3775,8 @@ boost::statechart::result PeeringState::Active::react(const MNotifyRec& notevt)
                       << dendl;
     ps->proc_replica_info(
       notevt.from, notevt.notify.info, notevt.notify.epoch_sent);
-    if (pg->have_unfound() || (pg->is_degraded() && pg->might_have_unfound.count(notevt.from))) {
-      pg->discover_all_missing(*context< PeeringMachine >().get_query_map());
+    if (ps->have_unfound() || (ps->is_degraded() && ps->might_have_unfound.count(notevt.from))) {
+      ps->discover_all_missing(*context< PeeringMachine >().get_query_map());
     }
   }
   return discard_event();
@@ -3670,7 +3826,7 @@ boost::statechart::result PeeringState::Active::react(const MLogRec& logevt)
                     << " log for unfound items" << dendl;
   pg->proc_replica_log(
     logevt.msg->info, logevt.msg->log, logevt.msg->missing, logevt.from);
-  bool got_missing = pg->search_for_missing(
+  bool got_missing = ps->search_for_missing(
     ps->peer_info[logevt.from],
     ps->peer_missing[logevt.from],
     logevt.from,
@@ -4659,7 +4815,7 @@ PeeringState::GetMissing::GetMissing(my_context ctx)
     // has the correct semantics even if we don't need to get a
     // missing set from a shard. This way later additions due to
     // lost+unfound delete work properly.
-    ps->peer_missing[*i].may_include_deletes = !pg->perform_deletes_during_peering();
+    ps->peer_missing[*i].may_include_deletes = !ps->perform_deletes_during_peering();
 
     if (pi.is_empty())
       continue;                                // no pg data, nothing divergent
index 4fc8f15ee6d2b4955ddac7bfb1ebd2d278d798b8..6313136dffa30eca1c649439fcc18281a57b14fb 100644 (file)
@@ -1333,6 +1333,13 @@ public:
                     bool restrict_to_up_acting,
                     bool *history_les_bound);
 
+  bool search_for_missing(
+    const pg_info_t &oinfo, const pg_missing_t &omissing,
+    pg_shard_t fromosd,
+    PeeringCtx*);
+  void discover_all_missing(std::map<int, map<spg_t,pg_query_t> > &query_map);
+  void build_might_have_unfound();
+  void log_weirdness();
   void activate(
     ObjectStore::Transaction& t,
     epoch_t activation_epoch,
@@ -1558,6 +1565,18 @@ public:
     return true;
   }
 
+  bool perform_deletes_during_peering() const {
+    return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES));
+  }
+
+
+  bool have_unfound() const {
+    return missing_loc.have_unfound();
+  }
+  uint64_t get_num_unfound() const {
+    return missing_loc.num_unfound();
+  }
+
   // Flush control interface
 private:
   void start_flush(ObjectStore::Transaction *t) {
index 8cd0ab3ada1690c79645703c799fb0ac98f2d5f0..2a4ffec409b591ef2c3b51d598745c4ba230dfbd 100644 (file)
@@ -11940,7 +11940,7 @@ void PrimaryLogPG::mark_all_unfound_lost(
     std::move(manager),
     boost::optional<std::function<void(void)> >(
       [this, oids, con, num_unfound, tid]() {
-       if (perform_deletes_during_peering()) {
+       if (recovery_state.perform_deletes_during_peering()) {
          for (auto oid : oids) {
            // clear old locations - merge_new_log_entries will have
            // handled rebuilding missing_loc for each of these