]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/: move Active purged_snaps handling back to PG
authorSamuel Just <sjust@redhat.com>
Fri, 29 Mar 2019 18:06:26 +0000 (11:06 -0700)
committersjust@redhat.com <sjust@redhat.com>
Wed, 1 May 2019 18:22:20 +0000 (11:22 -0700)
Note: this patch only sets dirty_info/dirty_big_info when
info.purged_snaps is actually changed.  I *think* that's
right, but that part deserves particular attention during
review.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/PeeringState.cc
src/osd/PeeringState.h

index 87e2f364988d82480d3314e0a2a10a00e3e683c4..08ba2a7b2b0d6367fc97cf333575c239b842d740 100644 (file)
@@ -3911,6 +3911,108 @@ void PG::cancel_remote_recovery_reservation() {
     pg_id);
 }
 
+void PG::on_active_advmap(const OSDMapRef &osdmap)
+{
+  if (osdmap->require_osd_release >= CEPH_RELEASE_MIMIC) {
+    const auto& new_removed_snaps = osdmap->get_new_removed_snaps();
+    auto i = new_removed_snaps.find(get_pgid().pool());
+    if (i != new_removed_snaps.end()) {
+      bool bad = false;
+      for (auto j : i->second) {
+       if (snap_trimq.intersects(j.first, j.second)) {
+         decltype(snap_trimq) added, overlap;
+         added.insert(j.first, j.second);
+         overlap.intersection_of(snap_trimq, added);
+         if (recovery_state.get_last_require_osd_release() < CEPH_RELEASE_MIMIC) {
+           derr << __func__ << " removed_snaps already contains "
+                << overlap << ", but this is the first mimic+ osdmap,"
+                << " so it's expected" << dendl;
+         } else {
+           derr << __func__ << " removed_snaps already contains "
+                << overlap << dendl;
+           bad = true;
+         }
+         snap_trimq.union_of(added);
+       } else {
+         snap_trimq.insert(j.first, j.second);
+       }
+      }
+      if (recovery_state.get_last_require_osd_release() < CEPH_RELEASE_MIMIC) {
+       // at upgrade, we report *all* previously removed snaps as removed in
+       // the first mimic epoch.  remove the ones we previously divined were
+       // removed (and subsequently purged) from the trimq.
+       derr << __func__ << " first mimic map, filtering purged_snaps"
+            << " from new removed_snaps" << dendl;
+       snap_trimq.subtract(recovery_state.get_info().purged_snaps);
+      }
+      dout(10) << __func__ << " new removed_snaps " << i->second
+              << ", snap_trimq now " << snap_trimq << dendl;
+      ceph_assert(!bad || !cct->_conf->osd_debug_verify_cached_snaps);
+    }
+
+    const auto& new_purged_snaps = osdmap->get_new_purged_snaps();
+    auto j = new_purged_snaps.find(get_pgid().pgid.pool());
+    if (j != new_purged_snaps.end()) {
+      bool bad = false;
+      for (auto k : j->second) {
+       if (!recovery_state.get_info().purged_snaps.contains(k.first, k.second)) {
+         interval_set<snapid_t> rm, overlap;
+         rm.insert(k.first, k.second);
+         overlap.intersection_of(recovery_state.get_info().purged_snaps, rm);
+         derr << __func__ << " purged_snaps does not contain "
+              << rm << ", only " << overlap << dendl;
+         recovery_state.adjust_purged_snaps(
+           [&overlap](auto &purged_snaps) {
+             purged_snaps.subtract(overlap);
+           });
+         // This can currently happen in the normal (if unlikely) course of
+         // events.  Because adding snaps to purged_snaps does not increase
+         // the pg version or add a pg log entry, we don't reliably propagate
+         // purged_snaps additions to other OSDs.
+         // One example:
+         //  - purge S
+         //  - primary and replicas update purged_snaps
+         //  - no object updates
+         //  - pg mapping changes, new primary on different node
+         //  - new primary pg version == eversion_t(), so info is not
+         //    propagated.
+         //bad = true;
+       } else {
+         recovery_state.adjust_purged_snaps(
+           [&k](auto &purged_snaps) {
+             purged_snaps.erase(k.first, k.second);
+           });
+       }
+      }
+      dout(10) << __func__ << " new purged_snaps " << j->second
+              << ", now " << recovery_state.get_info().purged_snaps << dendl;
+      ceph_assert(!bad || !cct->_conf->osd_debug_verify_cached_snaps);
+    }
+  } else if (!pool.newly_removed_snaps.empty()) {
+    snap_trimq.union_of(pool.newly_removed_snaps);
+    dout(10) << " snap_trimq now " << snap_trimq << dendl;
+  }
+}
+
+void PG::on_active_actmap()
+{
+  if (cct->_conf->osd_check_for_log_corruption)
+    check_log_for_corruption(osd->store);
+
+
+  if (recovery_state.is_active()) {
+    dout(10) << "Active: kicking snap trim" << dendl;
+    kick_snap_trim();
+  }
+
+  if (recovery_state.is_peered() &&
+      !recovery_state.is_clean() &&
+      !recovery_state.get_osdmap()->test_flag(CEPH_OSDMAP_NOBACKFILL) &&
+      (!recovery_state.get_osdmap()->test_flag(CEPH_OSDMAP_NOREBALANCE) ||
+       recovery_state.is_degraded())) {
+    queue_recovery();
+  }
+}
 void PG::do_replica_scrub_map(OpRequestRef op)
 {
   const MOSDRepScrubMap *m = static_cast<const MOSDRepScrubMap*>(op->get_req());
index 1cf0b0d5b0218cd9ce3c00c303e5c62d1a5d1591..a795683755c56988f4c84b889e0d5a961e4fb459 100644 (file)
@@ -449,6 +449,9 @@ public:
   void cancel_remote_recovery_reservation() override;
 
 
+  void on_active_actmap() override;
+  void on_active_advmap(const OSDMapRef &osdmap) override;
+
   bool is_forced_recovery_or_backfill() const {
     return recovery_state.is_forced_recovery_or_backfill();
   }
index 35994967f95180edd2f5b483908197a773944fa7..a914b103208473119379c6c706b515f267cf7e39 100644 (file)
@@ -2442,91 +2442,13 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
   psdout(10) << "Active advmap" << dendl;
   bool need_publish = false;
 
-  if (advmap.osdmap->require_osd_release >= CEPH_RELEASE_MIMIC) {
-    const auto& new_removed_snaps = advmap.osdmap->get_new_removed_snaps();
-    auto i = new_removed_snaps.find(ps->info.pgid.pool());
-    if (i != new_removed_snaps.end()) {
-      bool bad = false;
-      for (auto j : i->second) {
-       if (pg->snap_trimq.intersects(j.first, j.second)) {
-         decltype(pg->snap_trimq) added, overlap;
-         added.insert(j.first, j.second);
-         overlap.intersection_of(pg->snap_trimq, added);
-         if (ps->last_require_osd_release < CEPH_RELEASE_MIMIC) {
-           lderr(ps->cct) << __func__ << " removed_snaps already contains "
-                          << overlap << ", but this is the first mimic+ osdmap,"
-                          << " so it's expected" << dendl;
-         } else {
-           lderr(ps->cct) << __func__ << " removed_snaps already contains "
-                          << overlap << dendl;
-           bad = true;
-         }
-         pg->snap_trimq.union_of(added);
-       } else {
-         pg->snap_trimq.insert(j.first, j.second);
-       }
-      }
-      if (ps->last_require_osd_release < CEPH_RELEASE_MIMIC) {
-       // at upgrade, we report *all* previously removed snaps as removed in
-       // the first mimic epoch.  remove the ones we previously divined were
-       // removed (and subsequently purged) from the trimq.
-       lderr(ps->cct) << __func__ << " first mimic map, filtering purged_snaps"
-                      << " from new removed_snaps" << dendl;
-       pg->snap_trimq.subtract(ps->info.purged_snaps);
-      }
-      ldout(ps->cct,10) << __func__ << " new removed_snaps " << i->second
-                       << ", snap_trimq now " << pg->snap_trimq << dendl;
-      ceph_assert(!bad || !ps->cct->_conf->osd_debug_verify_cached_snaps);
-      ps->dirty_info = true;
-      ps->dirty_big_info = true;
-    }
-
-    const auto& new_purged_snaps = advmap.osdmap->get_new_purged_snaps();
-    auto j = new_purged_snaps.find(ps->info.pgid.pool());
-    if (j != new_purged_snaps.end()) {
-      bool bad = false;
-      for (auto k : j->second) {
-       if (!ps->info.purged_snaps.contains(k.first, k.second)) {
-         decltype(ps->info.purged_snaps) rm, overlap;
-         rm.insert(k.first, k.second);
-         overlap.intersection_of(ps->info.purged_snaps, rm);
-         lderr(ps->cct) << __func__ << " purged_snaps does not contain "
-                        << rm << ", only " << overlap << dendl;
-         ps->info.purged_snaps.subtract(overlap);
-         // This can currently happen in the normal (if unlikely) course of
-         // events.  Because adding snaps to purged_snaps does not increase
-         // the pg version or add a pg log entry, we don't reliably propagate
-         // purged_snaps additions to other OSDs.
-         // One example:
-         //  - purge S
-         //  - primary and replicas update purged_snaps
-         //  - no object updates
-         //  - pg mapping changes, new primary on different node
-         //  - new primary pg version == eversion_t(), so info is not
-         //    propagated.
-         //bad = true;
-       } else {
-         ps->info.purged_snaps.erase(k.first, k.second);
-       }
-      }
-      ldout(ps->cct,10) << __func__ << " new purged_snaps " << j->second
-                       << ", now " << ps->info.purged_snaps << dendl;
-      ceph_assert(!bad || !ps->cct->_conf->osd_debug_verify_cached_snaps);
-      ps->dirty_info = true;
-      ps->dirty_big_info = true;
-    }
-    if (ps->dirty_big_info) {
-      // share updated purged_snaps to mgr/mon so that we (a) stop reporting
-      // purged snaps and (b) perhaps share more snaps that we have purged
-      // but didn't fit in pg_stat_t.
-      need_publish = true;
-      pg->share_pg_info();
-    }
-  } else if (!ps->pool.newly_removed_snaps.empty()) {
-    pg->snap_trimq.union_of(ps->pool.newly_removed_snaps);
-    psdout(10) << *pg << " snap_trimq now " << pg->snap_trimq << dendl;
-    ps->dirty_info = true;
-    ps->dirty_big_info = true;
+  pl->on_active_advmap(advmap.osdmap);
+  if (ps->dirty_big_info) {
+    // share updated purged_snaps to mgr/mon so that we (a) stop reporting
+    // purged snaps and (b) perhaps share more snaps that we have purged
+    // but didn't fit in pg_stat_t.
+    need_publish = true;
+    pg->share_pg_info();
   }
 
   for (size_t i = 0; i < ps->want_acting.size(); i++) {
@@ -2539,9 +2461,9 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
 
   /* Check for changes in pool size (if the acting set changed as a result,
    * this does not matter) */
-  if (advmap.lastmap->get_pg_size(context< PeeringMachine >().spgid.pgid) !=
-      ps->get_osdmap()->get_pg_size(context< PeeringMachine >().spgid.pgid)) {
-    if (ps->get_osdmap()->get_pg_size(context< PeeringMachine >().spgid.pgid) <=
+  if (advmap.lastmap->get_pg_size(ps->info.pgid.pgid) !=
+      ps->get_osdmap()->get_pg_size(ps->info.pgid.pgid)) {
+    if (ps->get_osdmap()->get_pg_size(ps->info.pgid.pgid) <=
        ps->actingset.size()) {
       ps->state_clear(PG_STATE_UNDERSIZED);
     } else {
@@ -2570,14 +2492,13 @@ boost::statechart::result PeeringState::Active::react(const ActMap&)
   psdout(10) << "Active: handling ActMap" << dendl;
   ceph_assert(ps->is_primary());
 
+  pl->on_active_actmap();
+
   if (pg->have_unfound()) {
     // object may have become unfound
     pg->discover_all_missing(*context< PeeringMachine >().get_query_map());
   }
 
-  if (ps->cct->_conf->osd_check_for_log_corruption)
-    pg->check_log_for_corruption(pg->osd->store);
-
   uint64_t unfound = ps->missing_loc.num_unfound();
   if (unfound > 0 &&
       pg->all_unfound_are_queried_or_lost(ps->get_osdmap())) {
@@ -2591,17 +2512,6 @@ boost::statechart::result PeeringState::Active::react(const ActMap&)
                              << unfound << " objects unfound and apparently lost";
   }
 
-  if (ps->is_active()) {
-    psdout(10) << "Active: kicking snap trim" << dendl;
-    pg->kick_snap_trim();
-  }
-
-  if (ps->is_peered() &&
-      !ps->is_clean() &&
-      !ps->get_osdmap()->test_flag(CEPH_OSDMAP_NOBACKFILL) &&
-      (!ps->get_osdmap()->test_flag(CEPH_OSDMAP_NOREBALANCE) || ps->is_degraded())) {
-    pg->queue_recovery();
-  }
   return forward_event();
 }
 
index 318ebe514813856c72c02943d75a73f7ef534cba..f12214bb29a5112292686a817d3b27ed0343907b 100644 (file)
@@ -119,6 +119,10 @@ public:
     virtual void on_activate() = 0;
     virtual void on_new_interval() = 0;
 
+
+    // active map notifications
+    virtual void on_active_actmap() = 0;
+    virtual void on_active_advmap(const OSDMapRef &osdmap) = 0;
     virtual epoch_t oldest_stored_osdmap() = 0;
     virtual LogChannel &get_clog() = 0;
 
@@ -1329,6 +1333,13 @@ public:
     last_persisted_osdmap = 0;
   }
 
+  template <typename Func>
+  void adjust_purged_snaps(Func f) {
+    f(info.purged_snaps);
+    dirty_info = true;
+    dirty_big_info = true;
+  }
+
   bool is_deleting() const {
     return deleting;
   }
@@ -1426,6 +1437,14 @@ public:
     return backfill_reserving;
   }
 
+  unsigned get_last_require_osd_release() const {
+    return last_require_osd_release;
+  }
+
+  const pg_info_t &get_info() const {
+    return info;
+  }
+
   // Flush control interface
 private:
   void start_flush(ObjectStore::Transaction *t) {