]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: use new mimic osdmap structures for removed, pruned snaps
authorSage Weil <sage@redhat.com>
Fri, 3 Nov 2017 20:17:31 +0000 (15:17 -0500)
committerSage Weil <sage@redhat.com>
Sat, 2 Dec 2017 03:26:49 +0000 (21:26 -0600)
- update snap_trimq and purged_snaps based on new mimic OSDMap fields
- improve debug output to include both trimq and purged

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PG.cc
src/osd/PG.h

index cfad8873086b16e1df42fcae7f94331a49aa7a92..5adcfcf4e27eb70569688bc56d2e92c888fbf27c 100644 (file)
@@ -231,61 +231,77 @@ void PGPool::update(CephContext *cct, OSDMapRef map)
   info = *pi;
   auid = pi->auid;
   name = map->get_pool_name(id);
+
   bool updated = false;
   if ((map->get_epoch() != cached_epoch + 1) ||
       (pi->get_snap_epoch() == map->get_epoch())) {
     updated = true;
-    if (pi->maybe_updated_removed_snaps(cached_removed_snaps)) {
-      pi->build_removed_snaps(newly_removed_snaps);
-      if (cached_removed_snaps.subset_of(newly_removed_snaps)) {
+  }
+
+  if (map->require_osd_release >= CEPH_RELEASE_MIMIC) {
+    // mimic tracks removed_snaps_queue in the OSDmap and purged_snaps
+    // in the pg_info_t, with deltas for both in each OSDMap.  we don't
+    // need to (and can't) track it here.
+    cached_removed_snaps.clear();
+    newly_removed_snaps.clear();
+  } else {
+    // legacy (<= luminous) removed_snaps tracking
+    if (updated) {
+      if (pi->maybe_updated_removed_snaps(cached_removed_snaps)) {
+       pi->build_removed_snaps(newly_removed_snaps);
+       if (cached_removed_snaps.subset_of(newly_removed_snaps)) {
           interval_set<snapid_t> removed_snaps = newly_removed_snaps;
           newly_removed_snaps.subtract(cached_removed_snaps);
           cached_removed_snaps.swap(removed_snaps);
-      } else {
+       } else {
           lgeneric_subdout(cct, osd, 0) << __func__
-            << " cached_removed_snaps shrank from " << cached_removed_snaps
-            << " to " << newly_removed_snaps << dendl;
+               << " cached_removed_snaps shrank from " << cached_removed_snaps
+               << " to " << newly_removed_snaps << dendl;
           cached_removed_snaps.swap(newly_removed_snaps);
           newly_removed_snaps.clear();
+       }
+      } else {
+       newly_removed_snaps.clear();
       }
-    } else
+    } else {
+      /* 1) map->get_epoch() == cached_epoch + 1 &&
+       * 2) pi->get_snap_epoch() != map->get_epoch()
+       *
+       * From the if branch, 1 && 2 must be true.  From 2, we know that
+       * this map didn't change the set of removed snaps.  From 1, we
+       * know that our cached_removed_snaps matches the previous map.
+       * Thus, from 1 && 2, cached_removed snaps matches the current
+       * set of removed snaps and all we have to do is clear
+       * newly_removed_snaps.
+       */
       newly_removed_snaps.clear();
+    }
+    lgeneric_subdout(cct, osd, 20)
+      << "PGPool::update cached_removed_snaps "
+      << cached_removed_snaps
+      << " newly_removed_snaps "
+      << newly_removed_snaps
+      << " snapc " << snapc
+      << (updated ? " (updated)":" (no change)")
+      << dendl;
+    if (cct->_conf->osd_debug_verify_cached_snaps) {
+      interval_set<snapid_t> actual_removed_snaps;
+      pi->build_removed_snaps(actual_removed_snaps);
+      if (!(actual_removed_snaps == cached_removed_snaps)) {
+       lgeneric_derr(cct) << __func__
+                  << ": mismatch between the actual removed snaps "
+                  << actual_removed_snaps
+                  << " and pool.cached_removed_snaps "
+                  << " pool.cached_removed_snaps " << cached_removed_snaps
+                  << dendl;
+      }
+      assert(actual_removed_snaps == cached_removed_snaps);
+    }
+  }
+  if (info.is_pool_snaps_mode() && updated) {
     snapc = pi->get_snap_context();
-  } else {
-    /* 1) map->get_epoch() == cached_epoch + 1 &&
-     * 2) pi->get_snap_epoch() != map->get_epoch()
-     *
-     * From the if branch, 1 && 2 must be true.  From 2, we know that
-     * this map didn't change the set of removed snaps.  From 1, we
-     * know that our cached_removed_snaps matches the previous map.
-     * Thus, from 1 && 2, cached_removed snaps matches the current
-     * set of removed snaps and all we have to do is clear
-     * newly_removed_snaps.
-     */
-    newly_removed_snaps.clear();
   }
   cached_epoch = map->get_epoch();
-  lgeneric_subdout(cct, osd, 20)
-    << "PGPool::update cached_removed_snaps "
-    << cached_removed_snaps
-    << " newly_removed_snaps "
-    << newly_removed_snaps
-    << " snapc " << snapc
-    << (updated ? " (updated)":" (no change)")
-    << dendl;
-  if (cct->_conf->osd_debug_verify_cached_snaps) {
-    interval_set<snapid_t> actual_removed_snaps;
-    pi->build_removed_snaps(actual_removed_snaps);
-    if (!(actual_removed_snaps == cached_removed_snaps)) {
-      lgeneric_derr(cct) << __func__
-                        << ": mismatch between the actual removed snaps "
-                        << actual_removed_snaps
-                        << " and pool.cached_removed_snaps "
-                        << " pool.cached_removed_snaps " << cached_removed_snaps
-                        << dendl;
-    }
-    assert(actual_removed_snaps == cached_removed_snaps);
-  }
 }
 
 PG::PG(OSDService *o, OSDMapRef curmap,
@@ -1614,20 +1630,36 @@ void PG::activate(ObjectStore::Transaction& t,
       get_osdmap()->get_epoch(),
       activation_epoch));
   
-  // initialize snap_trimq
   if (is_primary()) {
-    dout(20) << "activate - purged_snaps " << info.purged_snaps
-            << " cached_removed_snaps " << pool.cached_removed_snaps << dendl;
-    snap_trimq = pool.cached_removed_snaps;
-    interval_set<snapid_t> intersection;
-    intersection.intersection_of(snap_trimq, info.purged_snaps);
-    if (intersection == info.purged_snaps) {
-      snap_trimq.subtract(info.purged_snaps);
+    // initialize snap_trimq
+    if (get_osdmap()->require_osd_release < CEPH_RELEASE_MIMIC) {
+      dout(20) << "activate - purged_snaps " << info.purged_snaps
+              << " cached_removed_snaps " << pool.cached_removed_snaps
+              << dendl;
+      snap_trimq = pool.cached_removed_snaps;
     } else {
-      dout(0) << "warning: info.purged_snaps (" << info.purged_snaps
-             << ") is not a subset of removed_snaps" << dendl;
-      snap_trimq.subtract(intersection);
-      assert(!cct->_conf->osd_debug_verify_cached_snaps);
+      auto& removed_snaps_queue = get_osdmap()->get_removed_snaps_queue();
+      auto p = removed_snaps_queue.find(info.pgid.pgid.pool());
+      snap_trimq.clear();
+      if (p != removed_snaps_queue.end()) {
+       dout(20) << "activate - purged_snaps " << info.purged_snaps
+                << " removed_snaps " << p->second
+                << dendl;
+       for (auto q : p->second) {
+         snap_trimq.insert(q.first, q.second);
+       }
+      }
+    }
+    interval_set<snapid_t> purged;
+    purged.intersection_of(snap_trimq, info.purged_snaps);
+    snap_trimq.subtract(purged);
+
+    if (get_osdmap()->require_osd_release >= CEPH_RELEASE_MIMIC) {
+      // adjust purged_snaps: PG may have been inactive while snaps were pruned
+      // from the removed_snaps_queue in the osdmap.  update local purged_snaps
+      // reflect only those snaps that we thought were pruned and were still in
+      // the queue.
+      info.purged_snaps.swap(purged);
     }
   }
 
@@ -5621,8 +5653,23 @@ ostream& operator<<(ostream& out, const PG& pg)
        out << " u=" << unfound;
     }
   }
-  if (pg.snap_trimq.size())
-    out << " snaptrimq=" << pg.snap_trimq;
+  if (!pg.snap_trimq.empty()) {
+    out << " trimq=";
+    // only show a count if the set is large
+    if (pg.snap_trimq.num_intervals() > 16) {
+      out << pg.snap_trimq.size();
+    } else {
+      out << pg.snap_trimq;
+    }
+  }
+  if (!pg.info.purged_snaps.empty()) {
+    out << " ps="; // snap trim queue / purged snaps
+    if (pg.info.purged_snaps.num_intervals() > 16) {
+      out << pg.info.purged_snaps.size();
+    } else {
+      out << pg.info.purged_snaps;
+    }
+  }
 
   out << "]";
 
@@ -5897,7 +5944,9 @@ void PG::handle_advance_map(
           << " -- " << up_primary << "/" << acting_primary
           << dendl;
   update_osdmap_ref(osdmap);
+
   pool.update(cct, osdmap);
+
   AdvMap evt(
     osdmap, lastmap, newup, up_primary,
     newacting, acting_primary);
@@ -7257,7 +7306,63 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap)
     return forward_event();
   }
   ldout(pg->cct, 10) << "Active advmap" << dendl;
-  if (!pg->pool.newly_removed_snaps.empty()) {
+
+  if (advmap.osdmap->require_osd_release >= CEPH_RELEASE_MIMIC) {
+    const auto& new_removed_snaps = advmap.osdmap->get_new_removed_snaps();
+    auto i = new_removed_snaps.find(pg->info.pgid.pool());
+    if (i != new_removed_snaps.end()) {
+      bool bad = false;
+      for (auto j : i->second) {
+       if (pg->snap_trimq.intersects(j.first, j.second)) {
+         decltype(pg->snap_trimq) added, overlap;
+         added.insert(j.first, j.second);
+         overlap.intersection_of(pg->snap_trimq, added);
+         lderr(pg->cct) << __func__ << " removed_snaps already contains "
+                        << overlap << dendl;
+         bad = true;
+         pg->snap_trimq.union_of(added);
+       } else {
+         pg->snap_trimq.insert(j.first, j.second);
+       }
+      }
+      ldout(pg->cct,10) << __func__ << " new removed_snaps " << i->second
+                       << ", snap_trimq now " << pg->snap_trimq << dendl;
+      assert(!bad || !pg->cct->_conf->osd_debug_verify_cached_snaps);
+      pg->dirty_info = true;
+      pg->dirty_big_info = true;
+    }
+
+    const auto& new_purged_snaps = advmap.osdmap->get_new_purged_snaps();
+    auto j = new_purged_snaps.find(pg->info.pgid.pool());
+    if (j != new_purged_snaps.end()) {
+      bool bad = false;
+      for (auto k : j->second) {
+       if (!pg->info.purged_snaps.contains(k.first, k.second)) {
+         decltype(pg->info.purged_snaps) rm, overlap;
+         rm.insert(k.first, k.second);
+         overlap.intersection_of(pg->info.purged_snaps, rm);
+         lderr(pg->cct) << __func__ << " purged_snaps does not contain "
+                        << rm << ", only " << overlap << dendl;
+         pg->info.purged_snaps.subtract(overlap);
+         bad = true;
+       } else {
+         pg->info.purged_snaps.erase(k.first, k.second);
+       }
+      }
+      ldout(pg->cct,10) << __func__ << " new purged_snaps " << j->second
+                       << ", now " << pg->info.purged_snaps << dendl;
+      assert(!bad || !pg->cct->_conf->osd_debug_verify_cached_snaps);
+      pg->dirty_info = true;
+      pg->dirty_big_info = true;
+    }
+    if (pg->dirty_big_info) {
+      // share updated purged_snaps to mgr/mon so that we (a) stop reporting
+      // purged snaps and (b) perhaps share more snaps that we have purged
+      // but didn't fit in pg_stat_t.
+      pg->publish_stats_to_osd();
+      pg->share_pg_info();
+    }
+  } else if (!pg->pool.newly_removed_snaps.empty()) {
     pg->snap_trimq.union_of(pg->pool.newly_removed_snaps);
     ldout(pg->cct, 10) << *pg << " snap_trimq now " << pg->snap_trimq << dendl;
     pg->dirty_info = true;
index 1e495da50b30fa4398c85ae044ea22983dc816fe..73632fa9654b42c508c3bacc929e4a94c815d70b 100644 (file)
@@ -221,6 +221,7 @@ struct PGPool {
   pg_pool_t info;      
   SnapContext snapc;   // the default pool snapc, ready to go.
 
+  // these two sets are for < mimic only
   interval_set<snapid_t> cached_removed_snaps;      // current removed_snaps set
   interval_set<snapid_t> newly_removed_snaps;  // newly removed in the last epoch
 
@@ -234,7 +235,9 @@ struct PGPool {
     assert(pi);
     info = *pi;
     snapc = pi->get_snap_context();
-    pi->build_removed_snaps(cached_removed_snaps);
+    if (map->require_osd_release < CEPH_RELEASE_MIMIC) {
+      pi->build_removed_snaps(cached_removed_snaps);
+    }
   }
 
   void update(CephContext *cct, OSDMapRef map);