]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/OSDMap: track newly removed and purged snaps in each epoch
authorSage Weil <sage@redhat.com>
Wed, 11 Oct 2017 19:17:39 +0000 (14:17 -0500)
committerSage Weil <sage@redhat.com>
Sat, 2 Dec 2017 03:16:23 +0000 (21:16 -0600)
Instead of maintaining a set of snapids that have been removed over
all time, instead note just the newly removed and newly purged snaps
in each OSDMap epoch.  This is easier to consume for both the Objecter
and OSD.

Also keep the interval of snaps that have been removed but not perged
in each OSDMap.  This is extremely convenient because it frees the OSDs
from having to maintain this information in parallel even when they may
not have PGs belonging to those pools.  These structures will be large
right when the ugprade happens and the pg_pool_t::removed_snaps gets copied
to the new fields, but in the steady state it will be relatively small,
reflecting only the set of snaps that are currently being removed.

This also provides convenient visibility into the "trimming snaps" set
that the cluster is working on.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/OSDMap.cc
src/osd/OSDMap.h

index 4588a34ab4418df51cc1bc95f183260b2717a177..c7b13a77d698145f3938d520e9e8b6ea033fef79 100644 (file)
@@ -472,10 +472,13 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
   ENCODE_START(8, 7, bl);
 
   {
-    uint8_t v = 5;
+    uint8_t v = 6;
     if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
       v = 3;
     }
+    if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
+      v = 5;
+    }
     ENCODE_START(v, 1, bl); // client-usable data
     ::encode(fsid, bl);
     ::encode(epoch, bl);
@@ -512,6 +515,10 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
       ::encode(new_pg_upmap_items, bl);
       ::encode(old_pg_upmap_items, bl);
     }
+    if (v >= 6) {
+      ::encode(new_removed_snaps, bl);
+      ::encode(new_purged_snaps, bl);
+    }
     ENCODE_FINISH(bl); // client-usable data
   }
 
@@ -693,7 +700,7 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl)
     return;
   }
   {
-    DECODE_START(5, bl); // client-usable data
+    DECODE_START(6, bl); // client-usable data
     ::decode(fsid, bl);
     ::decode(epoch, bl);
     ::decode(modified, bl);
@@ -736,6 +743,10 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl)
       ::decode(new_pg_upmap_items, bl);
       ::decode(old_pg_upmap_items, bl);
     }
+    if (struct_v >= 6) {
+      ::decode(new_removed_snaps, bl);
+      ::decode(new_purged_snaps, bl);
+    }
     DECODE_FINISH(bl); // client-usable data
   }
 
@@ -1053,6 +1064,37 @@ void OSDMap::Incremental::dump(Formatter *f) const
     f->dump_string("old", erasure_code_profile.c_str());
   }
   f->close_section();
+
+  f->open_array_section("new_removed_snaps");
+  for (auto& p : new_removed_snaps) {
+    f->open_object_section("pool");
+    f->dump_int("pool", p.first);
+    f->open_array_section("snaps");
+    for (auto q = p.second.begin(); q != p.second.end(); ++q) {
+      f->open_object_section("interval");
+      f->dump_unsigned("begin", q.get_start());
+      f->dump_unsigned("length", q.get_len());
+      f->close_section();
+    }
+    f->close_section();
+    f->close_section();
+  }
+  f->close_section();
+  f->open_array_section("new_purged_snaps");
+  for (auto& p : new_purged_snaps) {
+    f->open_object_section("pool");
+    f->dump_int("pool", p.first);
+    f->open_array_section("snaps");
+    for (auto q = p.second.begin(); q != p.second.end(); ++q) {
+      f->open_object_section("interval");
+      f->dump_unsigned("begin", q.get_start());
+      f->dump_unsigned("length", q.get_len());
+      f->close_section();
+    }
+    f->close_section();
+    f->close_section();
+  }
+  f->close_section();
 }
 
 void OSDMap::Incremental::generate_test_instances(list<Incremental*>& o)
@@ -1602,6 +1644,24 @@ int OSDMap::apply_incremental(const Incremental &inc)
     pools[pool.first].last_change = epoch;
   }
 
+  new_removed_snaps = inc.new_removed_snaps;
+  new_purged_snaps = inc.new_purged_snaps;
+  for (auto p = new_removed_snaps.begin();
+       p != new_removed_snaps.end();
+       ++p) {
+    removed_snaps_queue[p->first].union_of(p->second);
+  }
+  for (auto p = new_purged_snaps.begin();
+       p != new_purged_snaps.end();
+       ++p) {
+    auto q = removed_snaps_queue.find(p->first);
+    assert(q != removed_snaps_queue.end());
+    q->second.subtract(p->second);
+    if (q->second.empty()) {
+      removed_snaps_queue.erase(q);
+    }
+  }
+
   for (const auto &pname : inc.new_pool_names) {
     auto pool_name_entry = pool_name.find(pname.first);
     if (pool_name_entry != pool_name.end()) {
@@ -2306,10 +2366,13 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
   ENCODE_START(8, 7, bl);
 
   {
-    uint8_t v = 6;
+    uint8_t v = 7;
     if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
       v = 3;
     }
+    if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
+      v = 6;
+    }
     ENCODE_START(v, 1, bl); // client-usable data
     // base
     ::encode(fsid, bl);
@@ -2372,14 +2435,21 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
     if (v >= 6) {
       ::encode(crush_version, bl);
     }
+    if (v >= 7) {
+      ::encode(new_removed_snaps, bl);
+      ::encode(new_purged_snaps, bl);
+    }
     ENCODE_FINISH(bl); // client-usable data
   }
 
   {
-    uint8_t target_v = 5;
+    uint8_t target_v = 6;
     if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
       target_v = 1;
     }
+    if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
+      target_v = 5;
+    }
     ENCODE_START(target_v, 1, bl); // extended, osd-only data
     ::encode(osd_addrs->hb_back_addr, bl, features);
     ::encode(osd_info, bl);
@@ -2407,6 +2477,9 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
       ::encode(require_min_compat_client, bl);
       ::encode(require_osd_release, bl);
     }
+    if (target_v >= 6) {
+      ::encode(removed_snaps_queue, bl);
+    }
     ENCODE_FINISH(bl); // osd-only data
   }
 
@@ -2582,7 +2655,7 @@ void OSDMap::decode(bufferlist::iterator& bl)
    * Since we made it past that hurdle, we can use our normal paths.
    */
   {
-    DECODE_START(6, bl); // client-usable data
+    DECODE_START(7, bl); // client-usable data
     // base
     ::decode(fsid, bl);
     ::decode(epoch, bl);
@@ -2640,11 +2713,15 @@ void OSDMap::decode(bufferlist::iterator& bl)
     if (struct_v >= 6) {
       ::decode(crush_version, bl);
     }
+    if (struct_v >= 7) {
+      ::decode(new_removed_snaps, bl);
+      ::decode(new_purged_snaps, bl);
+    }
     DECODE_FINISH(bl); // client-usable data
   }
 
   {
-    DECODE_START(5, bl); // extended, osd-only data
+    DECODE_START(6, bl); // extended, osd-only data
     ::decode(osd_addrs->hb_back_addr, bl);
     ::decode(osd_info, bl);
     ::decode(blacklist, bl);
@@ -2693,6 +2770,9 @@ void OSDMap::decode(bufferlist::iterator& bl)
        require_osd_release = 0;
       }
     }
+    if (struct_v >= 6) {
+      ::decode(removed_snaps_queue, bl);
+    }
     DECODE_FINISH(bl); // osd-only data
   }
 
@@ -2882,6 +2962,52 @@ void OSDMap::dump(Formatter *f) const
   f->close_section();
 
   dump_erasure_code_profiles(erasure_code_profiles, f);
+
+  f->open_array_section("removed_snaps_queue");
+  for (auto& p : removed_snaps_queue) {
+    f->open_object_section("pool");
+    f->dump_int("pool", p.first);
+    f->open_array_section("snaps");
+    for (auto q = p.second.begin(); q != p.second.end(); ++q) {
+      f->open_object_section("interval");
+      f->dump_unsigned("begin", q.get_start());
+      f->dump_unsigned("length", q.get_len());
+      f->close_section();
+    }
+    f->close_section();
+    f->close_section();
+  }
+  f->close_section();
+  f->open_array_section("new_removed_snaps");
+  for (auto& p : new_removed_snaps) {
+    f->open_object_section("pool");
+    f->dump_int("pool", p.first);
+    f->open_array_section("snaps");
+    for (auto q = p.second.begin(); q != p.second.end(); ++q) {
+      f->open_object_section("interval");
+      f->dump_unsigned("begin", q.get_start());
+      f->dump_unsigned("length", q.get_len());
+      f->close_section();
+    }
+    f->close_section();
+    f->close_section();
+  }
+  f->close_section();
+  f->open_array_section("new_purged_snaps");
+  for (auto& p : new_purged_snaps) {
+    f->open_object_section("pool");
+    f->dump_int("pool", p.first);
+    f->open_array_section("snaps");
+    for (auto q = p.second.begin(); q != p.second.end(); ++q) {
+      f->open_object_section("interval");
+      f->dump_unsigned("begin", q.get_start());
+      f->dump_unsigned("length", q.get_len());
+      f->close_section();
+    }
+    f->close_section();
+    f->close_section();
+  }
+  f->close_section();
 }
 
 void OSDMap::generate_test_instances(list<OSDMap*>& o)
@@ -2968,6 +3094,10 @@ void OSDMap::print_pools(ostream& out) const
 
     if (!pool.second.removed_snaps.empty())
       out << "\tremoved_snaps " << pool.second.removed_snaps << "\n";
+    auto p = removed_snaps_queue.find(pool.first);
+    if (p != removed_snaps_queue.end()) {
+      out << "\tremoved_snaps_queue " << p->second << "\n";
+    }
   }
   out << std::endl;
 }
index 0deaae4d7c0d28597bbff413eaf15b8932416787..d817a632a017ba5bddf3cc0c4c6a51a7a766b82f 100644 (file)
@@ -343,6 +343,10 @@ class OSDMap {
 public:
   MEMPOOL_CLASS_HELPERS();
 
+  typedef interval_set<
+    snapid_t,
+    mempool::osdmap::flat_map<snapid_t,snapid_t>> snap_interval_set_t;
+
   class Incremental {
   public:
     MEMPOOL_CLASS_HELPERS();
@@ -389,6 +393,8 @@ public:
     mempool::osdmap::map<pg_t,mempool::osdmap::vector<int32_t>> new_pg_upmap;
     mempool::osdmap::map<pg_t,mempool::osdmap::vector<pair<int32_t,int32_t>>> new_pg_upmap_items;
     mempool::osdmap::set<pg_t> old_pg_upmap, old_pg_upmap_items;
+    mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps;
+    mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps;
 
     string cluster_snapshot;
 
@@ -523,6 +529,15 @@ private:
 
   mempool::osdmap::unordered_map<entity_addr_t,utime_t> blacklist;
 
+  /// queue of snaps to remove
+  mempool::osdmap::map<int64_t, snap_interval_set_t> removed_snaps_queue;
+
+  /// removed_snaps additions this epoch
+  mempool::osdmap::map<int64_t, snap_interval_set_t> new_removed_snaps;
+
+  /// removed_snaps removals this epoch
+  mempool::osdmap::map<int64_t, snap_interval_set_t> new_purged_snaps;
+
   epoch_t cluster_snapshot_epoch;
   string cluster_snapshot;
   bool new_blacklist_entries;
@@ -1140,6 +1155,19 @@ public:
     return false;
   }
 
+  const mempool::osdmap::map<int64_t,snap_interval_set_t>&
+  get_removed_snaps_queue() const {
+    return removed_snaps_queue;
+  }
+  const mempool::osdmap::map<int64_t,snap_interval_set_t>&
+  get_new_removed_snaps() const {
+    return new_removed_snaps;
+  }
+  const mempool::osdmap::map<int64_t,snap_interval_set_t>&
+  get_new_purged_snaps() const {
+    return new_purged_snaps;
+  }
+
   int64_t lookup_pg_pool_name(const string& name) const {
     auto p = name_pool.find(name);
     if (p == name_pool.end())