]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: collect and record pg_num changes by pool
authorSage Weil <sage@redhat.com>
Thu, 31 May 2018 19:37:48 +0000 (14:37 -0500)
committerSage Weil <sage@redhat.com>
Fri, 7 Sep 2018 17:08:41 +0000 (12:08 -0500)
This will simplify our identification of split and merge events.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/OSD.cc
src/osd/OSD.h
src/osd/osd_types.h
src/tools/ceph-dencoder/types.h

index cf9c457c60b02e8f0231aeff1daaed502a093472..360c1617c2914b6a2e86734697fd909156cecbd6 100644 (file)
@@ -3884,6 +3884,17 @@ void OSD::load_pgs()
   ceph_assert(osd_lock.is_locked());
   dout(0) << "load_pgs" << dendl;
 
+  {
+    auto pghist = make_pg_num_history_oid();
+    bufferlist bl;
+    int r = store->read(service.meta_ch, pghist, 0, 0, bl, 0);
+    if (r >= 0 && bl.length() > 0) {
+      auto p = bl.cbegin();
+      decode(pg_num_history, p);
+    }
+    dout(20) << __func__ << " pg_num_history " << pg_num_history << dendl;
+  }
+
   vector<coll_t> ls;
   int r = store->list_collections(ls);
   if (r < 0) {
@@ -7515,6 +7526,7 @@ void OSD::handle_osd_map(MOSDMap *m)
   if (superblock.oldest_map) {
     // make sure we at least keep pace with incoming maps
     trim_maps(m->oldest_map, last - first + 1, skip_maps);
+    pg_num_history.prune(superblock.oldest_map);
   }
 
   if (!superblock.oldest_map || skip_maps)
@@ -7529,7 +7541,7 @@ void OSD::handle_osd_map(MOSDMap *m)
     superblock.clean_thru = last;
   }
 
-  // check for deleted pools
+  // check for pg_num changes and deleted pools
   OSDMapRef lastmap;
   for (auto& i : added_maps) {
     if (!lastmap) {
@@ -7542,6 +7554,7 @@ void OSD::handle_osd_map(MOSDMap *m)
     ceph_assert(lastmap->get_epoch() + 1 == i.second->get_epoch());
     for (auto& j : lastmap->get_pools()) {
       if (!i.second->have_pg_pool(j.first)) {
+       pg_num_history.log_pool_delete(i.first, j.first);
        dout(10) << __func__ << " recording final pg_pool_t for pool "
                 << j.first << dendl;
        // this information is needed by _make_pg() if have to restart before
@@ -7559,10 +7572,30 @@ void OSD::handle_osd_map(MOSDMap *m)
        encode(profile, bl);
        t.write(coll_t::meta(), obj, 0, bl.length(), bl);
        service.store_deleted_pool_pg_num(j.first, j.second.get_pg_num());
+      } else if (unsigned new_pg_num = i.second->get_pg_num(j.first);
+                new_pg_num != j.second.get_pg_num()) {
+       dout(10) << __func__ << " recording pool " << j.first << " pg_num "
+                << j.second.get_pg_num() << " -> " << new_pg_num << dendl;
+       pg_num_history.log_pg_num_change(i.first, j.first, new_pg_num);
+      }
+    }
+    for (auto& j : i.second->get_pools()) {
+      if (!lastmap->have_pg_pool(j.first)) {
+       dout(10) << __func__ << " recording new pool " << j.first << " pg_num "
+                << j.second.get_pg_num() << dendl;
+       pg_num_history.log_pg_num_change(i.first, j.first,
+                                        j.second.get_pg_num());
       }
     }
     lastmap = i.second;
   }
+  pg_num_history.epoch = last;
+  {
+    bufferlist bl;
+    ::encode(pg_num_history, bl);
+    t.write(coll_t::meta(), make_pg_num_history_oid(), 0, bl.length(), bl);
+    dout(20) << __func__ << " pg_num_history " << pg_num_history << dendl;
+  }
 
   // superblock and commit
   write_superblock(t);
index 4b766f56ad2419f7fab8f318d87615265f6504e0..596b77d08dc412a493821faecf376892efc6a343 100644 (file)
@@ -1342,6 +1342,10 @@ public:
          CEPH_NOSNAP)));
   }
 
+  static ghobject_t make_pg_num_history_oid() {
+    return ghobject_t(hobject_t(sobject_t("pg_num_history", CEPH_NOSNAP)));
+  }
+
   static void recursive_remove_collection(CephContext* cct,
                                          ObjectStore *store,
                                          spg_t pgid,
@@ -1803,6 +1807,8 @@ protected:
     return osdmap ? osdmap->get_epoch() : 0;
   }
 
+  pool_pg_num_history_t pg_num_history;
+
   utime_t         had_map_since;
   RWLock          map_lock;
   list<OpRequestRef>  waiting_for_osdmap;
index 637f5cca1c1e91600538bc8e5bccc62b4ff0b89d..d1b83591aca6f9bb9fd81e2dff03740467804458 100644 (file)
@@ -5454,4 +5454,92 @@ struct store_statfs_t
 };
 ostream &operator<<(ostream &lhs, const store_statfs_t &rhs);
 
+
+struct pool_pg_num_history_t {
+  /// last epoch updated
+  epoch_t epoch = 0;
+  /// poolid -> epoch -> pg_num
+  map<int64_t,map<epoch_t,uint32_t>> pg_nums;
+  /// pair(epoch, poolid)
+  set<pair<epoch_t,int64_t>> deleted_pools;
+
+  void log_pg_num_change(epoch_t epoch, int64_t pool, uint32_t pg_num) {
+    pg_nums[pool][epoch] = pg_num;
+  }
+  void log_pool_delete(epoch_t epoch, int64_t pool) {
+    deleted_pools.insert(make_pair(epoch, pool));
+  }
+
+  /// prune history based on oldest osdmap epoch in the cluster
+  void prune(epoch_t oldest_epoch) {
+    auto i = deleted_pools.begin();
+    while (i != deleted_pools.end()) {
+      if (i->first >= oldest_epoch) {
+       break;
+      }
+      pg_nums.erase(i->second);
+      i = deleted_pools.erase(i);
+    }
+    for (auto& j : pg_nums) {
+      auto k = j.second.lower_bound(oldest_epoch);
+      // keep this and the entry before it (just to be paranoid)
+      if (k != j.second.begin()) {
+       --k;
+       j.second.erase(j.second.begin(), k);
+      }
+    }
+  }
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode(epoch, bl);
+    encode(pg_nums, bl);
+    encode(deleted_pools, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::const_iterator& p) {
+    DECODE_START(1, p);
+    decode(epoch, p);
+    decode(pg_nums, p);
+    decode(deleted_pools, p);
+    DECODE_FINISH(p);
+  }
+  void dump(Formatter *f) const {
+    f->dump_unsigned("epoch", epoch);
+    f->open_object_section("pools");
+    for (auto& i : pg_nums) {
+      f->open_object_section("pool");
+      f->dump_unsigned("pool_id", i.first);
+      f->open_array_section("changes");
+      for (auto& j : i.second) {
+       f->open_object_section("change");
+       f->dump_unsigned("epoch", j.first);
+       f->dump_unsigned("pg_num", j.second);
+       f->close_section();
+      }
+      f->close_section();
+      f->close_section();
+    }
+    f->close_section();
+    f->open_array_section("deleted_pools");
+    for (auto& i : deleted_pools) {
+      f->open_object_section("deletion");
+      f->dump_unsigned("pool_id", i.second);
+      f->dump_unsigned("epoch", i.first);
+      f->close_section();
+    }
+    f->close_section();
+  }
+  static void generate_test_instances(list<pool_pg_num_history_t*>& ls) {
+    ls.push_back(new pool_pg_num_history_t);
+  }
+  friend ostream& operator<<(ostream& out, const pool_pg_num_history_t& h) {
+    return out << "pg_num_history(e" << h.epoch
+              << " pg_nums " << h.pg_nums
+              << " deleted_pools " << h.deleted_pools
+              << ")";
+  }
+};
+WRITE_CLASS_ENCODER(pool_pg_num_history_t)
+
 #endif
index 255c23f00d6d9461fbe466c70e57fec586119b71..b3618c458287a4df83c119ec1e56a01438dd0843 100644 (file)
@@ -109,6 +109,7 @@ TYPE(ScrubMap)
 TYPE_FEATUREFUL(obj_list_watch_response_t)
 TYPE(clone_info)
 TYPE(obj_list_snap_response_t)
+TYPE(pool_pg_num_history_t)
 
 #include "osd/ECUtil.h"
 // TYPE(stripe_info_t) non-standard encoding/decoding functions