]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/OSDMap: fix upmap mis-killing for erasure-coded PGs 25418/head
authorxie xingguo <xie.xingguo@zte.com.cn>
Sat, 1 Dec 2018 09:42:01 +0000 (17:42 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Thu, 6 Dec 2018 03:12:22 +0000 (11:12 +0800)
The up-set of erasure-coded PGs may include CRUSH_ITEM_NONE,
which as a result causes mis-killing of valid upmap items.

Fixes: https://tracker.ceph.com/issues/37493
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
Signed-off-by: ningtao <ningtao@sangfor.com.cn>
(cherry picked from commit f043dcc6d8663ed960a354b2441fd3f8dd66c62d)

Conflicts:
        - *maybe_remove_pg_upmaps* input changed, in master
          we now have a passed in *nextmap* parameter

src/osd/OSDMap.cc
src/test/osd/TestOSDMap.cc

index af00b1e24022aef0081a951214394b011f221c3e..ed7c5dd4b6bea89bca14e044cd2516170509f5af 100644 (file)
@@ -1672,6 +1672,9 @@ void OSDMap::maybe_remove_pg_upmaps(CephContext *cct,
     tmpmap.pg_to_raw_up(pg, &raw, &primary);
     set<int> parents;
     for (auto osd : raw) {
+      // skip non-existent/down osd for erasure-coded PGs
+      if (osd == CRUSH_ITEM_NONE)
+        continue;
       if (type > 0) {
         auto parent = tmpmap.crush->get_parent_of_type(osd, type, crush_rule);
         if (parent < 0) {
index 7a6293eaca049fc6adbde526a117a8cfc5c9fb53..5a5fcf11f073ff77958bbf0895e43c39838ec0e3 100644 (file)
@@ -633,6 +633,56 @@ TEST_F(OSDMapTest, CleanPGUpmaps) {
     ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid));
   }
 
+  {
+    // https://tracker.ceph.com/issues/37493
+    pg_t ec_pg(0, my_ec_pool);
+    pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
+    OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
+    int from = -1;
+    int to = -1;
+    {
+      // insert a valid pg_upmap_item
+      vector<int> ec_up;
+      int ec_up_primary;
+      osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
+      ASSERT_TRUE(!ec_up.empty());
+      from = *(ec_up.begin());
+      ASSERT_TRUE(from >= 0);
+      for (int i = 0; i < (int)get_num_osds(); i++) {
+        if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
+          to = i;
+          break;
+        }
+      }
+      ASSERT_TRUE(to >= 0);
+      ASSERT_TRUE(from != to);
+      vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+      new_pg_upmap_items.push_back(make_pair(from, to));
+      OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+      pending_inc.new_pg_upmap_items[ec_pgid] =
+      mempool::osdmap::vector<pair<int32_t,int32_t>>(
+        new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+      tmpmap.deepish_copy_from(osdmap);
+      tmpmap.apply_incremental(pending_inc);
+      ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+    }
+    {
+      // mark one of the target OSDs of the above pg_upmap_item as down
+      OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
+      pending_inc.new_state[to] = CEPH_OSD_UP;
+      tmpmap.apply_incremental(pending_inc);
+      ASSERT_TRUE(!tmpmap.is_up(to));
+      ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+    }
+    {
+      // confirm *maybe_remove_pg_upmaps* won't do anything bad
+      OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
+      tmpmap.maybe_remove_pg_upmaps(g_ceph_context, tmpmap, &pending_inc);
+      tmpmap.apply_incremental(pending_inc);
+      ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+    }
+  }
+
   {
     // TEST pg_upmap
     {