]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/pg_map: allow multiple shards to create new pg mappings at the same time 57348/head
authorYingxin Cheng <yingxin.cheng@intel.com>
Wed, 20 Mar 2024 07:25:33 +0000 (15:25 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Wed, 8 May 2024 07:10:04 +0000 (10:10 +0300)
Also:
* Better detections in case of inconsistent racings, such as:
  * The new mapping is creating towards different cores.
  * Mapping creation is racing with its eracing.
  * Multiple shards are erasing the same mapping at the same time.
* Add more logs to debug in case of unexpected issues.

Fixes: https://tracker.ceph.com/issues/64934
Fixes: https://tracker.ceph.com/issues/64009
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
(cherry picked from commit 2422817b7578da44090ea5e2f540c505bbce1412)

src/crimson/osd/pg_map.cc

index c9cd09da0649d6476f4d6f656fa9af6adf7d14d8..2f5c0660cc38a69295a6cfb78e2e1d71150f2952 100644 (file)
@@ -19,75 +19,138 @@ seastar::future<core_id_t> PGShardMapping::get_or_create_pg_mapping(
   LOG_PREFIX(PGShardMapping::get_or_create_pg_mapping);
   auto find_iter = pg_to_core.find(pgid);
   if (find_iter != pg_to_core.end()) {
-    ceph_assert_always(find_iter->second != NULL_CORE);
-    if (core_expected != NULL_CORE) {
-      ceph_assert_always(find_iter->second == core_expected);
+    auto core_found = find_iter->second;
+    assert(core_found != NULL_CORE);
+    if (core_expected != NULL_CORE && core_expected != core_found) {
+      ERROR("the mapping is inconsistent for pg {}: core {}, expected {}",
+            pgid, core_found, core_expected);
+      ceph_abort("The pg mapping is inconsistent!");
     }
-    return seastar::make_ready_future<core_id_t>(find_iter->second);
+    return seastar::make_ready_future<core_id_t>(core_found);
   } else {
+    DEBUG("calling primary to add mapping for pg {} to the expected core {}",
+          pgid, core_expected);
     return container().invoke_on(
         0, [pgid, core_expected, FNAME](auto &primary_mapping) {
-      auto [insert_iter, inserted] =
-        primary_mapping.pg_to_core.emplace(pgid, core_expected);
-      ceph_assert_always(inserted);
-      ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0);
-      std::map<core_id_t, unsigned>::iterator core_iter;
-      if (core_expected == NULL_CORE) {
-        core_iter = std::min_element(
-          primary_mapping.core_to_num_pgs.begin(),
-          primary_mapping.core_to_num_pgs.end(),
-          [](const auto &left, const auto &right) {
-            return left.second < right.second;
+      auto core_to_update = core_expected;
+      auto find_iter = primary_mapping.pg_to_core.find(pgid);
+      if (find_iter != primary_mapping.pg_to_core.end()) {
+        // this pgid was already mapped within primary_mapping, assert that the
+        // mapping is consistent and avoid emplacing once again.
+        auto core_found = find_iter->second;
+        assert(core_found != NULL_CORE);
+        if (core_expected != NULL_CORE) {
+          if (core_expected != core_found) {
+            ERROR("the mapping is inconsistent for pg {} (primary): core {}, expected {}",
+                  pgid, core_found, core_expected);
+            ceph_abort("The pg mapping is inconsistent!");
           }
-        );
-        core_expected = core_iter->first;
-      } else {
-        core_iter = primary_mapping.core_to_num_pgs.find(core_expected);
+          // core_expected == core_found
+          DEBUG("mapping pg {} to core {} (primary): already mapped and expected",
+                pgid, core_to_update);
+        } else { // core_expected == NULL_CORE
+          core_to_update = core_found;
+          DEBUG("mapping pg {} to core {} (primary): already mapped",
+                pgid, core_to_update);
+        }
+        // proceed to broadcast core_to_update
+      } else { // find_iter == primary_mapping.pg_to_core.end()
+        // this pgid isn't mapped within primary_mapping,
+        // add the mapping and ajust core_to_num_pgs
+        ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0);
+        std::map<core_id_t, unsigned>::iterator count_iter;
+        if (core_expected == NULL_CORE) {
+          count_iter = std::min_element(
+            primary_mapping.core_to_num_pgs.begin(),
+            primary_mapping.core_to_num_pgs.end(),
+            [](const auto &left, const auto &right) {
+              return left.second < right.second;
+            }
+          );
+          core_to_update = count_iter->first;
+        } else { // core_expected != NULL_CORE
+          count_iter = primary_mapping.core_to_num_pgs.find(core_to_update);
+        }
+        ceph_assert_always(primary_mapping.core_to_num_pgs.end() != count_iter);
+        ++(count_iter->second);
+        auto [insert_iter, inserted] =
+          primary_mapping.pg_to_core.emplace(pgid, core_to_update);
+        assert(inserted);
+        DEBUG("mapping pg {} to core {} (primary): num_pgs {}",
+              pgid, core_to_update, count_iter->second);
       }
-      assert(core_expected != NULL_CORE);
-      ceph_assert_always(primary_mapping.core_to_num_pgs.end() != core_iter);
-      insert_iter->second = core_expected;
-      core_iter->second++;
-      DEBUG("mapping pg {} to core {} (primary) with num_pgs {}",
-            pgid, core_expected, core_iter->second);
+      assert(core_to_update != NULL_CORE);
       return primary_mapping.container().invoke_on_others(
-          [pgid, core_expected, FNAME](auto &other_mapping) {
-        auto [insert_iter, inserted] =
-          other_mapping.pg_to_core.emplace(pgid, core_expected);
-        ceph_assert_always(inserted);
-        DEBUG("mapping pg {} to core {} (others)",
-              pgid, core_expected);
+          [pgid, core_to_update, FNAME](auto &other_mapping) {
+        auto find_iter = other_mapping.pg_to_core.find(pgid);
+        if (find_iter == other_mapping.pg_to_core.end()) {
+          DEBUG("mapping pg {} to core {} (others)",
+                pgid, core_to_update);
+          auto [insert_iter, inserted] =
+            other_mapping.pg_to_core.emplace(pgid, core_to_update);
+          assert(inserted);
+        } else {
+          auto core_found = find_iter->second;
+          if (core_found != core_to_update) {
+            ERROR("the mapping is inconsistent for pg {} (others): core {}, expected {}",
+                  pgid, core_found, core_to_update);
+            ceph_abort("The pg mapping is inconsistent!");
+          }
+          DEBUG("mapping pg {} to core {} (others): already mapped",
+                pgid, core_to_update);
+        }
       });
-    }).then([this, pgid, FNAME] {
+    }).then([this, pgid, core_expected, FNAME] {
       auto find_iter = pg_to_core.find(pgid);
-      ceph_assert_always(find_iter != pg_to_core.end());
-      DEBUG("returning pg {} mapping to core {}",
-            pgid, find_iter->second);
-      return seastar::make_ready_future<core_id_t>(find_iter->second);
+      if (find_iter == pg_to_core.end()) {
+        ERROR("the mapping is inconsistent for pg {}: core not found, expected {}",
+              pgid, core_expected);
+        ceph_abort("The pg mapping is inconsistent!");
+      }
+      auto core_found = find_iter->second;
+      if (core_expected != NULL_CORE && core_found != core_expected) {
+        ERROR("the mapping is inconsistent for pg {}: core {}, expected {}",
+              pgid, core_found, core_expected);
+        ceph_abort("The pg mapping is inconsistent!");
+      }
+      DEBUG("returning pg {} mapping to core {} after broadcasted",
+            pgid, core_found);
+      return seastar::make_ready_future<core_id_t>(core_found);
     });
   }
 }
 
 seastar::future<> PGShardMapping::remove_pg_mapping(spg_t pgid) {
   LOG_PREFIX(PGShardMapping::remove_pg_mapping);
-  DEBUG("{}", pgid);
+  auto find_iter = pg_to_core.find(pgid);
+  if (find_iter == pg_to_core.end()) {
+    ERROR("trying to remove non-exist mapping for pg {}", pgid);
+    ceph_abort("The pg mapping is inconsistent!");
+  }
+  DEBUG("calling primary to remove mapping for pg {}", pgid);
   return container().invoke_on(
       0, [pgid, FNAME](auto &primary_mapping) {
-    auto iter = primary_mapping.pg_to_core.find(pgid);
-    ceph_assert_always(iter != primary_mapping.pg_to_core.end());
-    ceph_assert_always(iter->second != NULL_CORE);
-    auto count_iter = primary_mapping.core_to_num_pgs.find(iter->second);
-    ceph_assert_always(count_iter != primary_mapping.core_to_num_pgs.end());
-    ceph_assert_always(count_iter->second > 0);
+    auto find_iter = primary_mapping.pg_to_core.find(pgid);
+    if (find_iter == primary_mapping.pg_to_core.end()) {
+      ERROR("trying to remove non-exist mapping for pg {} (primary)", pgid);
+      ceph_abort("The pg mapping is inconsistent!");
+    }
+    assert(find_iter->second != NULL_CORE);
+    auto count_iter = primary_mapping.core_to_num_pgs.find(find_iter->second);
+    assert(count_iter != primary_mapping.core_to_num_pgs.end());
+    assert(count_iter->second > 0);
     --(count_iter->second);
-    primary_mapping.pg_to_core.erase(iter);
+    primary_mapping.pg_to_core.erase(find_iter);
     DEBUG("pg {} mapping erased (primary)", pgid);
     return primary_mapping.container().invoke_on_others(
       [pgid, FNAME](auto &other_mapping) {
-      auto iter = other_mapping.pg_to_core.find(pgid);
-      ceph_assert_always(iter != other_mapping.pg_to_core.end());
-      ceph_assert_always(iter->second != NULL_CORE);
-      other_mapping.pg_to_core.erase(iter);
+      auto find_iter = other_mapping.pg_to_core.find(pgid);
+      if (find_iter == other_mapping.pg_to_core.end()) {
+        ERROR("trying to remove non-exist mapping for pg {} (others)", pgid);
+        ceph_abort("The pg mapping is inconsistent!");
+      }
+      assert(find_iter->second != NULL_CORE);
+      other_mapping.pg_to_core.erase(find_iter);
       DEBUG("pg {} mapping erased (others)", pgid);
     });
   });