From: Yingxin Cheng Date: Wed, 20 Mar 2024 07:25:33 +0000 (+0800) Subject: crimson/os/pg_map: allow multiple shards to create new pg mappings at the same time X-Git-Tag: v19.1.1~383^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f853abf3b0fec105cce3c88d6460e5bbd59ef3a2;p=ceph.git crimson/os/pg_map: allow multiple shards to create new pg mappings at the same time Also: * Better detections in case of inconsistent racings, such as: * The new mapping is creating towards different cores. * Mapping creation is racing with its eracing. * Multiple shards are erasing the same mapping at the same time. * Add more logs to debug in case of unexpected issues. Fixes: https://tracker.ceph.com/issues/64934 Fixes: https://tracker.ceph.com/issues/64009 Signed-off-by: Yingxin Cheng (cherry picked from commit 2422817b7578da44090ea5e2f540c505bbce1412) --- diff --git a/src/crimson/osd/pg_map.cc b/src/crimson/osd/pg_map.cc index c9cd09da0649d..2f5c0660cc38a 100644 --- a/src/crimson/osd/pg_map.cc +++ b/src/crimson/osd/pg_map.cc @@ -19,75 +19,138 @@ seastar::future PGShardMapping::get_or_create_pg_mapping( LOG_PREFIX(PGShardMapping::get_or_create_pg_mapping); auto find_iter = pg_to_core.find(pgid); if (find_iter != pg_to_core.end()) { - ceph_assert_always(find_iter->second != NULL_CORE); - if (core_expected != NULL_CORE) { - ceph_assert_always(find_iter->second == core_expected); + auto core_found = find_iter->second; + assert(core_found != NULL_CORE); + if (core_expected != NULL_CORE && core_expected != core_found) { + ERROR("the mapping is inconsistent for pg {}: core {}, expected {}", + pgid, core_found, core_expected); + ceph_abort("The pg mapping is inconsistent!"); } - return seastar::make_ready_future(find_iter->second); + return seastar::make_ready_future(core_found); } else { + DEBUG("calling primary to add mapping for pg {} to the expected core {}", + pgid, core_expected); return container().invoke_on( 0, [pgid, core_expected, FNAME](auto &primary_mapping) { - auto [insert_iter, inserted] = - primary_mapping.pg_to_core.emplace(pgid, core_expected); - ceph_assert_always(inserted); - ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0); - std::map::iterator core_iter; - if (core_expected == NULL_CORE) { - core_iter = std::min_element( - primary_mapping.core_to_num_pgs.begin(), - primary_mapping.core_to_num_pgs.end(), - [](const auto &left, const auto &right) { - return left.second < right.second; + auto core_to_update = core_expected; + auto find_iter = primary_mapping.pg_to_core.find(pgid); + if (find_iter != primary_mapping.pg_to_core.end()) { + // this pgid was already mapped within primary_mapping, assert that the + // mapping is consistent and avoid emplacing once again. + auto core_found = find_iter->second; + assert(core_found != NULL_CORE); + if (core_expected != NULL_CORE) { + if (core_expected != core_found) { + ERROR("the mapping is inconsistent for pg {} (primary): core {}, expected {}", + pgid, core_found, core_expected); + ceph_abort("The pg mapping is inconsistent!"); } - ); - core_expected = core_iter->first; - } else { - core_iter = primary_mapping.core_to_num_pgs.find(core_expected); + // core_expected == core_found + DEBUG("mapping pg {} to core {} (primary): already mapped and expected", + pgid, core_to_update); + } else { // core_expected == NULL_CORE + core_to_update = core_found; + DEBUG("mapping pg {} to core {} (primary): already mapped", + pgid, core_to_update); + } + // proceed to broadcast core_to_update + } else { // find_iter == primary_mapping.pg_to_core.end() + // this pgid isn't mapped within primary_mapping, + // add the mapping and ajust core_to_num_pgs + ceph_assert_always(primary_mapping.core_to_num_pgs.size() > 0); + std::map::iterator count_iter; + if (core_expected == NULL_CORE) { + count_iter = std::min_element( + primary_mapping.core_to_num_pgs.begin(), + primary_mapping.core_to_num_pgs.end(), + [](const auto &left, const auto &right) { + return left.second < right.second; + } + ); + core_to_update = count_iter->first; + } else { // core_expected != NULL_CORE + count_iter = primary_mapping.core_to_num_pgs.find(core_to_update); + } + ceph_assert_always(primary_mapping.core_to_num_pgs.end() != count_iter); + ++(count_iter->second); + auto [insert_iter, inserted] = + primary_mapping.pg_to_core.emplace(pgid, core_to_update); + assert(inserted); + DEBUG("mapping pg {} to core {} (primary): num_pgs {}", + pgid, core_to_update, count_iter->second); } - assert(core_expected != NULL_CORE); - ceph_assert_always(primary_mapping.core_to_num_pgs.end() != core_iter); - insert_iter->second = core_expected; - core_iter->second++; - DEBUG("mapping pg {} to core {} (primary) with num_pgs {}", - pgid, core_expected, core_iter->second); + assert(core_to_update != NULL_CORE); return primary_mapping.container().invoke_on_others( - [pgid, core_expected, FNAME](auto &other_mapping) { - auto [insert_iter, inserted] = - other_mapping.pg_to_core.emplace(pgid, core_expected); - ceph_assert_always(inserted); - DEBUG("mapping pg {} to core {} (others)", - pgid, core_expected); + [pgid, core_to_update, FNAME](auto &other_mapping) { + auto find_iter = other_mapping.pg_to_core.find(pgid); + if (find_iter == other_mapping.pg_to_core.end()) { + DEBUG("mapping pg {} to core {} (others)", + pgid, core_to_update); + auto [insert_iter, inserted] = + other_mapping.pg_to_core.emplace(pgid, core_to_update); + assert(inserted); + } else { + auto core_found = find_iter->second; + if (core_found != core_to_update) { + ERROR("the mapping is inconsistent for pg {} (others): core {}, expected {}", + pgid, core_found, core_to_update); + ceph_abort("The pg mapping is inconsistent!"); + } + DEBUG("mapping pg {} to core {} (others): already mapped", + pgid, core_to_update); + } }); - }).then([this, pgid, FNAME] { + }).then([this, pgid, core_expected, FNAME] { auto find_iter = pg_to_core.find(pgid); - ceph_assert_always(find_iter != pg_to_core.end()); - DEBUG("returning pg {} mapping to core {}", - pgid, find_iter->second); - return seastar::make_ready_future(find_iter->second); + if (find_iter == pg_to_core.end()) { + ERROR("the mapping is inconsistent for pg {}: core not found, expected {}", + pgid, core_expected); + ceph_abort("The pg mapping is inconsistent!"); + } + auto core_found = find_iter->second; + if (core_expected != NULL_CORE && core_found != core_expected) { + ERROR("the mapping is inconsistent for pg {}: core {}, expected {}", + pgid, core_found, core_expected); + ceph_abort("The pg mapping is inconsistent!"); + } + DEBUG("returning pg {} mapping to core {} after broadcasted", + pgid, core_found); + return seastar::make_ready_future(core_found); }); } } seastar::future<> PGShardMapping::remove_pg_mapping(spg_t pgid) { LOG_PREFIX(PGShardMapping::remove_pg_mapping); - DEBUG("{}", pgid); + auto find_iter = pg_to_core.find(pgid); + if (find_iter == pg_to_core.end()) { + ERROR("trying to remove non-exist mapping for pg {}", pgid); + ceph_abort("The pg mapping is inconsistent!"); + } + DEBUG("calling primary to remove mapping for pg {}", pgid); return container().invoke_on( 0, [pgid, FNAME](auto &primary_mapping) { - auto iter = primary_mapping.pg_to_core.find(pgid); - ceph_assert_always(iter != primary_mapping.pg_to_core.end()); - ceph_assert_always(iter->second != NULL_CORE); - auto count_iter = primary_mapping.core_to_num_pgs.find(iter->second); - ceph_assert_always(count_iter != primary_mapping.core_to_num_pgs.end()); - ceph_assert_always(count_iter->second > 0); + auto find_iter = primary_mapping.pg_to_core.find(pgid); + if (find_iter == primary_mapping.pg_to_core.end()) { + ERROR("trying to remove non-exist mapping for pg {} (primary)", pgid); + ceph_abort("The pg mapping is inconsistent!"); + } + assert(find_iter->second != NULL_CORE); + auto count_iter = primary_mapping.core_to_num_pgs.find(find_iter->second); + assert(count_iter != primary_mapping.core_to_num_pgs.end()); + assert(count_iter->second > 0); --(count_iter->second); - primary_mapping.pg_to_core.erase(iter); + primary_mapping.pg_to_core.erase(find_iter); DEBUG("pg {} mapping erased (primary)", pgid); return primary_mapping.container().invoke_on_others( [pgid, FNAME](auto &other_mapping) { - auto iter = other_mapping.pg_to_core.find(pgid); - ceph_assert_always(iter != other_mapping.pg_to_core.end()); - ceph_assert_always(iter->second != NULL_CORE); - other_mapping.pg_to_core.erase(iter); + auto find_iter = other_mapping.pg_to_core.find(pgid); + if (find_iter == other_mapping.pg_to_core.end()) { + ERROR("trying to remove non-exist mapping for pg {} (others)", pgid); + ceph_abort("The pg mapping is inconsistent!"); + } + assert(find_iter->second != NULL_CORE); + other_mapping.pg_to_core.erase(find_iter); DEBUG("pg {} mapping erased (others)", pgid); }); });