From: xie xingguo Date: Wed, 3 Aug 2016 08:18:16 +0000 (+0800) Subject: osd: fix rare race while looking for a pg X-Git-Tag: ses5-milestone5~181^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8af1031790b7b90809a4af7dccdfefd9a03d4ed6;p=ceph.git osd: fix rare race while looking for a pg When _have_pg() returns true, we drop the pg_map_lock and pg->lock() simultaneously. So theoretically the subsequent call to _lookup_lock_pg() can return NULL, although the chance is rare. Also, since we are going to call _lookup_lock_pg() anyway, we can call it directly, thus the _have_pg() method is totally unnecessary. Signed-off-by: xie xingguo --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index fff7bd5b459..0d87eef45b8 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3182,12 +3182,6 @@ PG *OSD::get_pg_or_queue_for_pg(const spg_t& pgid, OpRequestRef& op) return out; } -bool OSD::_have_pg(spg_t pgid) -{ - RWLock::RLocker l(pg_map_lock); - return pg_map.count(pgid); -} - PG *OSD::_lookup_lock_pg(spg_t pgid) { RWLock::RLocker l(pg_map_lock); @@ -3530,7 +3524,8 @@ void OSD::handle_pg_peering_evt( return; } - if (!_have_pg(pgid)) { + PG *pg = _lookup_lock_pg(pgid); + if (!pg) { // same primary? if (!osdmap->have_pg_pool(pgid.pool())) return; @@ -3574,7 +3569,7 @@ void OSD::handle_pg_peering_evt( if (!pp->is_replicated() && role != pgid.shard) role = -1; - PG *pg = _create_lock_pg( + pg = _create_lock_pg( get_map(epoch), pgid, false, false, role, @@ -3604,7 +3599,7 @@ void OSD::handle_pg_peering_evt( pg_history_t old_history = old_pg_state->info.history; pg_interval_map_t old_past_intervals = old_pg_state->past_intervals; old_pg_state->unlock(); - PG *pg = _create_lock_pg( + pg = _create_lock_pg( old_osd_map, resurrected, false, @@ -3672,7 +3667,6 @@ void OSD::handle_pg_peering_evt( } } else { // already had it. did the mapping change? - PG *pg = _lookup_lock_pg(pgid); if (epoch < pg->info.history.same_interval_since) { dout(10) << *pg << " get_or_create_pg acting changed in " << pg->info.history.same_interval_since @@ -5479,10 +5473,9 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector& cmd, buffe r = -EINVAL; } else { spg_t pcand; + PG *pg = nullptr; if (osdmap->get_primary_shard(pgid, &pcand) && - _have_pg(pcand)) { - PG *pg = _lookup_lock_pg(pcand); - assert(pg); + (pg = _lookup_lock_pg(pcand))) { if (pg->is_primary()) { // simulate pg cmd= for pg->do-command if (prefix != "pg") diff --git a/src/osd/OSD.h b/src/osd/OSD.h index cba0b8f96fc..7678dd54ec9 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2026,7 +2026,6 @@ protected: PGPool _get_pool(int id, OSDMapRef createmap); PG *get_pg_or_queue_for_pg(const spg_t& pgid, OpRequestRef& op); - bool _have_pg(spg_t pgid); PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid); PG *_lookup_lock_pg(spg_t pgid); PG *_open_lock_pg(OSDMapRef createmap,