]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: fix rare race while looking for a pg
authorxie xingguo <xie.xingguo@zte.com.cn>
Wed, 3 Aug 2016 08:18:16 +0000 (16:18 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Wed, 3 Aug 2016 23:05:44 +0000 (07:05 +0800)
When _have_pg() returns true, we drop the pg_map_lock
and pg->lock() simultaneously. So theoretically the
subsequent call to _lookup_lock_pg() can return NULL,
although the chance is rare.

Also, since we are going to call _lookup_lock_pg() anyway,
we can call it directly, thus the _have_pg() method is
totally unnecessary.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/osd/OSD.cc
src/osd/OSD.h

index fff7bd5b4591fc3adced58254fc6e9befa35b464..0d87eef45b853a739d31020e0bedc8773325d725 100644 (file)
@@ -3182,12 +3182,6 @@ PG *OSD::get_pg_or_queue_for_pg(const spg_t& pgid, OpRequestRef& op)
   return out;
 }
 
-bool OSD::_have_pg(spg_t pgid)
-{
-  RWLock::RLocker l(pg_map_lock);
-  return pg_map.count(pgid);
-}
-
 PG *OSD::_lookup_lock_pg(spg_t pgid)
 {
   RWLock::RLocker l(pg_map_lock);
@@ -3530,7 +3524,8 @@ void OSD::handle_pg_peering_evt(
     return;
   }
 
-  if (!_have_pg(pgid)) {
+  PG *pg = _lookup_lock_pg(pgid);
+  if (!pg) {
     // same primary?
     if (!osdmap->have_pg_pool(pgid.pool()))
       return;
@@ -3574,7 +3569,7 @@ void OSD::handle_pg_peering_evt(
       if (!pp->is_replicated() && role != pgid.shard)
        role = -1;
 
-      PG *pg = _create_lock_pg(
+      pg = _create_lock_pg(
        get_map(epoch),
        pgid, false, false,
        role,
@@ -3604,7 +3599,7 @@ void OSD::handle_pg_peering_evt(
       pg_history_t old_history = old_pg_state->info.history;
       pg_interval_map_t old_past_intervals = old_pg_state->past_intervals;
       old_pg_state->unlock();
-      PG *pg = _create_lock_pg(
+      pg = _create_lock_pg(
        old_osd_map,
        resurrected,
        false,
@@ -3672,7 +3667,6 @@ void OSD::handle_pg_peering_evt(
     }
   } else {
     // already had it.  did the mapping change?
-    PG *pg = _lookup_lock_pg(pgid);
     if (epoch < pg->info.history.same_interval_since) {
       dout(10) << *pg << " get_or_create_pg acting changed in "
               << pg->info.history.same_interval_since
@@ -5479,10 +5473,9 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
       r = -EINVAL;
     } else {
       spg_t pcand;
+      PG *pg = nullptr;
       if (osdmap->get_primary_shard(pgid, &pcand) &&
-         _have_pg(pcand)) {
-       PG *pg = _lookup_lock_pg(pcand);
-       assert(pg);
+         (pg = _lookup_lock_pg(pcand))) {
        if (pg->is_primary()) {
          // simulate pg <pgid> cmd= for pg->do-command
          if (prefix != "pg")
index cba0b8f96fccf1f63969642aa6b132c55bc19858..7678dd54ec9937f9769872fd53daf7e3983172d3 100644 (file)
@@ -2026,7 +2026,6 @@ protected:
   PGPool _get_pool(int id, OSDMapRef createmap);
 
   PG *get_pg_or_queue_for_pg(const spg_t& pgid, OpRequestRef& op);
-  bool  _have_pg(spg_t pgid);
   PG   *_lookup_lock_pg_with_map_lock_held(spg_t pgid);
   PG   *_lookup_lock_pg(spg_t pgid);
   PG   *_open_lock_pg(OSDMapRef createmap,