From: Sage Weil <sage@redhat.com>
Date: Fri, 3 Aug 2018 15:45:51 +0000 (-0500)
Subject: osd: wait for laggy pgs without osd_lock in handle_osd_map
X-Git-Tag: v14.0.1~371^2~8
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=62a208b42384be3a8710106d96b41b17e098af7c;p=ceph.git

osd: wait for laggy pgs without osd_lock in handle_osd_map

We can't hold osd_lock while blocking because other objectstore completions
need to take osd_lock (e.g., _committed_osd_maps), and those objectstore
completions need to complete in order to finish_splits.  Move the blocking
to the top before we establish any local state in this stack frame since
both the public and cluster dispatchers may race in handle_osd_map and
we are dropping and retaking osd_lock.

Signed-off-by: Sage Weil <sage@redhat.com>
---

diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 41f3b9699dd9..180b84b14213 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -7428,6 +7428,35 @@ void OSD::trim_maps(epoch_t oldest, int nreceived, bool skip_maps)
 
 void OSD::handle_osd_map(MOSDMap *m)
 {
+  // wait for pgs to catch up
+  {
+    // we extend the map cache pins to accomodate pgs slow to consume maps
+    // for some period, until we hit the max_lag_factor bound, at which point
+    // we block here to stop injesting more maps than they are able to keep
+    // up with.
+    epoch_t max_lag = cct->_conf->osd_map_cache_size *
+      m_osd_pg_epoch_max_lag_factor;
+    ceph_assert(max_lag > 0);
+    if (osdmap->get_epoch() > max_lag) {
+      epoch_t need = osdmap->get_epoch() - max_lag;
+      dout(10) << __func__ << " waiting for pgs to catch up (need " << need
+	       << " max_lag " << max_lag << ")" << dendl;
+      osd_lock.Unlock();
+      for (auto shard : shards) {
+	epoch_t min = shard->get_min_pg_epoch();
+	if (need > min) {
+	  dout(10) << __func__ << " waiting for pgs to consume " << need
+		   << " (shard " << shard->shard_id << " min " << min
+		   << ", map cache is " << cct->_conf->osd_map_cache_size
+		   << ", max_lag_factor " << m_osd_pg_epoch_max_lag_factor
+		   << ")" << dendl;
+	  shard->wait_min_pg_epoch(need);
+	}
+      }
+      osd_lock.Lock();
+    }
+  }
+
   ceph_assert(osd_lock.is_locked());
   // Keep a ref in the list until we get the newly received map written
   // onto disk. This is important because as long as the refs are alive,
@@ -7510,31 +7539,6 @@ void OSD::handle_osd_map(MOSDMap *m)
     skip_maps = true;
   }
 
-  // wait for pgs to catch up
-  {
-    // we extend the map cache pins to accomodate pgs slow to consume maps
-    // for some period, until we hit the max_lag_factor bound, at which point
-    // we block here to stop injesting more maps than they are able to keep
-    // up with.
-    epoch_t max_lag = cct->_conf->osd_map_cache_size *
-      m_osd_pg_epoch_max_lag_factor;
-    ceph_assert(max_lag > 0);
-    if (osdmap->get_epoch() > max_lag) {
-      epoch_t need = osdmap->get_epoch() - max_lag;
-      for (auto shard : shards) {
-	epoch_t min = shard->get_min_pg_epoch();
-	if (need > min) {
-	  dout(10) << __func__ << " waiting for pgs to consume " << need
-		   << " (shard " << shard->shard_id << " min " << min
-		   << ", map cache is " << cct->_conf->osd_map_cache_size
-		   << ", max_lag_factor " << m_osd_pg_epoch_max_lag_factor
-		   << ")" << dendl;
-	  shard->wait_min_pg_epoch(need);
-	}
-      }
-    }
-  }
-
   ObjectStore::Transaction t;
   uint64_t txn_size = 0;