From: Sage Weil Date: Fri, 3 Aug 2018 15:45:51 +0000 (-0500) Subject: osd: wait for laggy pgs without osd_lock in handle_osd_map X-Git-Tag: v14.0.1~371^2~8 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=62a208b42384be3a8710106d96b41b17e098af7c;p=ceph.git osd: wait for laggy pgs without osd_lock in handle_osd_map We can't hold osd_lock while blocking because other objectstore completions need to take osd_lock (e.g., _committed_osd_maps), and those objectstore completions need to complete in order to finish_splits. Move the blocking to the top before we establish any local state in this stack frame since both the public and cluster dispatchers may race in handle_osd_map and we are dropping and retaking osd_lock. Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 41f3b9699dd9..180b84b14213 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7428,6 +7428,35 @@ void OSD::trim_maps(epoch_t oldest, int nreceived, bool skip_maps) void OSD::handle_osd_map(MOSDMap *m) { + // wait for pgs to catch up + { + // we extend the map cache pins to accomodate pgs slow to consume maps + // for some period, until we hit the max_lag_factor bound, at which point + // we block here to stop injesting more maps than they are able to keep + // up with. + epoch_t max_lag = cct->_conf->osd_map_cache_size * + m_osd_pg_epoch_max_lag_factor; + ceph_assert(max_lag > 0); + if (osdmap->get_epoch() > max_lag) { + epoch_t need = osdmap->get_epoch() - max_lag; + dout(10) << __func__ << " waiting for pgs to catch up (need " << need + << " max_lag " << max_lag << ")" << dendl; + osd_lock.Unlock(); + for (auto shard : shards) { + epoch_t min = shard->get_min_pg_epoch(); + if (need > min) { + dout(10) << __func__ << " waiting for pgs to consume " << need + << " (shard " << shard->shard_id << " min " << min + << ", map cache is " << cct->_conf->osd_map_cache_size + << ", max_lag_factor " << m_osd_pg_epoch_max_lag_factor + << ")" << dendl; + shard->wait_min_pg_epoch(need); + } + } + osd_lock.Lock(); + } + } + ceph_assert(osd_lock.is_locked()); // Keep a ref in the list until we get the newly received map written // onto disk. This is important because as long as the refs are alive, @@ -7510,31 +7539,6 @@ void OSD::handle_osd_map(MOSDMap *m) skip_maps = true; } - // wait for pgs to catch up - { - // we extend the map cache pins to accomodate pgs slow to consume maps - // for some period, until we hit the max_lag_factor bound, at which point - // we block here to stop injesting more maps than they are able to keep - // up with. - epoch_t max_lag = cct->_conf->osd_map_cache_size * - m_osd_pg_epoch_max_lag_factor; - ceph_assert(max_lag > 0); - if (osdmap->get_epoch() > max_lag) { - epoch_t need = osdmap->get_epoch() - max_lag; - for (auto shard : shards) { - epoch_t min = shard->get_min_pg_epoch(); - if (need > min) { - dout(10) << __func__ << " waiting for pgs to consume " << need - << " (shard " << shard->shard_id << " min " << min - << ", map cache is " << cct->_conf->osd_map_cache_size - << ", max_lag_factor " << m_osd_pg_epoch_max_lag_factor - << ")" << dendl; - shard->wait_min_pg_epoch(need); - } - } - } - } - ObjectStore::Transaction t; uint64_t txn_size = 0;