From 7dbcc772f6906f0ba720d0352d27b9dc84406161 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 30 Apr 2018 10:41:35 -0500 Subject: [PATCH] osd: fix _process handling for pg vs slot race We could see the slot with a different PG than we expected if the old PG was removed and a new one was instantiated in its place. We can't just pick up the new PG pointer, however, since it isn't locked. Fix by retrying with the slot's new pg (possibly null!). Move this check below the other cases so that we know we are otherwise consistent with the slot, since the next pass around we might get pg==null and skip the to_process.empty() and requeue_seq checks entirely. Signed-off-by: Sage Weil --- src/osd/OSD.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 6e7e14480b6..204838bb028 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -9713,11 +9713,13 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb) << " waiting " << slot->waiting << " waiting_peering " << slot->waiting_peering << dendl; - PGRef pg = slot->pg; slot->to_process.push_back(std::move(item)); dout(20) << __func__ << " " << slot->to_process.back() << " queued" << dendl; + retry_pg: + PGRef pg = slot->pg; + // lock pg (if we have it) if (pg) { // note the requeue seq now... @@ -9741,14 +9743,6 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb) slot = q->second.get(); --slot->num_running; - if (slot->pg != pg) { - // this can happen if we race with pg removal. - dout(20) << __func__ << " slot " << token << " no longer attached to " - << pg << dendl; - pg->unlock(); - pg = slot->pg; - } - if (slot->to_process.empty()) { // raced with _wake_pg_slot or consume_map dout(20) << __func__ << " " << token @@ -9770,6 +9764,13 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb) sdata->shard_lock.Unlock(); return; } + if (slot->pg != pg) { + // this can happen if we race with pg removal. + dout(20) << __func__ << " slot " << token << " no longer attached to " + << pg << dendl; + pg->unlock(); + goto retry_pg; + } } dout(20) << __func__ << " " << token -- 2.39.5