From: Sage Weil Date: Sun, 18 Feb 2018 02:27:30 +0000 (-0600) Subject: osd: fix pg removal vs _process race X-Git-Tag: v13.1.0~390^2~54 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=3a0b197cd1682f2fc68b1b962d577ec44cba5944;p=ceph.git osd: fix pg removal vs _process race Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 85464d29a1622..32cf018d25a94 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -9593,10 +9593,22 @@ void OSD::ShardedOpWQ::_process(uint32_t thread_index, heartbeat_handle_d *hb) sdata->sdata_op_ordering_lock.Lock(); auto q = sdata->pg_slots.find(token); - assert(q != sdata->pg_slots.end()); + if (q == sdata->pg_slots.end()) { + // this can happen if we race with pg removal. + dout(20) << __func__ << " slot " << token << " no longer there" << dendl; + pg->unlock(); + sdata->sdata_op_ordering_lock.Unlock(); + return; + } auto *slot = q->second.get(); --slot->num_running; + if (pg && !slot->pg) { + // this can happen if we race with pg removal. + dout(20) << __func__ << " slot " << token << " no longer attached" << dendl; + pg->unlock(); + pg = nullptr; + } if (slot->to_process.empty()) { // raced with wake_pg_waiters or consume_map dout(20) << __func__ << " " << token