From d294d736837f7d138dd0b5f5ac9c23e269eb7d47 Mon Sep 17 00:00:00 2001 From: Joao Eduardo Luis Date: Tue, 28 Jan 2014 15:54:33 +0000 Subject: [PATCH] osd: OSDMonitor: ignore pgtemps from removed pool There's a window in-between receiving an MOSDPGTemp message from an OSD and actually handling it that may lead to the pool the pg temps refer to no longer existing. This may happen if the MOSDPGTemp message is queued pending dispatching due to an on-going proposal (maybe even the pool removal). This patch fixes such behavior in two steps: 1. Check if the pool exists in the osdmap upon preprocessing - if pool does not exist in the osdmap, then the pool must have been removed prior to handling the message, but after the osd sent it. - safe to ignore the pg update 2. If all pg updates in the message have been ignored, ignore the whole message. Otherwise, let prepare handle the rest. 3. Recheck if pool exists in the osdmap upon prepare - We may have ignored this pg back in preprocess, but other pgs in the message may have led the message to be passed on to prepare; ignore pg update once more. 4. Check if pool is pending removal and ignore pg update if so. We delegate checking the pending value to prepare_pgtemp() because in this case we should only ignore the update IFF the pending value is in fact committed. Otherwise we should retry the message. prepare_pgtemp() is the appropriate place to do so. Fixes: 7116 Signed-off-by: Joao Eduardo Luis (cherry picked from commit f513f66f48383a07c70ca18a4dba6c2449ea9860) --- src/mon/OSDMonitor.cc | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 8a6016989b4a4..9d04da58e087c 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1376,6 +1376,7 @@ bool OSDMonitor::preprocess_pgtemp(MOSDPGTemp *m) dout(10) << "preprocess_pgtemp " << *m << dendl; vector empty; int from = m->get_orig_source().num(); + size_t ignore_cnt = 0; // check caps MonSession *session = m->get_session(); @@ -1396,7 +1397,25 @@ bool OSDMonitor::preprocess_pgtemp(MOSDPGTemp *m) for (map >::iterator p = m->pg_temp.begin(); p != m->pg_temp.end(); ++p) { dout(20) << " " << p->first << (osdmap.pg_temp->count(p->first) ? (*osdmap.pg_temp)[p->first] : empty) - << " -> " << p->second << dendl; + << " -> " << p->second << dendl; + + // does the pool exist? + if (!osdmap.have_pg_pool(p->first.pool())) { + /* + * 1. If the osdmap does not have the pool, it means the pool has been + * removed in-between the osd sending this message and us handling it. + * 2. If osdmap doesn't have the pool, it is safe to assume the pool does + * not exist in the pending either, as the osds would not send a + * message about a pool they know nothing about (yet). + * 3. However, if the pool does exist in the pending, then it must be a + * new pool, and not relevant to this message (see 1). + */ + dout(10) << __func__ << " ignore " << p->first << " -> " << p->second + << ": pool has been removed" << dendl; + ignore_cnt++; + continue; + } + // removal? if (p->second.empty() && osdmap.pg_temp->count(p->first)) return false; @@ -1406,6 +1425,10 @@ bool OSDMonitor::preprocess_pgtemp(MOSDPGTemp *m) return false; } + // should we ignore all the pgs? + if (ignore_cnt == m->pg_temp.size()) + goto ignore; + dout(7) << "preprocess_pgtemp e" << m->map_epoch << " no changes from " << m->get_orig_source_inst() << dendl; _reply_map(m, m->map_epoch); return true; @@ -1419,8 +1442,20 @@ bool OSDMonitor::prepare_pgtemp(MOSDPGTemp *m) { int from = m->get_orig_source().num(); dout(7) << "prepare_pgtemp e" << m->map_epoch << " from " << m->get_orig_source_inst() << dendl; - for (map >::iterator p = m->pg_temp.begin(); p != m->pg_temp.end(); ++p) + for (map >::iterator p = m->pg_temp.begin(); p != m->pg_temp.end(); ++p) { + uint64_t pool = p->first.pool(); + if (pending_inc.old_pools.count(pool)) { + dout(10) << __func__ << " ignore " << p->first << " -> " << p->second + << ": pool pending removal" << dendl; + continue; + } + if (!osdmap.have_pg_pool(pool)) { + dout(10) << __func__ << " ignore " << p->first << " -> " << p->second + << ": pool has been removed" << dendl; + continue; + } pending_inc.new_pg_temp[p->first] = p->second; + } pending_inc.new_up_thru[from] = m->map_epoch; // set up_thru too, so the osd doesn't have to ask again wait_for_finished_proposal(new C_ReplyMap(this, m, m->map_epoch)); return true; -- 2.39.5