From: Joao Eduardo Luis Date: Tue, 28 Jan 2014 15:54:33 +0000 (+0000) Subject: osd: OSDMonitor: ignore pgtemps from removed pool X-Git-Tag: v0.78~263^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F1153%2Fhead;p=ceph.git osd: OSDMonitor: ignore pgtemps from removed pool There's a window in-between receiving an MOSDPGTemp message from an OSD and actually handling it that may lead to the pool the pg temps refer to no longer existing. This may happen if the MOSDPGTemp message is queued pending dispatching due to an on-going proposal (maybe even the pool removal). This patch fixes such behavior in two steps: 1. Check if the pool exists in the osdmap upon preprocessing - if pool does not exist in the osdmap, then the pool must have been removed prior to handling the message, but after the osd sent it. - safe to ignore the pg update 2. If all pg updates in the message have been ignored, ignore the whole message. Otherwise, let prepare handle the rest. 3. Recheck if pool exists in the osdmap upon prepare - We may have ignored this pg back in preprocess, but other pgs in the message may have led the message to be passed on to prepare; ignore pg update once more. 4. Check if pool is pending removal and ignore pg update if so. We delegate checking the pending value to prepare_pgtemp() because in this case we should only ignore the update IFF the pending value is in fact committed. Otherwise we should retry the message. prepare_pgtemp() is the appropriate place to do so. Fixes: 7116 Signed-off-by: Joao Eduardo Luis --- diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index e93cd69cec5..f4143df02a7 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1443,6 +1443,7 @@ bool OSDMonitor::preprocess_pgtemp(MOSDPGTemp *m) dout(10) << "preprocess_pgtemp " << *m << dendl; vector empty; int from = m->get_orig_source().num(); + size_t ignore_cnt = 0; // check caps MonSession *session = m->get_session(); @@ -1463,7 +1464,25 @@ bool OSDMonitor::preprocess_pgtemp(MOSDPGTemp *m) for (map >::iterator p = m->pg_temp.begin(); p != m->pg_temp.end(); ++p) { dout(20) << " " << p->first << (osdmap.pg_temp->count(p->first) ? (*osdmap.pg_temp)[p->first] : empty) - << " -> " << p->second << dendl; + << " -> " << p->second << dendl; + + // does the pool exist? + if (!osdmap.have_pg_pool(p->first.pool())) { + /* + * 1. If the osdmap does not have the pool, it means the pool has been + * removed in-between the osd sending this message and us handling it. + * 2. If osdmap doesn't have the pool, it is safe to assume the pool does + * not exist in the pending either, as the osds would not send a + * message about a pool they know nothing about (yet). + * 3. However, if the pool does exist in the pending, then it must be a + * new pool, and not relevant to this message (see 1). + */ + dout(10) << __func__ << " ignore " << p->first << " -> " << p->second + << ": pool has been removed" << dendl; + ignore_cnt++; + continue; + } + // removal? if (p->second.empty() && osdmap.pg_temp->count(p->first)) return false; @@ -1473,6 +1492,10 @@ bool OSDMonitor::preprocess_pgtemp(MOSDPGTemp *m) return false; } + // should we ignore all the pgs? + if (ignore_cnt == m->pg_temp.size()) + goto ignore; + dout(7) << "preprocess_pgtemp e" << m->map_epoch << " no changes from " << m->get_orig_source_inst() << dendl; _reply_map(m, m->map_epoch); return true; @@ -1486,8 +1509,20 @@ bool OSDMonitor::prepare_pgtemp(MOSDPGTemp *m) { int from = m->get_orig_source().num(); dout(7) << "prepare_pgtemp e" << m->map_epoch << " from " << m->get_orig_source_inst() << dendl; - for (map >::iterator p = m->pg_temp.begin(); p != m->pg_temp.end(); ++p) + for (map >::iterator p = m->pg_temp.begin(); p != m->pg_temp.end(); ++p) { + uint64_t pool = p->first.pool(); + if (pending_inc.old_pools.count(pool)) { + dout(10) << __func__ << " ignore " << p->first << " -> " << p->second + << ": pool pending removal" << dendl; + continue; + } + if (!osdmap.have_pg_pool(pool)) { + dout(10) << __func__ << " ignore " << p->first << " -> " << p->second + << ": pool has been removed" << dendl; + continue; + } pending_inc.new_pg_temp[p->first] = p->second; + } pending_inc.new_up_thru[from] = m->map_epoch; // set up_thru too, so the osd doesn't have to ask again wait_for_finished_proposal(new C_ReplyMap(this, m, m->map_epoch)); return true;