From 2adc534a72cc199c8b11dbdf436258cbe147101b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 5 Mar 2014 15:58:52 -0800 Subject: [PATCH] mon/OSDMonitor: fix pool deletion checks, races Unify the pool deletion safety checks into a single set of functions. Make sure we check the committed state and error out if there is a problem. Also check the pending state, if any, and delay+retry if there is a problem there. This ensures that we correctly verify that a pool is not in use when it is deleted (by another tier or by cephfs). These checks are also now applied to librados calls. Fixes: #7590 Signed-off-by: Sage Weil --- src/mon/OSDMonitor.cc | 89 ++++++++++++++++++++++++++++++++----------- src/mon/OSDMonitor.h | 3 +- 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 1cee7d5935a2..e679a12da5e6 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -4388,14 +4388,6 @@ done: goto reply; } - // If the Pool is in use by CephFS, refuse to delete it - MDSMap const &pending_mdsmap = mon->mdsmon()->pending_mdsmap; - if (pending_mdsmap.is_data_pool(pool) || pending_mdsmap.get_metadata_pool() == pool) { - ss << "Cannot delete pool '" << poolstr << "' because it is in use by CephFS"; - err = -EBUSY; - goto reply; - } - if (poolstr2 != poolstr || sure != "--yes-i-really-really-mean-it") { ss << "WARNING: this will *PERMANENTLY DESTROY* all data stored in pool " << poolstr << ". If you are *ABSOLUTELY CERTAIN* that is what you want, pass the pool name *twice*, " @@ -4403,13 +4395,14 @@ done: err = -EPERM; goto reply; } - int ret = _prepare_remove_pool(pool); - if (ret == 0) - ss << "pool '" << poolstr << "' deleted"; - getline(ss, rs); - wait_for_finished_proposal(new Monitor::C_Command(mon, m, ret, rs, - get_last_committed() + 1)); - return true; + err = _prepare_remove_pool(pool, &ss); + if (err == -EAGAIN) { + wait_for_finished_proposal(new C_RetryMessage(this, m)); + return true; + } + if (err < 0) + goto reply; + goto update; } else if (prefix == "osd pool rename") { string srcpoolstr, destpoolstr; cmd_getval(g_ceph_context, cmdmap, "srcpool", srcpoolstr); @@ -4956,20 +4949,64 @@ bool OSDMonitor::prepare_pool_op_create(MPoolOp *m) return true; } -int OSDMonitor::_prepare_remove_pool(uint64_t pool) +int OSDMonitor::_check_remove_pool(int64_t pool, const pg_pool_t *p, + ostream *ss) +{ + string poolstr = osdmap.get_pool_name(pool); + + // If the Pool is in use by CephFS, refuse to delete it + MDSMap const &pending_mdsmap = mon->mdsmon()->pending_mdsmap; + if (pending_mdsmap.is_data_pool(pool) || + pending_mdsmap.get_metadata_pool() == pool) { + *ss << "pool '" << poolstr << "' is in use by CephFS"; + return -EBUSY; + } + + if (p->tier_of >= 0) { + *ss << "pool '" << poolstr << "' is a tier of '" + << osdmap.get_pool_name(p->tier_of) << "'"; + return -EBUSY; + } + if (!p->tiers.empty()) { + *ss << "pool '" << poolstr << "' includes tiers " + << p->tiers; + return -EBUSY; + } + *ss << "pool '" << poolstr << "' removed"; + return 0; +} + +int OSDMonitor::_prepare_remove_pool(int64_t pool, ostream *ss) { dout(10) << "_prepare_remove_pool " << pool << dendl; + const pg_pool_t *p = osdmap.get_pg_pool(pool); + int r = _check_remove_pool(pool, p, ss); + if (r < 0) + return r; + + if (pending_inc.new_pools.count(pool)) { + // if there is a problem with the pending info, wait and retry + // this op. + pg_pool_t *p = &pending_inc.new_pools[pool]; + int r = _check_remove_pool(pool, p, ss); + if (r < 0) + return -EAGAIN; + } + if (pending_inc.old_pools.count(pool)) { - dout(10) << "_prepare_remove_pool " << pool << " pending removal" << dendl; - return 0; // already removed + dout(10) << "_prepare_remove_pool " << pool << " already pending removal" + << dendl; + return 0; } + + // remove pending_inc.old_pools.insert(pool); // remove any pg_temp mappings for this pool too for (map >::iterator p = osdmap.pg_temp->begin(); p != osdmap.pg_temp->end(); ++p) { - if (p->first.pool() == pool) { + if (p->first.pool() == (uint64_t)pool) { dout(10) << "_prepare_remove_pool " << pool << " removing obsolete pg_temp " << p->first << dendl; pending_inc.new_pg_temp[p->first].clear(); @@ -4978,7 +5015,7 @@ int OSDMonitor::_prepare_remove_pool(uint64_t pool) for (map::iterator p = osdmap.primary_temp->begin(); p != osdmap.primary_temp->end(); ++p) { - if (p->first.pool() == pool) { + if (p->first.pool() == (uint64_t)pool) { dout(10) << "_prepare_remove_pool " << pool << " removing obsolete primary_temp" << p->first << dendl; pending_inc.new_primary_temp[p->first] = -1; @@ -5008,8 +5045,16 @@ int OSDMonitor::_prepare_rename_pool(int64_t pool, string newname) bool OSDMonitor::prepare_pool_op_delete(MPoolOp *m) { - int ret = _prepare_remove_pool(m->pool); - wait_for_finished_proposal(new OSDMonitor::C_PoolOp(this, m, ret, pending_inc.epoch)); + ostringstream ss; + int ret = _prepare_remove_pool(m->pool, &ss); + if (ret == -EAGAIN) { + wait_for_finished_proposal(new C_RetryMessage(this, m)); + return true; + } + if (ret < 0) + dout(10) << __func__ << " got " << ret << " " << ss.str() << dendl; + wait_for_finished_proposal(new OSDMonitor::C_PoolOp(this, m, ret, + pending_inc.epoch)); return true; } diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 73f8a9a26300..8a87374739b0 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -231,7 +231,8 @@ private: bool preprocess_pgtemp(class MOSDPGTemp *m); bool prepare_pgtemp(class MOSDPGTemp *m); - int _prepare_remove_pool(uint64_t pool); + int _check_remove_pool(int64_t pool, const pg_pool_t *pi, ostream *ss); + int _prepare_remove_pool(int64_t pool, ostream *ss); int _prepare_rename_pool(int64_t pool, string newname); bool preprocess_pool_op ( class MPoolOp *m); -- 2.47.3