Older versions of Ceph weren't strict about preventing pool deletion when the
MDSMap referred to to-be-deleted pool. If we are dealing with a cluster
upgrade, we should try to gracefully handle that by cleaning out data pools
that have been removed.
Reproduced this by allowing CephFS pools to be deleted:
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index
85c47c13da6..
694b240cb9f 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -10962,7 +10962,7 @@ int OSDMonitor::_check_remove_pool(int64_t pool_id, const pg_pool_t& pool,
FSMap const &pending_fsmap = mon->mdsmon()->get_pending();
if (pending_fsmap.pool_in_use(pool_id)) {
*ss << "pool '" << poolstr << "' is in use by CephFS";
- return -EBUSY;
+ //return -EBUSY;
}
if (pool.tier_of >= 0) {
pdonnell@icewind ~/ceph/build$ bin/ceph osd pool create derp 4 4
pool 'derp' created
pdonnell@icewind ~/ceph/build$ bin/ceph fs add_data_pool cephfs_a derp
added data pool 3 to fsmap
pdonnell@icewind ~/ceph/build$ bin/ceph osd pool rm derp derp --yes-i-really-really-mean-it
pool 'derp' is in use by CephFSpool 'derp' removed
pdonnell@icewind ~/ceph/build$ bin/ceph fs ls
...
2017-10-03 12:50:48.409561
7f9e2e05b700 -1 /home/pdonnell/ceph/src/osd/OSDMap.h: In function 'const string& OSDMap::get_pool_name(int64_t) const' thread
7f9e2e05b700 time 2017-10-03 12:50:48.407897
/home/pdonnell/ceph/src/osd/OSDMap.h: 1184: FAILED assert(i != pool_name.end())
ceph version
12.1.2-2624-g37884a41964 (
37884a419640b446fffc1fa4d6074c97339fdd96) mimic (dev)
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0xf5) [0x564ebb5420f5]
2: (()+0x41dade) [0x564ebb3cbade]
3: (MDSMonitor::preprocess_command(boost::intrusive_ptr<MonOpRequest>)+0x1fb9) [0x564ebb4cd119]
Note when testing this fix, use something like this after removing the data pool:
pdonnell@icewind ~/ceph/build$ bin/ceph fs set cephfs_a max_mds 2
Setting max_mds will cause a new FSMap to be created where MDSMap::sanitize is
called; this is simulating the initial load+sanitize of a Hammer legacy MDSMap
by the mons.
Fixes: http://tracker.ceph.com/issues/21568
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
DECODE_FINISH(p);
}
+void FSMap::sanitize(std::function<bool(int64_t pool)> pool_exists)
+{
+ for (auto &fs : filesystems) {
+ fs.second->mds_map.sanitize(pool_exists);
+ }
+}
void Filesystem::encode(bufferlist& bl, uint64_t features) const
{
bufferlist::iterator p = bl.begin();
decode(p);
}
+ void sanitize(std::function<bool(int64_t pool)> pool_exists);
void print(ostream& out) const;
void print_summary(Formatter *f, ostream *out) const;
*
*/
+#include "common/debug.h"
+#include "mon/health_check.h"
#include "MDSMap.h"
#include <sstream>
using std::stringstream;
-#include "mon/health_check.h"
-
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_
// features
CompatSet get_mdsmap_compat_set_all() {
ENCODE_FINISH(bl);
}
+void MDSMap::sanitize(std::function<bool(int64_t pool)> pool_exists)
+{
+ /* Before we did stricter checking, it was possible to remove a data pool
+ * without also deleting it from the MDSMap. Check for that here after
+ * decoding the data pools.
+ */
+
+ for (auto it = data_pools.begin(); it != data_pools.end();) {
+ if (!pool_exists(*it)) {
+ dout(0) << "removed non-existant data pool " << *it << " from MDSMap" << dendl;
+ it = data_pools.erase(it);
+ } else {
+ it++;
+ }
+ }
+}
+
void MDSMap::decode(bufferlist::iterator& p)
{
std::map<mds_rank_t,int32_t> inc; // Legacy field, parse and drop
bufferlist::iterator p = bl.begin();
decode(p);
}
-
+ void sanitize(std::function<bool(int64_t pool)> pool_exists);
void print(ostream& out) const;
void print_summary(Formatter *f, ostream *out) const;
assert(fsmap_bl.length() > 0);
dout(10) << __func__ << " got " << version << dendl;
fsmap.decode(fsmap_bl);
+ auto &osdmap = mon->osdmon()->osdmap;
+ fsmap.sanitize([&osdmap](int64_t pool){return osdmap.have_pg_pool(pool);});
// new map
dout(4) << "new map" << dendl;