From f1a360428c0ad19847899617f581f6bfe5da5356 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 15 Apr 2020 18:49:36 +0800 Subject: [PATCH] mds: don't shallow copy when decoding xattr map Otherwise inodes' xattr maps may reference a large shared buffer (from omap fetch or journal read). If mds trims and loads inode repeatly, each inode can reference different large buffer in the worst case. Fixes: https://tracker.ceph.com/issues/45090 Signed-off-by: "Yan, Zheng" (cherry picked from commit deff94c8f43bb3734b688ccb828d942b8f150638) --- src/mds/CInode.cc | 6 +++--- src/mds/CInode.h | 6 ++++++ src/mds/Locker.cc | 2 +- src/mds/Server.cc | 2 +- src/mds/journal.cc | 2 +- src/mds/mdstypes.h | 16 +++++++++++++++- 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 57908d996518d..e11e1e4d75fef 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1541,7 +1541,7 @@ void InodeStoreBase::decode_bare(bufferlist::const_iterator &bl, symlink = std::string_view(tmp); } decode(dirfragtree, bl); - decode(xattrs, bl); + decode_noshare(xattrs, bl); decode(snap_blob, bl); decode(old_inodes, bl); @@ -1965,7 +1965,7 @@ void CInode::decode_lock_ixattr(bufferlist::const_iterator& p) utime_t tm; decode(tm, p); if (inode.ctime < tm) inode.ctime = tm; - decode(xattrs, p); + decode_noshare(xattrs, p); DECODE_FINISH(p); } @@ -4066,7 +4066,7 @@ void CInode::_decode_base(bufferlist::const_iterator& p) symlink = std::string_view(tmp); } decode(dirfragtree, p); - decode(xattrs, p); + decode_noshare(xattrs, p); decode(old_inodes, p); decode(damage_flags, p); decode_snap(p); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 5f0da6415c7c1..c50815e94fe0d 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -104,6 +104,12 @@ public: damage_flags_t damage_flags = 0; }; +inline void decode_noshare(InodeStoreBase::mempool_xattr_map& xattrs, + ceph::buffer::list::const_iterator &p) +{ + decode_noshare(xattrs, p); +} + class InodeStore : public InodeStoreBase { public: void encode(bufferlist &bl, uint64_t features) const { diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index b811db4870cf3..4594d97dd0d6a 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -3833,7 +3833,7 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, dout(7) << " xattrs v" << pi.inode.xattr_version << " -> " << m->head.xattr_version << dendl; pi.inode.xattr_version = m->head.xattr_version; auto p = m->xattrbl.cbegin(); - decode(*pi.xattrs, p); + decode_noshare(*pi.xattrs, p); wrlock_force(&in->xattrlock, mut); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 3aade7aca79d7..b16a869d223d9 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3279,7 +3279,7 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino // xattrs on new inode? CInode::mempool_xattr_map xattrs; - decode(xattrs, p); + decode_noshare(xattrs, p); for (const auto &p : xattrs) { dout(10) << "prepare_new_inode setting xattr " << p.first << dendl; auto em = in->xattrs.emplace(std::piecewise_construct, std::forward_as_tuple(p.first), std::forward_as_tuple(p.second)); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 14966f14ce153..aa783f5f0975b 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -428,7 +428,7 @@ void EMetaBlob::fullbit::decode(bufferlist::const_iterator &bl) { decode(dnlast, bl); decode(dnv, bl); decode(inode, bl); - decode(xattrs, bl); + decode_noshare(xattrs, bl); if (inode.is_symlink()) decode(symlink, bl); if (inode.is_dir()) { diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index ddaed5666d609..fa85574f135a4 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -925,6 +925,20 @@ using alloc_string = std::basic_string,Allocator class Allocator> using xattr_map = compact_map, bufferptr, std::less>, Allocator, bufferptr>>>; // FIXME bufferptr not in mempool +template class Allocator> +inline void decode_noshare(xattr_map& xattrs, ceph::buffer::list::const_iterator &p) +{ + __u32 n; + decode(n, p); + while (n-- > 0) { + alloc_string key; + decode(key, p); + __u32 len; + decode(len, p); + p.copy_deep(len, xattrs[key]); + } +} + template class Allocator = std::allocator> struct old_inode_t { snapid_t first; @@ -954,7 +968,7 @@ void old_inode_t::decode(bufferlist::const_iterator& bl) DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); decode(first, bl); decode(inode, bl); - decode(xattrs, bl); + decode_noshare(xattrs, bl); DECODE_FINISH(bl); } -- 2.39.5