From 16c09421b20c96610bbc7d653c265f021a9ac9d3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 15 Apr 2020 18:49:36 +0800 Subject: [PATCH] mds: don't shallow copy when decoding xattr map Otherwise inodes' xattr maps may reference a large shared buffer (from omap fetch or journal read). If mds trims and loads inode repeatly, each inode can reference different large buffer in the worst case. Fixes: https://tracker.ceph.com/issues/45090 Signed-off-by: "Yan, Zheng" (cherry picked from commit deff94c8f43bb3734b688ccb828d942b8f150638) Conflicts: src/mds/CInode.cc - CInode::decode_lock_ixattr is missing in nautilus src/mds/mdstypes.h - nautilus has a code comment that is not in master --- src/mds/CInode.cc | 4 ++-- src/mds/CInode.h | 6 ++++++ src/mds/Locker.cc | 2 +- src/mds/Server.cc | 2 +- src/mds/journal.cc | 2 +- src/mds/mdstypes.h | 16 +++++++++++++++- 6 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 3209763b7b075..0c79d4f7ac3bf 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1479,7 +1479,7 @@ void InodeStoreBase::decode_bare(bufferlist::const_iterator &bl, symlink = std::string_view(tmp); } decode(dirfragtree, bl); - decode(xattrs, bl); + decode_noshare(xattrs, bl); decode(snap_blob, bl); decode(old_inodes, bl); @@ -3795,7 +3795,7 @@ void CInode::_decode_base(bufferlist::const_iterator& p) symlink = std::string_view(tmp); } decode(dirfragtree, p); - decode(xattrs, p); + decode_noshare(xattrs, p); decode(old_inodes, p); decode(damage_flags, p); decode_snap(p); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 27c946b0f56d7..6809d52fc5e23 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -113,6 +113,12 @@ public: frag_t pick_dirfrag(std::string_view dn); }; +inline void decode_noshare(InodeStoreBase::mempool_xattr_map& xattrs, + ceph::buffer::list::const_iterator &p) +{ + decode_noshare(xattrs, p); +} + class InodeStore : public InodeStoreBase { public: // FIXME bufferlist not part of mempool diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 90c16715f84bf..cf3c7aa5e3870 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -3511,7 +3511,7 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, dout(7) << " xattrs v" << pi.inode.xattr_version << " -> " << m->head.xattr_version << dendl; pi.inode.xattr_version = m->head.xattr_version; auto p = m->xattrbl.cbegin(); - decode(*pi.xattrs, p); + decode_noshare(*pi.xattrs, p); wrlock_force(&in->xattrlock, mut); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 5fc238cf98f72..522fd740fbce3 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3218,7 +3218,7 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino // xattrs on new inode? CInode::mempool_xattr_map xattrs; - decode(xattrs, p); + decode_noshare(xattrs, p); for (const auto &p : xattrs) { dout(10) << "prepare_new_inode setting xattr " << p.first << dendl; auto em = in->xattrs.emplace(std::piecewise_construct, std::forward_as_tuple(p.first), std::forward_as_tuple(p.second)); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 0d3f41a7343b4..c117b024401bf 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -424,7 +424,7 @@ void EMetaBlob::fullbit::decode(bufferlist::const_iterator &bl) { decode(dnlast, bl); decode(dnv, bl); decode(inode, bl); - decode(xattrs, bl); + decode_noshare(xattrs, bl); if (inode.is_symlink()) decode(symlink, bl); if (inode.is_dir()) { diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 1d9688812ef39..0d6325a06821f 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -946,6 +946,20 @@ using alloc_string = std::basic_string,Allocator class Allocator> using xattr_map = compact_map, bufferptr, std::less>, Allocator, bufferptr>>>; // FIXME bufferptr not in mempool +template class Allocator> +inline void decode_noshare(xattr_map& xattrs, ceph::buffer::list::const_iterator &p) +{ + __u32 n; + decode(n, p); + while (n-- > 0) { + alloc_string key; + decode(key, p); + __u32 len; + decode(len, p); + p.copy_deep(len, xattrs[key]); + } +} + /* * old_inode_t */ @@ -978,7 +992,7 @@ void old_inode_t::decode(bufferlist::const_iterator& bl) DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); decode(first, bl); decode(inode, bl); - decode(xattrs, bl); + decode_noshare(xattrs, bl); DECODE_FINISH(bl); } -- 2.39.5