From: Kotresh HR Date: Fri, 21 Feb 2025 12:34:03 +0000 (+0530) Subject: mds/rejoin: Handle referent inode on MDSCache rejoin X-Git-Tag: v20.3.0~377^2~36 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2ec2c1d63b994f57cf0b50744ba074026ddc39fd;p=ceph.git mds/rejoin: Handle referent inode on MDSCache rejoin The involves broadly following changes. 1. Add 'referent_ino' in the struct 'dn_strong' and required encoding/decoding of the same. Noticed that the MMDSCacheRejoin message actually isn't versioned yet. There is a tracker [1] open for it. For now, CEPH_MDS_PROTOCOL is bumped up as usual. 2. The following functions needs a change to construct the in-memory referent inode from the inode number. MDCache::rejoin_walk - add_strong_dentry, pass referent inode number to build dn_strong MDCache::handle_cache_rejoin_strong - Construct referent inode from inode number if not found in memory and add_remote_dentry MDCache::handle_cache_rejoin_ack - Bad linkage check!, construct referent inode MDCache::rejoin_send_acks - add_strong_dentry, pass referent inode number to build dn_strong MDCache::handle_cache_rejoin_weak - add_strong_dentry, pass referent inode number as 0 to build dn_strong as it's weak rejoin [1] https://tracker.ceph.com/issues/48886 Fixes: https://tracker.ceph.com/issues/54205 Signed-off-by: Kotresh HR --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index dbf582cee5ee..e2a83fcc6bfd 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4344,8 +4344,9 @@ void MDCache::rejoin_walk(CDir *dir, const ref_t &rejoin) rejoin->add_strong_dentry(dir->dirfrag(), dn->get_name(), dn->get_alternate_name(), dn->first, dn->last, dnl->is_primary() ? dnl->get_inode()->ino():inodeno_t(0), - dnl->is_remote() ? dnl->get_remote_ino():inodeno_t(0), - dnl->is_remote() ? dnl->get_remote_d_type():0, + (dnl->is_remote() || dnl->is_referent_remote())? dnl->get_remote_ino():inodeno_t(0), + dnl->is_referent_remote() ? dnl->get_referent_inode()->ino():inodeno_t(0), + (dnl->is_remote() || dnl->is_referent_remote())? dnl->get_remote_d_type():0, dn->get_replica_nonce(), dn->lock.get_state()); dn->state_set(CDentry::STATE_REJOINING); @@ -4566,7 +4567,7 @@ void MDCache::handle_cache_rejoin_weak(const cref_t &weak) if (ack) ack->add_strong_dentry(dir->dirfrag(), dn->get_name(), dn->get_alternate_name(), dn->first, dn->last, - dnl->get_inode()->ino(), inodeno_t(0), 0, + dnl->get_inode()->ino(), inodeno_t(0), inodeno_t(0), 0, dnonce, dn->lock.get_replica_state()); // inode @@ -4815,9 +4816,22 @@ void MDCache::handle_cache_rejoin_strong(const cref_t &strong) dn = dir->lookup(ss.name, ss.snapid); } if (!dn) { - if (d.is_remote()) { - //TODO: Fix for referent remote - dn = dir->add_remote_dentry(ss.name, nullptr, d.remote_ino, d.remote_d_type, mempool::mds_co::string(d.alternate_name), d.first, ss.snapid); + if (d.is_remote() || d.is_referent_remote()) { + CInode *ref_in = nullptr; + if (d.is_referent_remote()) { + // TODO: ss.snapid for referent inode ? Since it's not snapped, always use + // default CEPH_NOSNAP. Validate this by testing. + ref_in = get_inode(d.referent_ino); + if (!ref_in) { + dout(20) << __func__ << " rejoin: no dentry, referent inode not found in memory inventing " << dendl; + ref_in = rejoin_invent_inode(d.referent_ino, CEPH_NOSNAP); + ref_in->set_remote_ino(d.remote_ino); + } + dout(20) << __func__ << " rejoin: no dentry, referent inode invented " << *ref_in << dendl; + } else { + dout(20) << __func__ << " rejoin: no dentry, add remote inode " << dendl; + } + dn = dir->add_remote_dentry(ss.name, ref_in, d.remote_ino, d.remote_d_type, mempool::mds_co::string(d.alternate_name), d.first, ss.snapid); } else if (d.is_null()) { dn = dir->add_null_dentry(ss.name, d.first, ss.snapid); } else { @@ -5086,6 +5100,14 @@ void MDCache::handle_cache_rejoin_ack(const cref_t &ack) dout(10) << " had bad linkage for " << *dn << dendl; dir->unlink_inode(dn); } + } else if (dnl->is_referent_remote()) { + if (!q.second.is_referent_remote() || + q.second.remote_ino != dnl->get_remote_ino() || + q.second.remote_d_type != dnl->get_remote_d_type() || + q.second.referent_ino != dnl->get_referent_ino()) { + dout(10) << __func__ << " had bad referent remote linkage for " << *dn << dendl; + dir->unlink_inode(dn); + } } else { if (!q.second.is_null()) dout(10) << " had bad linkage for " << *dn << dendl; @@ -5095,6 +5117,24 @@ void MDCache::handle_cache_rejoin_ack(const cref_t &ack) if (dnl->is_null() && !q.second.is_null()) { if (q.second.is_remote()) { dn->dir->link_remote_inode(dn, q.second.remote_ino, q.second.remote_d_type); + } else if (q.second.is_referent_remote()) { + CInode *ref_in = get_inode(q.second.referent_ino, CEPH_NOSNAP); + if (!ref_in) { + // barebones inode; + ref_in = new CInode(this, false, 2, CEPH_NOSNAP); + auto _inode = ref_in->_get_inode(); + _inode->ino = q.second.referent_ino; + _inode->mode = S_IFREG; + _inode->layout = default_file_layout; + add_inode(ref_in); + dout(10) << __func__ << " add inode " << *ref_in << dendl; + } else if (ref_in->get_parent_dn()) { + dout(10) << __func__ << " had bad referent linkage for " << *(ref_in->get_parent_dn()) + << ", unlinking referent inode" << *ref_in << dendl; + ref_in->get_parent_dir()->unlink_inode(ref_in->get_parent_dn()); + } + dn->dir->link_referent_inode(dn, ref_in, q.second.remote_ino, q.second.remote_d_type); + isolated_inodes.erase(ref_in); } else { CInode *in = get_inode(q.second.ino, q.first.snapid); if (!in) { @@ -6192,8 +6232,9 @@ void MDCache::rejoin_send_acks() it->second->add_strong_dentry(dir->dirfrag(), dn->get_name(), dn->get_alternate_name(), dn->first, dn->last, dnl->is_primary() ? dnl->get_inode()->ino():inodeno_t(0), - dnl->is_remote() ? dnl->get_remote_ino():inodeno_t(0), - dnl->is_remote() ? dnl->get_remote_d_type():0, + (dnl->is_remote() || dnl->is_referent_remote()) ? dnl->get_remote_ino():inodeno_t(0), + dnl->is_referent_remote() ? dnl->get_referent_inode()->ino():inodeno_t(0), + (dnl->is_remote() || dnl->is_referent_remote()) ? dnl->get_remote_d_type():0, ++r.second, dn->lock.get_replica_state()); // peer missed MDentrylink message ? diff --git a/src/mds/MDSDaemon.h b/src/mds/MDSDaemon.h index e7cb3151bfa1..7fd466603cd0 100644 --- a/src/mds/MDSDaemon.h +++ b/src/mds/MDSDaemon.h @@ -36,7 +36,7 @@ #include "MDSMap.h" #include "MDSRank.h" -#define CEPH_MDS_PROTOCOL 36 /* cluster internal */ +#define CEPH_MDS_PROTOCOL 37 /* cluster internal */ class Messenger; class MonClient; diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h index 9237c79b6e77..08872c5c927c 100644 --- a/src/messages/MMDSCacheRejoin.h +++ b/src/messages/MMDSCacheRejoin.h @@ -108,14 +108,16 @@ public: std::string alternate_name; inodeno_t ino = 0; inodeno_t remote_ino = 0; + inodeno_t referent_ino = 0; unsigned char remote_d_type = 0; uint32_t nonce = 0; int32_t lock = 0; dn_strong() = default; - dn_strong(snapid_t f, std::string_view altn, inodeno_t pi, inodeno_t ri, unsigned char rdt, int n, int l) : - first(f), alternate_name(altn), ino(pi), remote_ino(ri), remote_d_type(rdt), nonce(n), lock(l) {} + dn_strong(snapid_t f, std::string_view altn, inodeno_t pi, inodeno_t ri, inodeno_t ref_ino, unsigned char rdt, int n, int l) : + first(f), alternate_name(altn), ino(pi), remote_ino(ri), referent_ino(ref_ino), remote_d_type(rdt), nonce(n), lock(l) {} bool is_primary() const { return ino > 0; } bool is_remote() const { return remote_ino > 0; } + bool is_referent_remote() const { return remote_ino > 0 && referent_ino > 0; } bool is_null() const { return ino == 0 && remote_ino == 0; } void encode(ceph::buffer::list &bl) const { using ceph::encode; @@ -126,6 +128,7 @@ public: encode(nonce, bl); encode(lock, bl); encode(alternate_name, bl); + encode(referent_ino, bl); } void decode(ceph::buffer::list::const_iterator &bl) { using ceph::decode; @@ -136,15 +139,17 @@ public: decode(nonce, bl); decode(lock, bl); decode(alternate_name, bl); + decode(referent_ino, bl); } static void generate_test_instances(std::list& ls) { ls.push_back(new dn_strong); - ls.push_back(new dn_strong(1, "alternate_name", 2, 3, 4, 5, 6)); + ls.push_back(new dn_strong(1, "alternate_name", 2, 3, 4, 5, 6, 7)); } void dump(ceph::Formatter *f) const { f->dump_unsigned("first", first); f->dump_string("alternate_name", alternate_name); f->dump_unsigned("ino", ino); + f->dump_unsigned("referent_ino", referent_ino); f->dump_unsigned("remote_ino", remote_ino); f->dump_unsigned("remote_d_type", remote_d_type); f->dump_unsigned("nonce", nonce); @@ -275,9 +280,9 @@ public: void add_weak_primary_dentry(inodeno_t dirino, std::string_view dname, snapid_t first, snapid_t last, inodeno_t ino) { weak[dirino][string_snap_t(dname, last)] = dn_weak(first, ino); } - void add_strong_dentry(dirfrag_t df, std::string_view dname, std::string_view altn, snapid_t first, snapid_t last, inodeno_t pi, inodeno_t ri, unsigned char rdt, int n, int ls) { + void add_strong_dentry(dirfrag_t df, std::string_view dname, std::string_view altn, snapid_t first, snapid_t last, inodeno_t pi, inodeno_t ri, inodeno_t ref_ino, unsigned char rdt, int n, int ls) { auto& m = strong_dentries[df]; - m.insert_or_assign(string_snap_t(dname, last), dn_strong(first, altn, pi, ri, rdt, n, ls)); + m.insert_or_assign(string_snap_t(dname, last), dn_strong(first, altn, pi, ri, ref_ino, rdt, n, ls)); } void add_dentry_authpin(dirfrag_t df, std::string_view dname, snapid_t last, const metareqid_t& ri, __u32 attempt) {