From: Kotresh HR Date: Wed, 19 Feb 2025 18:48:12 +0000 (+0530) Subject: mds/journal: Journal referent remote dentry X-Git-Tag: v20.3.0~377^2~43 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1cce52fd0d88586d0b6368000c6e093ea2b3feb5;p=ceph.git mds/journal: Journal referent remote dentry Add machinery to journal the referent remote dentry. The call to metablob's add_remote_dentry is just adding remote dentry and not referent remote dentry yet. This will be fixed as part of operations like link, unlink, rename which makes use of referent inodes. Fixes: https://tracker.ceph.com/issues/54205 Signed-off-by: Kotresh HR --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 51926b46ccd1..65f4b83f8889 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -577,8 +577,10 @@ void MDCache::_create_system_file(CDir *dir, std::string_view name, CInode *in, } else { predirty_journal_parents(mut, &le->metablob, in, dir, PREDIRTY_DIR, 1); journal_dirty_inode(mut.get(), &le->metablob, in); + //TODO: A referent inode for system file ?? dn->push_projected_linkage(in->ino(), in->d_type()); - le->metablob.add_remote_dentry(dn, true, in->ino(), in->d_type()); + dout(10) << __func__ << " add remote dentry " << *dn << dendl; + le->metablob.add_remote_dentry(dn, true, in->ino(), in->d_type(), 0, nullptr); le->metablob.add_root(true, in); } if (mdir) diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 23772c8f8b13..a77ca844afb0 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -7749,7 +7749,8 @@ void Server::_link_local(const MDRequestRef& mdr, CDentry *dn, CInode *targeti, le->metablob.add_client_req(mdr->reqid, mdr->client_request->get_oldest_client_tid()); mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, dn->get_dir(), PREDIRTY_DIR, 1); // new dn mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, 0, PREDIRTY_PRIMARY); // targeti - le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type()); // new remote + // TODO: Pass referent inode upon creation. It's adding just remote dentry now + le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type(), 0, nullptr); // new remote mdcache->journal_dirty_inode(mdr.get(), &le->metablob, targeti); // do this after predirty_*, to avoid funky extra dnl arg @@ -7867,7 +7868,8 @@ void Server::_link_remote(const MDRequestRef& mdr, bool inc, CDentry *dn, CInode if (inc) { dn->pre_dirty(); mdcache->predirty_journal_parents(mdr, &le->metablob, targeti, dn->get_dir(), PREDIRTY_DIR, 1); - le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type()); // new remote + // TODO: Pass referent inode upon creation. It's adding just remote dentry now + le->metablob.add_remote_dentry(dn, true, targeti->ino(), targeti->d_type(), 0, nullptr); // new remote dn->push_projected_linkage(targeti->ino(), targeti->d_type()); } else { dn->pre_dirty(); @@ -9879,7 +9881,8 @@ void Server::_rename_prepare(const MDRequestRef& mdr, destdn->first = mdcache->get_global_snaprealm()->get_newest_seq() + 1; if (destdn->is_auth()) - metablob->add_remote_dentry(destdn, true, srcdnl->get_remote_ino(), srcdnl->get_remote_d_type()); + // TODO: Pass referent inode upon creation. It's adding just remote dentry now + metablob->add_remote_dentry(destdn, true, srcdnl->get_remote_ino(), srcdnl->get_remote_d_type(), 0, nullptr); if (srci->is_auth() ) { // it's remote if (mdr->peer_request) { diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 02607ce8ba13..e9470d74e3bd 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -147,18 +147,24 @@ public: version_t dnv = 0; inodeno_t ino = 0; unsigned char d_type = '\0'; + inodeno_t referent_ino = 0; + CInode::inode_const_ptr referent_inode; // if it's not XXX should not be part of mempool; wait for std::pmr to simplify bool dirty = false; remotebit(std::string_view d, std::string_view an, snapid_t df, snapid_t dl, version_t v, inodeno_t i, unsigned char dt, bool dr) : dn(d), alternate_name(an), dnfirst(df), dnlast(dl), dnv(v), ino(i), d_type(dt), dirty(dr) { } + remotebit(std::string_view d, std::string_view an, snapid_t df, snapid_t dl, version_t v, inodeno_t i, unsigned char dt, inodeno_t ref_ino, const CInode::inode_const_ptr& ref_inode, bool dr) : + dn(d), alternate_name(an), dnfirst(df), dnlast(dl), dnv(v), ino(i), d_type(dt), referent_ino(ref_ino), referent_inode(ref_inode), dirty(dr) {} explicit remotebit(bufferlist::const_iterator &p) { decode(p); } remotebit() = default; - void encode(bufferlist& bl) const; + void encode(bufferlist& bl, uint64_t features) const; void decode(bufferlist::const_iterator &bl); void print(std::ostream& out) const { out << " remotebit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv << " ino " << ino + << " referent ino " << referent_ino + << " referent inode " << referent_inode << " dirty=" << dirty; if (!alternate_name.empty()) { out << " altn " << binstrprint(alternate_name, 8); @@ -168,7 +174,7 @@ public: void dump(Formatter *f) const; static void generate_test_instances(std::list& ls); }; - WRITE_CLASS_ENCODER(remotebit) + WRITE_CLASS_ENCODER_FEATURES(remotebit) /* * nullbit - a null dentry @@ -288,7 +294,7 @@ public: using ceph::encode; if (!dn_decoded) return; encode(dfull, dnbl, features); - encode(dremote, dnbl); + encode(dremote, dnbl, features); encode(dnull, dnbl); } void _decode_bits() const { @@ -425,21 +431,37 @@ private: } void add_remote_dentry(CDentry *dn, bool dirty) { - add_remote_dentry(add_dir(dn->get_dir(), false), dn, dirty, 0, 0); + add_remote_dentry(add_dir(dn->get_dir(), false), dn, dirty, 0, 0, 0, nullptr); } - void add_remote_dentry(CDentry *dn, bool dirty, inodeno_t rino, int rdt) { - add_remote_dentry(add_dir(dn->get_dir(), false), dn, dirty, rino, rdt); + void add_remote_dentry(CDentry *dn, bool dirty, inodeno_t rino, int rdt, inodeno_t referent_ino, CInode *ref_in) { + add_remote_dentry(add_dir(dn->get_dir(), false), dn, dirty, rino, rdt, referent_ino, ref_in); } void add_remote_dentry(dirlump& lump, CDentry *dn, bool dirty, - inodeno_t rino=0, unsigned char rdt=0) { + inodeno_t rino=0, unsigned char rdt=0, inodeno_t referent_ino=0, CInode *ref_in=nullptr) { dn->check_corruption(false); + /* In multi-version inode, i.e., a file has hardlinks and the primary link is being deleted, + * the primary inode is added as remote in the journal. In this case, it will not have a + * referent inode. So referent_ino=0 and ref_in=nullptr. + */ if (!rino) { rino = dn->get_projected_linkage()->get_remote_ino(); rdt = dn->get_projected_linkage()->get_remote_d_type(); + referent_ino = dn->get_projected_linkage()->get_referent_ino(); + ref_in = dn->get_projected_linkage()->get_referent_inode(); } + lump.nremote++; - lump.add_dremote(dn->get_name(), dn->get_alternate_name(), dn->first, dn->last, - dn->get_projected_version(), rino, rdt, dirty); + if (ref_in) { + ceph_assert(referent_ino > 0); + const auto& ref_pi = ref_in->get_projected_inode(); + ceph_assert(ref_pi->version > 0); + lump.add_dremote(dn->get_name(), dn->get_alternate_name(), dn->first, dn->last, + dn->get_projected_version(), rino, rdt, referent_ino, ref_pi, dirty); + } else { + ceph_assert(referent_ino == 0); + lump.add_dremote(dn->get_name(), dn->get_alternate_name(), dn->first, dn->last, + dn->get_projected_version(), rino, rdt, dirty); + } } // return remote pointer to to-be-journaled inode @@ -502,8 +524,8 @@ private: add_dentry(lump, dn, dn->is_dirty(), dirty_parent, dirty_pool); } void add_dentry(dirlump& lump, CDentry *dn, bool dirty, bool dirty_parent, bool dirty_pool) { - // primary or remote - if (dn->get_projected_linkage()->is_remote()) { + // primary or remote or referent_remote + if (dn->get_projected_linkage()->is_remote() || dn->get_projected_linkage()->is_referent_remote()) { add_remote_dentry(dn, dirty); } else if (dn->get_projected_linkage()->is_null()) { add_null_dentry(dn, dirty); @@ -607,7 +629,7 @@ private: }; WRITE_CLASS_ENCODER_FEATURES(EMetaBlob) WRITE_CLASS_ENCODER_FEATURES(EMetaBlob::fullbit) -WRITE_CLASS_ENCODER(EMetaBlob::remotebit) +WRITE_CLASS_ENCODER_FEATURES(EMetaBlob::remotebit) WRITE_CLASS_ENCODER(EMetaBlob::nullbit) WRITE_CLASS_ENCODER_FEATURES(EMetaBlob::dirlump) diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 4bbf02a7413c..53f3bb7e5886 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -683,10 +683,9 @@ void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in) } // EMetaBlob::remotebit - -void EMetaBlob::remotebit::encode(bufferlist& bl) const +void EMetaBlob::remotebit::encode(bufferlist& bl, uint64_t features) const { - ENCODE_START(3, 2, bl); + ENCODE_START(4, 2, bl); encode(dn, bl); encode(std::tuple{ dnfirst, @@ -697,12 +696,15 @@ void EMetaBlob::remotebit::encode(bufferlist& bl) const dirty, }, bl, 0); encode(alternate_name, bl); + encode(referent_ino, bl); + if (referent_ino) + encode(*referent_inode, bl, features); ENCODE_FINISH(bl); } void EMetaBlob::remotebit::decode(bufferlist::const_iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl); decode(dn, bl); decode(dnfirst, bl); decode(dnlast, bl); @@ -712,6 +714,16 @@ void EMetaBlob::remotebit::decode(bufferlist::const_iterator &bl) decode(dirty, bl); if (struct_v >= 3) decode(alternate_name, bl); + if (struct_v >= 4) { + decode(referent_ino, bl); + if (referent_ino) { + auto _inode = CInode::allocate_inode(); + decode(*_inode, bl); + referent_inode = std::move(_inode); + } else { + referent_inode = NULL; + } + } DECODE_FINISH(bl); } @@ -745,14 +757,16 @@ void EMetaBlob::remotebit::dump(Formatter *f) const f->dump_string("d_type", type_string); f->dump_string("dirty", dirty ? "true" : "false"); f->dump_string("alternate_name", alternate_name); + f->dump_int("referentino", referent_ino); } void EMetaBlob::remotebit:: generate_test_instances(std::list& ls) { - remotebit *remote = new remotebit("/test/dn", "", 0, 10, 15, 1, IFTODT(S_IFREG), false); + auto _inode = CInode::allocate_inode(); + remotebit *remote = new remotebit("/test/dn", "", 0, 10, 15, 1, IFTODT(S_IFREG), 2, _inode, false); ls.push_back(remote); - remote = new remotebit("/test/dn2", "foo", 0, 10, 15, 1, IFTODT(S_IFREG), false); + remote = new remotebit("/test/dn2", "foo", 0, 10, 15, 1, IFTODT(S_IFREG), 2, _inode, false); ls.push_back(remote); } diff --git a/src/tools/ceph-dencoder/mds_types.h b/src/tools/ceph-dencoder/mds_types.h index 1272017c438e..8cc39f34df99 100644 --- a/src/tools/ceph-dencoder/mds_types.h +++ b/src/tools/ceph-dencoder/mds_types.h @@ -96,7 +96,7 @@ TYPE_FEATUREFUL_NOCOPY(EImportStart) #include "mds/events/EMetaBlob.h" TYPE_FEATUREFUL_NOCOPY(EMetaBlob::fullbit) -TYPE(EMetaBlob::remotebit) +TYPE_FEATUREFUL_NOCOPY(EMetaBlob::remotebit) TYPE(EMetaBlob::nullbit) TYPE_FEATUREFUL_NOCOPY(EMetaBlob::dirlump) TYPE_FEATUREFUL_NOCOPY(EMetaBlob)