From 9e250226a83a7075bcfef024f3e60dbb6264c942 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 21 Sep 2020 12:55:31 +0800 Subject: [PATCH] mds: add alternate_name feature support for dentries This will support the "alternate_name" filename support, and will save an alternate name for each dentry. This alternate name is not used in path lookup or for any other usual file system purpose. The name is simply an added blob of metadata on the dentry that is distributed to clients so that "long" file names may be supported for clients which require them. In the case of an fscrypt enhanced kernel mount driver, the long name may be the cyphertext (exceeding FILENAME_MAX) of a long file name. Because this affects only files with long file names, the use of this feature should be rare but could be common for some unusual applications. The client mount should check the CEPHFS_FEATURE_ALTERNATE_NAME feature bit first to check whether the MDS has support for this feature or not. The alternate_name is transmitted as part of the message payload in MClientRequest when setting the alternate_name. The LeaseStat structure in MClientReply contains the alternate_name. When executing a metadata mutation RPC, the client will set the alternate_name (if it exists) as part of the operation. The MDS will pick that up and set it on the new or mutated dentry. Fixes: https://tracker.ceph.com/issues/47162 Signed-off-by: Xiubo Li Signed-off-by: Patrick Donnelly --- src/client/Client.cc | 2 ++ src/client/Dentry.h | 1 + src/mds/CDentry.cc | 20 ++++++++++++++++---- src/mds/CDentry.h | 9 ++++++++- src/mds/CDir.cc | 20 ++++++++++++++++---- src/mds/CDir.h | 1 + src/mds/Locker.cc | 5 ++++- src/mds/Migrator.cc | 17 +++++++++++++---- src/mds/Server.cc | 11 +++++++++++ src/mds/cephfs_features.cc | 1 + src/mds/cephfs_features.h | 4 +++- src/messages/MClientReply.h | 6 +++++- src/messages/MClientRequest.h | 14 +++++++++++++- src/tools/cephfs/DataScan.cc | 14 +++++++++++--- src/tools/cephfs/JournalTool.cc | 6 +++++- src/tools/cephfs/MetaTool.cc | 9 +++++++-- 16 files changed, 117 insertions(+), 23 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index a21e65ec6dd97..01e130037a256 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1061,6 +1061,7 @@ Dentry *Client::insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dl } update_dentry_lease(dn, dlease, from, session); + dn->alternate_name = std::move(dlease->alternate_name); return dn; } @@ -1247,6 +1248,7 @@ void Client::insert_readdir_results(MetaRequest *request, MetaSession *session, // new dn dn = link(dir, dname, in, NULL); } + dn->alternate_name = std::move(dlease.alternate_name); update_dentry_lease(dn, &dlease, request->sent_stamp, session); if (hash_order) { diff --git a/src/client/Dentry.h b/src/client/Dentry.h index 7559c45f49b59..483a31eccb324 100644 --- a/src/client/Dentry.h +++ b/src/client/Dentry.h @@ -90,6 +90,7 @@ public: uint64_t lease_gen = 0; ceph_seq_t lease_seq = 0; int cap_shared_gen = 0; + std::string alternate_name; private: xlist::item inode_xlist_link; diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index b674f8ba0d428..9831b3dc9a488 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -235,6 +235,12 @@ void CDentry::make_path(filepath& fp, bool projected) const fp.push_dentry(get_name()); } +void CDentry::set_alternate_name(std::string_view _alternate_name) +{ + dout(10) << "setting alternate_name on " << *this << dendl; + alternate_name = _alternate_name; +} + /* * we only add ourselves to remote_parents when the linkage is * active (no longer projected). if the passed dnl is projected, @@ -530,24 +536,30 @@ void CDentry::_put() } } -void CDentry::encode_remote(inodeno_t& ino, unsigned char d_type, bufferlist &bl) +void CDentry::encode_remote(inodeno_t& ino, unsigned char d_type, + std::string_view alternate_name, + bufferlist &bl) { - bl.append("l", 1); // remote link + bl.append('l'); // remote link // marker, name, ino - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(ino, bl); encode(d_type, bl); + encode(alternate_name, bl); ENCODE_FINISH(bl); } void CDentry::decode_remote(char icode, inodeno_t& ino, unsigned char& d_type, + mempool::mds_co::string& alternate_name, ceph::buffer::list::const_iterator& bl) { if (icode == 'l') { - DECODE_START(1, bl); + DECODE_START(2, bl); decode(ino, bl); decode(d_type, bl); + if (struct_v >= 2) + decode(alternate_name, bl); DECODE_FINISH(bl); } else if (icode == 'L') { decode(ino, bl); diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index a192527dd348f..5b60b838155cd 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -151,6 +151,9 @@ public: const CDir *get_dir() const { return dir; } CDir *get_dir() { return dir; } std::string_view get_name() const { return std::string_view(name); } + void set_alternate_name(std::string_view _alternate_name); + std::string_view get_alternate_name() const { return std::string_view(alternate_name); } + void decode_alternate_name(bufferlist::const_iterator &bl) { decode(alternate_name, bl); } __u32 get_hash() const { return hash; } @@ -332,8 +335,11 @@ public: void print(std::ostream& out) override; void dump(ceph::Formatter *f) const; - static void encode_remote(inodeno_t& ino, unsigned char d_type, bufferlist &bl); + static void encode_remote(inodeno_t& ino, unsigned char d_type, + std::string_view alternate_name, + bufferlist &bl); static void decode_remote(char icode, inodeno_t& ino, unsigned char& d_type, + mempool::mds_co::string& alternate_name, ceph::buffer::list::const_iterator& bl); __u32 hash; @@ -370,6 +376,7 @@ protected: private: mempool::mds_co::string name; + mempool::mds_co::string alternate_name; }; std::ostream& operator<<(std::ostream& out, const CDentry& dn); diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 47834b116282e..5458dadcf94f7 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1766,8 +1766,11 @@ CDentry *CDir::_load_dentry( // hard link inodeno_t ino; unsigned char d_type; + mempool::mds_co::string alternate_name; - CDentry::decode_remote(type, ino, d_type, q); + CDentry::decode_remote(type, ino, d_type, alternate_name, q); + if (alternate_name.length()) + dn->set_alternate_name(std::move(alternate_name)); if (stale) { if (!dn) { @@ -1808,7 +1811,9 @@ CDentry *CDir::_load_dentry( // inode // Load inode data before looking up or constructing CInode if (type == 'i') { - DECODE_START(1, q); + DECODE_START(2, q); + if (struct_v >= 2) + dn->decode_alternate_name(q); inode_data.decode(q); DECODE_FINISH(q); } else { @@ -2228,6 +2233,7 @@ void CDir::_encode_primary_inode_base(dentry_commit_item &item, bufferlist &dfts encode(item.oldest_snap, bl); encode(item.damage_flags, bl); + encode(item.alternate_name, bl); ENCODE_FINISH(bl); } @@ -2317,12 +2323,13 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers for (auto &item : to_set) { encode(item.first, bl); if (item.is_remote) { - CDentry::encode_remote(item.ino, item.d_type, bl); // remote link + // remote link + CDentry::encode_remote(item.ino, item.d_type, item.alternate_name, bl); } else { // marker, name, inode, [symlink string] bl.append('i'); // inode - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); _encode_primary_inode_base(item, dfts, bl); ENCODE_FINISH(bl); } @@ -2378,6 +2385,7 @@ void CDir::_omap_commit(int op_prio) // reverve enough memories, which maybe larger than the actually needed to_set.reserve(count); + // for dir fragtrees bufferlist dfts(CEPH_PAGE_SIZE); auto write_one = [&](CDentry *dn) { @@ -2442,6 +2450,10 @@ void CDir::_parse_dentry(CDentry *dn, dentry_commit_item &item, item.first = dn->first; + // Only in very rare case the dn->alternate_name not empty, + // so it won't cost much to copy it here + item.alternate_name = dn->get_alternate_name(); + // primary or remote? if (dn->linkage.is_remote()) { item.is_remote = true; diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 030d9c949cbdd..985f94cb06296 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -64,6 +64,7 @@ public: inodeno_t ino; unsigned char d_type; + mempool::mds_co::string alternate_name; bool snaprealm = false; sr_t srnode; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 8d2740ad7eead..dc112871d8926 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -4252,6 +4252,7 @@ void Locker::issue_client_lease(CDentry *dn, MDRequestRef &mdr, int mask, lstat.mask = CEPH_LEASE_VALID | mask; lstat.duration_ms = (uint32_t)(1000 * mdcache->client_lease_durations[pool]); lstat.seq = ++l->seq; + lstat.alternate_name = mempool::mds_co::string(dn->get_alternate_name()); encode_lease(bl, session->info, lstat); dout(20) << "issue_client_lease seq " << lstat.seq << " dur " << lstat.duration_ms << "ms " << " on " << *dn << dendl; @@ -4259,6 +4260,7 @@ void Locker::issue_client_lease(CDentry *dn, MDRequestRef &mdr, int mask, // null lease LeaseStat lstat; lstat.mask = mask; + lstat.alternate_name = mempool::mds_co::string(dn->get_alternate_name()); encode_lease(bl, session->info, lstat); dout(20) << "issue_client_lease no/null lease on " << *dn << dendl; } @@ -4291,10 +4293,11 @@ void Locker::encode_lease(bufferlist& bl, const session_info_t& info, const LeaseStat& ls) { if (info.has_feature(CEPHFS_FEATURE_REPLY_ENCODING)) { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(ls.mask, bl); encode(ls.duration_ms, bl); encode(ls.seq, bl); + encode(ls.alternate_name, bl); ENCODE_FINISH(bl); } else { diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 36a8dc0712fee..f3b653dd47b4f 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1793,7 +1793,8 @@ void Migrator::encode_export_dir(bufferlist& exportbl, if (dn->get_linkage()->is_remote()) { inodeno_t ino = dn->get_linkage()->get_remote_ino(); unsigned char d_type = dn->get_linkage()->get_remote_d_type(); - CDentry::encode_remote(ino, d_type, exportbl); // remote link + // remote link + CDentry::encode_remote(ino, d_type, dn->get_alternate_name(), exportbl); continue; } @@ -1801,8 +1802,9 @@ void Migrator::encode_export_dir(bufferlist& exportbl, // -- inode exportbl.append("i", 1); // inode dentry - ENCODE_START(1, 1, exportbl); + ENCODE_START(2, 1, exportbl); encode_export_inode(in, exportbl, exported_client_map, exported_client_metadata_map); // encode, and (update state for) export + encode(dn->get_alternate_name(), exportbl); ENCODE_FINISH(exportbl); // directory? @@ -3445,7 +3447,12 @@ void Migrator::decode_import_dir(bufferlist::const_iterator& blp, // remote link inodeno_t ino; unsigned char d_type; - CDentry::decode_remote(icode, ino, d_type, blp); + mempool::mds_co::string alternate_name; + + CDentry::decode_remote(icode, ino, d_type, alternate_name, blp); + if (alternate_name.length()) + dn->set_alternate_name(std::move(alternate_name)); + if (dn->get_linkage()->is_remote()) { ceph_assert(dn->get_linkage()->get_remote_ino() == ino); } else { @@ -3456,9 +3463,11 @@ void Migrator::decode_import_dir(bufferlist::const_iterator& blp, // inode ceph_assert(le); if (icode == 'i') { - DECODE_START(1, blp); + DECODE_START(2, blp); decode_import_inode(dn, blp, oldauth, ls, peer_exports, updated_scatterlocks); + if (struct_v >= 2) + dn->decode_alternate_name(blp); DECODE_FINISH(blp); } else { decode_import_inode(dn, blp, oldauth, ls, diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 4fc87db91242c..fb7c74f8ed345 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4390,6 +4390,8 @@ void Server::handle_client_openc(MDRequestRef& mdr) req->head.args.open.mode | S_IFREG, &layout); ceph_assert(newi); + dn->set_alternate_name(req->get_alternate_name()); + // it's a file. dn->push_projected_linkage(newi); @@ -6269,6 +6271,7 @@ void Server::handle_client_mknod(MDRequestRef& mdr) CInode *newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino), mode, &layout); ceph_assert(newi); + dn->set_alternate_name(req->get_alternate_name()); dn->push_projected_linkage(newi); auto _inode = newi->_get_inode(); @@ -6357,6 +6360,8 @@ void Server::handle_client_mkdir(MDRequestRef& mdr) CInode *newi = prepare_new_inode(mdr, dir, inodeno_t(req->head.ino), mode); ceph_assert(newi); + dn->set_alternate_name(req->get_alternate_name()); + // it's a directory. dn->push_projected_linkage(newi); @@ -6433,6 +6438,8 @@ void Server::handle_client_symlink(MDRequestRef& mdr) const cref_t &req = mdr->client_request; + dn->set_alternate_name(req->get_alternate_name()); + unsigned mode = S_IFLNK | 0777; CInode *newi = prepare_new_inode(mdr, dir, inodeno_t(req->head.ino), mode); ceph_assert(newi); @@ -6519,6 +6526,8 @@ void Server::handle_client_link(MDRequestRef& mdr) return; } + destdn->set_alternate_name(req->get_alternate_name()); + targeti = ret.second->get_projected_linkage()->get_inode(); } @@ -7946,6 +7955,8 @@ void Server::handle_client_rename(MDRequestRef& mdr) return; } + destdn->set_alternate_name(req->get_alternate_name()); + // is this a stray migration, reintegration or merge? (sanity checks!) if (mdr->reqid.name.is_mds() && !(MDS_INO_IS_STRAY(srcpath.get_ino()) && diff --git a/src/mds/cephfs_features.cc b/src/mds/cephfs_features.cc index 71554d60fd8fa..deec59a286045 100644 --- a/src/mds/cephfs_features.cc +++ b/src/mds/cephfs_features.cc @@ -22,6 +22,7 @@ static const std::array feature_names "multi_reconnect", "deleg_ino", "metric_collect", + "alternate_name", }; static_assert(feature_names.size() == CEPHFS_FEATURE_MAX + 1); diff --git a/src/mds/cephfs_features.h b/src/mds/cephfs_features.h index c959e281f8337..ca27c12132c9a 100644 --- a/src/mds/cephfs_features.h +++ b/src/mds/cephfs_features.h @@ -42,7 +42,8 @@ namespace ceph { #define CEPHFS_FEATURE_DELEG_INO 13 #define CEPHFS_FEATURE_OCTOPUS 13 #define CEPHFS_FEATURE_METRIC_COLLECT 14 -#define CEPHFS_FEATURE_MAX 14 +#define CEPHFS_FEATURE_ALTERNATE_NAME 15 +#define CEPHFS_FEATURE_MAX 15 #define CEPHFS_FEATURES_ALL { \ 0, 1, 2, 3, 4, \ @@ -58,6 +59,7 @@ namespace ceph { CEPHFS_FEATURE_DELEG_INO, \ CEPHFS_FEATURE_OCTOPUS, \ CEPHFS_FEATURE_METRIC_COLLECT, \ + CEPHFS_FEATURE_ALTERNATE_NAME, \ } #define CEPHFS_METRIC_FEATURES_ALL { \ diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index dca000a59bff4..bb20a83b779a7 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -18,6 +18,7 @@ #include "include/types.h" #include "include/fs_types.h" +#include "include/mempool.h" #include "MClientRequest.h" #include "msg/Message.h" @@ -50,6 +51,7 @@ struct LeaseStat { __u16 mask; __u32 duration_ms; __u32 seq; + std::string alternate_name; LeaseStat() : mask(0), duration_ms(0), seq(0) {} LeaseStat(__u16 msk, __u32 dur, __u32 sq) : mask{msk}, duration_ms{dur}, seq{sq} {} @@ -57,10 +59,12 @@ struct LeaseStat { void decode(ceph::buffer::list::const_iterator &bl, const uint64_t features) { using ceph::decode; if (features == (uint64_t)-1) { - DECODE_START(1, bl); + DECODE_START(2, bl); decode(mask, bl); decode(duration_ms, bl); decode(seq, bl); + if (struct_v >= 2) + decode(alternate_name, bl); DECODE_FINISH(bl); } else { diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 9517126fc6893..1463e26194feb 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -67,7 +67,7 @@ WRITE_CLASS_ENCODER(SnapPayload) class MClientRequest final : public MMDSOp { private: - static constexpr int HEAD_VERSION = 4; + static constexpr int HEAD_VERSION = 5; static constexpr int COMPAT_VERSION = 1; public: @@ -98,6 +98,7 @@ public: // path arguments filepath path, path2; + std::string alternate_name; std::vector gid_list; /* XXX HACK */ @@ -179,6 +180,13 @@ public: head.flags = head.flags | CEPH_MDS_FLAG_ASYNC; } + void set_alternate_name(std::string&& _alternate_name) { + alternate_name = std::move(_alternate_name); + } + void set_alternate_name(bufferptr&& cipher) { + alternate_name = std::move(cipher.c_str()); + } + utime_t get_stamp() const { return stamp; } ceph_tid_t get_oldest_client_tid() const { return head.oldest_client_tid; } int get_num_fwd() const { return head.num_fwd; } @@ -192,6 +200,7 @@ public: const filepath& get_filepath() const { return path; } const std::string& get_path2() const { return path2.get_path(); } const filepath& get_filepath2() const { return path2; } + std::string_view get_alternate_name() const { return std::string_view(alternate_name); } int get_dentry_wanted() const { return get_flags() & CEPH_MDS_FLAG_WANT_DENTRY; } @@ -229,6 +238,8 @@ public: decode(stamp, p); if (header.version >= 4) // epoch 3 was for a ceph_mds_request_args change decode(gid_list, p); + if (header.version >= 5) + decode(alternate_name, p); } void encode_payload(uint64_t features) override { @@ -250,6 +261,7 @@ public: ceph::encode_nohead(releases, payload); encode(stamp, payload); encode(gid_list, payload); + encode(alternate_name, payload); } std::string_view get_type_name() const override { return "creq"; } diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 52ff67a460195..34504fd4dca9e 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -20,6 +20,7 @@ #include "mds/CDentry.h" #include "mds/CInode.h" +#include "mds/CDentry.h" #include "mds/InoTable.h" #include "mds/SnapServer.h" #include "cls/cephfs/cls_cephfs_client.h" @@ -990,10 +991,13 @@ int DataScan::scan_links() } char dentry_type; decode(dentry_type, q); + mempool::mds_co::string alternate_name; if (dentry_type == 'I' || dentry_type == 'i') { InodeStore inode; if (dentry_type == 'i') { - DECODE_START(1, q); + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode.decode(q); DECODE_FINISH(q); } else { @@ -1059,7 +1063,7 @@ int DataScan::scan_links() } else if (dentry_type == 'L' || dentry_type == 'l') { inodeno_t ino; unsigned char d_type; - CDentry::decode_remote(dentry_type, ino, d_type, q); + CDentry::decode_remote(dentry_type, ino, d_type, alternate_name, q); if (step == SCAN_INOS) { remote_links[ino]++; @@ -1481,7 +1485,11 @@ int MetadataTool::read_dentry(inodeno_t parent_ino, frag_t frag, decode(dentry_type, q); if (dentry_type == 'I' || dentry_type == 'i') { if (dentry_type == 'i') { - DECODE_START(1, q); + mempool::mds_co::string alternate_name; + + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode->decode(q); DECODE_FINISH(q); } else { diff --git a/src/tools/cephfs/JournalTool.cc b/src/tools/cephfs/JournalTool.cc index f4598a4402950..ec98609803a46 100644 --- a/src/tools/cephfs/JournalTool.cc +++ b/src/tools/cephfs/JournalTool.cc @@ -837,7 +837,11 @@ int JournalTool::recover_dentries( // Read out inode version to compare with backing store InodeStore inode; if (dentry_type == 'i') { - DECODE_START(1, q); + mempool::mds_co::string alternate_name; + + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode.decode(q); DECODE_FINISH(q); } else { diff --git a/src/tools/cephfs/MetaTool.cc b/src/tools/cephfs/MetaTool.cc index fc45dc85ae8cd..527ae636fc0ff 100644 --- a/src/tools/cephfs/MetaTool.cc +++ b/src/tools/cephfs/MetaTool.cc @@ -926,8 +926,9 @@ int MetaTool::show_child(std::string_view key, // hard link inodeno_t ino; unsigned char d_type; + mempool::mds_co::string alternate_name; - CDentry::decode_remote(type, ino, d_type, q); + CDentry::decode_remote(type, ino, d_type, alternate_name, q); if (sp_ino > 0) { if (sp_ino == ino) { @@ -942,7 +943,11 @@ int MetaTool::show_child(std::string_view key, // load inode data before lookuping up or constructing CInode InodeStore& inode_data = *(new InodeStore); if (type == 'i') { - DECODE_START(1, q); + mempool::mds_co::string alternate_name; + + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode_data.decode(q); DECODE_FINISH(q); } else { -- 2.39.5