From: Xiubo Li Date: Mon, 21 Sep 2020 04:55:31 +0000 (+0800) Subject: mds: add alternate_name feature support for dentries X-Git-Tag: v16.1.0~9^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9e250226a83a7075bcfef024f3e60dbb6264c942;p=ceph.git mds: add alternate_name feature support for dentries This will support the "alternate_name" filename support, and will save an alternate name for each dentry. This alternate name is not used in path lookup or for any other usual file system purpose. The name is simply an added blob of metadata on the dentry that is distributed to clients so that "long" file names may be supported for clients which require them. In the case of an fscrypt enhanced kernel mount driver, the long name may be the cyphertext (exceeding FILENAME_MAX) of a long file name. Because this affects only files with long file names, the use of this feature should be rare but could be common for some unusual applications. The client mount should check the CEPHFS_FEATURE_ALTERNATE_NAME feature bit first to check whether the MDS has support for this feature or not. The alternate_name is transmitted as part of the message payload in MClientRequest when setting the alternate_name. The LeaseStat structure in MClientReply contains the alternate_name. When executing a metadata mutation RPC, the client will set the alternate_name (if it exists) as part of the operation. The MDS will pick that up and set it on the new or mutated dentry. Fixes: https://tracker.ceph.com/issues/47162 Signed-off-by: Xiubo Li Signed-off-by: Patrick Donnelly --- diff --git a/src/client/Client.cc b/src/client/Client.cc index a21e65ec6dd9..01e130037a25 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1061,6 +1061,7 @@ Dentry *Client::insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dl } update_dentry_lease(dn, dlease, from, session); + dn->alternate_name = std::move(dlease->alternate_name); return dn; } @@ -1247,6 +1248,7 @@ void Client::insert_readdir_results(MetaRequest *request, MetaSession *session, // new dn dn = link(dir, dname, in, NULL); } + dn->alternate_name = std::move(dlease.alternate_name); update_dentry_lease(dn, &dlease, request->sent_stamp, session); if (hash_order) { diff --git a/src/client/Dentry.h b/src/client/Dentry.h index 7559c45f49b5..483a31eccb32 100644 --- a/src/client/Dentry.h +++ b/src/client/Dentry.h @@ -90,6 +90,7 @@ public: uint64_t lease_gen = 0; ceph_seq_t lease_seq = 0; int cap_shared_gen = 0; + std::string alternate_name; private: xlist::item inode_xlist_link; diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index b674f8ba0d42..9831b3dc9a48 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -235,6 +235,12 @@ void CDentry::make_path(filepath& fp, bool projected) const fp.push_dentry(get_name()); } +void CDentry::set_alternate_name(std::string_view _alternate_name) +{ + dout(10) << "setting alternate_name on " << *this << dendl; + alternate_name = _alternate_name; +} + /* * we only add ourselves to remote_parents when the linkage is * active (no longer projected). if the passed dnl is projected, @@ -530,24 +536,30 @@ void CDentry::_put() } } -void CDentry::encode_remote(inodeno_t& ino, unsigned char d_type, bufferlist &bl) +void CDentry::encode_remote(inodeno_t& ino, unsigned char d_type, + std::string_view alternate_name, + bufferlist &bl) { - bl.append("l", 1); // remote link + bl.append('l'); // remote link // marker, name, ino - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(ino, bl); encode(d_type, bl); + encode(alternate_name, bl); ENCODE_FINISH(bl); } void CDentry::decode_remote(char icode, inodeno_t& ino, unsigned char& d_type, + mempool::mds_co::string& alternate_name, ceph::buffer::list::const_iterator& bl) { if (icode == 'l') { - DECODE_START(1, bl); + DECODE_START(2, bl); decode(ino, bl); decode(d_type, bl); + if (struct_v >= 2) + decode(alternate_name, bl); DECODE_FINISH(bl); } else if (icode == 'L') { decode(ino, bl); diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index a192527dd348..5b60b838155c 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -151,6 +151,9 @@ public: const CDir *get_dir() const { return dir; } CDir *get_dir() { return dir; } std::string_view get_name() const { return std::string_view(name); } + void set_alternate_name(std::string_view _alternate_name); + std::string_view get_alternate_name() const { return std::string_view(alternate_name); } + void decode_alternate_name(bufferlist::const_iterator &bl) { decode(alternate_name, bl); } __u32 get_hash() const { return hash; } @@ -332,8 +335,11 @@ public: void print(std::ostream& out) override; void dump(ceph::Formatter *f) const; - static void encode_remote(inodeno_t& ino, unsigned char d_type, bufferlist &bl); + static void encode_remote(inodeno_t& ino, unsigned char d_type, + std::string_view alternate_name, + bufferlist &bl); static void decode_remote(char icode, inodeno_t& ino, unsigned char& d_type, + mempool::mds_co::string& alternate_name, ceph::buffer::list::const_iterator& bl); __u32 hash; @@ -370,6 +376,7 @@ protected: private: mempool::mds_co::string name; + mempool::mds_co::string alternate_name; }; std::ostream& operator<<(std::ostream& out, const CDentry& dn); diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 47834b116282..5458dadcf94f 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1766,8 +1766,11 @@ CDentry *CDir::_load_dentry( // hard link inodeno_t ino; unsigned char d_type; + mempool::mds_co::string alternate_name; - CDentry::decode_remote(type, ino, d_type, q); + CDentry::decode_remote(type, ino, d_type, alternate_name, q); + if (alternate_name.length()) + dn->set_alternate_name(std::move(alternate_name)); if (stale) { if (!dn) { @@ -1808,7 +1811,9 @@ CDentry *CDir::_load_dentry( // inode // Load inode data before looking up or constructing CInode if (type == 'i') { - DECODE_START(1, q); + DECODE_START(2, q); + if (struct_v >= 2) + dn->decode_alternate_name(q); inode_data.decode(q); DECODE_FINISH(q); } else { @@ -2228,6 +2233,7 @@ void CDir::_encode_primary_inode_base(dentry_commit_item &item, bufferlist &dfts encode(item.oldest_snap, bl); encode(item.damage_flags, bl); + encode(item.alternate_name, bl); ENCODE_FINISH(bl); } @@ -2317,12 +2323,13 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers for (auto &item : to_set) { encode(item.first, bl); if (item.is_remote) { - CDentry::encode_remote(item.ino, item.d_type, bl); // remote link + // remote link + CDentry::encode_remote(item.ino, item.d_type, item.alternate_name, bl); } else { // marker, name, inode, [symlink string] bl.append('i'); // inode - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); _encode_primary_inode_base(item, dfts, bl); ENCODE_FINISH(bl); } @@ -2378,6 +2385,7 @@ void CDir::_omap_commit(int op_prio) // reverve enough memories, which maybe larger than the actually needed to_set.reserve(count); + // for dir fragtrees bufferlist dfts(CEPH_PAGE_SIZE); auto write_one = [&](CDentry *dn) { @@ -2442,6 +2450,10 @@ void CDir::_parse_dentry(CDentry *dn, dentry_commit_item &item, item.first = dn->first; + // Only in very rare case the dn->alternate_name not empty, + // so it won't cost much to copy it here + item.alternate_name = dn->get_alternate_name(); + // primary or remote? if (dn->linkage.is_remote()) { item.is_remote = true; diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 030d9c949cbd..985f94cb0629 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -64,6 +64,7 @@ public: inodeno_t ino; unsigned char d_type; + mempool::mds_co::string alternate_name; bool snaprealm = false; sr_t srnode; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 8d2740ad7eea..dc112871d892 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -4252,6 +4252,7 @@ void Locker::issue_client_lease(CDentry *dn, MDRequestRef &mdr, int mask, lstat.mask = CEPH_LEASE_VALID | mask; lstat.duration_ms = (uint32_t)(1000 * mdcache->client_lease_durations[pool]); lstat.seq = ++l->seq; + lstat.alternate_name = mempool::mds_co::string(dn->get_alternate_name()); encode_lease(bl, session->info, lstat); dout(20) << "issue_client_lease seq " << lstat.seq << " dur " << lstat.duration_ms << "ms " << " on " << *dn << dendl; @@ -4259,6 +4260,7 @@ void Locker::issue_client_lease(CDentry *dn, MDRequestRef &mdr, int mask, // null lease LeaseStat lstat; lstat.mask = mask; + lstat.alternate_name = mempool::mds_co::string(dn->get_alternate_name()); encode_lease(bl, session->info, lstat); dout(20) << "issue_client_lease no/null lease on " << *dn << dendl; } @@ -4291,10 +4293,11 @@ void Locker::encode_lease(bufferlist& bl, const session_info_t& info, const LeaseStat& ls) { if (info.has_feature(CEPHFS_FEATURE_REPLY_ENCODING)) { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(ls.mask, bl); encode(ls.duration_ms, bl); encode(ls.seq, bl); + encode(ls.alternate_name, bl); ENCODE_FINISH(bl); } else { diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 36a8dc0712fe..f3b653dd47b4 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1793,7 +1793,8 @@ void Migrator::encode_export_dir(bufferlist& exportbl, if (dn->get_linkage()->is_remote()) { inodeno_t ino = dn->get_linkage()->get_remote_ino(); unsigned char d_type = dn->get_linkage()->get_remote_d_type(); - CDentry::encode_remote(ino, d_type, exportbl); // remote link + // remote link + CDentry::encode_remote(ino, d_type, dn->get_alternate_name(), exportbl); continue; } @@ -1801,8 +1802,9 @@ void Migrator::encode_export_dir(bufferlist& exportbl, // -- inode exportbl.append("i", 1); // inode dentry - ENCODE_START(1, 1, exportbl); + ENCODE_START(2, 1, exportbl); encode_export_inode(in, exportbl, exported_client_map, exported_client_metadata_map); // encode, and (update state for) export + encode(dn->get_alternate_name(), exportbl); ENCODE_FINISH(exportbl); // directory? @@ -3445,7 +3447,12 @@ void Migrator::decode_import_dir(bufferlist::const_iterator& blp, // remote link inodeno_t ino; unsigned char d_type; - CDentry::decode_remote(icode, ino, d_type, blp); + mempool::mds_co::string alternate_name; + + CDentry::decode_remote(icode, ino, d_type, alternate_name, blp); + if (alternate_name.length()) + dn->set_alternate_name(std::move(alternate_name)); + if (dn->get_linkage()->is_remote()) { ceph_assert(dn->get_linkage()->get_remote_ino() == ino); } else { @@ -3456,9 +3463,11 @@ void Migrator::decode_import_dir(bufferlist::const_iterator& blp, // inode ceph_assert(le); if (icode == 'i') { - DECODE_START(1, blp); + DECODE_START(2, blp); decode_import_inode(dn, blp, oldauth, ls, peer_exports, updated_scatterlocks); + if (struct_v >= 2) + dn->decode_alternate_name(blp); DECODE_FINISH(blp); } else { decode_import_inode(dn, blp, oldauth, ls, diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 4fc87db91242..fb7c74f8ed34 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4390,6 +4390,8 @@ void Server::handle_client_openc(MDRequestRef& mdr) req->head.args.open.mode | S_IFREG, &layout); ceph_assert(newi); + dn->set_alternate_name(req->get_alternate_name()); + // it's a file. dn->push_projected_linkage(newi); @@ -6269,6 +6271,7 @@ void Server::handle_client_mknod(MDRequestRef& mdr) CInode *newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino), mode, &layout); ceph_assert(newi); + dn->set_alternate_name(req->get_alternate_name()); dn->push_projected_linkage(newi); auto _inode = newi->_get_inode(); @@ -6357,6 +6360,8 @@ void Server::handle_client_mkdir(MDRequestRef& mdr) CInode *newi = prepare_new_inode(mdr, dir, inodeno_t(req->head.ino), mode); ceph_assert(newi); + dn->set_alternate_name(req->get_alternate_name()); + // it's a directory. dn->push_projected_linkage(newi); @@ -6433,6 +6438,8 @@ void Server::handle_client_symlink(MDRequestRef& mdr) const cref_t &req = mdr->client_request; + dn->set_alternate_name(req->get_alternate_name()); + unsigned mode = S_IFLNK | 0777; CInode *newi = prepare_new_inode(mdr, dir, inodeno_t(req->head.ino), mode); ceph_assert(newi); @@ -6519,6 +6526,8 @@ void Server::handle_client_link(MDRequestRef& mdr) return; } + destdn->set_alternate_name(req->get_alternate_name()); + targeti = ret.second->get_projected_linkage()->get_inode(); } @@ -7946,6 +7955,8 @@ void Server::handle_client_rename(MDRequestRef& mdr) return; } + destdn->set_alternate_name(req->get_alternate_name()); + // is this a stray migration, reintegration or merge? (sanity checks!) if (mdr->reqid.name.is_mds() && !(MDS_INO_IS_STRAY(srcpath.get_ino()) && diff --git a/src/mds/cephfs_features.cc b/src/mds/cephfs_features.cc index 71554d60fd8f..deec59a28604 100644 --- a/src/mds/cephfs_features.cc +++ b/src/mds/cephfs_features.cc @@ -22,6 +22,7 @@ static const std::array feature_names "multi_reconnect", "deleg_ino", "metric_collect", + "alternate_name", }; static_assert(feature_names.size() == CEPHFS_FEATURE_MAX + 1); diff --git a/src/mds/cephfs_features.h b/src/mds/cephfs_features.h index c959e281f833..ca27c12132c9 100644 --- a/src/mds/cephfs_features.h +++ b/src/mds/cephfs_features.h @@ -42,7 +42,8 @@ namespace ceph { #define CEPHFS_FEATURE_DELEG_INO 13 #define CEPHFS_FEATURE_OCTOPUS 13 #define CEPHFS_FEATURE_METRIC_COLLECT 14 -#define CEPHFS_FEATURE_MAX 14 +#define CEPHFS_FEATURE_ALTERNATE_NAME 15 +#define CEPHFS_FEATURE_MAX 15 #define CEPHFS_FEATURES_ALL { \ 0, 1, 2, 3, 4, \ @@ -58,6 +59,7 @@ namespace ceph { CEPHFS_FEATURE_DELEG_INO, \ CEPHFS_FEATURE_OCTOPUS, \ CEPHFS_FEATURE_METRIC_COLLECT, \ + CEPHFS_FEATURE_ALTERNATE_NAME, \ } #define CEPHFS_METRIC_FEATURES_ALL { \ diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index dca000a59bff..bb20a83b779a 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -18,6 +18,7 @@ #include "include/types.h" #include "include/fs_types.h" +#include "include/mempool.h" #include "MClientRequest.h" #include "msg/Message.h" @@ -50,6 +51,7 @@ struct LeaseStat { __u16 mask; __u32 duration_ms; __u32 seq; + std::string alternate_name; LeaseStat() : mask(0), duration_ms(0), seq(0) {} LeaseStat(__u16 msk, __u32 dur, __u32 sq) : mask{msk}, duration_ms{dur}, seq{sq} {} @@ -57,10 +59,12 @@ struct LeaseStat { void decode(ceph::buffer::list::const_iterator &bl, const uint64_t features) { using ceph::decode; if (features == (uint64_t)-1) { - DECODE_START(1, bl); + DECODE_START(2, bl); decode(mask, bl); decode(duration_ms, bl); decode(seq, bl); + if (struct_v >= 2) + decode(alternate_name, bl); DECODE_FINISH(bl); } else { diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 9517126fc689..1463e26194fe 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -67,7 +67,7 @@ WRITE_CLASS_ENCODER(SnapPayload) class MClientRequest final : public MMDSOp { private: - static constexpr int HEAD_VERSION = 4; + static constexpr int HEAD_VERSION = 5; static constexpr int COMPAT_VERSION = 1; public: @@ -98,6 +98,7 @@ public: // path arguments filepath path, path2; + std::string alternate_name; std::vector gid_list; /* XXX HACK */ @@ -179,6 +180,13 @@ public: head.flags = head.flags | CEPH_MDS_FLAG_ASYNC; } + void set_alternate_name(std::string&& _alternate_name) { + alternate_name = std::move(_alternate_name); + } + void set_alternate_name(bufferptr&& cipher) { + alternate_name = std::move(cipher.c_str()); + } + utime_t get_stamp() const { return stamp; } ceph_tid_t get_oldest_client_tid() const { return head.oldest_client_tid; } int get_num_fwd() const { return head.num_fwd; } @@ -192,6 +200,7 @@ public: const filepath& get_filepath() const { return path; } const std::string& get_path2() const { return path2.get_path(); } const filepath& get_filepath2() const { return path2; } + std::string_view get_alternate_name() const { return std::string_view(alternate_name); } int get_dentry_wanted() const { return get_flags() & CEPH_MDS_FLAG_WANT_DENTRY; } @@ -229,6 +238,8 @@ public: decode(stamp, p); if (header.version >= 4) // epoch 3 was for a ceph_mds_request_args change decode(gid_list, p); + if (header.version >= 5) + decode(alternate_name, p); } void encode_payload(uint64_t features) override { @@ -250,6 +261,7 @@ public: ceph::encode_nohead(releases, payload); encode(stamp, payload); encode(gid_list, payload); + encode(alternate_name, payload); } std::string_view get_type_name() const override { return "creq"; } diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 52ff67a46019..34504fd4dca9 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -20,6 +20,7 @@ #include "mds/CDentry.h" #include "mds/CInode.h" +#include "mds/CDentry.h" #include "mds/InoTable.h" #include "mds/SnapServer.h" #include "cls/cephfs/cls_cephfs_client.h" @@ -990,10 +991,13 @@ int DataScan::scan_links() } char dentry_type; decode(dentry_type, q); + mempool::mds_co::string alternate_name; if (dentry_type == 'I' || dentry_type == 'i') { InodeStore inode; if (dentry_type == 'i') { - DECODE_START(1, q); + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode.decode(q); DECODE_FINISH(q); } else { @@ -1059,7 +1063,7 @@ int DataScan::scan_links() } else if (dentry_type == 'L' || dentry_type == 'l') { inodeno_t ino; unsigned char d_type; - CDentry::decode_remote(dentry_type, ino, d_type, q); + CDentry::decode_remote(dentry_type, ino, d_type, alternate_name, q); if (step == SCAN_INOS) { remote_links[ino]++; @@ -1481,7 +1485,11 @@ int MetadataTool::read_dentry(inodeno_t parent_ino, frag_t frag, decode(dentry_type, q); if (dentry_type == 'I' || dentry_type == 'i') { if (dentry_type == 'i') { - DECODE_START(1, q); + mempool::mds_co::string alternate_name; + + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode->decode(q); DECODE_FINISH(q); } else { diff --git a/src/tools/cephfs/JournalTool.cc b/src/tools/cephfs/JournalTool.cc index f4598a440295..ec98609803a4 100644 --- a/src/tools/cephfs/JournalTool.cc +++ b/src/tools/cephfs/JournalTool.cc @@ -837,7 +837,11 @@ int JournalTool::recover_dentries( // Read out inode version to compare with backing store InodeStore inode; if (dentry_type == 'i') { - DECODE_START(1, q); + mempool::mds_co::string alternate_name; + + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode.decode(q); DECODE_FINISH(q); } else { diff --git a/src/tools/cephfs/MetaTool.cc b/src/tools/cephfs/MetaTool.cc index fc45dc85ae8c..527ae636fc0f 100644 --- a/src/tools/cephfs/MetaTool.cc +++ b/src/tools/cephfs/MetaTool.cc @@ -926,8 +926,9 @@ int MetaTool::show_child(std::string_view key, // hard link inodeno_t ino; unsigned char d_type; + mempool::mds_co::string alternate_name; - CDentry::decode_remote(type, ino, d_type, q); + CDentry::decode_remote(type, ino, d_type, alternate_name, q); if (sp_ino > 0) { if (sp_ino == ino) { @@ -942,7 +943,11 @@ int MetaTool::show_child(std::string_view key, // load inode data before lookuping up or constructing CInode InodeStore& inode_data = *(new InodeStore); if (type == 'i') { - DECODE_START(1, q); + mempool::mds_co::string alternate_name; + + DECODE_START(2, q); + if (struct_v >= 2) + decode(alternate_name, q); inode_data.decode(q); DECODE_FINISH(q); } else {