From: Patrick Donnelly Date: Wed, 19 Feb 2025 16:17:00 +0000 (-0500) Subject: client: add wrappings for charmap manipuluation of dentry names X-Git-Tag: testing/wip-pdonnell-testing-20250227.191159-debug^2~19 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=751cf313dd5ca822c7c18ca8896ffde214605325;p=ceph-ci.git client: add wrappings for charmap manipuluation of dentry names Signed-off-by: Patrick Donnelly Fixes: https://tracker.ceph.com/issues/66373 --- diff --git a/src/client/Client.cc b/src/client/Client.cc index b33b13d40fd..28d1a0f3afd 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -34,7 +34,12 @@ using namespace std::literals::string_view_literals; #endif #include +#include +#include + #include +#include +#include #include #include "common/async/waiter.h" @@ -164,6 +169,7 @@ using std::oct; using std::pair; using std::string; using std::vector; +using namespace std::literals; using namespace TOPNSPC::common; @@ -394,6 +400,12 @@ Client::Client(Messenger *m, MonClient *mc, Objecter *objecter_) m_command_hook(this), fscid(0) { + /* We only use the locale for normalization/case folding. That is unaffected + * by the locale but required by the API. + */ + auto generator = boost::locale::generator(); + m_locale = generator("en_US.UTF-8"); + _reset_faked_inos(); user_id = cct->_conf->client_mount_uid; @@ -1068,6 +1080,8 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from, if (in->is_symlink()) in->symlink = st->symlink; + in->optmetadata = st->optmetadata; // TODO maybe prune unknown_md_t + // only update inode if mds info is strictly newer, or it is the same and projected (odd). bool new_version = false; if (in->version == 0 || @@ -1256,6 +1270,108 @@ Dentry *Client::insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dl return dn; } +bool Client::_wrap_name(const Inode& diri, std::string& dname, std::string& alternate_name) +{ + ldout(cct, 20) << __func__ << ": (" << dname << " len=" << dname.size() << ", " << alternate_name << ") on " << diri << dendl; + ceph_assert(dname.size() > 0); + if (diri.has_charmap()) { + auto& cs = diri.get_charmap(); + ldout(cct, 25) << __func__ << ": " << cs << dendl; + const auto&& encoding = cs.get_encoding(); + const auto&& normalization = cs.get_normalization(); + const bool is_insensitive = !cs.is_casesensitive(); + + std::string encoded; + if (encoding == "utf8"sv) { + try { + /* confirm valid utf-8 name */ + encoded = boost::locale::conv::to_utf(dname, "UTF-8", boost::locale::conv::stop); + } catch (const boost::locale::conv::conversion_error& e) { + ldout(cct, 2) << "`" << dname << "' is not valid utf-8: " << e.what() << dendl; + return false; + } + } else if (!encoding.empty()) { + ldout(cct, 2) << "unknown encoding: " << encoding << dendl; + return false; + } + + std::string normalized; + if (normalization.size()) { + if (encoded.empty()) { + ldout(cct, 2) << "unknown encoding: " << encoding << dendl; + return false; + } + boost::locale::norm_type norm_type; + if (normalization == "nfd"sv) { + norm_type = boost::locale::norm_type::norm_nfd; + } else if (normalization == "nfc") { + norm_type = boost::locale::norm_type::norm_nfc; + } else if (normalization == "nfkd") { + norm_type = boost::locale::norm_type::norm_nfkd; + } else if (normalization == "nfkc") { + norm_type = boost::locale::norm_type::norm_nfkc; + } else { + ldout(cct, 2) << "unknown normalization: " << normalization << dendl; + return false; + } + try { + normalized = boost::locale::normalize(encoded, norm_type, m_locale); + } catch (const std::bad_cast& e) { + ldout(cct, -1) << __func__ << ": linking issue detected: multiple copies of boost::locale present in this binary! Link Boost / Boost::Python shared." << dendl; + return false; + } + ldout(cct, 25) << __func__ << " normalized: " << normalized << " len=" << normalized.size() << dendl; + } + + std::string folded; + if (is_insensitive) { + if (normalized.empty()) { + ldout(cct, 2) << __func__ << " normalization is required before case folding: " << dname << dendl; + return false; + } + try { + folded = boost::locale::fold_case(normalized, m_locale); + } catch (const std::bad_cast& e) { + ldout(cct, -1) << __func__ << ": linking issue detected: multiple copies of boost::locale present in this binary! Link Boost / Boost::Python shared." << dendl; + return false; + } + ldout(cct, 25) << __func__ << " folded: " << folded << " len=" << folded.size() << dendl; + } + + if (folded.size()) { + alternate_name = dname; + dname = folded; + } else if (normalized.size()) { + alternate_name = dname; + dname = normalized; + } /* else: no normalization / folding / encoding */ + } + return true; +} + +std::string Client::_unwrap_name(const Inode& diri, const std::string& dname, const std::string& alternate_name) +{ + ldout(cct, 20) << __func__ << ": (" << dname << ", " << alternate_name << ") on " << diri << dendl; + std::string newdname = dname; + + /* TODO: annotate alternate_name with metadata for multiple wrappings? */ + if (diri.has_charmap()) { + auto& cs = diri.get_charmap(); + ldout(cct, 25) << __func__ << ": " << cs << dendl; + const bool is_insensitive = !cs.is_casesensitive(); + + /* no reverse of normalization / encoding */ + + if (is_insensitive) { + ldout(cct, 25) << __func__ << ": = " << alternate_name << dendl; + newdname = alternate_name; + } + } + + return newdname; +} + + void Client::update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, MetaSession *session) { utime_t dttl = from; @@ -7524,7 +7640,7 @@ relookup: return r; } -Dentry *Client::get_or_create(Inode *dir, const char* name) +Dentry *Client::get_or_create(Inode *dir, const std::string& name) { // lookup ldout(cct, 20) << __func__ << " " << *dir << " name " << name << dendl; @@ -7605,6 +7721,13 @@ int Client::path_walk(InodeRef dirinode, const filepath& origpath, walk_dentry_r caps = CEPH_CAP_AUTH_SHARED; } + // N.B.: we don't validate alternate_name we generate during wrapping + // matches the dentry. We probably should! + if (!_wrap_name(*diri, dname, alternate_name)) { + rc = -EACCES; + goto out; + } + if (dname.size() > NAME_MAX) { rc = -ENAMETOOLONG; goto out; @@ -9307,7 +9430,8 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, << " last_name " << dirp->last_name << " offset " << hex << dirp->offset << dec << dendl; - Dir *dir = dirp->inode->dir; + auto& diri = dirp->inode; + Dir *dir = diri->dir; if (!dir) { ldout(cct, 10) << " dir is empty" << dendl; @@ -9362,7 +9486,8 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, fill_statx(dn->inode, caps, &stx); uint64_t next_off = dn->offset + 1; - fill_dirent(&de, dn->name.c_str(), stx.stx_mode, stx.stx_ino, next_off); + auto dname = _unwrap_name(*diri, dn->name, dn->alternate_name); + fill_dirent(&de, dname.c_str(), stx.stx_mode, stx.stx_ino, next_off); ++pd; if (pd == dir->readdir_cache.end()) next_off = dir_result_t::END; @@ -9581,6 +9706,8 @@ int Client::_readdir_r_cb(int op, ++it) { dir_result_t::dentry &entry = *it; + ldout(cct, 25) << __func__ << ": " << entry << dendl; + uint64_t next_off = entry.offset + 1; int r; @@ -9595,7 +9722,8 @@ int Client::_readdir_r_cb(int op, } fill_statx(entry.inode, caps, &stx); - fill_dirent(&de, entry.name.c_str(), stx.stx_mode, stx.stx_ino, next_off); + auto dname = _unwrap_name(*diri, entry.name, entry.alternate_name); + fill_dirent(&de, dname.c_str(), stx.stx_mode, stx.stx_ino, next_off); Inode *inode = NULL; if (getref) { @@ -15116,6 +15244,15 @@ int Client::_rename(Inode *fromdir, const char *fromname, Inode *todir, const ch << " uid " << perm.uid() << " gid " << perm.gid() << ")" << dendl; + /* N.B.: when toname/fromname (wrapped) refer to the same file, then we + * expect the MDS to succeed since both names "exist" and refer to the same + * hard link. (From the MDS side, the wrapped names are equal too!) + * + * If an application wants to do a case-sensitive rename, they must use an + * intermediate file name. + * + */ + walk_dentry_result wdr_from; if (int rc = path_walk(fromdir, filepath(fromname), &wdr_from, perm, {.followsym = false, .is_rename = true}); rc < 0) { return rc; diff --git a/src/client/Client.h b/src/client/Client.h index 9017ee780c8..eb3020fc0e5 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -45,6 +45,7 @@ #include "UserPerm.h" #include +#include #include #include #include @@ -977,6 +978,9 @@ protected: void check_caps(const InodeRef& in, unsigned flags); + bool _wrap_name(const Inode& diri, std::string& dname, std::string& alternate_name); + std::string _unwrap_name(const Inode& diri, const std::string& dname, const std::string& alternate_name); + void set_cap_epoch_barrier(epoch_t e); void handle_command_reply(const MConstRef& m); @@ -1772,7 +1776,7 @@ private: int _flock(Fh *fh, int cmd, uint64_t owner); int _lazyio(Fh *fh, int enable); - Dentry *get_or_create(Inode *dir, const char* name); + Dentry *get_or_create(Inode *dir, const std::string& name); int xattr_permission(Inode *in, const char *name, unsigned want, const UserPerm& perms); @@ -1994,6 +1998,8 @@ private: bool is_fuse = false; bool client_permissions; bool fuse_default_permissions; + + std::locale m_locale; }; /** diff --git a/src/client/Inode.cc b/src/client/Inode.cc index be7749f211f..60932d606b5 100644 --- a/src/client/Inode.cc +++ b/src/client/Inode.cc @@ -89,6 +89,9 @@ void Inode::print(std::ostream& out) const if (quota.is_enabled()) out << " " << quota; + if (optmetadata.size() > 0) { + out << " " << optmetadata; + } out << ' ' << this << ")"; } diff --git a/src/client/Inode.h b/src/client/Inode.h index 88b7c81201d..5a92df5bea0 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -15,6 +15,8 @@ #include "mds/mdstypes.h" // hrm #include "include/cephfs/types.h" +#include "messages/MClientReply.h" + #include "osdc/ObjectCacher.h" #include "InodeRef.h" @@ -164,6 +166,10 @@ struct Inode : RefCountedObject { std::vector fscrypt_auth; std::vector fscrypt_file; + + decltype(InodeStat::optmetadata) optmetadata; + using optkind_t = decltype(InodeStat::optmetadata)::optkind_t; + bool is_fscrypt_enabled() { return !!fscrypt_auth.size(); } @@ -331,6 +337,14 @@ struct Inode : RefCountedObject { void dump(Formatter *f) const; void print(std::ostream&) const; + bool has_charmap() const { + return optmetadata.has_opt(optkind_t::CHARMAP); + } + auto& get_charmap() const { + auto& opt = optmetadata.get_opt(optkind_t::CHARMAP); + return opt.template get_meta< charmap_md_t >(); + } + void break_all_delegs() { break_deleg(false); }; void recall_deleg(bool skip_read);