From 353fcee10a6d836566a63c3c509c1263a74049a5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 9 May 2018 09:05:31 +0800 Subject: [PATCH] mds: introduce cephfs' own feature bits Signed-off-by: "Yan, Zheng" Fixes: http://tracker.ceph.com/issues/14456 --- src/client/Client.cc | 38 ++++++++++---- src/mds/MDSRank.cc | 4 +- src/mds/Server.cc | 37 ++++++++++--- src/mds/Server.h | 2 + src/mds/cephfs_features.h | 30 +++++++++++ src/mds/journal.cc | 4 +- src/mds/mdstypes.cc | 97 +++++++++++++++++++++++++++++++---- src/mds/mdstypes.h | 65 +++++++++++++++++++++-- src/messages/MClientSession.h | 19 ++++--- 9 files changed, 250 insertions(+), 46 deletions(-) create mode 100644 src/mds/cephfs_features.h diff --git a/src/client/Client.cc b/src/client/Client.cc index 1dc85f056d7..b763b36795f 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -64,6 +64,7 @@ #include "mon/MonClient.h" #include "mds/flock.h" +#include "mds/cephfs_features.h" #include "osd/OSDMap.h" #include "osdc/Filer.h" @@ -1979,9 +1980,10 @@ void Client::update_metadata(std::string const &k, std::string const &v) Mutex::Locker l(client_lock); assert(initialized); - if (metadata.count(k)) { + auto it = metadata.find(k); + if (it != metadata.end()) { ldout(cct, 1) << __func__ << " warning, overriding metadata field '" << k - << "' from '" << metadata[k] << "' to '" << v << "'" << dendl; + << "' from '" << it->second << "' to '" << v << "'" << dendl; } metadata[k] = v; @@ -2012,7 +2014,8 @@ MetaSession *Client::_open_mds_session(mds_rank_t mds) } MClientSession *m = new MClientSession(CEPH_SESSION_REQUEST_OPEN); - m->client_meta = metadata; + m->metadata = metadata; + m->supported_features = feature_bitset_t(CEPHFS_FEATURES_CLIENT_SUPPORTED); session->con->send_message(m); return session; } @@ -2050,14 +2053,27 @@ void Client::handle_client_session(MClientSession *m) switch (m->get_op()) { case CEPH_SESSION_OPEN: - renew_caps(session); - session->state = MetaSession::STATE_OPEN; - if (unmounting) - mount_cond.Signal(); - else - connect_mds_targets(from); - signal_context_list(session->waiting_for_open); - break; + { + feature_bitset_t missing_features(CEPHFS_FEATURES_CLIENT_REQUIRED); + missing_features -= m->supported_features; + if (!missing_features.empty()) { + lderr(cct) << "mds." << from << " lacks required features '" + << missing_features << "', closing session " << dendl; + rejected_by_mds[session->mds_num] = session->addrs; + _close_mds_session(session); + _closed_mds_session(session); + break; + } + + renew_caps(session); + session->state = MetaSession::STATE_OPEN; + if (unmounting) + mount_cond.Signal(); + else + connect_mds_targets(from); + signal_context_list(session->waiting_for_open); + break; + } case CEPH_SESSION_CLOSE: _closed_mds_session(session); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 3abc9a1cd76..a60f8ce4bfe 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -2229,9 +2229,7 @@ void MDSRankDispatcher::dump_sessions(const SessionFilter &filter, Formatter *f) f->dump_bool("reconnecting", server->waiting_for_reconnect(p.first.num())); f->dump_stream("inst") << s->info.inst; f->open_object_section("client_metadata"); - for (auto& q : s->info.client_metadata) { - f->dump_string(q.first.c_str(), q.second); - } + s->info.client_metadata.dump(f); f->close_section(); // client_metadata f->close_section(); //session } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index df01ca5005a..e249a2872aa 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -28,6 +28,7 @@ #include "InoTable.h" #include "SnapClient.h" #include "Mutation.h" +#include "cephfs_features.h" #include "msg/Messenger.h" @@ -187,6 +188,7 @@ Server::Server(MDSRank *m) : reconnect_evicting(false), terminating_sessions(false) { + supported_features = feature_bitset_t(CEPHFS_FEATURES_MDS_SUPPORTED); } @@ -363,12 +365,24 @@ void Server::handle_client_session(MClientSession *m) } { - client_metadata_t client_metadata(std::move(m->client_meta)); + client_metadata_t client_metadata(std::move(m->metadata), + std::move(m->supported_features)); dout(20) << __func__ << " CEPH_SESSION_REQUEST_OPEN metadata entries:" << dendl; + dout(20) << " features: '" << client_metadata.features << dendl; for (auto& p : client_metadata) { dout(20) << " " << p.first << ": " << p.second << dendl; } + feature_bitset_t missing_features(CEPHFS_FEATURES_MDS_REQUIRED); + missing_features -= client_metadata.features; + if (!missing_features.empty()) { + mds->send_message_client(new MClientSession(CEPH_SESSION_REJECT), session); + mds->clog->warn() << "client session lacks required features '" + << missing_features << "' denied (" << session->info.inst << ")"; + session->clear(); + break; + } + client_metadata_t::iterator it; // Special case for the 'root' metadata path; validate that the claimed // root is actually within the caps of the session @@ -379,14 +393,11 @@ void Server::handle_client_session(MClientSession *m) // into caps check if (claimed_root.empty() || claimed_root[0] != '/' || !session->auth_caps.path_capable(claimed_root.substr(1))) { - derr << __func__ << " forbidden path claimed as mount root: " - << claimed_root << " by " << m->get_source() << dendl; // Tell the client we're rejecting their open mds->send_message_client(new MClientSession(CEPH_SESSION_REJECT), session); mds->clog->warn() << "client session with invalid root '" << claimed_root << "' denied (" << session->info.inst << ")"; session->clear(); - // Drop out; don't record this session in SessionMap or journal it. break; } } @@ -521,7 +532,10 @@ void Server::_session_logged(Session *session, uint64_t state_seq, bool open, ve mds->sessionmap.set_state(session, Session::STATE_OPEN); mds->sessionmap.touch_session(session); assert(session->connection != NULL); - session->connection->send_message(new MClientSession(CEPH_SESSION_OPEN)); + MClientSession *reply = new MClientSession(CEPH_SESSION_OPEN); + if (session->info.has_feature(CEPHFS_FEATURE_MIMIC)) + reply->supported_features = supported_features; + session->connection->send_message(reply); if (mdcache->is_readonly()) session->connection->send_message(new MClientSession(CEPH_SESSION_FORCE_RO)); } else if (session->is_closing() || @@ -659,7 +673,12 @@ void Server::finish_force_open_sessions(const mapinfo.inst << dendl; mds->sessionmap.set_state(session, Session::STATE_OPEN); mds->sessionmap.touch_session(session); - mds->send_message_client(new MClientSession(CEPH_SESSION_OPEN), session); + + MClientSession *reply = new MClientSession(CEPH_SESSION_OPEN); + if (session->info.has_feature(CEPHFS_FEATURE_MIMIC)) + reply->supported_features = supported_features; + mds->send_message_client(reply, session); + if (mdcache->is_readonly()) mds->send_message_client(new MClientSession(CEPH_SESSION_FORCE_RO), session); } @@ -960,7 +979,11 @@ void Server::handle_client_reconnect(MClientReconnect *m) } // notify client of success with an OPEN - m->get_connection()->send_message(new MClientSession(CEPH_SESSION_OPEN)); + MClientSession *reply = new MClientSession(CEPH_SESSION_OPEN); + if (session->info.has_feature(CEPHFS_FEATURE_MIMIC)) + reply->supported_features = supported_features; + m->get_connection()->send_message(reply); + session->last_cap_renew = ceph_clock_now(); mds->clog->debug() << "reconnect by " << session->info.inst << " after " << delay; diff --git a/src/mds/Server.h b/src/mds/Server.h index e21beb8bfc2..b468eb674aa 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -85,6 +85,8 @@ private: bool reconnect_evicting; // true if I am waiting for evictions to complete // before proceeding to reconnect_gather_finish + feature_bitset_t supported_features; + friend class MDSContinuation; friend class ServerContext; friend class ServerLogContext; diff --git a/src/mds/cephfs_features.h b/src/mds/cephfs_features.h new file mode 100644 index 00000000000..f1ad8a68b1f --- /dev/null +++ b/src/mds/cephfs_features.h @@ -0,0 +1,30 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPHFS_FEATURES_H +#define CEPHFS_FEATURES_H + +#define CEPHFS_FEATURE_MIMIC 0 + +#define CEPHFS_FEATURES_ALL { \ + CEPHFS_FEATURE_MIMIC, \ +} + +#define CEPHFS_FEATURES_MDS_SUPPORTED CEPHFS_FEATURES_ALL +#define CEPHFS_FEATURES_MDS_REQUIRED {} + +#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL +#define CEPHFS_FEATURES_CLIENT_REQUIRED {} + +#endif diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 741a58f345c..12b0539c2e8 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1758,9 +1758,7 @@ void ESession::dump(Formatter *f) const f->dump_stream("inos") << inos; f->dump_int("inotable version", inotablev); f->open_object_section("client_metadata"); - for (auto& p : client_metadata) { - f->dump_string(p.first.c_str(), p.second); - } + client_metadata.dump(f); f->close_section(); // client_metadata } diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index c0fcb85e31f..499be4135e6 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -365,42 +365,118 @@ void old_rstat_t::generate_test_instances(list& ls) ls.back()->accounted_rstat = *nls.front(); } +/* + * feature_bitset_t + */ +feature_bitset_t::feature_bitset_t(const vector& array) +{ + if (!array.empty()) { + size_t n = array.back(); + n += bits_per_block; + n /= bits_per_block; + _vec.resize(n, 0); + + size_t last = 0; + for (auto& bit : array) { + if (bit > last) + last = bit; + else + assert(bit == last); + _vec[bit / bits_per_block] |= (block_type)1 << (bit % bits_per_block); + } + } +} + +feature_bitset_t& feature_bitset_t::operator-=(const feature_bitset_t& other) +{ + for (size_t i = 0; i < _vec.size(); ++i) { + if (i >= other._vec.size()) + break; + _vec[i] &= ~other._vec[i]; + } + return *this; +} + +void feature_bitset_t::encode(bufferlist& bl) const { + using ceph::encode; + using ceph::encode_nohead; + uint32_t len = _vec.size() * sizeof(block_type); + encode(len, bl); + encode_nohead(_vec, bl); +} + +void feature_bitset_t::decode(bufferlist::const_iterator &p) { + using ceph::decode; + using ceph::decode_nohead; + uint32_t len; + decode(len, p); + + _vec.clear(); + if (len >= sizeof(block_type)) + decode_nohead(len / sizeof(block_type), _vec, p); + + if (len % sizeof(block_type)) { + ceph_le64 buf{}; + p.copy(len % sizeof(block_type), (char*)&buf); + _vec.push_back((block_type)buf); + } +} + +void feature_bitset_t::print(ostream& out) const +{ + std::ios_base::fmtflags f(out.flags()); + for (int i = _vec.size() - 1; i >= 0; --i) + out << std::setfill('0') << std::setw(sizeof(block_type) * 2) + << std::hex << _vec[i]; + out.flags(f); +} + /* * client_metadata_t */ void client_metadata_t::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(kv_map, bl); + encode(features, bl); ENCODE_FINISH(bl); } void client_metadata_t::decode(bufferlist::const_iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); decode(kv_map, p); + if (struct_v >= 2) + decode(features, p); DECODE_FINISH(p); } +void client_metadata_t::dump(Formatter *f) const +{ + f->dump_stream("features") << features; + for (const auto& p : kv_map) + f->dump_string(p.first.c_str(), p.second); +} + /* * session_info_t */ void session_info_t::encode(bufferlist& bl, uint64_t features) const { - ENCODE_START(6, 3, bl); + ENCODE_START(7, 7, bl); encode(inst, bl, features); encode(completed_requests, bl); encode(prealloc_inos, bl); // hacky, see below. encode(used_inos, bl); - encode(client_metadata.kv_map, bl); encode(completed_flushes, bl); encode(auth_name, bl); + encode(client_metadata, bl); ENCODE_FINISH(bl); } void session_info_t::decode(bufferlist::const_iterator& p) { - DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, p); + DECODE_START_LEGACY_COMPAT_LEN(7, 2, 2, p); decode(inst, p); if (struct_v <= 2) { set s; @@ -416,7 +492,7 @@ void session_info_t::decode(bufferlist::const_iterator& p) decode(used_inos, p); prealloc_inos.insert(used_inos); used_inos.clear(); - if (struct_v >= 4) { + if (struct_v >= 4 && struct_v < 7) { decode(client_metadata.kv_map, p); } if (struct_v >= 5) { @@ -425,6 +501,9 @@ void session_info_t::decode(bufferlist::const_iterator& p) if (struct_v >= 6) { decode(auth_name, p); } + if (struct_v >= 7) { + decode(client_metadata, p); + } DECODE_FINISH(p); } @@ -465,9 +544,9 @@ void session_info_t::dump(Formatter *f) const } f->close_section(); - for (auto& p : client_metadata) { - f->dump_string(p.first.c_str(), p.second); - } + f->open_array_section("client_metadata"); + client_metadata.dump(f); + f->close_section(); } void session_info_t::generate_test_instances(list& ls) diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index b00bb862d5d..6569dfe9274 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -1059,6 +1059,51 @@ inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) { return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")"; } +/* + * feature_bitset_t + */ +class feature_bitset_t { +public: + typedef uint64_t block_type; + static const size_t bits_per_block = sizeof(block_type) * 8; + + feature_bitset_t() {} + feature_bitset_t(const feature_bitset_t& other) : _vec(other._vec) {} + feature_bitset_t(feature_bitset_t&& other) : _vec(std::move(other._vec)) {} + feature_bitset_t(const vector& array); + feature_bitset_t& operator=(const feature_bitset_t& other) { + _vec = other._vec; + return *this; + } + bool empty() const { + for (auto& v : _vec) { + if (v) + return false; + } + return true; + } + bool test(size_t bit) const { + if (bit >= bits_per_block * _vec.size()) + return false; + return _vec[bit / bits_per_block] & ((block_type)1 << (bit % bits_per_block)); + } + void clear() { + _vec.clear(); + } + feature_bitset_t& operator-=(const feature_bitset_t& other); + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator &p); + void print(ostream& out) const; +private: + vector _vec; +}; +WRITE_CLASS_ENCODER(feature_bitset_t) + +inline std::ostream& operator<<(std::ostream& out, const feature_bitset_t& s) { + s.print(out); + return out; +} + /* * client_metadata_t */ @@ -1067,30 +1112,38 @@ struct client_metadata_t { using iterator = kv_map_t::const_iterator; kv_map_t kv_map; + feature_bitset_t features; client_metadata_t() {} client_metadata_t(const client_metadata_t& other) : - kv_map(other.kv_map) {} + kv_map(other.kv_map), features(other.features) {} client_metadata_t(client_metadata_t&& other) : - kv_map(std::move(other.kv_map)) {} - client_metadata_t(kv_map_t&& kv) : - kv_map(std::move(kv)) {} + kv_map(std::move(other.kv_map)), features(std::move(other.features)) {} + client_metadata_t(kv_map_t&& kv, feature_bitset_t &&f) : + kv_map(std::move(kv)), features(std::move(f)) {} client_metadata_t& operator=(const client_metadata_t& other) { kv_map = other.kv_map; + features = other.features; return *this; } - bool empty() const { return kv_map.empty(); } + bool empty() const { return kv_map.empty() && features.empty(); } iterator find(const std::string& key) const { return kv_map.find(key); } iterator begin() const { return kv_map.begin(); } iterator end() const { return kv_map.end(); } std::string& operator[](const std::string& key) { return kv_map[key]; } void merge(const client_metadata_t& other) { kv_map.insert(other.kv_map.begin(), other.kv_map.end()); + features = other.features; + } + void clear() { + kv_map.clear(); + features.clear(); } void encode(bufferlist& bl) const; void decode(bufferlist::const_iterator& p); + void dump(Formatter *f) const; }; WRITE_CLASS_ENCODER(client_metadata_t) @@ -1107,6 +1160,7 @@ struct session_info_t { EntityName auth_name; client_t get_client() const { return client_t(inst.name.num()); } + bool has_feature(size_t bit) { return client_metadata.features.test(bit); } const entity_name_t& get_source() const { return inst.name; } void clear_meta() { @@ -1114,6 +1168,7 @@ struct session_info_t { used_inos.clear(); completed_requests.clear(); completed_flushes.clear(); + client_metadata.clear(); } void encode(bufferlist& bl, uint64_t features) const; diff --git a/src/messages/MClientSession.h b/src/messages/MClientSession.h index f4ce68334e4..31d943c89fb 100644 --- a/src/messages/MClientSession.h +++ b/src/messages/MClientSession.h @@ -16,15 +16,17 @@ #define CEPH_MCLIENTSESSION_H #include "msg/Message.h" +#include "mds/mdstypes.h" class MClientSession : public Message { - static const int HEAD_VERSION = 2; + static const int HEAD_VERSION = 3; static const int COMPAT_VERSION = 1; public: ceph_mds_session_head head; - std::map client_meta; + std::map metadata; + feature_bitset_t supported_features; int get_op() const { return head.op; } version_t get_seq() const { return head.seq; } @@ -63,23 +65,24 @@ public: void decode_payload() override { auto p = payload.cbegin(); decode(head, p); - if (header.version >= 2) { - decode(client_meta, p); - } + if (header.version >= 2) + decode(metadata, p); + if (header.version >= 3) + decode(supported_features, p); } void encode_payload(uint64_t features) override { using ceph::encode; encode(head, payload); - if (client_meta.empty()) { + if (metadata.empty() && supported_features.empty()) { // If we're not trying to send any metadata (always the case if // we are a server) then send older-format message to avoid upsetting // old kernel clients. header.version = 1; } else { - encode(client_meta, payload); header.version = HEAD_VERSION; + encode(metadata, payload); + encode(supported_features, payload); } - } }; -- 2.39.5