From f8c8bbd1ee451f15d74b71073ec1abd6a42743b0 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 18 Dec 2018 16:22:21 +0800 Subject: [PATCH] mds: update MClientReconnect encoding The old encoding assumes that snaprealms are encoded at the tail of message payload. So it does not allow adding new fields to the message. The patch introduce new encoding for MClientReconnect, the new encoding allows us to extend MClientReconnect. The new encoding is not compatible with the old encoding. If mds does not understand the new encoding. client needs to use the old encoding to encode MClientReconnect. Signed-off-by: "Yan, Zheng" --- src/client/Client.cc | 1 + src/mds/Server.cc | 10 +-- src/mds/mdstypes.cc | 43 +++++++++++- src/mds/mdstypes.h | 20 ++++++ src/messages/MClientReconnect.h | 114 ++++++++++++++++++++------------ 5 files changed, 138 insertions(+), 50 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 58ffd36d66f..713af0fa7f1 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2836,6 +2836,7 @@ void Client::send_reconnect(MetaSession *session) early_kick_flushing_caps(session); + m->set_encoding_version(0); // use connection features to choose encoding session->con->send_message2(std::move(m)); mount_cond.Signal(); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 455a6d0ef48..dad962be33e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1267,7 +1267,7 @@ void Server::handle_client_reconnect(const MClientReconnect::const_ref &m) // snaprealms for (const auto &r : m->realms) { - CInode *in = mdcache->get_inode(inodeno_t(r.ino)); + CInode *in = mdcache->get_inode(inodeno_t(r.realm.ino)); if (in && in->state_test(CInode::STATE_PURGING)) continue; if (in) { @@ -1277,11 +1277,11 @@ void Server::handle_client_reconnect(const MClientReconnect::const_ref &m) // this can happen if we are non-auth or we rollback snaprealm dout(15) << "open snaprealm (null snaprealm) on " << *in << dendl; } - mdcache->add_reconnected_snaprealm(from, inodeno_t(r.ino), snapid_t(r.seq)); + mdcache->add_reconnected_snaprealm(from, inodeno_t(r.realm.ino), snapid_t(r.realm.seq)); } else { - dout(15) << "open snaprealm (w/o inode) on " << inodeno_t(r.ino) - << " seq " << r.seq << dendl; - mdcache->add_reconnected_snaprealm(from, inodeno_t(r.ino), snapid_t(r.seq)); + dout(15) << "open snaprealm (w/o inode) on " << inodeno_t(r.realm.ino) + << " seq " << r.realm.seq << dendl; + mdcache->add_reconnected_snaprealm(from, inodeno_t(r.realm.ino), snapid_t(r.realm.seq)); } } diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index e98ca15eeb7..2c671b5d0ae 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -822,7 +822,7 @@ void cap_reconnect_t::encode_old(bufferlist& bl) const { } void cap_reconnect_t::decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); + DECODE_START(2, bl); decode_old(bl); // extract out when something changes if (struct_v >= 2) decode(snap_follows, bl); @@ -854,6 +854,47 @@ void cap_reconnect_t::generate_test_instances(list& ls) ls.back()->capinfo.cap_id = 1; } +/* + * snaprealm_reconnect_t + */ +void snaprealm_reconnect_t::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode_old(bl); // extract out when something changes + ENCODE_FINISH(bl); +} + +void snaprealm_reconnect_t::encode_old(bufferlist& bl) const { + using ceph::encode; + encode(realm, bl); +} + +void snaprealm_reconnect_t::decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode_old(bl); // extract out when something changes + DECODE_FINISH(bl); +} + +void snaprealm_reconnect_t::decode_old(bufferlist::const_iterator& bl) { + using ceph::decode; + decode(realm, bl); +} + +void snaprealm_reconnect_t::dump(Formatter *f) const +{ + f->dump_int("ino", realm.ino); + f->dump_int("seq", realm.seq); + f->dump_int("parent", realm.parent); +} + +void snaprealm_reconnect_t::generate_test_instances(list& ls) +{ + ls.push_back(new snaprealm_reconnect_t); + ls.back()->realm.ino = 0x10000000001ULL; + ls.back()->realm.seq = 2; + ls.back()->realm.parent = 1; +} + + ostream& operator<<(ostream &out, const mds_role_t &role) { out << role.fscid << ":" << role.rank; diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 20387fa6ab7..2130ace30b3 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -1390,6 +1390,26 @@ struct cap_reconnect_t { }; WRITE_CLASS_ENCODER(cap_reconnect_t) +struct snaprealm_reconnect_t { + mutable ceph_mds_snaprealm_reconnect realm; + + snaprealm_reconnect_t() { + memset(&realm, 0, sizeof(realm)); + } + snaprealm_reconnect_t(inodeno_t ino, snapid_t seq, inodeno_t parent) { + realm.ino = ino; + realm.seq = seq; + realm.parent = parent; + } + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& bl); + void encode_old(bufferlist& bl) const; + void decode_old(bufferlist::const_iterator& bl); + + void dump(Formatter *f) const; + static void generate_test_instances(list& ls); +}; +WRITE_CLASS_ENCODER(snaprealm_reconnect_t) // compat for pre-FLOCK feature struct old_ceph_mds_cap_reconnect { diff --git a/src/messages/MClientReconnect.h b/src/messages/MClientReconnect.h index 96c02d7bbe9..0e100c1e9b6 100644 --- a/src/messages/MClientReconnect.h +++ b/src/messages/MClientReconnect.h @@ -24,13 +24,15 @@ class MClientReconnect : public MessageInstance { public: friend factory; private: - static constexpr int HEAD_VERSION = 3; + static constexpr int HEAD_VERSION = 4; + static constexpr int COMPAT_VERSION = 4; public: map caps; // only head inodes - vector realms; + vector realms; - MClientReconnect() : MessageInstance(CEPH_MSG_CLIENT_RECONNECT, HEAD_VERSION) { } + MClientReconnect() : + MessageInstance(CEPH_MSG_CLIENT_RECONNECT, HEAD_VERSION, COMPAT_VERSION) { } private: ~MClientReconnect() override {} @@ -41,70 +43,94 @@ public: << caps.size() << " caps)"; } + // Force to use old encoding. + // Use connection's features to choose encoding if version is set to 0. + void set_encoding_version(int v) { + header.version = v; + if (v <= 3) + header.compat_version = 0; + } + void add_cap(inodeno_t ino, uint64_t cap_id, inodeno_t pathbase, const string& path, int wanted, int issued, inodeno_t sr, snapid_t sf, bufferlist& lb) { caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr, sf, lb); } void add_snaprealm(inodeno_t ino, snapid_t seq, inodeno_t parent) { - ceph_mds_snaprealm_reconnect r; - r.ino = ino; - r.seq = seq; - r.parent = parent; + snaprealm_reconnect_t r; + r.realm.ino = ino; + r.realm.seq = seq; + r.realm.parent = parent; realms.push_back(r); } void encode_payload(uint64_t features) override { + if (header.version == 0) { + if (features & CEPH_FEATURE_MDSENC) + header.version = 3; + else if (features & CEPH_FEATURE_FLOCK) + header.version = 2; + else + header.version = 1; + } + using ceph::encode; data.clear(); - if (features & CEPH_FEATURE_MDSENC) { - encode(caps, data); - header.version = HEAD_VERSION; - } else if (features & CEPH_FEATURE_FLOCK) { - // encode with old cap_reconnect_t encoding - __u32 n = caps.size(); - encode(n, data); - for (map::iterator p = caps.begin(); p != caps.end(); ++p) { - encode(p->first, data); - p->second.encode_old(data); - } - header.version = 2; + + if (header.version >= 4) { + encode(caps, data); + encode(realms, data); } else { // compat crap - header.version = 1; - map ocaps; - for (map::iterator p = caps.begin(); p != caps.end(); p++) - ocaps[p->first] = p->second; - encode(ocaps, data); + if (header.version == 3) { + encode(caps, data); + } else if (header.version == 2) { + __u32 n = caps.size(); + encode(n, data); + for (auto& p : caps) { + encode(p.first, data); + p.second.encode_old(data); + } + } else { + map ocaps; + for (auto& p : caps) { + ocaps[p.first] = p.second; + encode(ocaps, data); + } + for (auto& r : realms) + r.encode_old(data); + } } - encode_nohead(realms, data); } void decode_payload() override { auto p = data.cbegin(); - if (header.version >= 3) { - // new protocol + if (header.version >= 4) { decode(caps, p); - } else if (header.version == 2) { - __u32 n; - decode(n, p); - inodeno_t ino; - while (n--) { - decode(ino, p); - caps[ino].decode_old(p); - } + decode(realms, p); } else { // compat crap - map ocaps; - decode(ocaps, p); - for (map::iterator q = ocaps.begin(); q != ocaps.end(); q++) - caps[q->first] = q->second; - } - while (!p.end()) { - realms.push_back(ceph_mds_snaprealm_reconnect()); - decode(realms.back(), p); + if (header.version == 3) { + decode(caps, p); + } else if (header.version == 2) { + __u32 n; + decode(n, p); + inodeno_t ino; + while (n--) { + decode(ino, p); + caps[ino].decode_old(p); + } + } else { + map ocaps; + decode(ocaps, p); + for (auto &q : ocaps) + caps[q.first] = q.second; + } + while (!p.end()) { + realms.push_back(snaprealm_reconnect_t()); + realms.back().decode_old(p); + } } } - }; -- 2.39.5