From: Yan, Zheng Date: Tue, 4 Jul 2017 08:45:36 +0000 (+0800) Subject: mds: record snaps in old snaprealm when moving inode into new snaprealm X-Git-Tag: v13.1.0~413^2~37 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a810e9f1aa37dc16ecfc144afcaf65eae8e3560e;p=ceph.git mds: record snaps in old snaprealm when moving inode into new snaprealm To get effective snaps in past snaprealms, we just need to filter out deleted snaps by using global snap infos. This avoids the complexity of opening 'past parents' Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 8c1719dc32ed..9ca121e9f139 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -446,15 +446,34 @@ void CInode::project_past_snaprealm_parent(SnapRealm *newparent) if (!snaprealm) { oldparent = find_snaprealm(); new_snap.seq = oldparent->get_newest_seq(); - } - else + } else { oldparent = snaprealm->parent; + } if (newparent != oldparent) { + // convert past_parents to past_parent_snaps + if (!new_snap.past_parents.empty()) { + assert(snaprealm); + const set& snaps = snaprealm->get_snaps(); + for (auto p = snaps.begin(); + p != snaps.end() && *p < new_snap.current_parent_since; + ++p) { + if (!new_snap.snaps.count(*p)) + new_snap.past_parent_snaps.insert(*p); + } + new_snap.seq = snaprealm->get_newest_seq(); + new_snap.past_parents.clear(); + } + snapid_t oldparentseq = oldparent->get_newest_seq(); if (oldparentseq + 1 > new_snap.current_parent_since) { - new_snap.past_parents[oldparentseq].ino = oldparent->inode->ino(); - new_snap.past_parents[oldparentseq].first = new_snap.current_parent_since; + // copy old parent's snaps + const set& snaps = oldparent->get_snaps(); + auto p = snaps.lower_bound(new_snap.current_parent_since); + if (p != snaps.end()) + new_snap.past_parent_snaps.insert(p, snaps.end()); + if (oldparentseq > new_snap.seq) + new_snap.seq = oldparentseq; } new_snap.current_parent_since = std::max(oldparentseq, newparent->get_last_created()) + 1; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 1a94ca4e5786..c44bdc92aeec 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -8522,7 +8522,7 @@ void Server::handle_client_lssnap(MDRequestRef& mdr) return; SnapRealm *realm = diri->find_snaprealm(); - map infomap; + map infomap; realm->get_snap_info(infomap, diri->get_oldest_snap()); unsigned max_entries = req->head.args.readdir.max_entries; @@ -8545,7 +8545,7 @@ void Server::handle_client_lssnap(MDRequestRef& mdr) __u32 num = 0; bufferlist dnbl; - map::iterator p = infomap.upper_bound(last_snapid); + auto p = infomap.upper_bound(last_snapid); for (; p != infomap.end() && num < max_entries; ++p) { dout(10) << p->first << " -> " << *p->second << dendl; diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc index 4307fdd78bff..bfef001dd267 100644 --- a/src/mds/SnapRealm.cc +++ b/src/mds/SnapRealm.cc @@ -15,6 +15,7 @@ #include "SnapRealm.h" #include "MDCache.h" #include "MDSRank.h" +#include "SnapClient.h" #include @@ -45,6 +46,8 @@ ostream& operator<<(ostream& out, const SnapRealm& realm) if (realm.srnode.created != realm.srnode.current_parent_since) out << " cps " << realm.srnode.current_parent_since; out << " snaps=" << realm.srnode.snaps; + out << " past_parent_snaps=" << realm.srnode.past_parent_snaps; + if (realm.srnode.past_parents.size()) { out << " past_parents=("; for (map::const_iterator p = realm.srnode.past_parents.begin(); @@ -139,6 +142,9 @@ bool SnapRealm::_open_parents(MDSInternalContextBase *finish, snapid_t first, sn return false; } + if (!srnode.past_parent_snaps.empty()) + assert(mdcache->mds->snapclient->get_cached_version() > 0); + // and my past parents too! assert(srnode.past_parents.size() >= num_open_past_parents); if (srnode.past_parents.size() > num_open_past_parents) { @@ -187,6 +193,9 @@ bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) if (open) return true; + if (!srnode.past_parent_snaps.empty()) + assert(mdcache->mds->snapclient->get_cached_version() > 0); + for (map::iterator p = srnode.past_parents.lower_bound(first); p != srnode.past_parents.end(); ++p) { @@ -237,22 +246,34 @@ void SnapRealm::build_snap_set(set &s, max_last_destroyed = srnode.last_destroyed; // include my snaps within interval [first,last] - for (map::const_iterator p = srnode.snaps.lower_bound(first); // first element >= first + for (auto p = srnode.snaps.lower_bound(first); // first element >= first p != srnode.snaps.end() && p->first <= last; ++p) s.insert(p->first); - // include snaps for parents during intervals that intersect [first,last] - for (map::const_iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - ++p) { - const CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, - std::max(first, p->second.first), - std::min(last, p->first)); + if (!srnode.past_parent_snaps.empty()) { + set snaps; + for (auto p = srnode.past_parent_snaps.lower_bound(first); // first element >= first + p != srnode.past_parent_snaps.end() && *p <= last; + ++p) { + snaps.insert(*p); + } + snaps = mdcache->mds->snapclient->filter(snaps); + s.insert(snaps.begin(), snaps.end()); + } else { + // include snaps for parents during intervals that intersect [first,last] + for (map::const_iterator p = srnode.past_parents.lower_bound(first); + p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; + ++p) { + const CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, + std::max(first, p->second.first), + std::min(last, p->first)); + } } + if (srnode.current_parent_since <= last && parent) parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, std::max(first, srnode.current_parent_since), last); @@ -262,7 +283,9 @@ void SnapRealm::build_snap_set(set &s, void SnapRealm::check_cache() const { assert(open); - if (cached_seq >= srnode.seq) + uint64_t destroy_seq = mdcache->mds->snapclient->get_destroy_seq(); + if (cached_seq >= srnode.seq && + cached_destroy_seq == destroy_seq) return; cached_snaps.clear(); @@ -271,6 +294,7 @@ void SnapRealm::check_cache() const cached_last_created = srnode.last_created; cached_last_destroyed = srnode.last_destroyed; cached_seq = srnode.seq; + cached_destroy_seq = destroy_seq; build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed, 0, CEPH_NOSNAP); @@ -280,6 +304,7 @@ void SnapRealm::check_cache() const dout(10) << "check_cache rebuilt " << cached_snaps << " seq " << srnode.seq << " cached_seq " << cached_seq + << " cached_destroy_seq " << cached_destroy_seq << " cached_last_created " << cached_last_created << " cached_last_destroyed " << cached_last_destroyed << ")" << dendl; @@ -314,28 +339,42 @@ const SnapContext& SnapRealm::get_snap_context() const return cached_snap_context; } -void SnapRealm::get_snap_info(map& infomap, snapid_t first, snapid_t last) +void SnapRealm::get_snap_info(map& infomap, snapid_t first, snapid_t last) { const set& snaps = get_snaps(); dout(10) << "get_snap_info snaps " << snaps << dendl; // include my snaps within interval [first,last] - for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first + for (auto p = srnode.snaps.lower_bound(first); // first element >= first p != srnode.snaps.end() && p->first <= last; ++p) infomap[p->first] = &p->second; - // include snaps for parents during intervals that intersect [first,last] - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - ++p) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - oldparent->snaprealm->get_snap_info(infomap, - std::max(first, p->second.first), - std::min(last, p->first)); + if (!srnode.past_parent_snaps.empty()) { + set snaps; + for (auto p = srnode.past_parent_snaps.lower_bound(first); // first element >= first + p != srnode.past_parent_snaps.end() && *p <= last; + ++p) { + snaps.insert(*p); + } + + map _infomap; + mdcache->mds->snapclient->get_snap_infos(_infomap, snaps); + infomap.insert(_infomap.begin(), _infomap.end()); + } else { + // include snaps for parents during intervals that intersect [first,last] + for (map::iterator p = srnode.past_parents.lower_bound(first); + p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; + ++p) { + CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + oldparent->snaprealm->get_snap_info(infomap, + std::max(first, p->second.first), + std::min(last, p->first)); + } } + if (srnode.current_parent_since <= last && parent) parent->get_snap_info(infomap, std::max(first, srnode.current_parent_since), last); } @@ -350,12 +389,24 @@ std::string_view SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino) return srnode_snaps_entry->second.get_long_name(); } - map::iterator p = srnode.past_parents.lower_bound(snapid); - if (p != srnode.past_parents.end() && p->second.first <= snapid) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - return oldparent->snaprealm->get_snapname(snapid, atino); + if (!srnode.past_parent_snaps.empty()) { + if (srnode.past_parent_snaps.count(snapid)) { + const SnapInfo *sinfo = mdcache->mds->snapclient->get_snap_info(snapid); + if (sinfo) { + if (atino == sinfo->ino) + return sinfo->name; + else + return sinfo->get_long_name(); + } + } + } else { + map::iterator p = srnode.past_parents.lower_bound(snapid); + if (p != srnode.past_parents.end() && p->second.first <= snapid) { + CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + return oldparent->snaprealm->get_snapname(snapid, atino); + } } assert(srnode.current_parent_since <= snapid); @@ -374,17 +425,16 @@ snapid_t SnapRealm::resolve_snapname(std::string_view n, inodeno_t atino, snapid bool actual = (atino == inode->ino()); string pname; inodeno_t pino; - if (!actual) { - if (!n.length() || - n[0] != '_') return 0; + if (n.length() && n[0] == '_') { int next_ = n.find('_', 1); - if (next_ < 0) return 0; - pname = n.substr(1, next_ - 1); - pino = atoll(n.data() + next_ + 1); - dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl; + if (next_ > 1) { + pname = n.substr(1, next_ - 1); + pino = atoll(n.data() + next_ + 1); + dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl; + } } - for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first + for (auto p = srnode.snaps.lower_bound(first); // first element >= first p != srnode.snaps.end() && p->first <= last; ++p) { dout(15) << " ? " << p->second << dendl; @@ -396,19 +446,40 @@ snapid_t SnapRealm::resolve_snapname(std::string_view n, inodeno_t atino, snapid return p->first; } + if (!srnode.past_parent_snaps.empty()) { + set snaps; + for (auto p = srnode.past_parent_snaps.lower_bound(first); // first element >= first + p != srnode.past_parent_snaps.end() && *p <= last; + ++p) + snaps.insert(*p); + + map _infomap; + mdcache->mds->snapclient->get_snap_infos(_infomap, snaps); + + for (auto& it : _infomap) { + dout(15) << " ? " << *it.second << dendl; + actual = (it.second->ino == atino); + if (actual && it.second->name == n) + return it.first; + if (!actual && it.second->name == pname && it.second->ino == pino) + return it.first; + } + } else { // include snaps for parents during intervals that intersect [first,last] - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - ++p) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino, - std::max(first, p->second.first), - std::min(last, p->first)); - if (r) - return r; + for (map::iterator p = srnode.past_parents.lower_bound(first); + p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; + ++p) { + CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino, + std::max(first, p->second.first), + std::min(last, p->first)); + if (r) + return r; + } } + if (parent && srnode.current_parent_since <= last) return parent->resolve_snapname(n, atino, std::max(first, srnode.current_parent_since), last); return 0; @@ -497,22 +568,28 @@ void SnapRealm::build_snap_trace(bufferlist& snapbl) const if (parent) { info.h.parent = parent->inode->ino(); - if (!srnode.past_parents.empty()) { + + set past; + if (!srnode.past_parent_snaps.empty()) { + past = mdcache->mds->snapclient->filter(srnode.past_parent_snaps); + } else if (!srnode.past_parents.empty()) { snapid_t last = srnode.past_parents.rbegin()->first; - set past; snapid_t max_seq, max_last_created, max_last_destroyed; build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last); + } + + if (!past.empty()) { info.prior_parent_snaps.reserve(past.size()); for (set::reverse_iterator p = past.rbegin(); p != past.rend(); ++p) info.prior_parent_snaps.push_back(*p); - dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] " + dout(10) << "build_snap_trace prior_parent_snaps from [1," << *past.rbegin() << "] " << info.prior_parent_snaps << dendl; } } else info.h.parent = 0; info.my_snaps.reserve(srnode.snaps.size()); - for (map::const_reverse_iterator p = srnode.snaps.rbegin(); + for (auto p = srnode.snaps.rbegin(); p != srnode.snaps.rend(); ++p) info.my_snaps.push_back(p->first); @@ -524,26 +601,31 @@ void SnapRealm::build_snap_trace(bufferlist& snapbl) const parent->build_snap_trace(snapbl); } - - void SnapRealm::prune_past_parents() { dout(10) << "prune_past_parents" << dendl; check_cache(); assert(open); - map::iterator p = srnode.past_parents.begin(); - while (p != srnode.past_parents.end()) { - set::iterator q = cached_snaps.lower_bound(p->second.first); - if (q == cached_snaps.end() || - *q > p->first) { - dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first - << "] " << p->second.ino << dendl; - remove_open_past_parent(p->second.ino, p->first); - srnode.past_parents.erase(p++); + // convert past_parents to past_parent_snaps + if (!srnode.past_parents.empty()) { + for (auto p = cached_snaps.begin(); + p != cached_snaps.end() && *p < srnode.current_parent_since; + ++p) { + if (!srnode.snaps.count(*p)) + srnode.past_parent_snaps.insert(*p); + } + srnode.past_parents.clear(); + } + + for (auto p = srnode.past_parent_snaps.begin(); + p != srnode.past_parent_snaps.end(); ) { + auto q = cached_snaps.find(*p); + if (q == cached_snaps.end()) { + dout(10) << "prune_past_parents pruning " << *p << dendl; + srnode.past_parent_snaps.erase(p++); } else { - dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first - << "] " << p->second.ino << dendl; + dout(10) << "prune_past_parents keeping " << *p << dendl; ++p; } } diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h index c0723882996e..7abe656bae78 100644 --- a/src/mds/SnapRealm.h +++ b/src/mds/SnapRealm.h @@ -29,6 +29,7 @@ struct SnapRealm { protected: // cache mutable snapid_t cached_seq; // max seq over self and all past+present parents. + mutable uint64_t cached_destroy_seq; mutable snapid_t cached_last_created; // max last_created over all past+present parents mutable snapid_t cached_last_destroyed; mutable set cached_snaps; @@ -58,6 +59,7 @@ public: map* > client_caps; // to identify clients who need snap notifications SnapRealm(MDCache *c, CInode *in) : + cached_destroy_seq(0), srnode(), mdcache(c), inode(in), open(false), parent(0), @@ -86,12 +88,15 @@ public: void close_parents(); void prune_past_parents(); - bool has_past_parents() const { return !srnode.past_parents.empty(); } + bool has_past_parents() const { + return !srnode.past_parent_snaps.empty() || + !srnode.past_parents.empty(); + } void build_snap_set(set& s, snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed, snapid_t first, snapid_t last) const; - void get_snap_info(map& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP); + void get_snap_info(map& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP); const bufferlist& get_snap_trace(); void build_snap_trace(bufferlist& snapbl) const; diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index b7f94fa5ef01..a310fb34f27e 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -452,7 +452,7 @@ bool StrayManager::_eval_stray(CDentry *dn, bool delay) if (in->is_dir()) { if (in->snaprealm && in->snaprealm->has_past_parents()) { dout(20) << " directory has past parents " - << in->snaprealm->srnode.past_parents << dendl; + << in->snaprealm << dendl; if (in->state_test(CInode::STATE_MISSINGOBJS)) { mds->clog->error() << "previous attempt at committing dirfrag of ino " << in->ino() << " has failed, missing object"; @@ -507,7 +507,7 @@ bool StrayManager::_eval_stray(CDentry *dn, bool delay) // but leave the metadata intact. assert(!in->is_dir()); dout(20) << " file has past parents " - << in->snaprealm->srnode.past_parents << dendl; + << in->snaprealm << dendl; if (in->is_file() && in->get_projected_inode()->size > 0) { enqueue(dn, true); // truncate head objects } diff --git a/src/mds/snap.cc b/src/mds/snap.cc index 525bccf5b859..234d8d81fdda 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -68,7 +68,7 @@ ostream& operator<<(ostream& out, const SnapInfo &sn) << "' " << sn.stamp << ")"; } -std::string_view SnapInfo::get_long_name() +std::string_view SnapInfo::get_long_name() const { if (long_name.length() == 0) { char nm[80]; @@ -123,7 +123,7 @@ ostream& operator<<(ostream& out, const snaplink_t &l) void sr_t::encode(bufferlist& bl) const { - ENCODE_START(4, 4, bl); + ENCODE_START(5, 4, bl); encode(seq, bl); encode(created, bl); encode(last_created, bl); @@ -131,12 +131,13 @@ void sr_t::encode(bufferlist& bl) const encode(current_parent_since, bl); encode(snaps, bl); encode(past_parents, bl); + encode(past_parent_snaps, bl); ENCODE_FINISH(bl); } void sr_t::decode(bufferlist::iterator& p) { - DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, p); + DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, p); if (struct_v == 2) { __u8 struct_v; decode(struct_v, p); // yes, really: extra byte for v2 encoding only, see 6ee52e7d. @@ -148,6 +149,8 @@ void sr_t::decode(bufferlist::iterator& p) decode(current_parent_since, p); decode(snaps, p); decode(past_parents, p); + if (struct_v >= 5) + decode(past_parent_snaps, p); DECODE_FINISH(p); } @@ -176,6 +179,14 @@ void sr_t::dump(Formatter *f) const f->close_section(); } f->close_section(); + + f->open_array_section("past_parent_snaps"); + for (auto p = past_parent_snaps.begin(); p != past_parent_snaps.end(); ++p) { + f->open_object_section("snapinfo"); + f->dump_unsigned("snapid", *p); + f->close_section(); + } + f->close_section(); } void sr_t::generate_test_instances(list& ls) @@ -193,5 +204,8 @@ void sr_t::generate_test_instances(list& ls) ls.back()->snaps[123].name = "name1"; ls.back()->past_parents[12].ino = 12; ls.back()->past_parents[12].first = 3; + + ls.back()->past_parent_snaps.insert(5); + ls.back()->past_parent_snaps.insert(6); } diff --git a/src/mds/snap.h b/src/mds/snap.h index 0c44c83cfc6b..bcdeac52980f 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -29,14 +29,14 @@ struct SnapInfo { utime_t stamp; string name; - string long_name; ///< cached _$ino_$name + mutable string long_name; ///< cached _$ino_$name void encode(bufferlist &bl) const; void decode(bufferlist::iterator &bl); void dump(Formatter *f) const; static void generate_test_instances(list& ls); - std::string_view get_long_name(); + std::string_view get_long_name() const; }; WRITE_CLASS_ENCODER(SnapInfo) @@ -77,6 +77,7 @@ struct sr_t { snapid_t current_parent_since; map snaps; map past_parents; // key is "last" (or NOSNAP) + set past_parent_snaps; sr_t() : seq(0), created(0),