From f1baa79cd09a4a33bdec0a1ab7ab13c933ff8d72 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Sep 2012 18:35:53 -0700 Subject: [PATCH] mds: move SnapRealm into its own h/cc files Signed-off-by: Sage Weil --- src/Makefile.am | 2 + src/mds/CInode.h | 2 +- src/mds/SnapRealm.cc | 488 ++++++++++++++++++++++++++++++++++++++++++ src/mds/SnapRealm.h | 148 +++++++++++++ src/mds/snap.cc | 492 +------------------------------------------ src/mds/snap.h | 130 ------------ 6 files changed, 650 insertions(+), 612 deletions(-) create mode 100644 src/mds/SnapRealm.cc create mode 100644 src/mds/SnapRealm.h diff --git a/src/Makefile.am b/src/Makefile.am index b0b7179dda448..6e5054822acdf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1364,6 +1364,7 @@ libmds_a_SOURCES = \ mds/MDSTableServer.cc \ mds/AnchorServer.cc \ mds/AnchorClient.cc \ + mds/SnapRealm.cc \ mds/SnapServer.cc \ mds/snap.cc \ mds/SessionMap.cc \ @@ -1690,6 +1691,7 @@ noinst_HEADERS = \ mds/SessionMap.h\ mds/SimpleLock.h\ mds/SnapClient.h\ + mds/SnapRealm.h\ mds/SnapServer.h\ mds/events/ECommitted.h\ mds/events/EExport.h\ diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 8b18ce72f1e9d..51e1eea5d5829 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -31,7 +31,7 @@ #include "ScatterLock.h" #include "LocalLock.h" #include "Capability.h" -#include "snap.h" +#include "SnapRealm.h" #include #include diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc new file mode 100644 index 0000000000000..cc9fda7613809 --- /dev/null +++ b/src/mds/SnapRealm.cc @@ -0,0 +1,488 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "SnapRealm.h" +#include "MDCache.h" +#include "MDS.h" + +#include "messages/MClientSnap.h" + + +/* + * SnapRealm + */ + +#define dout_subsys ceph_subsys_mds +#undef dout_prefix +#define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this) +static ostream& _prefix(std::ostream *_dout, int whoami, CInode *inode, + uint64_t seq, SnapRealm *realm) { + return *_dout << " mds." << whoami + << ".cache.snaprealm(" << inode->ino() + << " seq " << seq << " " << realm << ") "; +} + +ostream& operator<<(ostream& out, const SnapRealm& realm) +{ + out << "snaprealm(" << realm.inode->ino() + << " seq " << realm.srnode.seq + << " lc " << realm.srnode.last_created + << " cr " << realm.srnode.created; + if (realm.srnode.created != realm.srnode.current_parent_since) + out << " cps " << realm.srnode.current_parent_since; + out << " snaps=" << realm.srnode.snaps; + if (realm.srnode.past_parents.size()) { + out << " past_parents=("; + for (map::const_iterator p = realm.srnode.past_parents.begin(); + p != realm.srnode.past_parents.end(); + p++) { + if (p != realm.srnode.past_parents.begin()) out << ","; + out << p->second.first << "-" << p->first + << "=" << p->second.ino; + } + out << ")"; + } + out << " " << &realm << ")"; + return out; +} + + + + +void SnapRealm::add_open_past_parent(SnapRealm *parent) +{ + open_past_parents[parent->inode->ino()] = parent; + parent->inode->get(CInode::PIN_PASTSNAPPARENT); +} + +bool SnapRealm::_open_parents(Context *finish, snapid_t first, snapid_t last) +{ + dout(10) << "open_parents [" << first << "," << last << "]" << dendl; + if (open) + return true; + + // make sure my current parents' parents are open... + if (parent) { + dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent + << " on " << *parent->inode << dendl; + if (last >= srnode.current_parent_since && + !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last)) + return false; + } + + // and my past parents too! + assert(srnode.past_parents.size() >= open_past_parents.size()); + if (srnode.past_parents.size() > open_past_parents.size()) { + for (map::iterator p = srnode.past_parents.begin(); + p != srnode.past_parents.end(); + p++) { + dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is " + << p->second.ino << dendl; + CInode *parent = mdcache->get_inode(p->second.ino); + if (!parent) { + mdcache->open_remote_ino(p->second.ino, finish); + return false; + } + assert(parent->snaprealm); // hmm! + if (!open_past_parents.count(p->second.ino)) { + add_open_past_parent(parent->snaprealm); + } + if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first)) + return false; + } + } + + open = true; + return true; +} + +bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) +{ + dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl; + if (open) + return true; + + for (map::iterator p = srnode.past_parents.lower_bound(first); + p != srnode.past_parents.end(); + p++) { + if (p->second.first > last) + break; + dout(10) << " past parent [" << p->second.first << "," << p->first << "] was " + << p->second.ino << dendl; + if (open_past_parents.count(p->second.ino) == 0) { + dout(10) << " past parent " << p->second.ino << " is not open" << dendl; + return false; + } + if (!open_past_parents[p->second.ino]->have_past_parents_open(MAX(first, p->second.first), + MIN(last, p->first))) + return false; + } + + open = true; + return true; +} + +void SnapRealm::close_parents() +{ + for (map::iterator p = open_past_parents.begin(); + p != open_past_parents.end(); + p++) + p->second->inode->put(CInode::PIN_PASTSNAPPARENT); + open_past_parents.clear(); +} + + +/* + * get list of snaps for this realm. we must include parents' snaps + * for the intervals during which they were our parent. + */ +void SnapRealm::build_snap_set(set &s, + snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed, + snapid_t first, snapid_t last) +{ + dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl; + + if (srnode.seq > max_seq) + max_seq = srnode.seq; + if (srnode.last_created > max_last_created) + max_last_created = srnode.last_created; + if (srnode.last_destroyed > max_last_destroyed) + max_last_destroyed = srnode.last_destroyed; + + // include my snaps within interval [first,last] + for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first + p != srnode.snaps.end() && p->first <= last; + p++) + s.insert(p->first); + + // include snaps for parents during intervals that intersect [first,last] + for (map::iterator p = srnode.past_parents.lower_bound(first); + p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; + p++) { + CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, + MAX(first, p->second.first), + MIN(last, p->first)); + } + if (srnode.current_parent_since <= last && parent) + parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, + MAX(first, srnode.current_parent_since), last); +} + + +void SnapRealm::check_cache() +{ + if (cached_seq >= srnode.seq) + return; + + cached_snaps.clear(); + cached_snap_context.clear(); + + cached_last_created = srnode.last_created; + cached_last_destroyed = srnode.last_destroyed; + cached_seq = srnode.seq; + build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed, + 0, CEPH_NOSNAP); + + cached_snap_trace.clear(); + build_snap_trace(cached_snap_trace); + + dout(10) << "check_cache rebuilt " << cached_snaps + << " seq " << srnode.seq + << " cached_seq " << cached_seq + << " cached_last_created " << cached_last_created + << " cached_last_destroyed " << cached_last_destroyed + << ")" << dendl; +} + +const set& SnapRealm::get_snaps() +{ + check_cache(); + dout(10) << "get_snaps " << cached_snaps + << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")" + << dendl; + return cached_snaps; +} + +/* + * build vector in reverse sorted order + */ +const SnapContext& SnapRealm::get_snap_context() +{ + check_cache(); + + if (!cached_snap_context.seq) { + cached_snap_context.seq = cached_seq; + cached_snap_context.snaps.resize(cached_snaps.size()); + unsigned i = 0; + for (set::reverse_iterator p = cached_snaps.rbegin(); + p != cached_snaps.rend(); + p++) + cached_snap_context.snaps[i++] = *p; + } + + return cached_snap_context; +} + +void SnapRealm::get_snap_info(map& infomap, snapid_t first, snapid_t last) +{ + const set& snaps = get_snaps(); + dout(10) << "get_snap_info snaps " << snaps << dendl; + + // include my snaps within interval [first,last] + for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first + p != srnode.snaps.end() && p->first <= last; + p++) + infomap[p->first] = &p->second; + + // include snaps for parents during intervals that intersect [first,last] + for (map::iterator p = srnode.past_parents.lower_bound(first); + p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; + p++) { + CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + oldparent->snaprealm->get_snap_info(infomap, + MAX(first, p->second.first), + MIN(last, p->first)); + } + if (srnode.current_parent_since <= last && parent) + parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last); +} + +const string& SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino) +{ + if (srnode.snaps.count(snapid)) { + if (atino == inode->ino()) + return srnode.snaps[snapid].name; + else + return srnode.snaps[snapid].get_long_name(); + } + + map::iterator p = srnode.past_parents.lower_bound(snapid); + if (p != srnode.past_parents.end() && p->second.first <= snapid) { + CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + return oldparent->snaprealm->get_snapname(snapid, atino); + } + + assert(srnode.current_parent_since <= snapid); + assert(parent); + return parent->get_snapname(snapid, atino); +} + +snapid_t SnapRealm::resolve_snapname(const string& n, inodeno_t atino, snapid_t first, snapid_t last) +{ + // first try me + dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl; + + //snapid_t num; + //if (n[0] == '~') num = atoll(n.c_str()+1); + + bool actual = (atino == inode->ino()); + string pname; + inodeno_t pino; + if (!actual) { + if (!n.length() || + n[0] != '_') return 0; + int next_ = n.find('_', 1); + if (next_ < 0) return 0; + pname = n.substr(1, next_ - 1); + pino = atoll(n.c_str() + next_ + 1); + dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl; + } + + for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first + p != srnode.snaps.end() && p->first <= last; + p++) { + dout(15) << " ? " << p->second << dendl; + //if (num && p->second.snapid == num) + //return p->first; + if (actual && p->second.name == n) + return p->first; + if (!actual && p->second.name == pname && p->second.ino == pino) + return p->first; + } + + // include snaps for parents during intervals that intersect [first,last] + for (map::iterator p = srnode.past_parents.lower_bound(first); + p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; + p++) { + CInode *oldparent = mdcache->get_inode(p->second.ino); + assert(oldparent); // call open_parents first! + assert(oldparent->snaprealm); + snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino, + MAX(first, p->second.first), + MIN(last, p->first)); + if (r) + return r; + } + if (parent && srnode.current_parent_since <= last) + return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last); + return 0; +} + + +void SnapRealm::adjust_parent() +{ + SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm(); + if (newparent != parent) { + dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl; + if (parent) + parent->open_children.erase(this); + parent = newparent; + if (parent) + parent->open_children.insert(this); + + invalidate_cached_snaps(); + } +} + +void SnapRealm::split_at(SnapRealm *child) +{ + dout(10) << "split_at " << *child + << " on " << *child->inode << dendl; + + if (!child->inode->is_dir()) { + // it's not a dir. + if (child->inode->containing_realm) { + // - no open children. + // - only need to move this child's inode's caps. + child->inode->move_to_realm(child); + } else { + // no caps, nothing to move/split. + dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl; + assert(!child->inode->is_any_caps()); + } + return; + } + + // it's a dir. + + // split open_children + dout(10) << " open_children are " << open_children << dendl; + for (set::iterator p = open_children.begin(); + p != open_children.end(); ) { + SnapRealm *realm = *p; + if (realm != child && + child->inode->is_projected_ancestor_of(realm->inode)) { + dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl; + realm->parent = child; + child->open_children.insert(realm); + open_children.erase(p++); + } else { + dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl; + p++; + } + } + + // split inodes_with_caps + elist::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps)); + while (!p.end()) { + CInode *in = *p; + ++p; + + // does inode fall within the child realm? + bool under_child = false; + + if (in == child->inode) { + under_child = true; + } else { + CInode *t = in; + while (t->get_parent_dn()) { + t = t->get_parent_dn()->get_dir()->get_inode(); + if (t == child->inode) { + under_child = true; + break; + } + if (t == in) + break; + } + } + if (under_child) { + dout(20) << " child gets " << *in << dendl; + in->move_to_realm(child); + } else { + dout(20) << " keeping " << *in << dendl; + } + } + +} + +const bufferlist& SnapRealm::get_snap_trace() +{ + check_cache(); + return cached_snap_trace; +} + +void SnapRealm::build_snap_trace(bufferlist& snapbl) +{ + SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since); + + if (parent) { + info.h.parent = parent->inode->ino(); + if (!srnode.past_parents.empty()) { + snapid_t last = srnode.past_parents.rbegin()->first; + set past; + snapid_t max_seq, max_last_created, max_last_destroyed; + build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last); + info.prior_parent_snaps.reserve(past.size()); + for (set::reverse_iterator p = past.rbegin(); p != past.rend(); p++) + info.prior_parent_snaps.push_back(*p); + dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] " + << info.prior_parent_snaps << dendl; + } + } else + info.h.parent = 0; + + info.my_snaps.reserve(srnode.snaps.size()); + for (map::reverse_iterator p = srnode.snaps.rbegin(); + p != srnode.snaps.rend(); + p++) + info.my_snaps.push_back(p->first); + dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl; + + ::encode(info, snapbl); + + if (parent) + parent->build_snap_trace(snapbl); +} + + + +void SnapRealm::prune_past_parents() +{ + dout(10) << "prune_past_parents" << dendl; + check_cache(); + assert(open); + + map::iterator p = srnode.past_parents.begin(); + while (p != srnode.past_parents.end()) { + set::iterator q = cached_snaps.lower_bound(p->second.first); + if (q == cached_snaps.end() || + *q > p->first) { + dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first + << "] " << p->second.ino << dendl; + srnode.past_parents.erase(p++); + } else { + dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first + << "] " << p->second.ino << dendl; + p++; + } + } +} + diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h new file mode 100644 index 0000000000000..a676b18aa2230 --- /dev/null +++ b/src/mds/SnapRealm.h @@ -0,0 +1,148 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_MDS_SNAPREALM_H +#define CEPH_MDS_SNAPREALM_H + +#include "mdstypes.h" +#include "snap.h" +#include "include/xlist.h" +#include "include/elist.h" +#include "common/snap_types.h" + +struct SnapRealm { + // realm state + + sr_t srnode; + + // in-memory state + MDCache *mdcache; + CInode *inode; + + bool open; // set to true once all past_parents are opened + SnapRealm *parent; + set open_children; // active children that are currently open + map open_past_parents; // these are explicitly pinned. + + // cache + snapid_t cached_seq; // max seq over self and all past+present parents. + snapid_t cached_last_created; // max last_created over all past+present parents + snapid_t cached_last_destroyed; + set cached_snaps; + SnapContext cached_snap_context; + + bufferlist cached_snap_trace; + + elist inodes_with_caps; // for efficient realm splits + map* > client_caps; // to identify clients who need snap notifications + + SnapRealm(MDCache *c, CInode *in) : + srnode(), + mdcache(c), inode(in), + open(false), parent(0), + inodes_with_caps(0) + { } + + bool exists(const string &name) { + for (map::iterator p = srnode.snaps.begin(); + p != srnode.snaps.end(); + p++) + if (p->second.name == name) + return true; + return false; + } + + bool _open_parents(Context *retryorfinish, snapid_t first=1, snapid_t last=CEPH_NOSNAP); + bool open_parents(Context *retryorfinish) { + if (!_open_parents(retryorfinish)) + return false; + delete retryorfinish; + return true; + } + bool have_past_parents_open(snapid_t first=1, snapid_t last=CEPH_NOSNAP); + void add_open_past_parent(SnapRealm *parent); + void close_parents(); + + void prune_past_parents(); + bool has_past_parents() { return !srnode.past_parents.empty(); } + + void build_snap_set(set& s, + snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed, + snapid_t first, snapid_t last); + void get_snap_info(map& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP); + + const bufferlist& get_snap_trace(); + void build_snap_trace(bufferlist& snapbl); + + const string& get_snapname(snapid_t snapid, inodeno_t atino); + snapid_t resolve_snapname(const string &name, inodeno_t atino, snapid_t first=0, snapid_t last=CEPH_NOSNAP); + + void check_cache(); + const set& get_snaps(); + const SnapContext& get_snap_context(); + void invalidate_cached_snaps() { + cached_seq = 0; + } + snapid_t get_last_created() { + check_cache(); + return cached_last_created; + } + snapid_t get_last_destroyed() { + check_cache(); + return cached_last_destroyed; + } + snapid_t get_newest_snap() { + check_cache(); + if (cached_snaps.empty()) + return 0; + else + return *cached_snaps.rbegin(); + } + snapid_t get_newest_seq() { + check_cache(); + return cached_seq; + } + + snapid_t get_snap_following(snapid_t follows) { + check_cache(); + set s = get_snaps(); + set::iterator p = s.upper_bound(follows); + if (p != s.end()) + return *p; + return CEPH_NOSNAP; + } + + void adjust_parent(); + + void split_at(SnapRealm *child); + void join(SnapRealm *child); + + void add_cap(client_t client, Capability *cap) { + if (client_caps.count(client) == 0) + client_caps[client] = new xlist; + client_caps[client]->push_back(&cap->item_snaprealm_caps); + } + void remove_cap(client_t client, Capability *cap) { + cap->item_snaprealm_caps.remove_myself(); + if (client_caps[client]->empty()) { + delete client_caps[client]; + client_caps.erase(client); + } + } + +}; + +ostream& operator<<(ostream& out, const SnapRealm &realm); + +#endif diff --git a/src/mds/snap.cc b/src/mds/snap.cc index df1b73f17537f..06dc95590c977 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -13,10 +13,8 @@ */ #include "snap.h" -#include "MDCache.h" -#include "MDS.h" -#include "messages/MClientSnap.h" +#include "common/Formatter.h" /* * SnapInfo @@ -68,6 +66,16 @@ ostream& operator<<(ostream& out, const SnapInfo &sn) << "' " << sn.stamp << ")"; } +const string& SnapInfo::get_long_name() +{ + if (long_name.length() == 0) { + char nm[80]; + snprintf(nm, sizeof(nm), "_%s_%llu", name.c_str(), (unsigned long long)ino); + long_name = nm; + } + return long_name; +} + /* * snaplink_t */ @@ -185,481 +193,3 @@ void sr_t::generate_test_instances(list& ls) ls.back()->past_parents[12].first = 3; } - -/* - * SnapRealm - */ - -#define dout_subsys ceph_subsys_mds -#undef dout_prefix -#define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this) -static ostream& _prefix(std::ostream *_dout, int whoami, CInode *inode, - uint64_t seq, SnapRealm *realm) { - return *_dout << " mds." << whoami - << ".cache.snaprealm(" << inode->ino() - << " seq " << seq << " " << realm << ") "; -} - -ostream& operator<<(ostream& out, const SnapRealm& realm) -{ - out << "snaprealm(" << realm.inode->ino() - << " seq " << realm.srnode.seq - << " lc " << realm.srnode.last_created - << " cr " << realm.srnode.created; - if (realm.srnode.created != realm.srnode.current_parent_since) - out << " cps " << realm.srnode.current_parent_since; - out << " snaps=" << realm.srnode.snaps; - if (realm.srnode.past_parents.size()) { - out << " past_parents=("; - for (map::const_iterator p = realm.srnode.past_parents.begin(); - p != realm.srnode.past_parents.end(); - p++) { - if (p != realm.srnode.past_parents.begin()) out << ","; - out << p->second.first << "-" << p->first - << "=" << p->second.ino; - } - out << ")"; - } - out << " " << &realm << ")"; - return out; -} - - - - -void SnapRealm::add_open_past_parent(SnapRealm *parent) -{ - open_past_parents[parent->inode->ino()] = parent; - parent->inode->get(CInode::PIN_PASTSNAPPARENT); -} - -bool SnapRealm::_open_parents(Context *finish, snapid_t first, snapid_t last) -{ - dout(10) << "open_parents [" << first << "," << last << "]" << dendl; - if (open) - return true; - - // make sure my current parents' parents are open... - if (parent) { - dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent - << " on " << *parent->inode << dendl; - if (last >= srnode.current_parent_since && - !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last)) - return false; - } - - // and my past parents too! - assert(srnode.past_parents.size() >= open_past_parents.size()); - if (srnode.past_parents.size() > open_past_parents.size()) { - for (map::iterator p = srnode.past_parents.begin(); - p != srnode.past_parents.end(); - p++) { - dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is " - << p->second.ino << dendl; - CInode *parent = mdcache->get_inode(p->second.ino); - if (!parent) { - mdcache->open_remote_ino(p->second.ino, finish); - return false; - } - assert(parent->snaprealm); // hmm! - if (!open_past_parents.count(p->second.ino)) { - add_open_past_parent(parent->snaprealm); - } - if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first)) - return false; - } - } - - open = true; - return true; -} - -bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) -{ - dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl; - if (open) - return true; - - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end(); - p++) { - if (p->second.first > last) - break; - dout(10) << " past parent [" << p->second.first << "," << p->first << "] was " - << p->second.ino << dendl; - if (open_past_parents.count(p->second.ino) == 0) { - dout(10) << " past parent " << p->second.ino << " is not open" << dendl; - return false; - } - if (!open_past_parents[p->second.ino]->have_past_parents_open(MAX(first, p->second.first), - MIN(last, p->first))) - return false; - } - - open = true; - return true; -} - -void SnapRealm::close_parents() -{ - for (map::iterator p = open_past_parents.begin(); - p != open_past_parents.end(); - p++) - p->second->inode->put(CInode::PIN_PASTSNAPPARENT); - open_past_parents.clear(); -} - - -/* - * get list of snaps for this realm. we must include parents' snaps - * for the intervals during which they were our parent. - */ -void SnapRealm::build_snap_set(set &s, - snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed, - snapid_t first, snapid_t last) -{ - dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl; - - if (srnode.seq > max_seq) - max_seq = srnode.seq; - if (srnode.last_created > max_last_created) - max_last_created = srnode.last_created; - if (srnode.last_destroyed > max_last_destroyed) - max_last_destroyed = srnode.last_destroyed; - - // include my snaps within interval [first,last] - for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first - p != srnode.snaps.end() && p->first <= last; - p++) - s.insert(p->first); - - // include snaps for parents during intervals that intersect [first,last] - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - p++) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, - MAX(first, p->second.first), - MIN(last, p->first)); - } - if (srnode.current_parent_since <= last && parent) - parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, - MAX(first, srnode.current_parent_since), last); -} - - -void SnapRealm::check_cache() -{ - if (cached_seq >= srnode.seq) - return; - - cached_snaps.clear(); - cached_snap_context.clear(); - - cached_last_created = srnode.last_created; - cached_last_destroyed = srnode.last_destroyed; - cached_seq = srnode.seq; - build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed, - 0, CEPH_NOSNAP); - - cached_snap_trace.clear(); - build_snap_trace(cached_snap_trace); - - dout(10) << "check_cache rebuilt " << cached_snaps - << " seq " << srnode.seq - << " cached_seq " << cached_seq - << " cached_last_created " << cached_last_created - << " cached_last_destroyed " << cached_last_destroyed - << ")" << dendl; -} - -const set& SnapRealm::get_snaps() -{ - check_cache(); - dout(10) << "get_snaps " << cached_snaps - << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")" - << dendl; - return cached_snaps; -} - -/* - * build vector in reverse sorted order - */ -const SnapContext& SnapRealm::get_snap_context() -{ - check_cache(); - - if (!cached_snap_context.seq) { - cached_snap_context.seq = cached_seq; - cached_snap_context.snaps.resize(cached_snaps.size()); - unsigned i = 0; - for (set::reverse_iterator p = cached_snaps.rbegin(); - p != cached_snaps.rend(); - p++) - cached_snap_context.snaps[i++] = *p; - } - - return cached_snap_context; -} - -void SnapRealm::get_snap_info(map& infomap, snapid_t first, snapid_t last) -{ - const set& snaps = get_snaps(); - dout(10) << "get_snap_info snaps " << snaps << dendl; - - // include my snaps within interval [first,last] - for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first - p != srnode.snaps.end() && p->first <= last; - p++) - infomap[p->first] = &p->second; - - // include snaps for parents during intervals that intersect [first,last] - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - p++) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - oldparent->snaprealm->get_snap_info(infomap, - MAX(first, p->second.first), - MIN(last, p->first)); - } - if (srnode.current_parent_since <= last && parent) - parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last); -} - -const string& SnapInfo::get_long_name() -{ - if (long_name.length() == 0) { - char nm[80]; - snprintf(nm, sizeof(nm), "_%s_%llu", name.c_str(), (unsigned long long)ino); - long_name = nm; - } - return long_name; -} - -const string& SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino) -{ - if (srnode.snaps.count(snapid)) { - if (atino == inode->ino()) - return srnode.snaps[snapid].name; - else - return srnode.snaps[snapid].get_long_name(); - } - - map::iterator p = srnode.past_parents.lower_bound(snapid); - if (p != srnode.past_parents.end() && p->second.first <= snapid) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - return oldparent->snaprealm->get_snapname(snapid, atino); - } - - assert(srnode.current_parent_since <= snapid); - assert(parent); - return parent->get_snapname(snapid, atino); -} - -snapid_t SnapRealm::resolve_snapname(const string& n, inodeno_t atino, snapid_t first, snapid_t last) -{ - // first try me - dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl; - - //snapid_t num; - //if (n[0] == '~') num = atoll(n.c_str()+1); - - bool actual = (atino == inode->ino()); - string pname; - inodeno_t pino; - if (!actual) { - if (!n.length() || - n[0] != '_') return 0; - int next_ = n.find('_', 1); - if (next_ < 0) return 0; - pname = n.substr(1, next_ - 1); - pino = atoll(n.c_str() + next_ + 1); - dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl; - } - - for (map::iterator p = srnode.snaps.lower_bound(first); // first element >= first - p != srnode.snaps.end() && p->first <= last; - p++) { - dout(15) << " ? " << p->second << dendl; - //if (num && p->second.snapid == num) - //return p->first; - if (actual && p->second.name == n) - return p->first; - if (!actual && p->second.name == pname && p->second.ino == pino) - return p->first; - } - - // include snaps for parents during intervals that intersect [first,last] - for (map::iterator p = srnode.past_parents.lower_bound(first); - p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; - p++) { - CInode *oldparent = mdcache->get_inode(p->second.ino); - assert(oldparent); // call open_parents first! - assert(oldparent->snaprealm); - snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino, - MAX(first, p->second.first), - MIN(last, p->first)); - if (r) - return r; - } - if (parent && srnode.current_parent_since <= last) - return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last); - return 0; -} - - -void SnapRealm::adjust_parent() -{ - SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm(); - if (newparent != parent) { - dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl; - if (parent) - parent->open_children.erase(this); - parent = newparent; - if (parent) - parent->open_children.insert(this); - - invalidate_cached_snaps(); - } -} - -void SnapRealm::split_at(SnapRealm *child) -{ - dout(10) << "split_at " << *child - << " on " << *child->inode << dendl; - - if (!child->inode->is_dir()) { - // it's not a dir. - if (child->inode->containing_realm) { - // - no open children. - // - only need to move this child's inode's caps. - child->inode->move_to_realm(child); - } else { - // no caps, nothing to move/split. - dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl; - assert(!child->inode->is_any_caps()); - } - return; - } - - // it's a dir. - - // split open_children - dout(10) << " open_children are " << open_children << dendl; - for (set::iterator p = open_children.begin(); - p != open_children.end(); ) { - SnapRealm *realm = *p; - if (realm != child && - child->inode->is_projected_ancestor_of(realm->inode)) { - dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl; - realm->parent = child; - child->open_children.insert(realm); - open_children.erase(p++); - } else { - dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl; - p++; - } - } - - // split inodes_with_caps - elist::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps)); - while (!p.end()) { - CInode *in = *p; - ++p; - - // does inode fall within the child realm? - bool under_child = false; - - if (in == child->inode) { - under_child = true; - } else { - CInode *t = in; - while (t->get_parent_dn()) { - t = t->get_parent_dn()->get_dir()->get_inode(); - if (t == child->inode) { - under_child = true; - break; - } - if (t == in) - break; - } - } - if (under_child) { - dout(20) << " child gets " << *in << dendl; - in->move_to_realm(child); - } else { - dout(20) << " keeping " << *in << dendl; - } - } - -} - -const bufferlist& SnapRealm::get_snap_trace() -{ - check_cache(); - return cached_snap_trace; -} - -void SnapRealm::build_snap_trace(bufferlist& snapbl) -{ - SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since); - - if (parent) { - info.h.parent = parent->inode->ino(); - if (!srnode.past_parents.empty()) { - snapid_t last = srnode.past_parents.rbegin()->first; - set past; - snapid_t max_seq, max_last_created, max_last_destroyed; - build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last); - info.prior_parent_snaps.reserve(past.size()); - for (set::reverse_iterator p = past.rbegin(); p != past.rend(); p++) - info.prior_parent_snaps.push_back(*p); - dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] " - << info.prior_parent_snaps << dendl; - } - } else - info.h.parent = 0; - - info.my_snaps.reserve(srnode.snaps.size()); - for (map::reverse_iterator p = srnode.snaps.rbegin(); - p != srnode.snaps.rend(); - p++) - info.my_snaps.push_back(p->first); - dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl; - - ::encode(info, snapbl); - - if (parent) - parent->build_snap_trace(snapbl); -} - - - -void SnapRealm::prune_past_parents() -{ - dout(10) << "prune_past_parents" << dendl; - check_cache(); - assert(open); - - map::iterator p = srnode.past_parents.begin(); - while (p != srnode.past_parents.end()) { - set::iterator q = cached_snaps.lower_bound(p->second.first); - if (q == cached_snaps.end() || - *q > p->first) { - dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first - << "] " << p->second.ino << dendl; - srnode.past_parents.erase(p++); - } else { - dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first - << "] " << p->second.ino << dendl; - p++; - } - } -} - diff --git a/src/mds/snap.h b/src/mds/snap.h index 45c2c036677fc..068b6f17073b9 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -16,8 +16,6 @@ #define CEPH_MDS_SNAP_H #include "mdstypes.h" -#include "include/xlist.h" -#include "include/elist.h" #include "common/snap_types.h" /* @@ -93,132 +91,4 @@ struct sr_t { }; WRITE_CLASS_ENCODER(sr_t); -struct SnapRealm { - // realm state - - sr_t srnode; - - // in-memory state - MDCache *mdcache; - CInode *inode; - - bool open; // set to true once all past_parents are opened - SnapRealm *parent; - set open_children; // active children that are currently open - map open_past_parents; // these are explicitly pinned. - - // cache - snapid_t cached_seq; // max seq over self and all past+present parents. - snapid_t cached_last_created; // max last_created over all past+present parents - snapid_t cached_last_destroyed; - set cached_snaps; - SnapContext cached_snap_context; - - bufferlist cached_snap_trace; - - elist inodes_with_caps; // for efficient realm splits - map* > client_caps; // to identify clients who need snap notifications - - SnapRealm(MDCache *c, CInode *in) : - srnode(), - mdcache(c), inode(in), - open(false), parent(0), - inodes_with_caps(0) - { } - - bool exists(const string &name) { - for (map::iterator p = srnode.snaps.begin(); - p != srnode.snaps.end(); - p++) - if (p->second.name == name) - return true; - return false; - } - - bool _open_parents(Context *retryorfinish, snapid_t first=1, snapid_t last=CEPH_NOSNAP); - bool open_parents(Context *retryorfinish) { - if (!_open_parents(retryorfinish)) - return false; - delete retryorfinish; - return true; - } - bool have_past_parents_open(snapid_t first=1, snapid_t last=CEPH_NOSNAP); - void add_open_past_parent(SnapRealm *parent); - void close_parents(); - - void prune_past_parents(); - bool has_past_parents() { return !srnode.past_parents.empty(); } - - void build_snap_set(set& s, - snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed, - snapid_t first, snapid_t last); - void get_snap_info(map& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP); - - const bufferlist& get_snap_trace(); - void build_snap_trace(bufferlist& snapbl); - - const string& get_snapname(snapid_t snapid, inodeno_t atino); - snapid_t resolve_snapname(const string &name, inodeno_t atino, snapid_t first=0, snapid_t last=CEPH_NOSNAP); - - void check_cache(); - const set& get_snaps(); - const SnapContext& get_snap_context(); - void invalidate_cached_snaps() { - cached_seq = 0; - } - snapid_t get_last_created() { - check_cache(); - return cached_last_created; - } - snapid_t get_last_destroyed() { - check_cache(); - return cached_last_destroyed; - } - snapid_t get_newest_snap() { - check_cache(); - if (cached_snaps.empty()) - return 0; - else - return *cached_snaps.rbegin(); - } - snapid_t get_newest_seq() { - check_cache(); - return cached_seq; - } - - snapid_t get_snap_following(snapid_t follows) { - check_cache(); - set s = get_snaps(); - set::iterator p = s.upper_bound(follows); - if (p != s.end()) - return *p; - return CEPH_NOSNAP; - } - - void adjust_parent(); - - void split_at(SnapRealm *child); - void join(SnapRealm *child); - - void add_cap(client_t client, Capability *cap) { - if (client_caps.count(client) == 0) - client_caps[client] = new xlist; - client_caps[client]->push_back(&cap->item_snaprealm_caps); - } - void remove_cap(client_t client, Capability *cap) { - cap->item_snaprealm_caps.remove_myself(); - if (client_caps[client]->empty()) { - delete client_caps[client]; - client_caps.erase(client); - } - } - -}; - -ostream& operator<<(ostream& out, const SnapRealm &realm); - - - - - #endif -- 2.39.5