The dummy global snaprealm includes all snapshots in the filesystem.
For any later snapshot, mds will COW the inode and preserve snap data.
These snap data will cover any possible snapshot on remote linkages
of the inode.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
if (cur_srnode) {
new_srnode = new sr_t(*cur_srnode);
+ if (!new_srnode->past_parents.empty()) {
+ // convert past_parents to past_parent_snaps
+ assert(snaprealm);
+ auto& snaps = snaprealm->get_snaps();
+ for (auto p : snaps) {
+ if (p >= new_srnode->current_parent_since)
+ break;
+ if (!new_srnode->snaps.count(p))
+ new_srnode->past_parent_snaps.insert(p);
+ }
+ new_srnode->seq = snaprealm->get_newest_seq();
+ new_srnode->past_parents.clear();
+ }
} else {
+ if (snapid == 0)
+ snapid = mdcache->get_global_snaprealm()->get_newest_seq();
new_srnode = new sr_t();
+ new_srnode->seq = snapid;
new_srnode->created = snapid;
new_srnode->current_parent_since = get_oldest_snap();
}
++num_projected_srnodes;
}
+void CInode::mark_snaprealm_global(sr_t *new_srnode)
+{
+ assert(!is_dir());
+ new_srnode->current_parent_since = get_oldest_snap();
+ new_srnode->mark_parent_global();
+}
+
+bool CInode::is_projected_snaprealm_global() const
+{
+ const sr_t *srnode = get_projected_srnode();
+ if (srnode && srnode->is_parent_global())
+ return true;
+ return false;
+}
+
void CInode::project_snaprealm_past_parent(SnapRealm *newparent)
{
sr_t *new_snap = project_snaprealm();
SnapRealm *oldparent;
if (!snaprealm) {
oldparent = find_snaprealm();
- new_snap->seq = oldparent->get_newest_seq();
} else {
oldparent = snaprealm->parent;
}
if (newparent != oldparent) {
- // convert past_parents to past_parent_snaps
- if (!new_snap->past_parents.empty()) {
- assert(snaprealm);
- const set<snapid_t>& snaps = snaprealm->get_snaps();
- for (auto p = snaps.begin();
- p != snaps.end() && *p < new_snap->current_parent_since;
- ++p) {
- if (!new_snap->snaps.count(*p))
- new_snap->past_parent_snaps.insert(*p);
- }
- new_snap->seq = snaprealm->get_newest_seq();
- new_snap->past_parents.clear();
- }
-
snapid_t oldparentseq = oldparent->get_newest_seq();
if (oldparentseq + 1 > new_snap->current_parent_since) {
// copy old parent's snaps
if (oldparentseq > new_snap->seq)
new_snap->seq = oldparentseq;
}
- new_snap->current_parent_since = std::max(oldparentseq, newparent->get_last_created()) + 1;
+ new_snap->current_parent_since = mdcache->get_global_snaprealm()->get_newest_seq() + 1;
}
}
dout(10) << " realm " << *snaprealm << " past_parents " << snaprealm->srnode.past_parents
<< " -> " << next_snaprealm->past_parents << dendl;
}
+ auto old_flags = snaprealm->srnode.flags;
snaprealm->srnode = *next_snaprealm;
delete next_snaprealm;
+ if ((snaprealm->srnode.flags ^ old_flags) & sr_t::PARENT_GLOBAL) {
+ snaprealm->close_parents();
+ snaprealm->adjust_parent();
+ }
+
// we should be able to open these up (or have them already be open).
bool ok = snaprealm->_open_parents(NULL);
assert(ok);
using ceph::decode;
if (snapbl.length()) {
open_snaprealm();
+ auto old_flags = snaprealm->srnode.flags;
bufferlist::iterator p = snapbl.begin();
decode(snaprealm->srnode, p);
if (is_base()) {
bool ok = snaprealm->_open_parents(NULL);
assert(ok);
+ } else {
+ if ((snaprealm->srnode.flags ^ old_flags) & sr_t::PARENT_GLOBAL) {
+ snaprealm->close_parents();
+ snaprealm->adjust_parent();
+ }
}
dout(20) << __func__ << " " << *snaprealm << dendl;
} else if (snaprealm) {
else
return NULL;
}
+
+ void mark_snaprealm_global(sr_t *new_srnode);
+ bool is_projected_snaprealm_global() const;
+
void record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent);
void project_snaprealm_past_parent(SnapRealm *newparent);
void early_pop_projected_snaprealm();
return (mds_rank_t)MDS_INO_STRAY_OWNER(inode.ino);
}
bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(inode.ino); }
- bool is_base() const { return is_root() || is_mdsdir(); }
+ bool is_base() const { return MDS_INO_IS_BASE(inode.ino); }
bool is_system() const { return inode.ino < MDS_INO_SYSTEM_BASE; }
bool is_normal() const { return !(is_base() || is_system() || is_stray()); }
decayrate.set_halflife(g_conf->mds_decay_halflife);
did_shutdown_log_cap = false;
+
+ global_snaprealm = NULL;
}
MDCache::~MDCache()
}
if (o->is_base())
base_inodes.erase(o);
- }
+ }
// delete it
assert(o->get_num_ref() == 0);
void MDCache::open_mydir_inode(MDSInternalContextBase *c)
{
- MDSGatherBuilder gather(g_ceph_context);
-
CInode *in = create_system_inode(MDS_INO_MDSDIR(mds->get_nodeid()), S_IFDIR|0755); // initially inaccurate!
- in->fetch(gather.new_sub());
-
- gather.set_finisher(c);
- gather.activate();
+ in->fetch(c);
}
void MDCache::open_root()
}
}
+ notify_global_snaprealm_update(CEPH_SNAP_OP_UPDATE);
+
if (gather.has_subs()) {
// for multimds, must succeed the first time
assert(recovery_set.empty());
// Other rank's base inodes (when I'm stopping)
if (mds->is_stopping()) {
for (set<CInode*>::iterator p = base_inodes.begin();
- p != base_inodes.end(); ++p) {
- if (MDS_INO_MDSDIR_OWNER((*p)->ino()) != mds->get_nodeid()) {
- dout(20) << __func__ << ": maybe trimming base: " << *(*p) << dendl;
- if ((*p)->get_num_ref() == 0) {
- trim_inode(NULL, *p, NULL, expiremap);
+ p != base_inodes.end();) {
+ CInode *base_in = *p;
+ ++p;
+ if (MDS_INO_IS_MDSDIR(base_in->ino()) &&
+ MDS_INO_MDSDIR_OWNER(base_in->ino()) != mds->get_nodeid()) {
+ dout(20) << __func__ << ": maybe trimming base: " << *base_in << dendl;
+ if (base_in->get_num_ref() == 0) {
+ trim_inode(NULL, base_in, NULL, expiremap);
}
}
}
if (myin)
remove_inode(myin);
+
+ if (global_snaprealm)
+ remove_inode(global_snaprealm->inode);
// done!
dout(2) << "shutdown done." << dendl;
// -------------------------------------------------------------------------------
// SNAPREALMS
+void MDCache::create_global_snaprealm()
+{
+ CInode *in = new CInode(this); // dummy inode
+ create_unlinked_system_inode(in, MDS_INO_GLOBAL_SNAPREALM, S_IFDIR|0755);
+ add_inode(in);
+ global_snaprealm = in->snaprealm;
+}
+
struct C_MDC_snaprealm_create_finish : public MDCacheLogContext {
MDRequestRef mdr;
MutationRef mut;
void MDCache::send_snap_update(CInode *in, version_t stid, int snap_op)
{
dout(10) << __func__ << " " << *in << " stid " << stid << dendl;
-
- bufferlist snap_blob;
- in->encode_snap(snap_blob);
+ assert(in->is_auth());
set<mds_rank_t> mds_set;
- mds->mdsmap->get_mds_set_lower_bound(mds_set, MDSMap::STATE_RESOLVE);
- mds_set.erase(mds->get_nodeid());
- for (auto p : mds_set) {
- MMDSSnapUpdate *m = new MMDSSnapUpdate(in->ino(), stid, snap_op);
- m->snap_blob = snap_blob;
- mds->send_message_mds(m, p);
+ if (stid > 0) {
+ mds->mdsmap->get_mds_set_lower_bound(mds_set, MDSMap::STATE_RESOLVE);
+ mds_set.erase(mds->get_nodeid());
+ } else {
+ in->list_replicas(mds_set);
+ }
+
+ if (!mds_set.empty()) {
+ bufferlist snap_blob;
+ in->encode_snap(snap_blob);
+
+ for (auto p : mds_set) {
+ MMDSSnapUpdate *m = new MMDSSnapUpdate(in->ino(), stid, snap_op);
+ m->snap_blob = snap_blob;
+ mds->send_message_mds(m, p);
+ }
}
+
+ if (stid > 0)
+ notify_global_snaprealm_update(snap_op);
}
void MDCache::handle_snap_update(MMDSSnapUpdate *m)
return;
}
- mds->snapclient->notify_commit(m->get_tid());
+ // null rejoin_done means open_snaprealms() has already been called
+ bool notify_clients = mds->get_state() > MDSMap::STATE_REJOIN ||
+ (mds->is_rejoin() && !rejoin_done);
+
+ if (m->get_tid() > 0) {
+ mds->snapclient->notify_commit(m->get_tid());
+ if (notify_clients)
+ notify_global_snaprealm_update(m->get_snap_op());
+ }
CInode *in = get_inode(m->get_ino());
if (in) {
bufferlist::iterator p = m->snap_blob.begin();
in->decode_snap(p);
- bool notify_clients = true;
- if (mds->is_rejoin()) {
- if (rejoin_done) {
- // rejoin_done is non-null, it means open_snaprealms() hasn't been called
- if (!rejoin_pending_snaprealms.count(in)) {
- in->get(CInode::PIN_OPENINGSNAPPARENTS);
- rejoin_pending_snaprealms.insert(in);
- }
- notify_clients = false;
+ if (!notify_clients) {
+ if (!rejoin_pending_snaprealms.count(in)) {
+ in->get(CInode::PIN_OPENINGSNAPPARENTS);
+ rejoin_pending_snaprealms.insert(in);
}
}
do_realm_invalidate_and_update_notify(in, m->get_snap_op(), notify_clients);
m->put();
}
+void MDCache::notify_global_snaprealm_update(int snap_op)
+{
+ if (snap_op != CEPH_SNAP_OP_DESTROY)
+ snap_op = CEPH_SNAP_OP_UPDATE;
+ set<Session*> sessions;
+ mds->sessionmap.get_client_session_set(sessions);
+ for (auto &session : sessions) {
+ if (!session->is_open() && !session->is_stale())
+ continue;
+ MClientSnap *update = new MClientSnap(snap_op);
+ update->head.split = global_snaprealm->inode->ino();
+ update->bl = global_snaprealm->get_snap_trace();
+ mds->send_message_client_counted(update, session);
+ }
+}
+
// -------------------------------------------------------------------------------
// STRAYS
void kick_find_ino_peers(mds_rank_t who);
// -- snaprealms --
+private:
+ SnapRealm *global_snaprealm;
public:
+ SnapRealm *get_global_snaprealm() const { return global_snaprealm; }
+ void create_global_snaprealm();
void snaprealm_create(MDRequestRef& mdr, CInode *in);
void _snaprealm_create_finish(MDRequestRef& mdr, MutationRef& mut, CInode *in);
void do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool notify_clients=true);
void send_snap_update(CInode *in, version_t stid, int snap_op);
void handle_snap_update(MMDSSnapUpdate *m);
+ void notify_global_snaprealm_update(int snap_op);
// -- stray --
public:
MDSGatherBuilder gather(g_ceph_context,
new C_MDS_BootStart(this, MDS_BOOT_PREPARE_LOG));
- mdcache->open_mydir_inode(gather.new_sub());
+ mdcache->open_mydir_inode(gather.new_sub());
+
+ mdcache->create_global_snaprealm();
if (is_starting() ||
whoami == mdsmap->get_root()) { // load root inode off disk if we are auth
dout(3) << "boot_create creating mydir hierarchy" << dendl;
mdcache->create_mydir_hierarchy(fin.get());
+ dout(3) << "boot_create creating global snaprealm" << dendl;
+ mdcache->create_global_snaprealm();
+
// fixme: fake out inotable (reset, pretend loaded)
dout(10) << "boot_create creating fresh inotable table" << dendl;
inotable->reset();
if (new_realm) {
int op = CEPH_SNAP_OP_SPLIT;
+ mds->mdcache->send_snap_update(in, 0, op);
mds->mdcache->do_realm_invalidate_and_update_notify(in, op);
}
}
xlocks.insert(&targeti->linklock);
+ xlocks.insert(&targeti->snaplock);
+ rdlocks.erase(&targeti->snaplock);
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
return;
CInode *targeti;
version_t dnpv;
version_t tipv;
+ bool adjust_realm;
public:
C_MDS_link_local_finish(Server *s, MDRequestRef& r, CDentry *d, CInode *ti,
- version_t dnpv_, version_t tipv_) :
+ version_t dnpv_, version_t tipv_, bool ar) :
ServerLogContext(s, r), dn(d), targeti(ti),
- dnpv(dnpv_), tipv(tipv_) { }
+ dnpv(dnpv_), tipv(tipv_), adjust_realm(ar) { }
void finish(int r) override {
assert(r == 0);
- server->_link_local_finish(mdr, dn, targeti, dnpv, tipv);
+ server->_link_local_finish(mdr, dn, targeti, dnpv, tipv, adjust_realm);
}
};
pi.inode.change_attr++;
pi.inode.version = tipv;
+ bool adjust_realm = false;
+ if (!targeti->is_projected_snaprealm_global()) {
+ sr_t *newsnap = targeti->project_snaprealm();
+ targeti->mark_snaprealm_global(newsnap);
+ adjust_realm = true;
+ }
+
// log + wait
EUpdate *le = new EUpdate(mdlog, "link_local");
mdlog->start_entry(le);
// do this after predirty_*, to avoid funky extra dnl arg
dn->push_projected_linkage(targeti->ino(), targeti->d_type());
- journal_and_reply(mdr, targeti, dn, le, new C_MDS_link_local_finish(this, mdr, dn, targeti, dnpv, tipv));
+ journal_and_reply(mdr, targeti, dn, le,
+ new C_MDS_link_local_finish(this, mdr, dn, targeti, dnpv, tipv, adjust_realm));
}
void Server::_link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti,
- version_t dnpv, version_t tipv)
+ version_t dnpv, version_t tipv, bool adjust_realm)
{
dout(10) << "_link_local_finish " << *dn << " to " << *targeti << dendl;
MDRequestRef null_ref;
mdcache->send_dentry_link(dn, null_ref);
+ if (adjust_realm) {
+ int op = CEPH_SNAP_OP_SPLIT;
+ mds->mdcache->send_snap_update(targeti, 0, op);
+ mds->mdcache->do_realm_invalidate_and_update_notify(targeti, op);
+ }
+
// bump target popularity
mds->balancer->hit_inode(mdr->get_mds_stamp(), targeti, META_POP_IWR);
mds->balancer->hit_dir(mdr->get_mds_stamp(), dn->get_dir(), META_POP_IWR);
class C_MDS_SlaveLinkPrep : public ServerLogContext {
CInode *targeti;
+ bool adjust_realm;
public:
- C_MDS_SlaveLinkPrep(Server *s, MDRequestRef& r, CInode *t) :
- ServerLogContext(s, r), targeti(t) { }
+ C_MDS_SlaveLinkPrep(Server *s, MDRequestRef& r, CInode *t, bool ar) :
+ ServerLogContext(s, r), targeti(t), adjust_realm(ar) { }
void finish(int r) override {
assert(r == 0);
- server->_logged_slave_link(mdr, targeti);
+ server->_logged_slave_link(mdr, targeti, adjust_realm);
}
};
// update journaled target inode
bool inc;
+ bool adjust_realm = false;
if (mdr->slave_request->get_op() == MMDSSlaveRequest::OP_LINKPREP) {
inc = true;
pi.inode.nlink++;
+ if (!targeti->is_projected_snaprealm_global()) {
+ sr_t *newsnap = targeti->project_snaprealm();
+ targeti->mark_snaprealm_global(newsnap);
+ adjust_realm = true;
+ }
} else {
inc = false;
pi.inode.nlink--;
rollback.old_dir_mtime = pf->fragstat.mtime;
rollback.old_dir_rctime = pf->rstat.rctime;
rollback.was_inc = inc;
+ if (adjust_realm) {
+ if (targeti->snaprealm) {
+ encode(true, rollback.snapbl);
+ targeti->encode_snap_blob(rollback.snapbl);
+ } else {
+ encode(false, rollback.snapbl);
+ }
+ }
encode(rollback, le->rollback);
mdr->more()->rollback_bl = le->rollback;
mdr->more()->slave_commit = new C_MDS_SlaveLinkCommit(this, mdr, targeti);
mdr->more()->slave_update_journaled = true;
- submit_mdlog_entry(le, new C_MDS_SlaveLinkPrep(this, mdr, targeti),
+ submit_mdlog_entry(le, new C_MDS_SlaveLinkPrep(this, mdr, targeti, adjust_realm),
mdr, __func__);
mdlog->flush();
}
-void Server::_logged_slave_link(MDRequestRef& mdr, CInode *targeti)
+void Server::_logged_slave_link(MDRequestRef& mdr, CInode *targeti, bool adjust_realm)
{
dout(10) << "_logged_slave_link " << *mdr
<< " " << *targeti << dendl;
mdr->slave_request->put();
mdr->slave_request = 0;
+ if (adjust_realm) {
+ int op = CEPH_SNAP_OP_SPLIT;
+ mds->mdcache->send_snap_update(targeti, 0, op);
+ mds->mdcache->do_realm_invalidate_and_update_notify(targeti, op);
+ }
+
// ack
if (!mdr->aborted) {
MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, mdr->attempt,
struct C_MDS_LoggedLinkRollback : public ServerLogContext {
MutationRef mut;
- C_MDS_LoggedLinkRollback(Server *s, MutationRef& m, MDRequestRef& r) : ServerLogContext(s, r), mut(m) {}
+ map<client_t,MClientSnap*> splits;
+ C_MDS_LoggedLinkRollback(Server *s, MutationRef& m, MDRequestRef& r,
+ map<client_t,MClientSnap*>& _splits) :
+ ServerLogContext(s, r), mut(m) {
+ splits.swap(_splits);
+ }
void finish(int r) override {
- server->_link_rollback_finish(mut, mdr);
+ server->_link_rollback_finish(mut, mdr, splits);
}
};
else
pi.inode.nlink++;
+ map<client_t,MClientSnap*> splits;
+ if (rollback.snapbl.length() && in->snaprealm) {
+ bool hadrealm;
+ bufferlist::iterator p = rollback.snapbl.begin();
+ decode(hadrealm, p);
+ if (hadrealm) {
+ if (!mds->is_resolve()) {
+ sr_t *new_srnode = new sr_t();
+ decode(*new_srnode, p);
+ in->project_snaprealm(new_srnode);
+ } else {
+ decode(in->snaprealm->srnode, p);
+ }
+ } else {
+ SnapRealm *realm = parent->get_inode()->find_snaprealm();
+ if (!mds->is_resolve())
+ mdcache->prepare_realm_merge(in->snaprealm, realm, splits);
+ in->project_snaprealm(NULL);
+ }
+ }
+
// journal it
ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_rollback", rollback.reqid, master,
ESlaveUpdate::OP_ROLLBACK, ESlaveUpdate::LINK);
le->commit.add_dir(parent, true);
le->commit.add_primary_dentry(in->get_projected_parent_dn(), 0, true);
- submit_mdlog_entry(le, new C_MDS_LoggedLinkRollback(this, mut, mdr),
+ submit_mdlog_entry(le, new C_MDS_LoggedLinkRollback(this, mut, mdr, splits),
mdr, __func__);
mdlog->flush();
}
-void Server::_link_rollback_finish(MutationRef& mut, MDRequestRef& mdr)
+void Server::_link_rollback_finish(MutationRef& mut, MDRequestRef& mdr,
+ map<client_t,MClientSnap*>& splits)
{
dout(10) << "_link_rollback_finish" << dendl;
assert(g_conf->mds_kill_link_at != 10);
mut->apply();
+
+ if (!mds->is_resolve())
+ mdcache->send_snaps(splits);
+
if (mdr)
mdcache->request_finish(mdr);
rollback.stray.dname = straydn->get_name();
}
if (mdr->slave_request->desti_snapbl.length()) {
- assert(destdnl->is_primary());
CInode *oldin = destdnl->get_inode();
if (oldin->snaprealm) {
encode(true, rollback.desti_snapbl);
}
}
if (mdr->slave_request->srci_snapbl.length()) {
- assert(srcdnl->is_primary());
if (srci->snaprealm) {
encode(true, rollback.srci_snapbl);
srci->encode_snap_blob(rollback.srci_snapbl);
// link
void handle_client_link(MDRequestRef& mdr);
void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti);
- void _link_local_finish(MDRequestRef& mdr,
- CDentry *dn, CInode *targeti,
- version_t, version_t);
+ void _link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti,
+ version_t, version_t, bool);
void _link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti);
void _link_remote_finish(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti,
version_t);
void handle_slave_link_prep(MDRequestRef& mdr);
- void _logged_slave_link(MDRequestRef& mdr, CInode *targeti);
+ void _logged_slave_link(MDRequestRef& mdr, CInode *targeti, bool adjust_realm);
void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti);
void _committed_slave(MDRequestRef& mdr); // use for rename, too
void handle_slave_link_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *m);
void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
- void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr);
+ void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr,
+ map<client_t,MClientSnap*>& split);
// unlink
void handle_client_unlink(MDRequestRef& mdr);
}
out << ")";
}
+
+ if (realm.srnode.is_parent_global())
+ out << " global ";
out << " " << &realm << ")";
return out;
}
+SnapRealm::SnapRealm(MDCache *c, CInode *in) :
+ mdcache(c), inode(in), open(false), parent(0),
+ num_open_past_parents(0), inodes_with_caps(0)
+{
+ global = (inode->ino() == MDS_INO_GLOBAL_SNAPREALM);
+}
void SnapRealm::add_open_past_parent(SnapRealm *parent, snapid_t last)
{
{
dout(10) << "build_snap_set on " << *this << dendl;
- cached_seq = srnode.seq;
- cached_last_created = srnode.last_created;
cached_snaps.clear();
+ if (global) {
+ mdcache->mds->snapclient->get_snaps(cached_snaps);
+ return;
+ }
+
// include my snaps
for (const auto& p : srnode.snaps)
cached_snaps.insert(p.first);
void SnapRealm::check_cache() const
{
assert(have_past_parents_open());
+ snapid_t seq;
+ snapid_t last_created;
snapid_t last_destroyed = mdcache->mds->snapclient->get_last_destroyed();
- if (cached_seq >= srnode.seq &&
+ if (global || srnode.is_parent_global()) {
+ last_created = mdcache->mds->snapclient->get_last_created();
+ seq = MAX(last_created, last_destroyed);
+ } else {
+ last_created = srnode.last_created;
+ seq = srnode.seq;
+ }
+ if (cached_seq >= seq &&
cached_last_destroyed == last_destroyed)
return;
cached_snap_context.clear();
+ cached_seq = seq;
+ cached_last_created = last_created;
cached_last_destroyed = last_destroyed;
build_snap_set();
build_snap_trace();
dout(10) << "check_cache rebuilt " << cached_snaps
- << " seq " << srnode.seq
+ << " seq " << seq
<< " cached_seq " << cached_seq
<< " cached_last_created " << cached_last_created
<< " cached_last_destroyed " << cached_last_destroyed
void SnapRealm::adjust_parent()
{
- CDentry *pdn = inode->get_parent_dn();
- SnapRealm *newparent = pdn ? pdn->get_dir()->get_inode()->find_snaprealm() : NULL;
+ SnapRealm *newparent;
+ if (srnode.is_parent_global()) {
+ newparent = mdcache->get_global_snaprealm();
+ } else {
+ CDentry *pdn = inode->get_parent_dn();
+ newparent = pdn ? pdn->get_dir()->get_inode()->find_snaprealm() : NULL;
+ }
if (newparent != parent) {
dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl;
if (parent)
{
cached_snap_trace.clear();
+ if (global) {
+ SnapRealmInfo info(inode->ino(), 0, cached_seq, 0);
+ info.my_snaps.reserve(cached_snaps.size());
+ for (auto p = cached_snaps.rbegin(); p != cached_snaps.rend(); ++p)
+ info.my_snaps.push_back(*p);
+
+ dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
+ encode(info, cached_snap_trace);
+ return;
+ }
+
SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since);
if (parent) {
info.h.parent = parent->inode->ino();
dout(10) << "build_snap_trace prior_parent_snaps from [1," << *past.rbegin() << "] "
<< info.prior_parent_snaps << dendl;
}
- } else
- info.h.parent = 0;
+ }
info.my_snaps.reserve(srnode.snaps.size());
for (auto p = srnode.snaps.rbegin();
CInode *inode;
mutable bool open; // set to true once all past_parents are opened
+ bool global;
+
SnapRealm *parent;
set<SnapRealm*> open_children; // active children that are currently open
set<SnapRealm*> open_past_children; // past children who has pinned me
elist<CInode*> inodes_with_caps; // for efficient realm splits
map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications
- SnapRealm(MDCache *c, CInode *in) :
- srnode(),
- mdcache(c), inode(in),
- open(false), parent(0),
- num_open_past_parents(0),
- inodes_with_caps(0)
- { }
+ SnapRealm(MDCache *c, CInode *in);
bool exists(std::string_view name) const {
for (map<snapid_t,SnapInfo>::const_iterator p = srnode.snaps.begin();
utime_t old_ctime;
utime_t old_dir_mtime;
utime_t old_dir_rctime;
+ bufferlist snapbl;
link_rollback() : ino(0), was_inc(false) {}
void link_rollback::encode(bufferlist &bl) const
{
- ENCODE_START(2, 2, bl);
+ ENCODE_START(3, 2, bl);
encode(reqid, bl);
encode(ino, bl);
encode(was_inc, bl);
encode(old_ctime, bl);
encode(old_dir_mtime, bl);
encode(old_dir_rctime, bl);
+ encode(snapbl, bl);
ENCODE_FINISH(bl);
}
void link_rollback::decode(bufferlist::iterator &bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
decode(reqid, bl);
decode(ino, bl);
decode(was_inc, bl);
decode(old_ctime, bl);
decode(old_dir_mtime, bl);
decode(old_dir_rctime, bl);
+ if (struct_v >= 3)
+ decode(snapbl, bl);
DECODE_FINISH(bl);
}
// so that we don't try to fragment it.
#define MDS_INO_CEPH 2
+#define MDS_INO_GLOBAL_SNAPREALM 3
+
#define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS)
#define MDS_INO_STRAY_OFFSET (6*MAX_MDS)
#define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY)))
#define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS))
#define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET))
-#define MDS_INO_IS_BASE(i) (MDS_INO_ROOT == (i) || MDS_INO_IS_MDSDIR(i))
+#define MDS_INO_IS_BASE(i) ((i) < MDS_INO_MDSDIR_OFFSET || MDS_INO_IS_MDSDIR(i))
#define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY))
#define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY)
void sr_t::encode(bufferlist& bl) const
{
- ENCODE_START(5, 4, bl);
+ ENCODE_START(6, 4, bl);
encode(seq, bl);
encode(created, bl);
encode(last_created, bl);
encode(snaps, bl);
encode(past_parents, bl);
encode(past_parent_snaps, bl);
+ encode(flags, bl);
ENCODE_FINISH(bl);
}
void sr_t::decode(bufferlist::iterator& p)
{
- DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, p);
+ DECODE_START_LEGACY_COMPAT_LEN(6, 4, 4, p);
if (struct_v == 2) {
__u8 struct_v;
decode(struct_v, p); // yes, really: extra byte for v2 encoding only, see 6ee52e7d.
decode(past_parents, p);
if (struct_v >= 5)
decode(past_parent_snaps, p);
+ if (struct_v >= 6)
+ decode(flags, p);
+ else
+ flags = 0;
DECODE_FINISH(p);
}
map<snapid_t, snaplink_t> past_parents; // key is "last" (or NOSNAP)
set<snapid_t> past_parent_snaps;
+ __u32 flags;
+ enum {
+ PARENT_GLOBAL = 1 << 0,
+ };
+
+ void mark_parent_global() { flags |= PARENT_GLOBAL; }
+ void clear_parent_global() { flags &= ~PARENT_GLOBAL; }
+ bool is_parent_global() const { return flags & PARENT_GLOBAL; }
+
sr_t()
: seq(0), created(0),
last_created(0), last_destroyed(0),
- current_parent_since(1)
+ current_parent_since(1), flags(0)
{}
void encode(bufferlist &bl) const;