From 853e00a64ed71e8b459371c2113300b4ba07bea4 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 1 Aug 2017 09:52:36 +0800 Subject: [PATCH] mds: send updated snaprealms along with slave requests rmdir and rename may create/update snaprealms. If snaprealms are created/updated, encode the updated snaprealms in slave requests and dentry unlink messages. So that when rmdir or rename finishes, snaprealms in different mds are in sync. Signed-off-by: "Yan, Zheng" --- src/mds/CInode.cc | 83 +++++++------ src/mds/CInode.h | 11 +- src/mds/MDCache.cc | 19 ++- src/mds/Mutation.h | 8 +- src/mds/Server.cc | 205 +++++++++++++++++++++++--------- src/messages/MDentryUnlink.h | 1 + src/messages/MMDSSlaveRequest.h | 12 +- 7 files changed, 238 insertions(+), 101 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index fa02cf19ee2..cc24e3b5354 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -361,27 +361,6 @@ void CInode::clear_dirty_rstat() } } -/* Ideally this function would be subsumed by project_inode but it is also - * needed by CInode::project_past_snaprealm_parent so we keep it. - */ -sr_t &CInode::project_snaprealm(projected_inode &pi) -{ - const sr_t *cur_srnode = get_projected_srnode(); - - assert(!pi.snapnode); - if (cur_srnode) { - pi.snapnode.reset(new sr_t(*cur_srnode)); - } else { - pi.snapnode.reset(new sr_t()); - pi.snapnode->created = 0; - pi.snapnode->current_parent_since = get_oldest_snap(); - } - ++num_projected_srnodes; - - dout(10) << __func__ << " " << pi.snapnode.get() << dendl; - return *pi.snapnode.get(); -} - CInode::projected_inode &CInode::project_inode(bool xattr, bool snap) { auto &pi = projected_nodes.empty() ? @@ -401,7 +380,7 @@ CInode::projected_inode &CInode::project_inode(bool xattr, bool snap) } if (snap) { - project_snaprealm(pi); + project_snaprealm(); } dout(15) << __func__ << " " << pi.inode.ino << dendl; @@ -436,46 +415,74 @@ void CInode::pop_and_dirty_projected_inode(LogSegment *ls) projected_nodes.pop_front(); } +sr_t *CInode::prepare_new_srnode(snapid_t snapid) +{ + const sr_t *cur_srnode = get_projected_srnode(); + sr_t *new_srnode; + + if (cur_srnode) { + new_srnode = new sr_t(*cur_srnode); + } else { + new_srnode = new sr_t(); + new_srnode->created = snapid; + new_srnode->current_parent_since = get_oldest_snap(); + } + return new_srnode; +} + +void CInode::project_snaprealm(sr_t *new_srnode) +{ + dout(10) << __func__ << " " << new_srnode << dendl; + assert(!projected_nodes.back().snapnode); + projected_nodes.back().snapnode.reset(new_srnode); + ++num_projected_srnodes; +} + +void CInode::project_snaprealm_past_parent(SnapRealm *newparent) +{ + sr_t *new_snap = project_snaprealm(); + record_snaprealm_past_parent(new_snap, newparent); +} + + /* if newparent != parent, add parent to past_parents if parent DNE, we need to find what the parent actually is and fill that in */ -void CInode::project_past_snaprealm_parent(SnapRealm *newparent) +void CInode::record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent) { - assert(!projected_nodes.empty()); - sr_t &new_snap = project_snaprealm(projected_nodes.back()); SnapRealm *oldparent; if (!snaprealm) { oldparent = find_snaprealm(); - new_snap.seq = oldparent->get_newest_seq(); + new_snap->seq = oldparent->get_newest_seq(); } else { oldparent = snaprealm->parent; } if (newparent != oldparent) { // convert past_parents to past_parent_snaps - if (!new_snap.past_parents.empty()) { + if (!new_snap->past_parents.empty()) { assert(snaprealm); const set& snaps = snaprealm->get_snaps(); for (auto p = snaps.begin(); - p != snaps.end() && *p < new_snap.current_parent_since; + p != snaps.end() && *p < new_snap->current_parent_since; ++p) { - if (!new_snap.snaps.count(*p)) - new_snap.past_parent_snaps.insert(*p); + if (!new_snap->snaps.count(*p)) + new_snap->past_parent_snaps.insert(*p); } - new_snap.seq = snaprealm->get_newest_seq(); - new_snap.past_parents.clear(); + new_snap->seq = snaprealm->get_newest_seq(); + new_snap->past_parents.clear(); } snapid_t oldparentseq = oldparent->get_newest_seq(); - if (oldparentseq + 1 > new_snap.current_parent_since) { + if (oldparentseq + 1 > new_snap->current_parent_since) { // copy old parent's snaps const set& snaps = oldparent->get_snaps(); - auto p = snaps.lower_bound(new_snap.current_parent_since); + auto p = snaps.lower_bound(new_snap->current_parent_since); if (p != snaps.end()) - new_snap.past_parent_snaps.insert(p, snaps.end()); - if (oldparentseq > new_snap.seq) - new_snap.seq = oldparentseq; + new_snap->past_parent_snaps.insert(p, snaps.end()); + if (oldparentseq > new_snap->seq) + new_snap->seq = oldparentseq; } - new_snap.current_parent_since = std::max(oldparentseq, newparent->get_last_created()) + 1; + new_snap->current_parent_since = std::max(oldparentseq, newparent->get_last_created()) + 1; } } diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 152456e8252..f82873c47fc 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -430,7 +430,6 @@ private: size_t num_projected_xattrs = 0; size_t num_projected_srnodes = 0; - sr_t &project_snaprealm(projected_inode &pi); public: CInode::projected_inode &project_inode(bool xattr = false, bool snap = false); void pop_and_dirty_projected_inode(LogSegment *ls); @@ -491,6 +490,13 @@ public: return &xattrs; } + sr_t *prepare_new_srnode(snapid_t snapid); + void project_snaprealm(sr_t *new_srnode); + sr_t *project_snaprealm(snapid_t snapid=0) { + sr_t* new_srnode = prepare_new_srnode(snapid); + project_snaprealm(new_srnode); + return new_srnode; + } const sr_t *get_projected_srnode() const { if (num_projected_srnodes > 0) { for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) @@ -502,7 +508,8 @@ public: else return NULL; } - void project_past_snaprealm_parent(SnapRealm *newparent); + void record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent); + void project_snaprealm_past_parent(SnapRealm *newparent); private: void pop_projected_snaprealm(sr_t *next_snaprealm); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index d4ff9f90eec..67c4a002dfd 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -10628,8 +10628,12 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& md // share unlink news with replicas set replicas; dn->list_replicas(replicas); - if (straydn) + bufferlist snapbl; + if (straydn) { straydn->list_replicas(replicas); + CInode *strayin = straydn->get_linkage()->get_inode(); + strayin->encode_snap_blob(snapbl); + } for (set::iterator it = replicas.begin(); it != replicas.end(); ++it) { @@ -10643,8 +10647,10 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& md continue; MDentryUnlink *unlink = new MDentryUnlink(dn->get_dir()->dirfrag(), dn->get_name()); - if (straydn) + if (straydn) { replicate_stray(straydn, *it, unlink->straybl); + unlink->snapbl = snapbl; + } mds->send_message_mds(unlink, *it); } } @@ -10684,6 +10690,15 @@ void MDCache::handle_dentry_unlink(MDentryUnlink *m) if (in->is_dir()) adjust_subtree_after_rename(in, dir, false); + if (m->snapbl.length()) { + bool hadrealm = (in->snaprealm ? true : false); + in->decode_snap_blob(m->snapbl); + assert(in->snaprealm); + assert(in->snaprealm->have_past_parents_open()); + if (!hadrealm) + do_realm_invalidate_and_update_notify(in, CEPH_SNAP_OP_SPLIT, false); + } + // send caps to auth (if we're not already) if (in->is_any_caps() && !in->state_test(CInode::STATE_EXPORTINGCAPS)) diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index 2508ab1c092..4b116c472db 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -35,6 +35,7 @@ class Session; class ScatterLock; class MClientRequest; class MMDSSlaveRequest; +struct sr_t; struct MutationImpl : public TrackedOp { metareqid_t reqid; @@ -255,6 +256,9 @@ struct MDRequestImpl : public MutationImpl { version_t stid; bufferlist snapidbl; + sr_t *srci_srnode; + sr_t *desti_srnode; + // called when slave commits or aborts Context *slave_commit; bufferlist rollback_bl; @@ -276,7 +280,9 @@ struct MDRequestImpl : public MutationImpl { srcdn_auth_mds(-1), inode_import_v(0), rename_inode(0), is_freeze_authpin(false), is_ambiguous_auth(false), is_remote_frozen_authpin(false), is_inode_exporter(false), - flock_was_waiting(false), stid(0), slave_commit(0), export_dir(NULL) { } + flock_was_waiting(false), + stid(0), srci_srnode(NULL), desti_srnode(NULL), + slave_commit(0), export_dir(NULL) { } } *_more; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 29caeb08df4..db8cdd86d2c 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1929,10 +1929,10 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) } CDentry *straydn = NULL; - if (m->stray.length() > 0) { - straydn = mdcache->add_replica_stray(m->stray, from); + if (m->straybl.length() > 0) { + straydn = mdcache->add_replica_stray(m->straybl, from); assert(straydn); - m->stray.clear(); + m->straybl.clear(); } // am i a new slave? @@ -6012,6 +6012,15 @@ void Server::handle_client_unlink(MDRequestRef& mdr) if (straydn) straydn->first = std::max(in->first, follows + 1); + if (dnl->is_primary() && !mdr->more()->desti_srnode) { + // prepare snaprealm blob for slave request + if (in->snaprealm || follows + 1 > in->get_oldest_snap()) { + sr_t *new_srnode = in->prepare_new_srnode(follows); + in->record_snaprealm_past_parent(new_srnode, straydn->get_dir()->inode->find_snaprealm()); + mdr->more()->desti_srnode = new_srnode; + } + } + // yay! if (in->is_dir() && in->has_subtree_root_dirfrag()) { // subtree root auths need to be witnesses @@ -6107,8 +6116,13 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, sna mdcache->predirty_journal_parents(mdr, &le->metablob, in, straydn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); // project snaprealm, too - if (in->snaprealm || follows + 1 > in->get_oldest_snap()) - in->project_past_snaprealm_parent(straydn->get_dir()->inode->find_snaprealm()); + if (in->snaprealm || follows + 1 > in->get_oldest_snap()) { + assert(mdr->more()->desti_srnode); + in->project_snaprealm(mdr->more()->desti_srnode); + mdr->more()->desti_srnode = NULL; + } else { + assert(!mdr->more()->desti_srnode); + } pi.inode.update_backtrace(); le->metablob.add_primary_dentry(straydn, in, true, true); @@ -6159,27 +6173,30 @@ void Server::_unlink_local_finish(MDRequestRef& mdr, // relink as stray? (i.e. was primary link?) CInode *strayin = NULL; - bool snap_is_new = false; + bool hadrealm = false; if (straydn) { dout(20) << " straydn is " << *straydn << dendl; CDentry::linkage_t *straydnl = straydn->pop_projected_linkage(); strayin = straydnl->get_inode(); - snap_is_new = strayin->snaprealm ? true : false; + hadrealm = strayin->snaprealm ? true : false; + mdcache->touch_dentry_bottom(straydn); } dn->mark_dirty(dnpv, mdr->ls); mdr->apply(); - - if (snap_is_new) //only new if strayin exists - mdcache->do_realm_invalidate_and_update_notify(strayin, CEPH_SNAP_OP_SPLIT, false); mdcache->send_dentry_unlink(dn, straydn, mdr); - // update subtree map? - if (straydn && strayin->is_dir()) - mdcache->adjust_subtree_after_rename(strayin, dn->get_dir(), true); + if (straydn) { + // update subtree map? + if (strayin->is_dir()) + mdcache->adjust_subtree_after_rename(strayin, dn->get_dir(), true); + + if (strayin->snaprealm && !hadrealm) + mdcache->do_realm_invalidate_and_update_notify(strayin, CEPH_SNAP_OP_SPLIT, false); + } // bump pop mds->balancer->hit_dir(mdr->get_mds_stamp(), dn->get_dir(), META_POP_IWR); @@ -6215,7 +6232,9 @@ bool Server::_rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vectorsrcdnpath = filepath(trace.front()->get_dir()->ino()); for (auto dn : trace) req->srcdnpath.push_dentry(dn->get_name()); - mdcache->replicate_stray(straydn, who, req->stray); + mdcache->replicate_stray(straydn, who, req->straybl); + if (mdr->more()->desti_srnode) + encode(*mdr->more()->desti_srnode, req->desti_snapbl); req->op_stamp = mdr->get_op_stamp(); mds->send_message_mds(req, who); @@ -6280,41 +6299,24 @@ void Server::handle_slave_rmdir_prep(MDRequestRef& mdr) rollback.dest_dir = straydn->get_dir()->dirfrag(); rollback.dest_dname = straydn->get_name(); encode(rollback, mdr->more()->rollback_bl); + // FIXME: rollback snaprealm dout(20) << " rollback is " << mdr->more()->rollback_bl.length() << " bytes" << dendl; // set up commit waiter mdr->more()->slave_commit = new C_MDS_SlaveRmdirCommit(this, mdr, straydn); - if (!in->has_subtree_root_dirfrag(mds->get_nodeid())) { - dout(10) << " no auth subtree in " << *in << ", skipping journal" << dendl; - dn->get_dir()->unlink_inode(dn); - straydn->get_dir()->link_primary_inode(straydn, in); - - assert(straydn->first >= in->first); - in->first = straydn->first; - - mdcache->adjust_subtree_after_rename(in, dn->get_dir(), false); - - MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, mdr->attempt, - MMDSSlaveRequest::OP_RMDIRPREPACK); - reply->mark_not_journaled(); - mds->send_message_mds(reply, mdr->slave_to_mds); - - // send caps to auth (if we're not already) - if (in->is_any_caps() && !in->state_test(CInode::STATE_EXPORTINGCAPS)) - mdcache->migrator->export_caps(in); + straydn->push_projected_linkage(in); + dn->push_projected_linkage(); - mdcache->touch_dentry_bottom(straydn); // move stray to end of lru + assert(straydn->first >= in->first); + in->first = straydn->first; - mdr->slave_request->put(); - mdr->slave_request = 0; - mdr->straydn = 0; + if (!in->has_subtree_root_dirfrag(mds->get_nodeid())) { + dout(10) << " no auth subtree in " << *in << ", skipping journal" << dendl; + _logged_slave_rmdir(mdr, dn, straydn); return; } - straydn->push_projected_linkage(in); - dn->push_projected_linkage(); - ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rmdir", mdr->reqid, mdr->slave_to_mds, ESlaveUpdate::OP_PREPARE, ESlaveUpdate::RMDIR); mdlog->start_entry(le); @@ -6345,7 +6347,17 @@ void Server::_logged_slave_rmdir(MDRequestRef& mdr, CDentry *dn, CDentry *strayd dn->get_dir()->unlink_inode(dn); straydn->pop_projected_linkage(); dn->pop_projected_linkage(); - mdcache->adjust_subtree_after_rename(in, dn->get_dir(), true); + + mdcache->adjust_subtree_after_rename(in, dn->get_dir(), mdr->more()->slave_update_journaled); + + if (mdr->slave_request->desti_snapbl.length()) { + bool hadrealm = (in->snaprealm ? true : false); + in->decode_snap_blob(mdr->slave_request->desti_snapbl); + assert(in->snaprealm); + assert(in->snaprealm->have_past_parents_open()); + if (!hadrealm) + mdcache->do_realm_invalidate_and_update_notify(in, CEPH_SNAP_OP_SPLIT, false); + } // done. mdr->slave_request->put(); @@ -6355,6 +6367,8 @@ void Server::_logged_slave_rmdir(MDRequestRef& mdr, CDentry *dn, CDentry *strayd if (!mdr->aborted) { MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, mdr->attempt, MMDSSlaveRequest::OP_RMDIRPREPACK); + if (!mdr->more()->slave_update_journaled) + reply->mark_not_journaled(); mds->send_message_mds(reply, mdr->slave_to_mds); } else { dout(10) << " abort flag set, finishing" << dendl; @@ -6886,7 +6900,7 @@ void Server::handle_client_rename(MDRequestRef& mdr) return; } - /* project_past_snaprealm_parent() will do this job + /* project_snaprealm_past_parent() will do this job * // moving between snaprealms? if (srcdnl->is_primary() && srci->is_multiversion() && !srci->snaprealm) { @@ -6927,6 +6941,29 @@ void Server::handle_client_rename(MDRequestRef& mdr) } } + // -- prepare snaprealm --- + + if (destdnl->is_primary() && !mdr->more()->desti_srnode) { + SnapRealm *dest_realm = destdir->inode->find_snaprealm(); + snapid_t follows = dest_realm->get_newest_seq(); + if (oldin->snaprealm || follows + 1 > oldin->get_oldest_snap()) { + sr_t *new_srnode = oldin->prepare_new_srnode(follows); + oldin->record_snaprealm_past_parent(new_srnode, straydn->get_dir()->inode->find_snaprealm()); + mdr->more()->desti_srnode = new_srnode; + } + } + if (srcdn->is_auth() && srcdnl->is_primary() && !mdr->more()->srci_srnode) { + SnapRealm *dest_realm = destdir->inode->find_snaprealm(); + SnapRealm *src_realm = srci->find_snaprealm(); + snapid_t follows = src_realm->get_newest_seq(); + if (src_realm != dest_realm && + (srci->snaprealm || follows + 1 > srci->get_oldest_snap())) { + sr_t *new_srnode = srci->prepare_new_srnode(follows); + srci->record_snaprealm_past_parent(new_srnode, dest_realm); + mdr->more()->srci_srnode = new_srnode; + } + } + // -- prepare witnesses -- // do srcdn auth last @@ -7074,7 +7111,12 @@ bool Server::_rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, setdestdnpath.push_dentry(dn->get_name()); if (straydn) - mdcache->replicate_stray(straydn, who, req->stray); + mdcache->replicate_stray(straydn, who, req->straybl); + + if (mdr->more()->srci_srnode) + encode(*mdr->more()->srci_srnode, req->srci_snapbl); + if (mdr->more()->desti_srnode) + encode(*mdr->more()->desti_srnode, req->desti_snapbl); req->srcdn_auth = mdr->more()->srcdn_auth_mds; @@ -7352,8 +7394,13 @@ void Server::_rename_prepare(MDRequestRef& mdr, assert(straydn); if (destdn->is_auth()) { // project snaprealm, too - if (oldin->snaprealm || dest_realm->get_newest_seq() + 1 > oldin->get_oldest_snap()) - oldin->project_past_snaprealm_parent(straydn->get_dir()->inode->find_snaprealm()); + if (oldin->snaprealm || dest_realm->get_newest_seq() + 1 > oldin->get_oldest_snap()) { + assert(mdr->more()->desti_srnode); + oldin->project_snaprealm(mdr->more()->desti_srnode); + mdr->more()->desti_srnode = NULL; + } else { + assert(!mdr->more()->desti_srnode); + } straydn->first = std::max(oldin->first, dest_realm->get_newest_seq() + 1); metablob->add_primary_dentry(straydn, oldin, true, true); } else if (force_journal_stray) { @@ -7400,8 +7447,13 @@ void Server::_rename_prepare(MDRequestRef& mdr, } else if (srcdnl->is_primary()) { // project snap parent update? if (destdn->is_auth() && src_realm != dest_realm && - (srci->snaprealm || src_realm->get_newest_seq() + 1 > srci->get_oldest_snap())) - srci->project_past_snaprealm_parent(dest_realm); + (srci->snaprealm || src_realm->get_newest_seq() + 1 > srci->get_oldest_snap())) { + assert(mdr->more()->srci_srnode); + srci->project_snaprealm(mdr->more()->srci_srnode); + mdr->more()->srci_srnode = NULL; + } else { + assert(!mdr->more()->srci_srnode); + } if (destdn->is_auth() && !destdnl->is_null()) mdcache->journal_cow_dentry(mdr.get(), metablob, destdn, CEPH_NOSNAP, 0, destdnl); @@ -7475,6 +7527,9 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C bool linkmerge = (srcdnl->get_inode() == destdnl->get_inode() && (srcdnl->is_primary() || destdnl->is_primary())); + bool new_in_snaprealm = false; + bool new_oldin_snaprealm = false; + // target inode if (!linkmerge) { if (destdnl->is_primary()) { @@ -7492,11 +7547,15 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C if (destdn->is_auth()) { bool hadrealm = (oldin->snaprealm ? true : false); oldin->pop_and_dirty_projected_inode(mdr->ls); - if (oldin->snaprealm && !hadrealm) - mdcache->do_realm_invalidate_and_update_notify(oldin, CEPH_SNAP_OP_SPLIT, false); + new_oldin_snaprealm = (oldin->snaprealm && !hadrealm); } else { - // FIXME this snaprealm is not filled out correctly - //oldin->open_snaprealm(); might be sufficient.. + assert(mdr->slave_request); + if (mdr->slave_request->desti_snapbl.length()) { + new_oldin_snaprealm = !oldin->snaprealm; + oldin->decode_snap_blob(mdr->slave_request->desti_snapbl); + assert(oldin->snaprealm); + assert(oldin->snaprealm->have_past_parents_open()); + } } } else if (destdnl->is_remote()) { destdn->get_dir()->unlink_inode(destdn, false); @@ -7574,10 +7633,17 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C } if (destdn->is_auth()) { + bool hadrealm = (in->snaprealm ? true : false); in->pop_and_dirty_projected_inode(mdr->ls); - + new_in_snaprealm = (in->snaprealm && !hadrealm); } else { - // FIXME: fix up snaprealm! + assert(mdr->slave_request); + if (mdr->slave_request->srci_snapbl.length()) { + new_in_snaprealm = !in->snaprealm; + in->decode_snap_blob(mdr->slave_request->srci_snapbl); + assert(in->snaprealm); + assert(in->snaprealm->have_past_parents_open()); + } } } @@ -7598,6 +7664,11 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C if (straydn && oldin->is_dir()) mdcache->adjust_subtree_after_rename(oldin, destdn->get_dir(), true); + if (new_oldin_snaprealm) + mdcache->do_realm_invalidate_and_update_notify(oldin, CEPH_SNAP_OP_SPLIT, false); + if (new_in_snaprealm) + mdcache->do_realm_invalidate_and_update_notify(in, CEPH_SNAP_OP_SPLIT, true); + // removing a new dn? if (srcdn->is_auth()) srcdn->get_dir()->try_remove_unlinked_dn(srcdn); @@ -7781,11 +7852,25 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) break; } + if (srcdnl->is_primary() && !mdr->slave_request->srci_snapbl.length()) { + SnapRealm *dest_realm = destdn->get_dir()->inode->find_snaprealm(); + SnapRealm *src_realm = srci->find_snaprealm(); + snapid_t follows = src_realm->get_newest_seq(); + if (src_realm != dest_realm && + (srci->snaprealm || follows + 1 > srci->get_oldest_snap())) { + sr_t *new_srnode = srci->prepare_new_srnode(follows); + srci->record_snaprealm_past_parent(new_srnode, dest_realm); + encode(*new_srnode, mdr->slave_request->srci_snapbl); + delete new_srnode; + } + } + if (reply_witness) { assert(!srcdnrep.empty()); MMDSSlaveRequest *reply = new MMDSSlaveRequest(mdr->reqid, mdr->attempt, MMDSSlaveRequest::OP_RENAMEPREPACK); reply->witnesses.swap(srcdnrep); + reply->srci_snapbl.swap(mdr->slave_request->srci_snapbl); mds->send_message_mds(reply, mdr->slave_to_mds); mdr->slave_request->put(); mdr->slave_request = 0; @@ -7837,6 +7922,7 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) rollback.stray.dname = straydn->get_name(); } encode(rollback, mdr->more()->rollback_bl); + // FIXME: rollback snaprealm dout(20) << " rollback is " << mdr->more()->rollback_bl.length() << " bytes" << dendl; // journal. @@ -7876,7 +7962,6 @@ void Server::_logged_slave_rename(MDRequestRef& mdr, } CDentry::linkage_t *srcdnl = srcdn->get_linkage(); - CDentry::linkage_t *destdnl = NULL; //CDentry::linkage_t *straydnl = straydn ? straydn->get_linkage() : 0; // export srci? @@ -7915,8 +8000,11 @@ void Server::_logged_slave_rename(MDRequestRef& mdr, // apply _rename_apply(mdr, srcdn, destdn, straydn); - - destdnl = destdn->get_linkage(); + + CDentry::linkage_t *destdnl = destdn->get_linkage(); + + if (mdr->more()->is_inode_exporter) + reply->srci_snapbl.swap(mdr->slave_request->srci_snapbl); // bump popularity mds->balancer->hit_dir(mdr->get_mds_stamp(), srcdn->get_dir(), META_POP_IWR); @@ -8440,6 +8528,13 @@ void Server::handle_slave_rename_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *a mdr->more()->inode_import_v = ack->inode_export_v; } + if (ack->srci_snapbl.length() && !mdr->more()->srci_srnode) { + dout(10) << " got srci snapbl" << dendl; + mdr->more()->srci_srnode = new sr_t(); + bufferlist::iterator p = ack->srci_snapbl.begin(); + decode(*mdr->more()->srci_srnode, p); + } + // remove from waiting list assert(mdr->more()->waiting_on_slave.count(from)); mdr->more()->waiting_on_slave.erase(from); diff --git a/src/messages/MDentryUnlink.h b/src/messages/MDentryUnlink.h index c6f98369ce1..da580cae3fb 100644 --- a/src/messages/MDentryUnlink.h +++ b/src/messages/MDentryUnlink.h @@ -27,6 +27,7 @@ class MDentryUnlink : public Message { string& get_dn() { return dn; } bufferlist straybl; + bufferlist snapbl; MDentryUnlink() : Message(MSG_MDS_DENTRYUNLINK) { } diff --git a/src/messages/MMDSSlaveRequest.h b/src/messages/MMDSSlaveRequest.h index 0ef692506be..8fe873fb5ac 100644 --- a/src/messages/MMDSSlaveRequest.h +++ b/src/messages/MMDSSlaveRequest.h @@ -121,7 +121,9 @@ class MMDSSlaveRequest : public Message { mds_rank_t srcdn_auth; utime_t op_stamp; - bufferlist stray; // stray dir + dentry + bufferlist straybl; // stray dir + dentry + bufferlist srci_snapbl; + bufferlist desti_snapbl; public: metareqid_t get_reqid() { return reqid; } @@ -177,7 +179,9 @@ public: encode(inode_export_v, payload); encode(srcdn_auth, payload); encode(srci_replica, payload); - encode(stray, payload); + encode(straybl, payload); + encode(srci_snapbl, payload); + encode(desti_snapbl, payload); } void decode_payload() override { bufferlist::iterator p = payload.begin(); @@ -196,7 +200,9 @@ public: decode(inode_export_v, p); decode(srcdn_auth, p); decode(srci_replica, p); - decode(stray, p); + decode(straybl, p); + decode(srci_snapbl, p); + decode(desti_snapbl, p); } const char *get_type_name() const override { return "slave_request"; } -- 2.39.5