From: Yan, Zheng Date: Sat, 28 Oct 2017 03:16:57 +0000 (+0800) Subject: mds: record hardlink snaps in inode's snaprealm X-Git-Tag: v13.1.0~413^2~20 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=46bb0f448aa0025338aba67f7f8b693e18ede2c4;p=ceph.git mds: record hardlink snaps in inode's snaprealm Inode with multiple hardlinks is attached to global snaprealm. Before modifying a hardlink, record snaps that reference the the hardlink. When all hardlinks are removed, stray inode gets moved into normal snaprealm. By checking the recorded snaps, mds knows if there still are snaps reference the stray inode. Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index ae7cd51a191d..45d90b464e4c 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -457,10 +457,21 @@ void CInode::project_snaprealm(sr_t *new_srnode) void CInode::mark_snaprealm_global(sr_t *new_srnode) { assert(!is_dir()); - new_srnode->current_parent_since = get_oldest_snap(); + // 'last_destroyed' is no longer used, use it to store origin 'current_parent_since' + new_srnode->last_destroyed = new_srnode->current_parent_since; + new_srnode->current_parent_since = mdcache->get_global_snaprealm()->get_newest_seq() + 1; new_srnode->mark_parent_global(); } +void CInode::clear_snaprealm_global(sr_t *new_srnode) +{ + // restore 'current_parent_since' + new_srnode->current_parent_since = new_srnode->last_destroyed; + new_srnode->last_destroyed = 0; + new_srnode->seq = mdcache->get_global_snaprealm()->get_newest_seq(); + new_srnode->clear_parent_global(); +} + bool CInode::is_projected_snaprealm_global() const { const sr_t *srnode = get_projected_srnode(); @@ -480,6 +491,7 @@ void CInode::project_snaprealm_past_parent(SnapRealm *newparent) if parent DNE, we need to find what the parent actually is and fill that in */ void CInode::record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent) { + assert(!new_snap->is_parent_global()); SnapRealm *oldparent; if (!snaprealm) { oldparent = find_snaprealm(); @@ -502,6 +514,26 @@ void CInode::record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent) } } +void CInode::record_snaprealm_parent_dentry(sr_t *new_snap, SnapRealm *newparent, + CDentry *dn, bool primary_dn) +{ + assert(new_snap->is_parent_global()); + SnapRealm *oldparent = dn->get_dir()->inode->find_snaprealm(); + auto& snaps = oldparent->get_snaps(); + + if (!primary_dn) { + auto p = snaps.lower_bound(dn->first); + if (p != snaps.end()) + new_snap->past_parent_snaps.insert(p, snaps.end()); + } else if (newparent != oldparent) { + // 'last_destroyed' is used as 'current_parent_since' + auto p = snaps.lower_bound(new_snap->last_destroyed); + if (p != snaps.end()) + new_snap->past_parent_snaps.insert(p, snaps.end()); + new_snap->last_destroyed = mdcache->get_global_snaprealm()->get_newest_seq() + 1; + } +} + void CInode::early_pop_projected_snaprealm() { assert(!projected_nodes.empty()); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index ecd8d649b716..52512a2d44de 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -512,9 +512,12 @@ public: } void mark_snaprealm_global(sr_t *new_srnode); + void clear_snaprealm_global(sr_t *new_srnode); bool is_projected_snaprealm_global() const; void record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent); + void record_snaprealm_parent_dentry(sr_t *new_snap, SnapRealm *newparent, + CDentry *dn, bool primary_dn); void project_snaprealm_past_parent(SnapRealm *newparent); void early_pop_projected_snaprealm(); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 5025360e0988..10e511f7224e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -5447,6 +5447,7 @@ void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti) if (!targeti->is_projected_snaprealm_global()) { sr_t *newsnap = targeti->project_snaprealm(); targeti->mark_snaprealm_global(newsnap); + targeti->record_snaprealm_parent_dentry(newsnap, NULL, targeti->get_projected_parent_dn(), true); adjust_realm = true; } @@ -5543,6 +5544,8 @@ void Server::_link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targ MMDSSlaveRequest *req = new MMDSSlaveRequest(mdr->reqid, mdr->attempt, op); targeti->set_object_info(req->get_object_info()); req->op_stamp = mdr->get_op_stamp(); + if (auto& desti_srnode = mdr->more()->desti_srnode) + encode(*desti_srnode, req->desti_snapbl); mds->send_message_mds(req, linkauth); assert(mdr->more()->waiting_on_slave.count(linkauth) == 0); @@ -5553,6 +5556,11 @@ void Server::_link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targ assert(g_conf->mds_kill_link_at != 2); + if (auto& desti_srnode = mdr->more()->desti_srnode) { + delete desti_srnode; + desti_srnode = NULL; + } + mdr->set_mds_stamp(ceph_clock_now()); // add to event @@ -5689,17 +5697,34 @@ void Server::handle_slave_link_prep(MDRequestRef& mdr) // update journaled target inode bool inc; bool adjust_realm = false; + bool realm_projected = false; if (mdr->slave_request->get_op() == MMDSSlaveRequest::OP_LINKPREP) { inc = true; pi.inode.nlink++; if (!targeti->is_projected_snaprealm_global()) { sr_t *newsnap = targeti->project_snaprealm(); targeti->mark_snaprealm_global(newsnap); + targeti->record_snaprealm_parent_dentry(newsnap, NULL, targeti->get_projected_parent_dn(), true); adjust_realm = true; + realm_projected = true; } } else { inc = false; pi.inode.nlink--; + if (targeti->is_projected_snaprealm_global()) { + assert(mdr->slave_request->desti_snapbl.length()); + auto p = mdr->slave_request->desti_snapbl.begin(); + + sr_t *newsnap = targeti->project_snaprealm(); + decode(*newsnap, p); + + if (pi.inode.nlink == 0) + assert(!newsnap->is_parent_global()); + + realm_projected = true; + } else { + assert(mdr->slave_request->desti_snapbl.length() == 0); + } } link_rollback rollback; @@ -5710,7 +5735,7 @@ void Server::handle_slave_link_prep(MDRequestRef& mdr) rollback.old_dir_mtime = pf->fragstat.mtime; rollback.old_dir_rctime = pf->rstat.rctime; rollback.was_inc = inc; - if (adjust_realm) { + if (realm_projected) { if (targeti->snaprealm) { encode(true, rollback.snapbl); targeti->encode_snap_blob(rollback.snapbl); @@ -6066,12 +6091,11 @@ void Server::handle_client_unlink(MDRequestRef& mdr) wrlocks.insert(&straydn->get_dir()->inode->nestlock); xlocks.insert(&straydn->lock); } + mds->locker->include_snap_rdlocks(rdlocks, diri); - if (dnl->is_primary() && in->is_dir()) { + xlocks.insert(&in->snaplock); + if (in->is_dir()) rdlocks.insert(&in->filelock); // to verify it's empty - xlocks.insert(&in->snaplock); - } else - rdlocks.insert(&in->snaplock); if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; @@ -6092,12 +6116,21 @@ void Server::handle_client_unlink(MDRequestRef& mdr) if (straydn) straydn->first = std::max(in->first, follows + 1); - if (dnl->is_primary() && !mdr->more()->desti_srnode) { - // prepare snaprealm blob for slave request - if (in->snaprealm || follows + 1 > in->get_oldest_snap()) { - sr_t *new_srnode = in->prepare_new_srnode(follows); - in->record_snaprealm_past_parent(new_srnode, straydn->get_dir()->inode->find_snaprealm()); + if (!mdr->more()->desti_srnode) { + if (in->is_projected_snaprealm_global()) { + sr_t *new_srnode = in->prepare_new_srnode(0); + in->record_snaprealm_parent_dentry(new_srnode, NULL, dn, dnl->is_primary()); + // dropping the last linkage, detch the inode from global snaprealm + if (in->get_projected_inode()->nlink == 1) + in->clear_snaprealm_global(new_srnode); mdr->more()->desti_srnode = new_srnode; + } else if (dnl->is_primary()) { + // prepare snaprealm blob for slave request + if (in->snaprealm || follows + 1 > in->get_oldest_snap()) { + sr_t *new_srnode = in->prepare_new_srnode(follows); + in->record_snaprealm_past_parent(new_srnode, straydn->get_dir()->inode->find_snaprealm()); + mdr->more()->desti_srnode = new_srnode; + } } } @@ -6128,7 +6161,7 @@ void Server::handle_client_unlink(MDRequestRef& mdr) if (dnl->is_remote() && !dnl->get_inode()->is_auth()) _link_remote(mdr, false, dn, dnl->get_inode()); else - _unlink_local(mdr, dn, straydn, follows); + _unlink_local(mdr, dn, straydn); } class C_MDS_unlink_local_finish : public ServerLogContext { @@ -6145,7 +6178,7 @@ public: } }; -void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, snapid_t follows) +void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn) { dout(10) << "_unlink_local " << *dn << dendl; @@ -6188,6 +6221,12 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, sna if (pi.inode.nlink == 0) in->state_set(CInode::STATE_ORPHAN); + if (mdr->more()->desti_srnode) { + auto& desti_srnode = mdr->more()->desti_srnode; + in->project_snaprealm(desti_srnode); + desti_srnode = NULL; + } + if (straydn) { // will manually pop projected inode @@ -6195,15 +6234,6 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, sna mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, -1); mdcache->predirty_journal_parents(mdr, &le->metablob, in, straydn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); - // project snaprealm, too - if (in->snaprealm || follows + 1 > in->get_oldest_snap()) { - assert(mdr->more()->desti_srnode); - in->project_snaprealm(mdr->more()->desti_srnode); - mdr->more()->desti_srnode = NULL; - } else { - assert(!mdr->more()->desti_srnode); - } - pi.inode.update_backtrace(); le->metablob.add_primary_dentry(straydn, in, true, true); } else { @@ -6965,20 +6995,14 @@ void Server::handle_client_rename(MDRequestRef& mdr) // we need to update srci's ctime. xlock its least contended lock to do that... xlocks.insert(&srci->linklock); - if (srcdnl->is_primary()) - xlocks.insert(&srci->snaplock); - else - rdlocks.insert(&srci->snaplock); + xlocks.insert(&srci->snaplock); if (oldin) { // xlock oldin (for nlink--) xlocks.insert(&oldin->linklock); - if (destdnl->is_primary()) { - xlocks.insert(&oldin->snaplock); - if (oldin->is_dir()) + xlocks.insert(&oldin->snaplock); + if (oldin->is_dir()) rdlocks.insert(&oldin->filelock); // to verify it's empty - } else - rdlocks.insert(&oldin->snaplock); } CInode *auth_pin_freeze = !srcdn->is_auth() && srcdnl->is_primary() ? srci : NULL; @@ -7054,24 +7078,41 @@ void Server::handle_client_rename(MDRequestRef& mdr) // -- prepare snaprealm --- - if (destdnl->is_primary() && !mdr->more()->desti_srnode) { - SnapRealm *dest_realm = destdir->inode->find_snaprealm(); - snapid_t follows = dest_realm->get_newest_seq(); - if (oldin->snaprealm || follows + 1 > oldin->get_oldest_snap()) { - sr_t *new_srnode = oldin->prepare_new_srnode(follows); - oldin->record_snaprealm_past_parent(new_srnode, straydn->get_dir()->inode->find_snaprealm()); - mdr->more()->desti_srnode = new_srnode; + if (!linkmerge) { + if (oldin && !mdr->more()->desti_srnode) { + if (oldin->is_projected_snaprealm_global()) { + sr_t *new_srnode = oldin->prepare_new_srnode(0); + oldin->record_snaprealm_parent_dentry(new_srnode, NULL, destdn, destdnl->is_primary()); + // dropping the last linkage, detch the inode from global snaprealm + if (oldin->get_projected_inode()->nlink == 1) + oldin->clear_snaprealm_global(new_srnode); + mdr->more()->desti_srnode = new_srnode; + } else if (destdnl->is_primary()) { + SnapRealm *dest_realm = destdir->inode->find_snaprealm(); + snapid_t follows = dest_realm->get_newest_seq(); + if (oldin->snaprealm || follows + 1 > oldin->get_oldest_snap()) { + sr_t *new_srnode = oldin->prepare_new_srnode(follows); + oldin->record_snaprealm_past_parent(new_srnode, straydn->get_dir()->inode->find_snaprealm()); + mdr->more()->desti_srnode = new_srnode; + } + } } - } - if (srcdnl->is_primary() && !mdr->more()->srci_srnode) { - SnapRealm *dest_realm = destdir->inode->find_snaprealm(); - SnapRealm *src_realm = srci->find_snaprealm(); - snapid_t follows = src_realm->get_newest_seq(); - if (src_realm != dest_realm && - (srci->snaprealm || follows + 1 > srci->get_oldest_snap())) { - sr_t *new_srnode = srci->prepare_new_srnode(follows); - srci->record_snaprealm_past_parent(new_srnode, dest_realm); - mdr->more()->srci_srnode = new_srnode; + if (!mdr->more()->srci_srnode) { + SnapRealm *dest_realm = destdir->inode->find_snaprealm(); + if (srci->is_projected_snaprealm_global()) { + sr_t *new_srnode = srci->prepare_new_srnode(0); + srci->record_snaprealm_parent_dentry(new_srnode, dest_realm, srcdn, srcdnl->is_primary()); + mdr->more()->srci_srnode = new_srnode; + } else if (srcdnl->is_primary()) { + SnapRealm *src_realm = srcdir->inode->find_snaprealm(); + snapid_t follows = src_realm->get_newest_seq(); + if (src_realm != dest_realm && + (srci->snaprealm || follows + 1 > srci->get_oldest_snap())) { + sr_t *new_srnode = srci->prepare_new_srnode(follows); + srci->record_snaprealm_past_parent(new_srnode, dest_realm); + mdr->more()->srci_srnode = new_srnode; + } + } } } @@ -7506,12 +7547,11 @@ void Server::_rename_prepare(MDRequestRef& mdr, assert(straydn); if (destdn->is_auth()) { // project snaprealm, too - if (oldin->snaprealm || dest_realm->get_newest_seq() + 1 > oldin->get_oldest_snap()) { - assert(mdr->more()->desti_srnode); - oldin->project_snaprealm(mdr->more()->desti_srnode); - mdr->more()->desti_srnode = NULL; - } else { - assert(!mdr->more()->desti_srnode); + if (auto& desti_srnode = mdr->more()->desti_srnode) { + oldin->project_snaprealm(desti_srnode); + if (tpi->nlink == 0) + assert(!desti_srnode->is_parent_global()); + desti_srnode = NULL; } straydn->first = std::max(oldin->first, dest_realm->get_newest_seq() + 1); metablob->add_primary_dentry(straydn, oldin, true, true); @@ -7522,6 +7562,22 @@ void Server::_rename_prepare(MDRequestRef& mdr, } } else if (destdnl->is_remote()) { if (oldin->is_auth()) { + sr_t *new_srnode = NULL; + if (mdr->slave_request) { + if (mdr->slave_request->desti_snapbl.length() > 0) { + new_srnode = new sr_t(); + auto p = mdr->slave_request->desti_snapbl.begin(); + decode(*new_srnode, p); + } + } else if (auto& desti_srnode = mdr->more()->desti_srnode) { + new_srnode = desti_srnode; + desti_srnode = NULL; + } + if (new_srnode) { + oldin->project_snaprealm(new_srnode); + if (tpi->nlink == 0) + assert(!new_srnode->is_parent_global()); + } // auth for targeti metablob->add_dir_context(oldin->get_projected_parent_dir()); mdcache->journal_cow_dentry(mdr.get(), metablob, oldin->get_projected_parent_dn(), @@ -7541,20 +7597,32 @@ void Server::_rename_prepare(MDRequestRef& mdr, if (destdn->is_auth()) metablob->add_remote_dentry(destdn, true, srcdnl->get_remote_ino(), srcdnl->get_remote_d_type()); - if (srci->get_projected_parent_dn()->is_auth()) { // it's remote - metablob->add_dir_context(srci->get_projected_parent_dir()); - mdcache->journal_cow_dentry(mdr.get(), metablob, srci->get_projected_parent_dn(), CEPH_NOSNAP, 0, srcdnl); - metablob->add_primary_dentry(srci->get_projected_parent_dn(), srci, true); + + if (srci->is_auth() ) { // it's remote + if (mdr->slave_request) { + if (mdr->slave_request->srci_snapbl.length() > 0) { + sr_t *new_srnode = new sr_t(); + auto p = mdr->slave_request->srci_snapbl.begin(); + decode(*new_srnode, p); + srci->project_snaprealm(new_srnode); + } + } else if (auto& srci_srnode = mdr->more()->srci_srnode) { + srci->project_snaprealm(srci_srnode); + srci_srnode = NULL; + } + + CDentry *srci_pdn = srci->get_projected_parent_dn(); + metablob->add_dir_context(srci_pdn->get_dir()); + mdcache->journal_cow_dentry(mdr.get(), metablob, srci_pdn, CEPH_NOSNAP, 0, srcdnl); + metablob->add_primary_dentry(srci_pdn, srci, true); } } else if (srcdnl->is_primary()) { // project snap parent update? - if (destdn->is_auth() && src_realm != dest_realm && - (srci->snaprealm || src_realm->get_newest_seq() + 1 > srci->get_oldest_snap())) { - assert(mdr->more()->srci_srnode); - srci->project_snaprealm(mdr->more()->srci_srnode); - mdr->more()->srci_srnode = NULL; - } else { - assert(!mdr->more()->srci_srnode); + if (destdn->is_auth()) { + if (auto& srci_srnode = mdr->more()->srci_srnode) { + srci->project_snaprealm(srci_srnode); + srci_srnode = NULL; + } } if (destdn->is_auth() && !destdnl->is_null()) @@ -7668,8 +7736,17 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C mdcache->touch_dentry_bottom(straydn); // drop dn as quickly as possible. } else if (destdnl->is_remote()) { destdn->get_dir()->unlink_inode(destdn, false); - if (oldin->is_auth()) - oldin->pop_and_dirty_projected_inode(mdr->ls); + if (oldin->is_auth()) { + oldin->pop_and_dirty_projected_inode(mdr->ls); + } else if (mdr->slave_request) { + if (mdr->slave_request->desti_snapbl.length() > 0) { + assert(oldin->snaprealm); + oldin->decode_snap_blob(mdr->slave_request->desti_snapbl); + } + } else if (auto& desti_srnode = mdr->more()->desti_srnode) { + delete desti_srnode; + desti_srnode = NULL; + } } } @@ -7710,8 +7787,17 @@ void Server::_rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, C if (destdn->is_auth()) destdn->mark_dirty(mdr->more()->pvmap[destdn], mdr->ls); // in - if (in->is_auth()) + if (in->is_auth()) { in->pop_and_dirty_projected_inode(mdr->ls); + } else if (mdr->slave_request) { + if (mdr->slave_request->srci_snapbl.length() > 0) { + assert(in->snaprealm); + in->decode_snap_blob(mdr->slave_request->srci_snapbl); + } + } else if (auto& srci_srnode = mdr->more()->srci_srnode) { + delete srci_srnode; + srci_srnode = NULL; + } } else { dout(10) << "merging remote onto primary link" << dendl; oldin->pop_and_dirty_projected_inode(mdr->ls); diff --git a/src/mds/Server.h b/src/mds/Server.h index cbec34f69604..467aaccfd7cc 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -259,7 +259,7 @@ public: void handle_client_unlink(MDRequestRef& mdr); bool _dir_is_nonempty_unlocked(MDRequestRef& mdr, CInode *rmdiri); bool _dir_is_nonempty(MDRequestRef& mdr, CInode *rmdiri); - void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, snapid_t follows); + void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn); void _unlink_local_finish(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, version_t); diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc index bcb762fb1cb8..055d243a74c8 100644 --- a/src/mds/SnapRealm.cc +++ b/src/mds/SnapRealm.cc @@ -635,6 +635,10 @@ void SnapRealm::build_snap_trace() const set past; if (!srnode.past_parent_snaps.empty()) { past = mdcache->mds->snapclient->filter(srnode.past_parent_snaps); + if (srnode.is_parent_global()) { + auto p = past.lower_bound(srnode.current_parent_since); + past.erase(p, past.end()); + } } else if (!srnode.past_parents.empty()) { const set& snaps = get_snaps(); for (const auto& p : srnode.past_parents) {