From 5b832aa5b6e0bd23f491ac896136e13f72e19353 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 27 Dec 2016 16:58:07 +0800 Subject: [PATCH] mds: use locked dentry trace to compose slave rmdir/rename request {rmdir/rename}_prepare_witness() use full path to compose slave requests. But they do not lock all dentries in the path. So someone else changes the unlocked dentry and causes path travsese of slave request to fail. Signed-off-by: Yan, Zheng --- src/mds/Server.cc | 95 ++++++++++++++++++----------------------- src/mds/Server.h | 4 +- src/mds/StrayManager.cc | 7 +-- 3 files changed, 48 insertions(+), 58 deletions(-) diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 1ad52eaff0204..e95bdb3636c02 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -5591,7 +5591,7 @@ void Server::handle_client_unlink(MDRequestRef& mdr) } else if (mdr->more()->waiting_on_slave.count(*p)) { dout(10) << " already waiting on witness mds." << *p << dendl; } else { - if (!_rmdir_prepare_witness(mdr, *p, dn, straydn)) + if (!_rmdir_prepare_witness(mdr, *p, trace, straydn)) return; } } @@ -5755,7 +5755,7 @@ void Server::_unlink_local_finish(MDRequestRef& mdr, } } -bool Server::_rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, CDentry *dn, CDentry *straydn) +bool Server::_rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector& trace, CDentry *straydn) { if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(who)) { dout(10) << "_rmdir_prepare_witness mds." << who << " is not active" << dendl; @@ -5767,12 +5767,12 @@ bool Server::_rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, CDentry * dout(10) << "_rmdir_prepare_witness mds." << who << dendl; MMDSSlaveRequest *req = new MMDSSlaveRequest(mdr->reqid, mdr->attempt, MMDSSlaveRequest::OP_RMDIRPREP); - dn->make_path(req->srcdnpath); - straydn->make_path(req->destdnpath); - req->op_stamp = mdr->get_op_stamp(); - + req->srcdnpath = filepath(trace.front()->get_dir()->ino()); + for (auto dn : trace) + req->srcdnpath.push_dentry(dn->name); mdcache->replicate_stray(straydn, who, req->stray); - + + req->op_stamp = mdr->get_op_stamp(); mds->send_message_mds(req, who); assert(mdr->more()->waiting_on_slave.count(who) == 0); @@ -6237,46 +6237,26 @@ void Server::handle_client_rename(MDRequestRef& mdr) if (destpath.get_ino() != srcpath.get_ino() && !(req->get_source().is_mds() && MDS_INO_IS_MDSDIR(srcpath.get_ino()))) { // <-- mds 'rename' out of stray dir is ok! - // do traces share a dentry? - CDentry *common = 0; - for (unsigned i=0; i < srctrace.size(); i++) { - for (unsigned j=0; j < desttrace.size(); j++) { - if (srctrace[i] == desttrace[j]) { - common = srctrace[i]; - break; - } - } - if (common) - break; + CInode *srcbase = srctrace[0]->get_dir()->get_inode(); + CInode *destbase = desttrace[0]->get_dir()->get_inode(); + // ok, extend srctrace toward root until it is an ancestor of desttrace. + while (srcbase != destbase && + !srcbase->is_projected_ancestor_of(destbase)) { + CDentry *pdn = srcbase->get_projected_parent_dn(); + srctrace.insert(srctrace.begin(), pdn); + dout(10) << "rename prepending srctrace with " << *pdn << dendl; + srcbase = pdn->get_dir()->get_inode(); } - if (common) { - dout(10) << "rename src and dest traces share common dentry " << *common << dendl; - } else { - CInode *srcbase = srctrace[0]->get_dir()->get_inode(); - CInode *destbase = destdir->get_inode(); - if (!desttrace.empty()) - destbase = desttrace[0]->get_dir()->get_inode(); - - // ok, extend srctrace toward root until it is an ancestor of desttrace. - while (srcbase != destbase && - !srcbase->is_projected_ancestor_of(destbase)) { - srctrace.insert(srctrace.begin(), - srcbase->get_projected_parent_dn()); - dout(10) << "rename prepending srctrace with " << *srctrace[0] << dendl; - srcbase = srcbase->get_projected_parent_dn()->get_dir()->get_inode(); - } - - // then, extend destpath until it shares the same parent inode as srcpath. - while (destbase != srcbase) { - desttrace.insert(desttrace.begin(), - destbase->get_projected_parent_dn()); - rdlocks.insert(&desttrace[0]->lock); - dout(10) << "rename prepending desttrace with " << *desttrace[0] << dendl; - destbase = destbase->get_projected_parent_dn()->get_dir()->get_inode(); - } - dout(10) << "rename src and dest traces now share common ancestor " << *destbase << dendl; + // then, extend destpath until it shares the same parent inode as srcpath. + while (destbase != srcbase) { + CDentry *pdn = destbase->get_projected_parent_dn(); + desttrace.insert(desttrace.begin(), pdn); + rdlocks.insert(&pdn->lock); + dout(10) << "rename prepending desttrace with " << *pdn << dendl; + destbase = pdn->get_dir()->get_inode(); } + dout(10) << "rename src and dest traces now share common ancestor " << *destbase << dendl; } // src == dest? @@ -6301,7 +6281,7 @@ void Server::handle_client_rename(MDRequestRef& mdr) // is this a stray migration, reintegration or merge? (sanity checks!) if (mdr->reqid.name.is_mds() && !(MDS_INO_IS_MDSDIR(srcpath.get_ino()) && - MDS_INO_IS_STRAY(destpath.get_ino())) && + MDS_INO_IS_MDSDIR(destpath.get_ino())) && !(destdnl->is_remote() && destdnl->get_remote_ino() == srci->ino())) { respond_to_request(mdr, -EINVAL); // actually, this won't reply, but whatev. @@ -6494,7 +6474,7 @@ void Server::handle_client_rename(MDRequestRef& mdr) if (srcdnl->is_primary() && !mdr->more()->is_ambiguous_auth) { dout(10) << " preparing ambiguous auth for srci" << dendl; mdr->set_ambiguous_auth(srci); - _rename_prepare_witness(mdr, last, witnesses, srcdn, destdn, straydn); + _rename_prepare_witness(mdr, last, witnesses, srctrace, desttrace, straydn); return; } } @@ -6508,7 +6488,7 @@ void Server::handle_client_rename(MDRequestRef& mdr) } else if (mdr->more()->waiting_on_slave.count(*p)) { dout(10) << " already waiting on witness mds." << *p << dendl; } else { - if (!_rename_prepare_witness(mdr, *p, witnesses, srcdn, destdn, straydn)) + if (!_rename_prepare_witness(mdr, *p, witnesses, srctrace, desttrace, straydn)) return; } } @@ -6518,7 +6498,7 @@ void Server::handle_client_rename(MDRequestRef& mdr) if (last != MDS_RANK_NONE && mdr->more()->witnessed.count(last) == 0) { dout(10) << " preparing last witness (srcdn auth)" << dendl; assert(mdr->more()->waiting_on_slave.count(last) == 0); - _rename_prepare_witness(mdr, last, witnesses, srcdn, destdn, straydn); + _rename_prepare_witness(mdr, last, witnesses, srctrace, desttrace, straydn); return; } @@ -6607,7 +6587,7 @@ void Server::_rename_finish(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, // helpers bool Server::_rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set &witnesse, - CDentry *srcdn, CDentry *destdn, CDentry *straydn) + vector& srctrace, vector& dsttrace, CDentry *straydn) { if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(who)) { dout(10) << "_rename_prepare_witness mds." << who << " is not active" << dendl; @@ -6619,16 +6599,20 @@ bool Server::_rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, setreqid, mdr->attempt, MMDSSlaveRequest::OP_RENAMEPREP); - srcdn->make_path(req->srcdnpath); - destdn->make_path(req->destdnpath); - req->op_stamp = mdr->get_op_stamp(); - + + req->srcdnpath = filepath(srctrace.front()->get_dir()->ino()); + for (auto dn : srctrace) + req->srcdnpath.push_dentry(dn->name); + req->destdnpath = filepath(dsttrace.front()->get_dir()->ino()); + for (auto dn : dsttrace) + req->destdnpath.push_dentry(dn->name); if (straydn) mdcache->replicate_stray(straydn, who, req->stray); // srcdn auth will verify our current witness list is sufficient req->witnesses = witnesse; + req->op_stamp = mdr->get_op_stamp(); mds->send_message_mds(req, who); assert(mdr->more()->waiting_on_slave.count(who) == 0); @@ -7197,6 +7181,11 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) vector trace; int r = mdcache->path_traverse(mdr, NULL, NULL, destpath, &trace, NULL, MDS_TRAVERSE_DISCOVERXLOCK); if (r > 0) return; + if (r == -ESTALE) { + mdcache->find_ino_peers(destpath.get_ino(), new C_MDS_RetryRequest(mdcache, mdr), + mdr->slave_to_mds); + return; + } assert(r == 0); // we shouldn't get an error here! CDentry *destdn = trace[trace.size()-1]; diff --git a/src/mds/Server.h b/src/mds/Server.h index 0669d440db66d..8cea19cecebe0 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -233,7 +233,7 @@ public: void _unlink_local_finish(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, version_t); - bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, CDentry *dn, CDentry *straydn); + bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector& trace, CDentry *straydn); void handle_slave_rmdir_prep(MDRequestRef& mdr); void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn); void _commit_slave_rmdir(MDRequestRef& mdr, int r); @@ -257,7 +257,7 @@ public: // helpers bool _rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set &witnesse, - CDentry *srcdn, CDentry *destdn, CDentry *straydn); + vector& srctrace, vector& dsttrace, CDentry *straydn); version_t _rename_prepare_import(MDRequestRef& mdr, CDentry *srcdn, bufferlist *client_map_bl); bool _need_force_journal(CInode *diri, bool empty); void _rename_prepare(MDRequestRef& mdr, diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index dd52d4c1f745d..34d733691d5ac 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -822,10 +822,11 @@ void StrayManager::migrate_stray(CDentry *dn, mds_rank_t to) // rename it to another mds. filepath src; dn->make_path(src); + assert(src.depth() == 2); - string dname; - in->name_stray_dentry(dname); - filepath dst(dname, MDS_INO_STRAY(to, 0)); + filepath dst(MDS_INO_MDSDIR(to)); + dst.push_dentry(src[0]); + dst.push_dentry(src[1]); MClientRequest *req = new MClientRequest(CEPH_MDS_OP_RENAME); req->set_filepath(dst); -- 2.39.5