From: Yan, Zheng Date: Tue, 28 Feb 2017 09:36:48 +0000 (+0800) Subject: mds: optimize state check of peer mds X-Git-Tag: v12.0.2~112^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8ea6e084e66d1c82611c5a4eb5e62e556efdf70c;p=ceph.git mds: optimize state check of peer mds Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index e9cd6434dc4d..899cd0082c5b 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -135,7 +135,8 @@ void Locker::send_lock_message(SimpleLock *lock, int msg) for (compact_map::iterator it = lock->get_parent()->replicas_begin(); it != lock->get_parent()->replicas_end(); ++it) { - if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN) + if (mds->is_cluster_degraded() && + mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN) continue; MLock *m = new MLock(lock, msg, mds->get_nodeid()); mds->send_message_mds(m, it->first); @@ -147,7 +148,8 @@ void Locker::send_lock_message(SimpleLock *lock, int msg, const bufferlist &data for (compact_map::iterator it = lock->get_parent()->replicas_begin(); it != lock->get_parent()->replicas_end(); ++it) { - if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN) + if (mds->is_cluster_degraded() && + mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN) continue; MLock *m = new MLock(lock, msg, mds->get_nodeid()); m->set_data(data); @@ -401,7 +403,8 @@ bool Locker::acquire_locks(MDRequestRef& mdr, dout(10) << "requesting remote auth_pins from mds." << p->first << dendl; // wait for active auth - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(p->first)) { + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(p->first)) { dout(10) << " mds." << p->first << " is not active" << dendl; if (mdr->more()->waiting_on_slave.empty()) mds->wait_for_active_peer(p->first, new C_MDS_RetryRequest(mdcache, mdr)); @@ -648,7 +651,8 @@ void Locker::_drop_non_rdlocks(MutationImpl *mut, set *pneed_issue) } for (set::iterator p = slaves.begin(); p != slaves.end(); ++p) { - if (mds->mdsmap->get_state(*p) >= MDSMap::STATE_REJOIN) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->get_state(*p) >= MDSMap::STATE_REJOIN) { dout(10) << "_drop_non_rdlocks dropping remote locks on mds." << *p << dendl; MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mut->reqid, mut->attempt, MMDSSlaveRequest::OP_DROPLOCKS); @@ -797,7 +801,8 @@ void Locker::eval_gather(SimpleLock *lock, bool first, bool *pneed_issue, listmdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { switch (lock->get_state()) { case LOCK_SYNC_LOCK: mds->send_message_mds(new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid()), @@ -1195,7 +1200,8 @@ bool Locker::_rdlock_kick(SimpleLock *lock, bool as_anon) } else { // request rdlock state change from auth mds_rank_t auth = lock->get_parent()->authority().first; - if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { dout(10) << "requesting rdlock from auth on " << *lock << " on " << *lock->get_parent() << dendl; mds->send_message_mds(new MLock(lock, LOCK_AC_REQRDLOCK, mds->get_nodeid()), auth); @@ -1423,7 +1429,8 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequestRef& mut, bool nowait) // replica. // auth should be auth_pinned (see acquire_locks wrlock weird mustpin case). mds_rank_t auth = lock->get_parent()->authority().first; - if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { dout(10) << "requesting scatter from auth on " << *lock << " on " << *lock->get_parent() << dendl; mds->send_message_mds(new MLock(lock, LOCK_AC_REQSCATTER, mds->get_nodeid()), auth); @@ -1471,7 +1478,8 @@ void Locker::remote_wrlock_start(SimpleLock *lock, mds_rank_t target, MDRequestR dout(7) << "remote_wrlock_start mds." << target << " on " << *lock << " on " << *lock->get_parent() << dendl; // wait for active target - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(target)) { + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(target)) { dout(7) << " mds." << target << " is not active" << dendl; if (mut->more()->waiting_on_slave.empty()) mds->wait_for_active_peer(target, new C_MDS_RetryRequest(mdcache, mut)); @@ -1501,7 +1509,8 @@ void Locker::remote_wrlock_finish(SimpleLock *lock, mds_rank_t target, dout(7) << "remote_wrlock_finish releasing remote wrlock on mds." << target << " " << *lock->get_parent() << dendl; - if (mds->mdsmap->get_state(target) >= MDSMap::STATE_REJOIN) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->get_state(target) >= MDSMap::STATE_REJOIN) { MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mut->reqid, mut->attempt, MMDSSlaveRequest::OP_UNWRLOCK); slavereq->set_lock_type(lock->get_type()); @@ -1573,7 +1582,8 @@ bool Locker::xlock_start(SimpleLock *lock, MDRequestRef& mut) // wait for active auth mds_rank_t auth = lock->get_parent()->authority().first; - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { dout(7) << " mds." << auth << " is not active" << dendl; if (mut->more()->waiting_on_slave.empty()) mds->wait_for_active_peer(auth, new C_MDS_RetryRequest(mdcache, mut)); @@ -1647,7 +1657,8 @@ void Locker::xlock_finish(SimpleLock *lock, MutationImpl *mut, bool *pneed_issue // tell auth dout(7) << "xlock_finish releasing remote xlock on " << *lock->get_parent() << dendl; mds_rank_t auth = lock->get_parent()->authority().first; - if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mut->reqid, mut->attempt, MMDSSlaveRequest::OP_UNXLOCK); slavereq->set_lock_type(lock->get_type()); @@ -2124,7 +2135,8 @@ void Locker::request_inode_file_caps(CInode *in) } mds_rank_t auth = in->authority().first; - if (mds->mdsmap->get_state(auth) == MDSMap::STATE_REJOIN) { + if (mds->is_cluster_degraded() && + mds->mdsmap->get_state(auth) == MDSMap::STATE_REJOIN) { mds->wait_for_active_peer(auth, new C_MDL_RequestInodeFileCaps(this, in)); return; } @@ -2135,7 +2147,8 @@ void Locker::request_inode_file_caps(CInode *in) in->replica_caps_wanted = wanted; - if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) + if (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) mds->send_message_mds(new MInodeFileCaps(in->ino(), in->replica_caps_wanted), auth); } @@ -4501,7 +4514,8 @@ void Locker::scatter_nudge(ScatterLock *lock, MDSInternalContextBase *c, bool fo << *lock << " on " << *p << dendl; // request unscatter? mds_rank_t auth = lock->get_parent()->authority().first; - if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) + if (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) mds->send_message_mds(new MLock(lock, LOCK_AC_NUDGE, mds->get_nodeid()), auth); // wait... diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index afe4296f179f..4914e7655171 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -211,7 +211,7 @@ void MDBalancer::send_heartbeat() { utime_t now = ceph_clock_now(); - if (mds->mdsmap->is_degraded()) { + if (mds->is_cluster_degraded()) { dout(10) << "send_heartbeat degraded" << dendl; return; } @@ -284,7 +284,7 @@ void MDBalancer::handle_heartbeat(MHeartbeat *m) return; } - if (mds->mdsmap->is_degraded()) { + if (mds->is_cluster_degraded()) { dout(10) << " degraded, ignoring" << dendl; goto out; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index c8f21e21f21b..029d9d0dc49e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -6528,9 +6528,10 @@ void MDCache::send_expire_messages(map& expiremap) for (map::iterator it = expiremap.begin(); it != expiremap.end(); ++it) { - if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN || - (mds->mdsmap->get_state(it->first) == MDSMap::STATE_REJOIN && - rejoin_sent.count(it->first) == 0)) { + if (mds->is_cluster_degraded() && + (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN || + (mds->mdsmap->get_state(it->first) == MDSMap::STATE_REJOIN && + rejoin_sent.count(it->first) == 0))) { it->second->put(); continue; } @@ -8159,7 +8160,8 @@ void MDCache::open_remote_dirfrag(CInode *diri, frag_t approxfg, MDSInternalCont mds_rank_t auth = diri->authority().first; - if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { discover_dir_frag(diri, approxfg, fin); } else { // mds is down or recovering. forge a replica! @@ -10755,7 +10757,7 @@ bool MDCache::can_fragment(CInode *diri, list& dirs) dout(7) << "can_fragment: read-only FS, no fragmenting for now" << dendl; return false; } - if (mds->mdsmap->is_degraded()) { + if (mds->is_cluster_degraded()) { dout(7) << "can_fragment: cluster degraded, no fragmenting for now" << dendl; return false; } diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index fd5e233cf47a..b98102f48e13 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -66,7 +66,7 @@ MDSRank::MDSRank( g_conf->osd_num_op_tracker_shard), last_state(MDSMap::STATE_BOOT), state(MDSMap::STATE_BOOT), - stopping(false), + cluster_degraded(false), stopping(false), purge_queue(g_ceph_context, whoami_, mdsmap_->get_metadata_pool(), objecter, new FunctionContext( @@ -1573,8 +1573,10 @@ void MDSRankDispatcher::handle_mds_map( } } - if (oldmap->is_degraded() && !mdsmap->is_degraded() && state >= MDSMap::STATE_ACTIVE) + cluster_degraded = mdsmap->is_degraded(); + if (oldmap->is_degraded() && !cluster_degraded && state >= MDSMap::STATE_ACTIVE) { dout(1) << "cluster recovered." << dendl; + } // did someone go active? if (oldstate >= MDSMap::STATE_CLIENTREPLAY && diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index e8d7af2fc5ce..717dbd268b18 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -181,6 +181,8 @@ class MDSRank { // The state assigned to me by the MDSMap MDSMap::DaemonState state; + bool cluster_degraded; + MDSMap::DaemonState get_state() const { return state; } MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); } @@ -197,6 +199,7 @@ class MDSRank { bool is_stopping() const { return state == MDSMap::STATE_STOPPING; } bool is_any_replay() const { return (is_replay() || is_standby_replay()); } bool is_stopped() const { return mdsmap->is_stopped(whoami); } + bool is_cluster_degraded() const { return cluster_degraded; } void handle_write_error(int err); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 52df15882054..dedd3bea46b0 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -280,7 +280,9 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) it->second.state = EXPORT_CANCELLED; dir->unfreeze_tree(); // cancel the freeze dir->auth_unpin(this); - if (notify_peer && mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer)) // tell them. + if (notify_peer && + (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer))) // tell them. mds->send_message_mds(new MExportDirCancel(dir->dirfrag(), it->second.tid), it->second.peer); break; @@ -288,7 +290,9 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) dout(10) << "export state=freezing : canceling freeze" << dendl; it->second.state = EXPORT_CANCELLED; dir->unfreeze_tree(); // cancel the freeze - if (notify_peer && mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer)) // tell them. + if (notify_peer && + (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer))) // tell them. mds->send_message_mds(new MExportDirCancel(dir->dirfrag(), it->second.tid), it->second.peer); break; @@ -325,7 +329,9 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer) dir->unfreeze_tree(); cache->adjust_subtree_auth(dir, mds->get_nodeid()); cache->try_subtree_merge(dir); - if (notify_peer && mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer)) // tell them. + if (notify_peer && + (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer))) // tell them. mds->send_message_mds(new MExportDirCancel(dir->dirfrag(), it->second.tid), it->second.peer); break; @@ -765,7 +771,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest) dout(7) << "read-only FS, no exports for now" << dendl; return; } - if (mds->mdsmap->is_degraded()) { + if (mds->is_cluster_degraded()) { dout(7) << "cluster degraded, no exports for now" << dendl; return; } @@ -1180,7 +1186,8 @@ void Migrator::handle_export_prep_ack(MExportDirPrepAck *m) p != dir->replicas_end(); ++p) { if (p->first == it->second.peer) continue; - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(p->first)) + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(p->first)) continue; // only if active it->second.warning_ack_waiting.insert(p->first); it->second.notify_ack_waiting.insert(p->first); // we'll eventually get a notifyack, too! @@ -1771,7 +1778,8 @@ void Migrator::export_logged_finish(CDir *dir) export_finish(dir); // skip notify/notify_ack stage. } else { // notify peer to send cap import messages to clients - if (mds->mdsmap->is_clientreplay_or_active_or_stopping(stat.peer)) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(stat.peer)) { mds->send_message_mds(new MExportDirFinish(dir->dirfrag(), false, stat.tid), stat.peer); } else { dout(7) << "not sending MExportDirFinish, dest has failed" << dendl; @@ -1854,7 +1862,8 @@ void Migrator::export_finish(CDir *dir) } // send finish/commit to new auth - if (mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer)) { + if (!mds->is_cluster_degraded() || + mds->mdsmap->is_clientreplay_or_active_or_stopping(it->second.peer)) { mds->send_message_mds(new MExportDirFinish(dir->dirfrag(), true, it->second.tid), it->second.peer); } else { dout(7) << "not sending MExportDirFinish last, dest has failed" << dendl; @@ -2561,7 +2570,8 @@ void Migrator::import_notify_abort(CDir *dir, set& bounds) import_state_t& stat = import_state[dir->dirfrag()]; for (set::iterator p = stat.bystanders.begin(); p != stat.bystanders.end(); ) { - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(*p)) { + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(*p)) { // this can happen if both exporter and bystander fail in the same mdsmap epoch stat.bystanders.erase(p++); continue; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 02acd6ebee14..4ecc7085bd70 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -5086,7 +5086,8 @@ void Server::_link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targ // 1. send LinkPrepare to dest (journal nlink++ prepare) mds_rank_t linkauth = targeti->authority().first; if (mdr->more()->witnessed.count(linkauth) == 0) { - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(linkauth)) { + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(linkauth)) { dout(10) << " targeti auth mds." << linkauth << " is not active" << dendl; if (mdr->more()->waiting_on_slave.empty()) mds->wait_for_active_peer(linkauth, new C_MDS_RetryRequest(mdcache, mdr)); @@ -5768,7 +5769,8 @@ void Server::_unlink_local_finish(MDRequestRef& mdr, bool Server::_rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector& trace, CDentry *straydn) { - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(who)) { + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(who)) { dout(10) << "_rmdir_prepare_witness mds." << who << " is not active" << dendl; if (mdr->more()->waiting_on_slave.empty()) mds->wait_for_active_peer(who, new C_MDS_RetryRequest(mdcache, mdr)); @@ -6609,7 +6611,8 @@ void Server::_rename_finish(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, bool Server::_rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set &witnesse, vector& srctrace, vector& dsttrace, CDentry *straydn) { - if (!mds->mdsmap->is_clientreplay_or_active_or_stopping(who)) { + if (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(who)) { dout(10) << "_rename_prepare_witness mds." << who << " is not active" << dendl; if (mdr->more()->waiting_on_slave.empty()) mds->wait_for_active_peer(who, new C_MDS_RetryRequest(mdcache, mdr)); @@ -7286,7 +7289,8 @@ void Server::handle_slave_rename_prep(MDRequestRef& mdr) // make sure bystanders have received all lock related messages for (set::iterator p = srcdnrep.begin(); p != srcdnrep.end(); ++p) { if (*p == mdr->slave_to_mds || - !mds->mdsmap->is_clientreplay_or_active_or_stopping(*p)) + (mds->is_cluster_degraded() && + !mds->mdsmap->is_clientreplay_or_active_or_stopping(*p))) continue; MMDSSlaveRequest *notify = new MMDSSlaveRequest(mdr->reqid, mdr->attempt, MMDSSlaveRequest::OP_RENAMENOTIFY);