From 40503ffbd7a669880bb62aa801f54c1bd6b39acb Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 8 Oct 2014 11:25:16 +0100 Subject: [PATCH] mds: implement osd epoch barrier on caps Signed-off-by: John Spray --- src/mds/Locker.cc | 34 +++++++++++++++++++++++++++++----- src/mds/MDCache.cc | 7 ++++--- src/mds/MDS.cc | 14 ++++++++++++++ src/mds/MDS.h | 5 +++++ src/mds/Migrator.cc | 2 +- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index c1903519aaa0a..8370fa63080a6 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -31,6 +31,7 @@ #include "events/EOpen.h" #include "msg/Messenger.h" +#include "osdc/Objecter.h" #include "messages/MGenericMessage.h" #include "messages/MDiscover.h" @@ -1950,7 +1951,8 @@ bool Locker::issue_caps(CInode *in, Capability *only_cap) in->find_snaprealm()->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), after, wanted, 0, - cap->get_mseq()); + cap->get_mseq(), + mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); mds->send_message_client_counted(m, it->first); @@ -1977,7 +1979,8 @@ void Locker::issue_truncate(CInode *in) in->find_snaprealm()->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, - cap->get_mseq()); + cap->get_mseq(), + mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); mds->send_message_client_counted(m, it->first); } @@ -2328,7 +2331,8 @@ void Locker::share_inode_max_size(CInode *in, Capability *only_cap) in->find_snaprealm()->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, - cap->get_mseq()); + cap->get_mseq(), + mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); mds->send_message_client_counted(m, client); } @@ -2473,6 +2477,16 @@ void Locker::handle_client_caps(MClientCaps *m) return; } + if (m->osd_epoch_barrier && !mds->objecter->have_map(m->osd_epoch_barrier)) { + // Pause RADOS operations until we see the required epoch + mds->objecter->set_epoch_barrier(m->osd_epoch_barrier); + } + + if (mds->get_osd_epoch_barrier() < m->osd_epoch_barrier) { + // Record the barrier so that we will retransmit it to clients + mds->set_osd_epoch_barrier(m->osd_epoch_barrier); + } + CInode *in = mdcache->pick_inode_snap(head_in, follows); if (in != head_in) dout(10) << " head inode " << *head_in << dendl; @@ -2529,7 +2543,7 @@ void Locker::handle_client_caps(MClientCaps *m) // case we get a dup response, so whatever.) MClientCaps *ack = 0; if (m->get_dirty()) { - ack = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, m->get_dirty(), 0); + ack = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, m->get_dirty(), 0, mds->get_osd_epoch_barrier()); ack->set_snap_follows(follows); ack->set_client_tid(m->get_client_tid()); } @@ -2592,7 +2606,7 @@ void Locker::handle_client_caps(MClientCaps *m) dout(7) << " flush client." << client << " dirty " << ccap_string(m->get_dirty()) << " seq " << m->get_seq() << " on " << *in << dendl; ack = new MClientCaps(CEPH_CAP_OP_FLUSH_ACK, in->ino(), 0, cap->get_cap_id(), m->get_seq(), - m->get_caps(), 0, m->get_dirty(), 0); + m->get_caps(), 0, m->get_dirty(), 0, mds->get_osd_epoch_barrier()); ack->set_client_tid(m->get_client_tid()); } @@ -3129,6 +3143,16 @@ void Locker::handle_client_cap_release(MClientCapRelease *m) return; } + if (m->osd_epoch_barrier && !mds->objecter->have_map(m->osd_epoch_barrier)) { + // Pause RADOS operations until we see the required epoch + mds->objecter->set_epoch_barrier(m->osd_epoch_barrier); + } + + if (mds->get_osd_epoch_barrier() < m->osd_epoch_barrier) { + // Record the barrier so that we will retransmit it to clients + mds->set_osd_epoch_barrier(m->osd_epoch_barrier); + } + Session *session = static_cast(m->get_connection()->get_priv()); for (vector::iterator p = m->caps.begin(); p != m->caps.end(); ++p) { diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 3d990bbd2fe5b..e35bf18557357 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4799,7 +4799,8 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) // mark client caps stale. MClientCaps *m = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0, - cap_exports[p->first][q->first].cap_id, 0); + cap_exports[p->first][q->first].cap_id, 0, + mds->get_osd_epoch_barrier()); m->set_cap_peer(q->second.cap_id, q->second.issue_seq, q->second.mseq, from, 0); mds->send_message_client_counted(m, session); @@ -5277,7 +5278,7 @@ void MDCache::export_remaining_imported_caps() Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(q->first.v)); if (session) { // mark client caps stale. - MClientCaps *stale = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0, 0, 0); + MClientCaps *stale = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0, 0, 0, mds->get_osd_epoch_barrier()); stale->set_cap_peer(0, 0, 0, -1, 0); mds->send_message_client_counted(stale, q->first); } @@ -5336,7 +5337,7 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap, realm->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, - cap->get_mseq()); + cap->get_mseq(), mds->get_osd_epoch_barrier()); in->encode_cap_message(reap, cap); realm->build_snap_trace(reap->snapbl); reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags); diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 96c0468de02cc..e568adece1a43 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -108,6 +108,7 @@ MDS::MDS(const std::string &n, Messenger *m, MonClient *mc) : op_tracker(cct, m->cct->_conf->mds_enable_op_tracker, m->cct->_conf->osd_num_op_tracker_shard), finisher(cct), + osd_epoch_barrier(0), sessionmap(this), progress_thread(this), asok_hook(NULL) @@ -2893,3 +2894,16 @@ void MDS::ProgressThread::shutdown() join(); mds->mds_lock.Lock(); } + +/** + * This is used whenever a RADOS operation has been cancelled + * or a RADOS client has been blacklisted, to cause the MDS and + * any clients to wait for this OSD epoch before using any new caps. + * + * See doc/cephfs/eviction + */ +void MDS::set_osd_epoch_barrier(epoch_t e) +{ + dout(4) << __func__ << ": epoch=" << e << dendl; + osd_epoch_barrier = e; +} diff --git a/src/mds/MDS.h b/src/mds/MDS.h index 0ee73e37e4634..f92be473249c5 100644 --- a/src/mds/MDS.h +++ b/src/mds/MDS.h @@ -215,7 +215,12 @@ class MDS : public Dispatcher, public md_config_obs_t { ceph_tid_t last_tid; // for mds-initiated requests (e.g. stray rename) + epoch_t osd_epoch_barrier; + public: + void set_osd_epoch_barrier(epoch_t e); + epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;} + void wait_for_active(MDSInternalContextBase *c) { waiting_for_active.push_back(c); } diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 50330708373b3..28decd2cd8b87 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1319,7 +1319,7 @@ void Migrator::finish_export_inode_caps(CInode *in, mds_rank_t peer, dout(7) << "finish_export_inode_caps telling client." << it->first << " exported caps on " << *in << dendl; MClientCaps *m = new MClientCaps(CEPH_CAP_OP_EXPORT, in->ino(), 0, - cap->get_cap_id(), cap->get_mseq()); + cap->get_cap_id(), cap->get_mseq(), mds->get_osd_epoch_barrier()); map::iterator q = peer_imported.find(it->first); assert(q != peer_imported.end()); -- 2.39.5