From: Yan, Zheng Date: Wed, 3 Jun 2015 10:52:08 +0000 (+0800) Subject: mds: record completed cap flushes in session map X-Git-Tag: v9.1.0~57^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0ca7a49ba8294a9e9406fcda58221fd8c89a7491;p=ceph.git mds: record completed cap flushes in session map This information is for detecting duplicated cap flushes when MDS failovers. We only record completed cap flushes for clients whose cap messages include TID of oldest cap flushes. Signed-off-by: Yan, Zheng --- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 9d4bf2dbc5e99..7a4956381c2d3 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1715,8 +1715,18 @@ void Locker::file_update_finish(CInode *in, MutationRef& mut, bool share, client mut->apply(); - if (ack) - mds->send_message_client_counted(ack, client); + if (ack) { + Session *session = mds->get_session(client); + if (session) { + // "oldest flush tid" > 0 means client uses unique TID for each flush + if (ack->get_oldest_flush_tid() > 0) + session->add_completed_flush(ack->get_client_tid()); + mds->send_message_client_counted(ack, session); + } else { + dout(10) << " no session for client." << client << " " << *ack << dendl; + ack->put(); + } + } set need_issue; drop_locks(mut.get(), &need_issue); @@ -2456,6 +2466,13 @@ void Locker::handle_client_caps(MClientCaps *m) return; } + // "oldest flush tid" > 0 means client uses unique TID for each flush + if (m->get_oldest_flush_tid() > 0) { + if (session->trim_completed_flushes(m->get_oldest_flush_tid())) { + mds->mdlog->get_current_segment()->touched_sessions.insert(session->info.inst.name); + } + } + CInode *head_in = mdcache->get_inode(m->get_ino()); if (!head_in) { dout(7) << "handle_client_caps on unknown ino " << m->get_ino() << ", dropping" << dendl; @@ -2526,6 +2543,7 @@ void Locker::handle_client_caps(MClientCaps *m) ack = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, m->get_dirty(), 0, mds->get_osd_epoch_barrier()); ack->set_snap_follows(follows); ack->set_client_tid(m->get_client_tid()); + ack->set_oldest_flush_tid(m->get_oldest_flush_tid()); } if (in == head_in || @@ -2601,6 +2619,7 @@ void Locker::handle_client_caps(MClientCaps *m) ack = new MClientCaps(CEPH_CAP_OP_FLUSH_ACK, in->ino(), 0, cap->get_cap_id(), m->get_seq(), m->get_caps(), 0, m->get_dirty(), 0, mds->get_osd_epoch_barrier()); ack->set_client_tid(m->get_client_tid()); + ack->set_oldest_flush_tid(m->get_oldest_flush_tid()); } // filter wanted based on what we could ever give out (given auth/replica status) @@ -2877,6 +2896,11 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, 0, follows); mdcache->journal_dirty_inode(mut.get(), &le->metablob, in, follows); + // "oldest flush tid" > 0 means client uses unique TID for each flush + if (ack && ack->get_oldest_flush_tid() > 0) + le->metablob.add_client_flush(metareqid_t(m->get_source(), ack->get_client_tid()), + ack->get_oldest_flush_tid()); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, false, client, NULL, @@ -3122,6 +3146,11 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, 0, follows); mdcache->journal_dirty_inode(mut.get(), &le->metablob, in, follows); + // "oldest flush tid" > 0 means client uses unique TID for each flush + if (ack && ack->get_oldest_flush_tid() > 0) + le->metablob.add_client_flush(metareqid_t(m->get_source(), ack->get_client_tid()), + ack->get_oldest_flush_tid()); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, change_max, client, cap, diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h index 93eb66095bee8..b73246e5baab3 100644 --- a/src/mds/SessionMap.h +++ b/src/mds/SessionMap.h @@ -261,6 +261,25 @@ public: return true; } + void add_completed_flush(ceph_tid_t tid) { + info.completed_flushes.insert(tid); + } + bool trim_completed_flushes(ceph_tid_t mintid) { + bool erased_any = false; + while (!info.completed_flushes.empty() && + (mintid == 0 || *info.completed_flushes.begin() < mintid)) { + info.completed_flushes.erase(info.completed_flushes.begin()); + erased_any = true; + } + if (erased_any) { + completed_requests_dirty = true; + } + return erased_any; + } + bool have_completed_flush(ceph_tid_t tid) const { + return info.completed_flushes.count(tid); + } + unsigned get_num_completed_requests() const { return info.completed_requests.size(); } unsigned get_num_trim_requests_warnings() { return num_trim_requests_warnings; } void inc_num_trim_requests_warnings() { ++num_trim_requests_warnings; } diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 28843a58a2879..d0218ec806126 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -665,12 +665,13 @@ void old_rstat_t::generate_test_instances(list& ls) */ void session_info_t::encode(bufferlist& bl) const { - ENCODE_START(4, 3, bl); + ENCODE_START(5, 3, bl); ::encode(inst, bl); ::encode(completed_requests, bl); ::encode(prealloc_inos, bl); // hacky, see below. ::encode(used_inos, bl); ::encode(client_metadata, bl); + ::encode(completed_flushes, bl); ENCODE_FINISH(bl); } @@ -695,6 +696,9 @@ void session_info_t::decode(bufferlist::iterator& p) if (struct_v >= 4) { ::decode(client_metadata, p); } + if (struct_v >= 5) { + ::decode(completed_flushes, p); + } DECODE_FINISH(p); } diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 9ed51e2499389..6fb5f12de4de5 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -642,6 +642,7 @@ struct session_info_t { interval_set prealloc_inos; // preallocated, ready to use. interval_set used_inos; // journaling use std::map client_metadata; + std::set completed_flushes; client_t get_client() const { return client_t(inst.name.num()); } @@ -649,6 +650,7 @@ struct session_info_t { prealloc_inos.clear(); used_inos.clear(); completed_requests.clear(); + completed_flushes.clear(); } void encode(bufferlist& bl) const;