From b58b8d098e4cacf033e9cc4bef694687dcbc4f66 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Nov 2010 14:16:43 -0800 Subject: [PATCH] mds: fix discover requests, tracking wrt fragments Track discover requests by tid. The old system of tracking outstanding discovers was kludgey and somewhat broken. Also there is a possibility of getting dup replies if someone does kick_requests(). There is still room for improvement with the logic detemrining when a discover is sent: we may want to discover multiple dirfrags in parallel, but the current code will only do one at a time. Signed-off-by: Sage Weil comment --- src/mds/MDCache.cc | 219 ++++++++++++++-------------------- src/mds/MDCache.h | 30 ++++- src/messages/MDiscover.h | 25 ++-- src/messages/MDiscoverReply.h | 4 +- 4 files changed, 124 insertions(+), 154 deletions(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 93d72132a5ddb..6b472ed18d7e6 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -124,6 +124,8 @@ MDCache::MDCache(MDS *m) num_inodes_with_caps = 0; num_caps = 0; + discover_last_tid = 0; + last_cap_id = 0; client_lease_durations[0] = 5.0; @@ -7470,32 +7472,30 @@ void MDCache::migrate_stray(CDentry *dn, int from, int to) - for all discovers (except base_inos, e.g. root, stray), waiters are attached to the parent metadata object in the cache (pinning it). - - - the discover is also registered under the per-mds discover_ hashes, so that - waiters can be kicked in the event of a failure. that is, every discover will - be followed by a reply, unless the remote node fails.. - - - each discover_reply must reliably decrement the discover_ counts. - - base_inos are the exception. those waiters are under waiting_for_base_ino. + - all discovers are tracked by tid, so that we can ignore potentially dup replies. */ +void MDCache::_send_discover(discover_info_t& d) +{ + MDiscover *dis = new MDiscover(d.ino, d.frag, d.snap, + d.want_path, d.want_ino, d.want_base_dir, d.want_xlocked); + dis->set_tid(d.tid); + mds->send_message_mds(dis, d.mds); +} + void MDCache::discover_base_ino(inodeno_t want_ino, Context *onfinish, int from) { dout(7) << "discover_base_ino " << want_ino << " from mds" << from << dendl; if (waiting_for_base_ino[from].count(want_ino) == 0) { - filepath want_path; - MDiscover *dis = new MDiscover(want_ino, - CEPH_NOSNAP, - want_path, - false); - mds->send_message_mds(dis, from); + discover_info_t& d = _create_discover(from); + d.ino = want_ino; + _send_discover(d); + waiting_for_base_ino[from][want_ino].push_back(onfinish); } - - waiting_for_base_ino[from][want_ino].push_back(onfinish); } @@ -7504,25 +7504,23 @@ void MDCache::discover_dir_frag(CInode *base, Context *onfinish, int from) { - if (from < 0) from = base->authority().first; + if (from < 0) + from = base->authority().first; - dout(7) << "discover_dir_frag " << base->ino() << " " << approx_fg + dirfrag_t df(base->ino(), approx_fg); + dout(7) << "discover_dir_frag " << df << " from mds" << from << dendl; - if (!base->is_waiter_for(CInode::WAIT_DIR) || !onfinish) { // this is overly conservative - filepath want_path; - MDiscover *dis = new MDiscover(base->ino(), - CEPH_NOSNAP, - want_path, - true); // need the base dir open - dis->set_base_dir_frag(approx_fg); - mds->send_message_mds(dis, from); + if (!base->is_waiter_for(CInode::WAIT_DIR) || !onfinish) { // FIXME: this is kind of weak! + discover_info_t& d = _create_discover(from); + d.ino = base->ino(); + d.frag = approx_fg; + d.want_base_dir = true; + _send_discover(d); } - // register + wait if (onfinish) base->add_waiter(CInode::WAIT_DIR, onfinish); - discover_dir[from][base->ino()]++; } struct C_MDC_RetryDiscoverPath : public Context { @@ -7545,7 +7543,8 @@ void MDCache::discover_path(CInode *base, bool want_xlocked, int from) { - if (from < 0) from = base->authority().first; + if (from < 0) + from = base->authority().first; dout(7) << "discover_path " << base->ino() << " " << want_path << " snap " << snap << " from mds" << from << (want_xlocked ? " want_xlocked":"") @@ -7559,18 +7558,19 @@ void MDCache::discover_path(CInode *base, return; } - if (!base->is_waiter_for(CInode::WAIT_DIR) || !onfinish) { // this is overly conservative - MDiscover *dis = new MDiscover(base->ino(), - snap, - want_path, - true, // we want the base dir; we are relative to ino. - want_xlocked); - mds->send_message_mds(dis, from); + if (!base->is_waiter_for(CInode::WAIT_DIR) || !onfinish) { // FIXME: weak! + discover_info_t& d = _create_discover(from); + d.ino = base->ino(); + d.snap = snap; + d.want_path = want_path; + d.want_base_dir = true; + d.want_xlocked = want_xlocked; + _send_discover(d); } // register + wait - if (onfinish) base->add_waiter(CInode::WAIT_DIR, onfinish); - discover_dir[from][base->ino()]++; + if (onfinish) + base->add_waiter(CInode::WAIT_DIR, onfinish); } struct C_MDC_RetryDiscoverPath2 : public Context { @@ -7606,17 +7606,19 @@ void MDCache::discover_path(CDir *base, } if (!base->is_waiting_for_dentry(want_path[0].c_str(), snap) || !onfinish) { - MDiscover *dis = new MDiscover(base->ino(), - snap, - want_path, - false, // no base dir; we are relative to dir - want_xlocked); - mds->send_message_mds(dis, from); + discover_info_t& d = _create_discover(from); + d.ino = base->ino(); + d.frag = base->get_frag(); + d.snap = snap; + d.want_path = want_path; + d.want_base_dir = true; + d.want_xlocked = want_xlocked; + _send_discover(d); } // register + wait - if (onfinish) base->add_dentry_waiter(want_path[0], snap, onfinish); - discover_dir_sub[from][base->dirfrag()]++; + if (onfinish) + base->add_dentry_waiter(want_path[0], snap, onfinish); } struct C_MDC_RetryDiscoverIno : public Context { @@ -7650,53 +7652,27 @@ void MDCache::discover_ino(CDir *base, } if (!base->is_waiting_for_ino(want_ino)) { - MDiscover *dis = new MDiscover(base->dirfrag(), - CEPH_NOSNAP, - want_ino, - want_xlocked); - mds->send_message_mds(dis, from); + discover_info_t& d = _create_discover(from); + d.ino = base->ino(); + d.frag = base->get_frag(); + d.want_ino = want_ino; + d.want_base_dir = true; + d.want_xlocked = want_xlocked; + _send_discover(d); } // register + wait base->add_ino_waiter(want_ino, onfinish); - discover_dir_sub[from][base->dirfrag()]++; } void MDCache::kick_discovers(int who) { - list waiters; - - for (map >::iterator p = waiting_for_base_ino[who].begin(); - p != waiting_for_base_ino[who].end(); - ++p) { - dout(10) << "kick_discovers on base ino " << p->first << dendl; - mds->queue_waiters(p->second); - } - waiting_for_base_ino.erase(who); - - for (map::iterator p = discover_dir[who].begin(); - p != discover_dir[who].end(); - ++p) { - CInode *in = get_inode(p->first); - if (!in) continue; - dout(10) << "kick_discovers dir waiters on " << *in << dendl; - in->take_waiting(CInode::WAIT_DIR, waiters); - } - discover_dir.erase(who); - - for (map::iterator p = discover_dir_sub[who].begin(); - p != discover_dir_sub[who].end(); - ++p) { - CDir *dir = get_dirfrag(p->first); - if (!dir) continue; - dout(10) << "kick_discovers dentry+ino waiters on " << *dir << dendl; - dir->take_sub_waiting(waiters); - } - discover_dir_sub.erase(who); - - mds->queue_waiters(waiters); + for (map::iterator p = discovers.begin(); + p != discovers.end(); + p++) + _send_discover(p->second); } @@ -7725,34 +7701,19 @@ void MDCache::handle_discover(MDiscover *dis) snapid_t snapid = dis->get_snapid(); // get started. - if (dis->get_base_ino() == MDS_INO_ROOT) { + if (MDS_INO_IS_BASE(dis->get_base_ino())) { // wants root dout(7) << "handle_discover from mds" << from - << " wants root + " << dis->get_want().get_path() + << " wants base + " << dis->get_want().get_path() << " snap " << snapid << dendl; - assert(mds->get_nodeid() == 0); - assert(root->is_auth()); + cur = get_inode(dis->get_base_ino()); // add root reply->starts_with = MDiscoverReply::INODE; - replicate_inode(root, from, reply->trace); - dout(10) << "added root " << *root << dendl; - - cur = root; - } - else if (dis->get_base_ino() == MDS_INO_MDSDIR(whoami)) { - // wants root - dout(7) << "handle_discover from mds" << from - << " wants mdsdir + " << dis->get_want().get_path() - << " snap " << snapid - << dendl; - reply->starts_with = MDiscoverReply::INODE; - replicate_inode(myin, from, reply->trace); - dout(10) << "added mdsdir " << *myin << dendl; - - cur = myin; + replicate_inode(cur, from, reply->trace); + dout(10) << "added base " << *cur << dendl; } else { // there's a base inode @@ -8008,6 +7969,12 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) } */ dout(7) << "discover_reply " << *m << dendl; + if (m->is_flag_error_dir()) + dout(7) << " flag error, dir" << dendl; + if (m->is_flag_error_dn()) + dout(7) << " flag error, dentry = " << m->get_error_dentry() << dendl; + if (m->is_flag_error_ino()) + dout(7) << " flag error, ino = " << m->get_wanted_ino() << dendl; list finished, error; int from = m->get_source().num(); @@ -8018,44 +7985,34 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) int next = m->starts_with; + // decrement discover counters + if (m->get_tid()) { + map::iterator p = discovers.find(m->get_tid()); + if (p != discovers.end()) { + dout(10) << " found tid " << m->get_tid() << dendl; + discovers.erase(p); + } else { + dout(10) << " tid " << m->get_tid() << " not found, must be dup reply" << dendl; + } + } + + // discover may start with an inode if (!p.end() && next == MDiscoverReply::INODE) { - // add base inode cur = add_replica_inode(p, NULL, finished); dout(7) << "discover_reply got base inode " << *cur << dendl; assert(cur->is_base()); - + next = MDiscoverReply::DIR; - // take waiters - finished.swap(waiting_for_base_ino[from][cur->ino()]); - waiting_for_base_ino[from].erase(cur->ino()); + // take waiters? + if (cur->is_base() && + waiting_for_base_ino[from].count(cur->ino())) { + finished.swap(waiting_for_base_ino[from][cur->ino()]); + waiting_for_base_ino[from].erase(cur->ino()); + } } assert(cur); - - // fyi - if (m->is_flag_error_dir()) - dout(7) << " flag error, dir" << dendl; - if (m->is_flag_error_dn()) - dout(7) << " flag error, dentry = " << m->get_error_dentry() << dendl; - if (m->is_flag_error_ino()) - dout(7) << " flag error, ino = " << m->get_wanted_ino() << dendl; - - // decrement discover counters - if (!m->is_unsolicited()) { - if (m->get_wanted_base_dir()) { - inodeno_t ino = m->get_base_ino(); - assert(discover_dir[from].count(ino)); - if (--discover_dir[from][ino] == 0) - discover_dir[from].erase(ino); - } else if (m->get_base_ino() >= MDS_INO_SYSTEM_BASE) { - dirfrag_t df(m->get_base_ino(), m->get_base_dir_frag()); - assert(discover_dir_sub[from].count(df)); - if (--discover_dir_sub[from][df] == 0) - discover_dir_sub[from].erase(df); - } - } - // loop over discover results. // indexes follow each ([[dir] dentry] inode) // can start, end with any type. diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 38b6d1a8fb231..ce3cefb84aa57 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -462,15 +462,33 @@ public: // -- discover -- + struct discover_info_t { + tid_t tid; + int mds; + inodeno_t ino; + frag_t frag; + snapid_t snap; + filepath want_path; + inodeno_t want_ino; + bool want_base_dir; + bool want_xlocked; + + discover_info_t() : tid(0), mds(-1), snap(CEPH_NOSNAP), want_base_dir(false), want_xlocked(false) {} + }; + + map discovers; + tid_t discover_last_tid; + + void _send_discover(discover_info_t& dis); + discover_info_t& _create_discover(int mds) { + discover_info_t& d = discovers[++discover_last_tid]; + d.mds = mds; + return d; + } + // waiters map > > waiting_for_base_ino; - // in process discovers, by mds. - // this is just enough info to kick any waiters in the event of a failure. - // FIXME: use pointers here instead of identifiers? - map > discover_dir; - map > discover_dir_sub; - void discover_base_ino(inodeno_t want_ino, Context *onfinish, int from=-1); void discover_dir_frag(CInode *base, frag_t approx_fg, Context *onfinish, int from=-1); diff --git a/src/messages/MDiscover.h b/src/messages/MDiscover.h index a27d0aee83c11..9ba31a622886d 100644 --- a/src/messages/MDiscover.h +++ b/src/messages/MDiscover.h @@ -51,37 +51,30 @@ class MDiscover : public Message { MDiscover() { } MDiscover(inodeno_t base_ino_, + frag_t base_frag_, snapid_t s, - filepath& want_, + filepath& want_path_, + inodeno_t want_ino_, bool want_base_dir_ = true, bool discover_xlocks_ = false) : Message(MSG_MDS_DISCOVER), base_ino(base_ino_), + base_dir_frag(base_frag_), snapid(s), - want(want_), - want_ino(0), - want_base_dir(want_base_dir_), - want_xlocked(discover_xlocks_) { } - MDiscover(dirfrag_t base_dirfrag, - snapid_t s, - inodeno_t want_ino_, - bool want_base_dir_ = true) : - Message(MSG_MDS_DISCOVER), - base_ino(base_dirfrag.ino), - base_dir_frag(base_dirfrag.frag), - snapid(s), + want(want_path_), want_ino(want_ino_), want_base_dir(want_base_dir_), - want_xlocked(false) { } + want_xlocked(discover_xlocks_) { } private: ~MDiscover() {} public: const char *get_type_name() { return "Dis"; } void print(ostream &out) { - out << "discover(" << base_ino << "." << base_dir_frag + out << "discover(" << header.tid << " " << base_ino << "." << base_dir_frag << " " << want; - if (want_ino) out << want_ino; + if (want_ino) + out << want_ino; out << ")"; } diff --git a/src/messages/MDiscoverReply.h b/src/messages/MDiscoverReply.h index d93ea9f03e22d..80e1c2095d964 100644 --- a/src/messages/MDiscoverReply.h +++ b/src/messages/MDiscoverReply.h @@ -124,6 +124,7 @@ class MDiscoverReply : public Message { flag_error_ino(false), flag_error_dir(false), dir_auth_hint(CDIR_AUTH_UNKNOWN) { + header.tid = dis->get_tid(); } MDiscoverReply(dirfrag_t df) : Message(MSG_MDS_DISCOVERREPLY), @@ -137,6 +138,7 @@ class MDiscoverReply : public Message { flag_error_ino(false), flag_error_dir(false), dir_auth_hint(CDIR_AUTH_UNKNOWN) { + header.tid = 0; } private: ~MDiscoverReply() {} @@ -144,7 +146,7 @@ private: public: const char *get_type_name() { return "discover_reply"; } void print(ostream& out) { - out << "discover_reply(" << base_ino << ")"; + out << "discover_reply(" << header.tid << " " << base_ino << ")"; } // builders -- 2.39.5