From: Yan, Zheng Date: Wed, 13 Mar 2013 12:47:11 +0000 (+0800) Subject: mds: include replica nonce in MMDSCacheRejoin::inode_strong X-Git-Tag: v0.62~120^2~21 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9f66d0454fbb8615df4c3a79d88134ad86c9628d;p=ceph.git mds: include replica nonce in MMDSCacheRejoin::inode_strong So the recovering MDS can properly handle cache expire messages. Also increase the nonce value when sending the cache rejoin acks. Signed-off-by: Yan, Zheng Reviewed-by: Greg Farnum Also update the MMDSCacheRejoin encoding to the new format. Signed-off-by: Greg Farnum --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 4a730f8ad8c6..3ca6ada4cf5d 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3542,6 +3542,7 @@ void MDCache::rejoin_send_rejoins() if (p->first == 0 && root) { p->second->add_weak_inode(root->vino()); p->second->add_strong_inode(root->vino(), + root->get_replica_nonce(), root->get_caps_wanted(), root->filelock.get_state(), root->nestlock.get_state(), @@ -3555,6 +3556,7 @@ void MDCache::rejoin_send_rejoins() if (CInode *in = get_inode(MDS_INO_MDSDIR(p->first))) { p->second->add_weak_inode(in->vino()); p->second->add_strong_inode(in->vino(), + in->get_replica_nonce(), in->get_caps_wanted(), in->filelock.get_state(), in->nestlock.get_state(), @@ -3713,6 +3715,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) CInode *in = dnl->get_inode(); dout(15) << " add_strong_inode " << *in << dendl; rejoin->add_strong_inode(in->vino(), + in->get_replica_nonce(), in->get_caps_wanted(), in->filelock.get_state(), in->nestlock.get_state(), @@ -4252,7 +4255,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong) dir = rejoin_invent_dirfrag(p->first); } if (dir) { - dir->add_replica(from); + dir->add_replica(from, p->second.nonce); dir->dir_rep = p->second.dir_rep; } else { dout(10) << " frag " << p->first << " doesn't match dirfragtree " << *diri << dendl; @@ -4267,7 +4270,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong) dir = rejoin_invent_dirfrag(p->first); else dout(10) << " have(approx) " << *dir << dendl; - dir->add_replica(from); + dir->add_replica(from, p->second.nonce); dir->dir_rep = p->second.dir_rep; } refragged = true; @@ -4331,7 +4334,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong) mdr->locks.insert(&dn->lock); } - dn->add_replica(from); + dn->add_replica(from, q->second.nonce); dout(10) << " have " << *dn << dendl; // inode? @@ -4416,7 +4419,7 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong) dout(10) << " sender has dentry but not inode, adding them as a replica" << dendl; } - in->add_replica(from); + in->add_replica(from, p->second.nonce); dout(10) << " have " << *in << dendl; } } @@ -5180,7 +5183,7 @@ void MDCache::rejoin_send_acks() for (map::iterator r = dir->replicas_begin(); r != dir->replicas_end(); ++r) - ack[r->first]->add_strong_dirfrag(dir->dirfrag(), r->second, dir->dir_rep); + ack[r->first]->add_strong_dirfrag(dir->dirfrag(), ++r->second, dir->dir_rep); for (CDir::map_t::iterator q = dir->items.begin(); q != dir->items.end(); @@ -5196,7 +5199,7 @@ void MDCache::rejoin_send_acks() dnl->is_primary() ? dnl->get_inode()->ino():inodeno_t(0), dnl->is_remote() ? dnl->get_remote_ino():inodeno_t(0), dnl->is_remote() ? dnl->get_remote_d_type():0, - r->second, + ++r->second, dn->lock.get_replica_state()); if (!dnl->is_primary()) @@ -5209,7 +5212,7 @@ void MDCache::rejoin_send_acks() r != in->replicas_end(); ++r) { ack[r->first]->add_inode_base(in); - ack[r->first]->add_inode_locks(in, r->second); + ack[r->first]->add_inode_locks(in, ++r->second); } // subdirs in this subtree? @@ -5224,14 +5227,14 @@ void MDCache::rejoin_send_acks() r != root->replicas_end(); ++r) { ack[r->first]->add_inode_base(root); - ack[r->first]->add_inode_locks(root, r->second); + ack[r->first]->add_inode_locks(root, ++r->second); } if (myin) for (map::iterator r = myin->replicas_begin(); r != myin->replicas_end(); ++r) { ack[r->first]->add_inode_base(myin); - ack[r->first]->add_inode_locks(myin, r->second); + ack[r->first]->add_inode_locks(myin, ++r->second); } // include inode base for any inodes whose scatterlocks may have updated @@ -5732,6 +5735,12 @@ void MDCache::send_expire_messages(map& expiremap) for (map::iterator it = expiremap.begin(); it != expiremap.end(); ++it) { + if (mds->mdsmap->get_state(it->first) < MDSMap::STATE_REJOIN || + (mds->mdsmap->get_state(it->first) == MDSMap::STATE_REJOIN && + rejoin_sent.count(it->first) == 0)) { + it->second->put(); + continue; + } dout(7) << "sending cache_expire to " << it->first << dendl; mds->send_message_mds(it->second, it->first); } @@ -9714,9 +9723,11 @@ void MDCache::handle_dentry_link(MDentryLink *m) CInode *in = add_replica_inode(p, NULL, finished); assert(in->get_num_ref() == 0); assert(in->get_parent_dn() == NULL); - MCacheExpire* expire = new MCacheExpire(mds->get_nodeid()); - expire->add_inode(m->get_subtree(), in->vino(), in->get_replica_nonce()); - mds->send_message_mds(expire, m->get_source().num()); + map expiremap; + int from = m->get_source().num(); + expiremap[from] = new MCacheExpire(mds->get_nodeid()); + expiremap[from]->add_inode(m->get_subtree(), in->vino(), in->get_replica_nonce()); + send_expire_messages(expiremap); remove_inode(in); } diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h index 825400d6eeae..eb565e3a6653 100644 --- a/src/messages/MMDSCacheRejoin.h +++ b/src/messages/MMDSCacheRejoin.h @@ -24,6 +24,10 @@ // sent from replica to auth class MMDSCacheRejoin : public Message { + + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + public: static const int OP_WEAK = 1; // replica -> auth, i exist, + maybe open files. static const int OP_STRONG = 2; // replica -> auth, i exist, + open files and lock state. @@ -43,19 +47,22 @@ class MMDSCacheRejoin : public Message { // -- types -- struct inode_strong { + int32_t nonce; int32_t caps_wanted; int32_t filelock, nestlock, dftlock; inode_strong() {} - inode_strong(int cw, int dl, int nl, int dftl) : - caps_wanted(cw), + inode_strong(int n, int cw, int dl, int nl, int dftl) : + nonce(n), caps_wanted(cw), filelock(dl), nestlock(nl), dftlock(dftl) { } void encode(bufferlist &bl) const { + ::encode(nonce, bl); ::encode(caps_wanted, bl); ::encode(filelock, bl); ::encode(nestlock, bl); ::encode(dftlock, bl); } void decode(bufferlist::iterator &bl) { + ::decode(nonce, bl); ::decode(caps_wanted, bl); ::decode(filelock, bl); ::decode(nestlock, bl); @@ -190,9 +197,11 @@ class MMDSCacheRejoin : public Message { map > authpinned_dentries; map > xlocked_dentries; - MMDSCacheRejoin() : Message(MSG_MDS_CACHEREJOIN) {} + MMDSCacheRejoin() : + Message(MSG_MDS_CACHEREJOIN, HEAD_VERSION, COMPAT_VERSION) + {} MMDSCacheRejoin(int o) : - Message(MSG_MDS_CACHEREJOIN), + Message(MSG_MDS_CACHEREJOIN, HEAD_VERSION, COMPAT_VERSION), op(o) {} private: ~MMDSCacheRejoin() {} @@ -208,8 +217,8 @@ public: void add_weak_inode(vinodeno_t i) { weak_inodes.insert(i); } - void add_strong_inode(vinodeno_t i, int cw, int dl, int nl, int dftl) { - strong_inodes[i] = inode_strong(cw, dl, nl, dftl); + void add_strong_inode(vinodeno_t i, int n, int cw, int dl, int nl, int dftl) { + strong_inodes[i] = inode_strong(n, cw, dl, nl, dftl); } void add_inode_locks(CInode *in, __u32 nonce) { ::encode(in->inode.ino, inode_locks);