From: dongdong tao Date: Fri, 26 Jan 2018 06:12:31 +0000 (+0800) Subject: cephfs: Make mds-mds per message versioned X-Git-Tag: v15.1.0~1738^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9a193ec3bc9442ef334eaff31f0d89df0d5ea2c2;p=ceph.git cephfs: Make mds-mds per message versioned Fixes: http://tracker.ceph.com/issues/12107 Signed-off-by: dongdong tao --- diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index b2a7db1e78ce..da91b1284ca9 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -395,31 +395,6 @@ bool CDentry::is_freezing() const return dir->is_freezing(); } -void CDentry::decode_replica(bufferlist::const_iterator& p, bool is_new) -{ - __u32 nonce; - decode(nonce, p); - replica_nonce = nonce; - - decode(first, p); - - inodeno_t rino; - unsigned char rdtype; - decode(rino, p); - decode(rdtype, p); - lock.decode_state(p, is_new); - - bool need_recover; - decode(need_recover, p); - - if (is_new) { - if (rino) - dir->link_remote_inode(this, rino, rdtype); - if (need_recover) - lock.mark_need_recover(); - } -} - // ---------------------------- // locking diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index 56aa58c561a9..62ed6e1b479e 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -243,22 +243,11 @@ public: bool is_new() const { return state_test(STATE_NEW); } void clear_new() { state_clear(STATE_NEW); } - // -- replication - void encode_replica(mds_rank_t mds, bufferlist& bl, bool need_recover) { - __u32 nonce = add_replica(mds); - encode(nonce, bl); - encode(first, bl); - encode(linkage.remote_ino, bl); - encode(linkage.remote_d_type, bl); - lock.encode_state_for_replica(bl); - encode(need_recover, bl); - } - void decode_replica(bufferlist::const_iterator& p, bool is_new); - // -- exporting // note: this assumes the dentry already exists. // i.e., the name is already extracted... so we just need the other state. void encode_export(bufferlist& bl) { + ENCODE_START(1, 1, bl); encode(first, bl); encode(state, bl); encode(version, bl); @@ -266,6 +255,7 @@ public: encode(lock, bl); encode(get_replicas(), bl); get(PIN_TEMPEXPORTING); + ENCODE_FINISH(bl); } void finish_export() { // twiddle @@ -280,6 +270,7 @@ public: put(PIN_TEMPEXPORTING); } void decode_import(bufferlist::const_iterator& blp, LogSegment *ls) { + DECODE_START(1, blp); decode(first, blp); __u32 nstate; decode(nstate, blp); @@ -296,6 +287,7 @@ public: if (is_replicated()) get(PIN_REPLICATED); replica_nonce = 0; + DECODE_FINISH(blp); } // -- locking -- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index c2fe6b9d7318..231737dd9f42 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -2474,6 +2474,7 @@ void CDir::_committed(int r, version_t v) void CDir::encode_export(bufferlist& bl) { + ENCODE_START(1, 1, bl); ceph_assert(!is_projected()); encode(first, bl); encode(fnode, bl); @@ -2490,6 +2491,7 @@ void CDir::encode_export(bufferlist& bl) encode(get_replicas(), bl); get(PIN_TEMPEXPORTING); + ENCODE_FINISH(bl); } void CDir::finish_export() @@ -2505,6 +2507,7 @@ void CDir::finish_export() void CDir::decode_import(bufferlist::const_iterator& blp, LogSegment *ls) { + DECODE_START(1, blp); decode(first, blp); decode(fnode, blp); decode(dirty_old_rstat, blp); @@ -2555,6 +2558,7 @@ void CDir::decode_import(bufferlist::const_iterator& blp, LogSegment *ls) ls->dirty_dirfrag_dirfragtree.push_back(&inode->item_dirty_dirfrag_dirfragtree); } } + DECODE_FINISH(blp); } void CDir::abort_import() diff --git a/src/mds/CDir.h b/src/mds/CDir.h index d2d109fea8a3..cba08b2ca68d 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -430,27 +430,20 @@ public: static void encode_dirstat(bufferlist& bl, const session_info_t& info, const DirStat& ds); void _encode_base(bufferlist& bl) { + ENCODE_START(1, 1, bl); encode(first, bl); encode(fnode, bl); encode(dir_rep, bl); encode(dir_rep_by, bl); + ENCODE_FINISH(bl); } void _decode_base(bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(first, p); decode(fnode, p); decode(dir_rep, p); decode(dir_rep_by, p); - } - void encode_replica(mds_rank_t who, bufferlist& bl) { - __u32 nonce = add_replica(who); - encode(nonce, bl); - _encode_base(bl); - } - void decode_replica(bufferlist::const_iterator& p) { - __u32 nonce; - decode(nonce, p); - replica_nonce = nonce; - _decode_base(p); + DECODE_FINISH(p); } // -- state -- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index a3aa12178336..e3c4ef1e24af 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1606,172 +1606,490 @@ void CInode::set_object_info(MDSCacheObjectInfo &info) info.snapid = last; } -void CInode::encode_lock_state(int type, bufferlist& bl) +void CInode::encode_lock_iauth(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + encode(inode.version, bl); + encode(inode.ctime, bl); + encode(inode.mode, bl); + encode(inode.uid, bl); + encode(inode.gid, bl); + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_iauth(bufferlist::const_iterator& p) { - using ceph::encode; - encode(first, bl); - if (!is_base()) - encode(parent->first, bl); + DECODE_START(1, p); + decode(inode.version, p); + utime_t tm; + decode(tm, p); + if (inode.ctime < tm) inode.ctime = tm; + decode(inode.mode, p); + decode(inode.uid, p); + decode(inode.gid, p); + DECODE_FINISH(p); +} - switch (type) { - case CEPH_LOCK_IAUTH: - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.mode, bl); - encode(inode.uid, bl); - encode(inode.gid, bl); - break; - - case CEPH_LOCK_ILINK: +void CInode::encode_lock_ilink(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + encode(inode.version, bl); + encode(inode.ctime, bl); + encode(inode.nlink, bl); + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_ilink(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + decode(inode.version, p); + utime_t tm; + decode(tm, p); + if (inode.ctime < tm) inode.ctime = tm; + decode(inode.nlink, p); + DECODE_FINISH(p); +} + +void CInode::encode_lock_idft(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + if (is_auth()) { encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.nlink, bl); - break; - - case CEPH_LOCK_IDFT: + } else { + // treat flushing as dirty when rejoining cache + bool dirty = dirfragtreelock.is_dirty_or_flushing(); + encode(dirty, bl); + } + { + // encode the raw tree + encode(dirfragtree, bl); + + // also specify which frags are mine + set myfrags; + auto&& dfls = get_dirfrags(); + for (const auto& dir : dfls) { + if (dir->is_auth()) { + frag_t fg = dir->get_frag(); + myfrags.insert(fg); + } + } + encode(myfrags, bl); + } + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_idft(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + if (is_auth()) { + bool replica_dirty; + decode(replica_dirty, p); + if (replica_dirty) { + dout(10) << __func__ << " setting dftlock dirty flag" << dendl; + dirfragtreelock.mark_dirty(); // ok bc we're auth and caller will handle + } + } else { + decode(inode.version, p); + } + { + fragtree_t temp; + decode(temp, p); + set authfrags; + decode(authfrags, p); if (is_auth()) { - encode(inode.version, bl); + // auth. believe replica's auth frags only. + for (auto fg : authfrags) { + if (!dirfragtree.is_leaf(fg)) { + dout(10) << " forcing frag " << fg << " to leaf (split|merge)" << dendl; + dirfragtree.force_to_leaf(g_ceph_context, fg); + dirfragtreelock.mark_dirty(); // ok bc we're auth and caller will handle + } + } } else { - // treat flushing as dirty when rejoining cache - bool dirty = dirfragtreelock.is_dirty_or_flushing(); - encode(dirty, bl); - } - { - // encode the raw tree - encode(dirfragtree, bl); - - // also specify which frags are mine - set myfrags; - auto&& dfls = get_dirfrags(); - for (const auto& dir : dfls) { - if (dir->is_auth()) { - frag_t fg = dir->get_frag(); - myfrags.insert(fg); - } + // replica. take the new tree, BUT make sure any open + // dirfrags remain leaves (they may have split _after_ this + // dft was scattered, or we may still be be waiting on the + // notify from the auth) + dirfragtree.swap(temp); + for (const auto &p : dirfrags) { + if (!dirfragtree.is_leaf(p.first)) { + dout(10) << " forcing open dirfrag " << p.first << " to leaf (racing with split|merge)" << dendl; + dirfragtree.force_to_leaf(g_ceph_context, p.first); + } + if (p.second->is_auth()) + p.second->state_clear(CDir::STATE_DIRTYDFT); } - encode(myfrags, bl); } - break; - - case CEPH_LOCK_IFILE: + if (g_conf()->mds_debug_frag) + verify_dirfrags(); + } + DECODE_FINISH(p); +} + +void CInode::encode_lock_ifile(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + if (is_auth()) { + encode(inode.version, bl); + encode(inode.ctime, bl); + encode(inode.mtime, bl); + encode(inode.atime, bl); + encode(inode.time_warp_seq, bl); + if (!is_dir()) { + encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features()); + encode(inode.size, bl); + encode(inode.truncate_seq, bl); + encode(inode.truncate_size, bl); + encode(inode.client_ranges, bl); + encode(inode.inline_data, bl); + } + } else { + // treat flushing as dirty when rejoining cache + bool dirty = filelock.is_dirty_or_flushing(); + encode(dirty, bl); + } + dout(15) << __func__ << " inode.dirstat is " << inode.dirstat << dendl; + encode(inode.dirstat, bl); // only meaningful if i am auth. + bufferlist tmp; + __u32 n = 0; + for (const auto &p : dirfrags) { + frag_t fg = p.first; + CDir *dir = p.second; + if (is_auth() || dir->is_auth()) { + fnode_t *pf = dir->get_projected_fnode(); + dout(15) << fg << " " << *dir << dendl; + dout(20) << fg << " fragstat " << pf->fragstat << dendl; + dout(20) << fg << " accounted_fragstat " << pf->accounted_fragstat << dendl; + encode(fg, tmp); + encode(dir->first, tmp); + encode(pf->fragstat, tmp); + encode(pf->accounted_fragstat, tmp); + n++; + } + } + encode(n, bl); + bl.claim_append(tmp); + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_ifile(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + if (!is_auth()) { + decode(inode.version, p); + utime_t tm; + decode(tm, p); + if (inode.ctime < tm) inode.ctime = tm; + decode(inode.mtime, p); + decode(inode.atime, p); + decode(inode.time_warp_seq, p); + if (!is_dir()) { + decode(inode.layout, p); + decode(inode.size, p); + decode(inode.truncate_seq, p); + decode(inode.truncate_size, p); + decode(inode.client_ranges, p); + decode(inode.inline_data, p); + } + } else { + bool replica_dirty; + decode(replica_dirty, p); + if (replica_dirty) { + dout(10) << __func__ << " setting filelock dirty flag" << dendl; + filelock.mark_dirty(); // ok bc we're auth and caller will handle + } + } + + frag_info_t dirstat; + decode(dirstat, p); + if (!is_auth()) { + dout(10) << " taking inode dirstat " << dirstat << " for " << *this << dendl; + inode.dirstat = dirstat; // take inode summation if replica + } + __u32 n; + decode(n, p); + dout(10) << " ...got " << n << " fragstats on " << *this << dendl; + while (n--) { + frag_t fg; + snapid_t fgfirst; + frag_info_t fragstat; + frag_info_t accounted_fragstat; + decode(fg, p); + decode(fgfirst, p); + decode(fragstat, p); + decode(accounted_fragstat, p); + dout(10) << fg << " [" << fgfirst << ",head] " << dendl; + dout(10) << fg << " fragstat " << fragstat << dendl; + dout(20) << fg << " accounted_fragstat " << accounted_fragstat << dendl; + + CDir *dir = get_dirfrag(fg); if (is_auth()) { - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.mtime, bl); - encode(inode.atime, bl); - encode(inode.time_warp_seq, bl); - if (!is_dir()) { - encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features()); - encode(inode.size, bl); - encode(inode.truncate_seq, bl); - encode(inode.truncate_size, bl); - encode(inode.client_ranges, bl); - encode(inode.inline_data, bl); + ceph_assert(dir); // i am auth; i had better have this dir open + dout(10) << fg << " first " << dir->first << " -> " << fgfirst + << " on " << *dir << dendl; + dir->first = fgfirst; + dir->fnode.fragstat = fragstat; + dir->fnode.accounted_fragstat = accounted_fragstat; + if (!(fragstat == accounted_fragstat)) { + dout(10) << fg << " setting filelock updated flag" << dendl; + filelock.mark_dirty(); // ok bc we're auth and caller will handle } } else { - // treat flushing as dirty when rejoining cache - bool dirty = filelock.is_dirty_or_flushing(); - encode(dirty, bl); + if (dir && dir->is_auth()) { + dout(10) << fg << " first " << dir->first << " -> " << fgfirst + << " on " << *dir << dendl; + dir->first = fgfirst; + fnode_t *pf = dir->get_projected_fnode(); + finish_scatter_update(&filelock, dir, + inode.dirstat.version, pf->accounted_fragstat.version); + } } + } + DECODE_FINISH(p); +} - { - dout(15) << __func__ << " inode.dirstat is " << inode.dirstat << dendl; - encode(inode.dirstat, bl); // only meaningful if i am auth. - bufferlist tmp; - __u32 n = 0; - for (const auto &p : dirfrags) { - frag_t fg = p.first; - CDir *dir = p.second; - if (is_auth() || dir->is_auth()) { - fnode_t *pf = dir->get_projected_fnode(); - dout(15) << fg << " " << *dir << dendl; - dout(20) << fg << " fragstat " << pf->fragstat << dendl; - dout(20) << fg << " accounted_fragstat " << pf->accounted_fragstat << dendl; - encode(fg, tmp); - encode(dir->first, tmp); - encode(pf->fragstat, tmp); - encode(pf->accounted_fragstat, tmp); - n++; - } - } - encode(n, bl); - bl.claim_append(tmp); +void CInode::encode_lock_inest(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + if (is_auth()) { + encode(inode.version, bl); + } else { + // treat flushing as dirty when rejoining cache + bool dirty = nestlock.is_dirty_or_flushing(); + encode(dirty, bl); + } + dout(15) << __func__ << " inode.rstat is " << inode.rstat << dendl; + encode(inode.rstat, bl); // only meaningful if i am auth. + bufferlist tmp; + __u32 n = 0; + for (const auto &p : dirfrags) { + frag_t fg = p.first; + CDir *dir = p.second; + if (is_auth() || dir->is_auth()) { + fnode_t *pf = dir->get_projected_fnode(); + dout(10) << __func__ << " " << fg << " dir " << *dir << dendl; + dout(10) << __func__ << " " << fg << " rstat " << pf->rstat << dendl; + dout(10) << __func__ << " " << fg << " accounted_rstat " << pf->rstat << dendl; + dout(10) << __func__ << " " << fg << " dirty_old_rstat " << dir->dirty_old_rstat << dendl; + encode(fg, tmp); + encode(dir->first, tmp); + encode(pf->rstat, tmp); + encode(pf->accounted_rstat, tmp); + encode(dir->dirty_old_rstat, tmp); + n++; } - break; + } + encode(n, bl); + bl.claim_append(tmp); + ENCODE_FINISH(bl); +} - case CEPH_LOCK_INEST: +void CInode::decode_lock_inest(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + if (is_auth()) { + bool replica_dirty; + decode(replica_dirty, p); + if (replica_dirty) { + dout(10) << __func__ << " setting nestlock dirty flag" << dendl; + nestlock.mark_dirty(); // ok bc we're auth and caller will handle + } + } else { + decode(inode.version, p); + } + nest_info_t rstat; + decode(rstat, p); + if (!is_auth()) { + dout(10) << __func__ << " taking inode rstat " << rstat << " for " << *this << dendl; + inode.rstat = rstat; // take inode summation if replica + } + __u32 n; + decode(n, p); + while (n--) { + frag_t fg; + snapid_t fgfirst; + nest_info_t rstat; + nest_info_t accounted_rstat; + decltype(CDir::dirty_old_rstat) dirty_old_rstat; + decode(fg, p); + decode(fgfirst, p); + decode(rstat, p); + decode(accounted_rstat, p); + decode(dirty_old_rstat, p); + dout(10) << __func__ << " " << fg << " [" << fgfirst << ",head]" << dendl; + dout(10) << __func__ << " " << fg << " rstat " << rstat << dendl; + dout(10) << __func__ << " " << fg << " accounted_rstat " << accounted_rstat << dendl; + dout(10) << __func__ << " " << fg << " dirty_old_rstat " << dirty_old_rstat << dendl; + CDir *dir = get_dirfrag(fg); if (is_auth()) { - encode(inode.version, bl); + ceph_assert(dir); // i am auth; i had better have this dir open + dout(10) << fg << " first " << dir->first << " -> " << fgfirst + << " on " << *dir << dendl; + dir->first = fgfirst; + dir->fnode.rstat = rstat; + dir->fnode.accounted_rstat = accounted_rstat; + dir->dirty_old_rstat.swap(dirty_old_rstat); + if (!(rstat == accounted_rstat) || !dir->dirty_old_rstat.empty()) { + dout(10) << fg << " setting nestlock updated flag" << dendl; + nestlock.mark_dirty(); // ok bc we're auth and caller will handle + } } else { - // treat flushing as dirty when rejoining cache - bool dirty = nestlock.is_dirty_or_flushing(); - encode(dirty, bl); - } - { - dout(15) << __func__ << " inode.rstat is " << inode.rstat << dendl; - encode(inode.rstat, bl); // only meaningful if i am auth. - bufferlist tmp; - __u32 n = 0; - for (const auto &p : dirfrags) { - frag_t fg = p.first; - CDir *dir = p.second; - if (is_auth() || dir->is_auth()) { - fnode_t *pf = dir->get_projected_fnode(); - dout(10) << fg << " " << *dir << dendl; - dout(10) << fg << " " << pf->rstat << dendl; - dout(10) << fg << " " << pf->rstat << dendl; - dout(10) << fg << " " << dir->dirty_old_rstat << dendl; - encode(fg, tmp); - encode(dir->first, tmp); - encode(pf->rstat, tmp); - encode(pf->accounted_rstat, tmp); - encode(dir->dirty_old_rstat, tmp); - n++; - } + if (dir && dir->is_auth()) { + dout(10) << fg << " first " << dir->first << " -> " << fgfirst + << " on " << *dir << dendl; + dir->first = fgfirst; + fnode_t *pf = dir->get_projected_fnode(); + finish_scatter_update(&nestlock, dir, + inode.rstat.version, pf->accounted_rstat.version); } - encode(n, bl); - bl.claim_append(tmp); } + } + DECODE_FINISH(p); +} + +void CInode::encode_lock_ixattr(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + encode(inode.version, bl); + encode(inode.ctime, bl); + encode(xattrs, bl); + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_ixattr(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + decode(inode.version, p); + utime_t tm; + decode(tm, p); + if (inode.ctime < tm) inode.ctime = tm; + decode(xattrs, p); + DECODE_FINISH(p); +} + +void CInode::encode_lock_isnap(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + encode(inode.version, bl); + encode(inode.ctime, bl); + encode_snap(bl); + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_isnap(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + decode(inode.version, p); + utime_t tm; + decode(tm, p); + if (inode.ctime < tm) inode.ctime = tm; + decode_snap(p); + DECODE_FINISH(p); +} + +void CInode::encode_lock_iflock(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + encode(inode.version, bl); + _encode_file_locks(bl); + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_iflock(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + decode(inode.version, p); + _decode_file_locks(p); + DECODE_FINISH(p); +} + +void CInode::encode_lock_ipolicy(bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + if (inode.is_dir()) { + encode(inode.version, bl); + encode(inode.ctime, bl); + encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features()); + encode(inode.quota, bl); + encode(inode.export_pin, bl); + } + ENCODE_FINISH(bl); +} + +void CInode::decode_lock_ipolicy(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + if (inode.is_dir()) { + decode(inode.version, p); + utime_t tm; + decode(tm, p); + if (inode.ctime < tm) inode.ctime = tm; + decode(inode.layout, p); + decode(inode.quota, p); + mds_rank_t old_pin = inode.export_pin; + decode(inode.export_pin, p); + maybe_export_pin(old_pin != inode.export_pin); + } + DECODE_FINISH(p); +} + +void CInode::encode_lock_state(int type, bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + encode(first, bl); + if (!is_base()) + encode(parent->first, bl); + + switch (type) { + case CEPH_LOCK_IAUTH: + encode_lock_iauth(bl); + break; + + case CEPH_LOCK_ILINK: + encode_lock_ilink(bl); + break; + + case CEPH_LOCK_IDFT: + encode_lock_idft(bl); + break; + + case CEPH_LOCK_IFILE: + encode_lock_ifile(bl); + break; + + case CEPH_LOCK_INEST: + encode_lock_inest(bl); break; case CEPH_LOCK_IXATTR: - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(xattrs, bl); + encode_lock_ixattr(bl); break; case CEPH_LOCK_ISNAP: - encode(inode.version, bl); - encode(inode.ctime, bl); - encode_snap(bl); + encode_lock_isnap(bl); break; case CEPH_LOCK_IFLOCK: - encode(inode.version, bl); - _encode_file_locks(bl); + encode_lock_iflock(bl); break; case CEPH_LOCK_IPOLICY: - if (inode.is_dir()) { - encode(inode.version, bl); - encode(inode.ctime, bl); - encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features()); - encode(inode.quota, bl); - encode(inode.export_pin, bl); - } + encode_lock_ipolicy(bl); break; default: ceph_abort(); } + ENCODE_FINISH(bl); } - /* for more info on scatterlocks, see comments by Locker::scatter_writebehind */ void CInode::decode_lock_state(int type, const bufferlist& bl) { auto p = bl.cbegin(); + + DECODE_START(1, p); utime_t tm; snapid_t newfirst; @@ -1791,239 +2109,45 @@ void CInode::decode_lock_state(int type, const bufferlist& bl) switch (type) { case CEPH_LOCK_IAUTH: - decode(inode.version, p); - decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.mode, p); - decode(inode.uid, p); - decode(inode.gid, p); + decode_lock_iauth(p); break; case CEPH_LOCK_ILINK: - decode(inode.version, p); - decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.nlink, p); + decode_lock_ilink(p); break; case CEPH_LOCK_IDFT: - if (is_auth()) { - bool replica_dirty; - decode(replica_dirty, p); - if (replica_dirty) { - dout(10) << __func__ << " setting dftlock dirty flag" << dendl; - dirfragtreelock.mark_dirty(); // ok bc we're auth and caller will handle - } - } else { - decode(inode.version, p); - } - { - fragtree_t temp; - decode(temp, p); - set authfrags; - decode(authfrags, p); - if (is_auth()) { - // auth. believe replica's auth frags only. - for (set::iterator p = authfrags.begin(); p != authfrags.end(); ++p) - if (!dirfragtree.is_leaf(*p)) { - dout(10) << " forcing frag " << *p << " to leaf (split|merge)" << dendl; - dirfragtree.force_to_leaf(g_ceph_context, *p); - dirfragtreelock.mark_dirty(); // ok bc we're auth and caller will handle - } - } else { - // replica. take the new tree, BUT make sure any open - // dirfrags remain leaves (they may have split _after_ this - // dft was scattered, or we may still be be waiting on the - // notify from the auth) - dirfragtree.swap(temp); - for (const auto &p : dirfrags) { - if (!dirfragtree.is_leaf(p.first)) { - dout(10) << " forcing open dirfrag " << p.first << " to leaf (racing with split|merge)" << dendl; - dirfragtree.force_to_leaf(g_ceph_context, p.first); - } - if (p.second->is_auth()) - p.second->state_clear(CDir::STATE_DIRTYDFT); - } - } - if (g_conf()->mds_debug_frag) - verify_dirfrags(); - } + decode_lock_idft(p); break; case CEPH_LOCK_IFILE: - if (!is_auth()) { - decode(inode.version, p); - decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.mtime, p); - decode(inode.atime, p); - decode(inode.time_warp_seq, p); - if (!is_dir()) { - decode(inode.layout, p); - decode(inode.size, p); - decode(inode.truncate_seq, p); - decode(inode.truncate_size, p); - decode(inode.client_ranges, p); - decode(inode.inline_data, p); - } - } else { - bool replica_dirty; - decode(replica_dirty, p); - if (replica_dirty) { - dout(10) << __func__ << " setting filelock dirty flag" << dendl; - filelock.mark_dirty(); // ok bc we're auth and caller will handle - } - } - { - frag_info_t dirstat; - decode(dirstat, p); - if (!is_auth()) { - dout(10) << " taking inode dirstat " << dirstat << " for " << *this << dendl; - inode.dirstat = dirstat; // take inode summation if replica - } - __u32 n; - decode(n, p); - dout(10) << " ...got " << n << " fragstats on " << *this << dendl; - while (n--) { - frag_t fg; - snapid_t fgfirst; - frag_info_t fragstat; - frag_info_t accounted_fragstat; - decode(fg, p); - decode(fgfirst, p); - decode(fragstat, p); - decode(accounted_fragstat, p); - dout(10) << fg << " [" << fgfirst << ",head] " << dendl; - dout(10) << fg << " fragstat " << fragstat << dendl; - dout(20) << fg << " accounted_fragstat " << accounted_fragstat << dendl; - - CDir *dir = get_dirfrag(fg); - if (is_auth()) { - ceph_assert(dir); // i am auth; i had better have this dir open - dout(10) << fg << " first " << dir->first << " -> " << fgfirst - << " on " << *dir << dendl; - dir->first = fgfirst; - dir->fnode.fragstat = fragstat; - dir->fnode.accounted_fragstat = accounted_fragstat; - dir->first = fgfirst; - if (!(fragstat == accounted_fragstat)) { - dout(10) << fg << " setting filelock updated flag" << dendl; - filelock.mark_dirty(); // ok bc we're auth and caller will handle - } - } else { - if (dir && dir->is_auth()) { - dout(10) << fg << " first " << dir->first << " -> " << fgfirst - << " on " << *dir << dendl; - dir->first = fgfirst; - fnode_t *pf = dir->get_projected_fnode(); - finish_scatter_update(&filelock, dir, - inode.dirstat.version, pf->accounted_fragstat.version); - } - } - } - } + decode_lock_ifile(p); break; case CEPH_LOCK_INEST: - if (is_auth()) { - bool replica_dirty; - decode(replica_dirty, p); - if (replica_dirty) { - dout(10) << __func__ << " setting nestlock dirty flag" << dendl; - nestlock.mark_dirty(); // ok bc we're auth and caller will handle - } - } else { - decode(inode.version, p); - } - { - nest_info_t rstat; - decode(rstat, p); - if (!is_auth()) { - dout(10) << " taking inode rstat " << rstat << " for " << *this << dendl; - inode.rstat = rstat; // take inode summation if replica - } - __u32 n; - decode(n, p); - while (n--) { - frag_t fg; - snapid_t fgfirst; - nest_info_t rstat; - nest_info_t accounted_rstat; - decltype(CDir::dirty_old_rstat) dirty_old_rstat; - decode(fg, p); - decode(fgfirst, p); - decode(rstat, p); - decode(accounted_rstat, p); - decode(dirty_old_rstat, p); - dout(10) << fg << " [" << fgfirst << ",head]" << dendl; - dout(10) << fg << " rstat " << rstat << dendl; - dout(10) << fg << " accounted_rstat " << accounted_rstat << dendl; - dout(10) << fg << " dirty_old_rstat " << dirty_old_rstat << dendl; - - CDir *dir = get_dirfrag(fg); - if (is_auth()) { - ceph_assert(dir); // i am auth; i had better have this dir open - dout(10) << fg << " first " << dir->first << " -> " << fgfirst - << " on " << *dir << dendl; - dir->first = fgfirst; - dir->fnode.rstat = rstat; - dir->fnode.accounted_rstat = accounted_rstat; - dir->dirty_old_rstat.swap(dirty_old_rstat); - if (!(rstat == accounted_rstat) || !dir->dirty_old_rstat.empty()) { - dout(10) << fg << " setting nestlock updated flag" << dendl; - nestlock.mark_dirty(); // ok bc we're auth and caller will handle - } - } else { - if (dir && dir->is_auth()) { - dout(10) << fg << " first " << dir->first << " -> " << fgfirst - << " on " << *dir << dendl; - dir->first = fgfirst; - fnode_t *pf = dir->get_projected_fnode(); - finish_scatter_update(&nestlock, dir, - inode.rstat.version, pf->accounted_rstat.version); - } - } - } - } + decode_lock_inest(p); break; case CEPH_LOCK_IXATTR: - decode(inode.version, p); - decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(xattrs, p); + decode_lock_ixattr(p); break; case CEPH_LOCK_ISNAP: - { - decode(inode.version, p); - decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode_snap(p); - } + decode_lock_isnap(p); break; case CEPH_LOCK_IFLOCK: - decode(inode.version, p); - _decode_file_locks(p); + decode_lock_iflock(p); break; case CEPH_LOCK_IPOLICY: - if (inode.is_dir()) { - decode(inode.version, p); - decode(tm, p); - if (inode.ctime < tm) inode.ctime = tm; - decode(inode.layout, p); - decode(inode.quota, p); - mds_rank_t old_pin = inode.export_pin; - decode(inode.export_pin, p); - maybe_export_pin(old_pin != inode.export_pin); - } + decode_lock_ipolicy(p); break; default: ceph_abort(); } + DECODE_FINISH(p); } @@ -2902,20 +3026,22 @@ void CInode::decode_snap_blob(const bufferlist& snapbl) void CInode::encode_snap(bufferlist& bl) { - using ceph::encode; + ENCODE_START(1, 1, bl); bufferlist snapbl; encode_snap_blob(snapbl); encode(snapbl, bl); encode(oldest_snap, bl); + ENCODE_FINISH(bl); } void CInode::decode_snap(bufferlist::const_iterator& p) { - using ceph::decode; + DECODE_START(1, p); bufferlist snapbl; decode(snapbl, p); decode(oldest_snap, p); decode_snap_blob(snapbl); + DECODE_FINISH(p); } // ============================================= @@ -3873,7 +3999,7 @@ void CInode::encode_cap_message(const ref_t &m, Capability *cap) void CInode::_encode_base(bufferlist& bl, uint64_t features) { - using ceph::encode; + ENCODE_START(1, 1, bl); encode(first, bl); encode(inode, bl, features); encode(symlink, bl); @@ -3882,10 +4008,11 @@ void CInode::_encode_base(bufferlist& bl, uint64_t features) encode(old_inodes, bl, features); encode(damage_flags, bl); encode_snap(bl); + ENCODE_FINISH(bl); } void CInode::_decode_base(bufferlist::const_iterator& p) { - using ceph::decode; + DECODE_START(1, p); decode(first, p); decode(inode, p); { @@ -3898,6 +4025,7 @@ void CInode::_decode_base(bufferlist::const_iterator& p) decode(old_inodes, p); decode(damage_flags, p); decode_snap(p); + DECODE_FINISH(p); } void CInode::_encode_locks_full(bufferlist& bl) @@ -3935,6 +4063,7 @@ void CInode::_decode_locks_full(bufferlist::const_iterator& p) void CInode::_encode_locks_state_for_replica(bufferlist& bl, bool need_recover) { + ENCODE_START(1, 1, bl); authlock.encode_state_for_replica(bl); linklock.encode_state_for_replica(bl); dirfragtreelock.encode_state_for_replica(bl); @@ -3944,8 +4073,8 @@ void CInode::_encode_locks_state_for_replica(bufferlist& bl, bool need_recover) snaplock.encode_state_for_replica(bl); flocklock.encode_state_for_replica(bl); policylock.encode_state_for_replica(bl); - using ceph::encode; encode(need_recover, bl); + ENCODE_FINISH(bl); } void CInode::_encode_locks_state_for_rejoin(bufferlist& bl, int rep) @@ -3961,8 +4090,9 @@ void CInode::_encode_locks_state_for_rejoin(bufferlist& bl, int rep) policylock.encode_state_for_replica(bl); } -void CInode::_decode_locks_state(bufferlist::const_iterator& p, bool is_new) +void CInode::_decode_locks_state_for_replica(bufferlist::const_iterator& p, bool is_new) { + DECODE_START(1, p); authlock.decode_state(p, is_new); linklock.decode_state(p, is_new); dirfragtreelock.decode_state(p, is_new); @@ -3973,7 +4103,6 @@ void CInode::_decode_locks_state(bufferlist::const_iterator& p, bool is_new) flocklock.decode_state(p, is_new); policylock.decode_state(p, is_new); - using ceph::decode; bool need_recover; decode(need_recover, p); if (need_recover && is_new) { @@ -3989,6 +4118,7 @@ void CInode::_decode_locks_state(bufferlist::const_iterator& p, bool is_new) flocklock.mark_need_recover(); policylock.mark_need_recover(); } + DECODE_FINISH(p); } void CInode::_decode_locks_rejoin(bufferlist::const_iterator& p, MDSContext::vec& waiters, list& eval_locks, bool survivor) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 4fba93d466bf..f2b23a486453 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -662,26 +662,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter& eval_locks, bool survivor); @@ -721,8 +701,27 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter cap_map; in->export_client_caps(cap_map); bufferlist bl; - encode(in->ino(), bl); - encode(cap_map, bl); + MMDSResolve::slave_inode_cap inode_caps(in->ino(), cap_map); + encode(inode_caps, bl); resolves[master]->add_slave_request(p->first, bl); } else { resolves[master]->add_slave_request(p->first, mdr->committing); @@ -3165,13 +3165,11 @@ void MDCache::handle_resolve(const cref_t &m) if (p.second.inode_caps.length() > 0) { // slave wants to export caps (rename) ceph_assert(mds->is_resolve()); - - inodeno_t ino; - map cap_exports; + MMDSResolve::slave_inode_cap inode_caps; auto q = p.second.inode_caps.cbegin(); - decode(ino, q); - decode(cap_exports, q); - + decode(inode_caps, q); + inodeno_t ino = inode_caps.ino; + map cap_exports = inode_caps.cap_exports; ceph_assert(get_inode(ino)); for (map::iterator q = cap_exports.begin(); @@ -10006,7 +10004,7 @@ void MDCache::handle_discover(const cref_t &dis) // add root reply->starts_with = MDiscoverReply::INODE; - replicate_inode(cur, from, reply->trace, mds->mdsmap->get_up_features()); + encode_replica_inode(cur, from, reply->trace, mds->mdsmap->get_up_features()); dout(10) << "added base " << *cur << dendl; } else { @@ -10142,7 +10140,7 @@ void MDCache::handle_discover(const cref_t &dis) ceph_assert(!curdir->is_ambiguous_auth()); // would be frozen. if (!reply->trace.length()) reply->starts_with = MDiscoverReply::DIR; - replicate_dir(curdir, from, reply->trace); + encode_replica_dir(curdir, from, reply->trace); dout(7) << "handle_discover added dir " << *curdir << dendl; } @@ -10237,7 +10235,7 @@ void MDCache::handle_discover(const cref_t &dis) // add dentry if (!reply->trace.length()) reply->starts_with = MDiscoverReply::DENTRY; - replicate_dentry(dn, from, reply->trace); + encode_replica_dentry(dn, from, reply->trace); dout(7) << "handle_discover added dentry " << *dn << dendl; if (!dnl->is_primary()) break; // stop on null or remote link. @@ -10246,7 +10244,7 @@ void MDCache::handle_discover(const cref_t &dis) CInode *next = dnl->get_inode(); ceph_assert(next->is_auth()); - replicate_inode(next, from, reply->trace, mds->mdsmap->get_up_features()); + encode_replica_inode(next, from, reply->trace, mds->mdsmap->get_up_features()); dout(7) << "handle_discover added inode " << *next << dendl; // descend, keep going. @@ -10296,7 +10294,7 @@ void MDCache::handle_discover_reply(const cref_t &m) // discover may start with an inode if (!p.end() && next == MDiscoverReply::INODE) { - cur = add_replica_inode(p, NULL, finished); + decode_replica_inode(cur, p, NULL, finished); dout(7) << "discover_reply got base inode " << *cur << dendl; ceph_assert(cur->is_base()); @@ -10317,9 +10315,9 @@ void MDCache::handle_discover_reply(const cref_t &m) while (!p.end()) { // dir frag_t fg; - CDir *curdir = 0; + CDir *curdir = nullptr; if (next == MDiscoverReply::DIR) { - curdir = add_replica_dir(p, cur, mds_rank_t(m->get_source().num()), finished); + decode_replica_dir(curdir, p, cur, mds_rank_t(m->get_source().num()), finished); if (cur->ino() == m->get_base_ino() && curdir->get_frag() != m->get_base_dir_frag()) { ceph_assert(m->get_wanted_base_dir()); cur->take_dir_waiting(m->get_base_dir_frag(), finished); @@ -10337,13 +10335,14 @@ void MDCache::handle_discover_reply(const cref_t &m) break; // dentry - CDentry *dn = add_replica_dentry(p, curdir, finished); + CDentry *dn = nullptr; + decode_replica_dentry(dn, p, curdir, finished); if (p.end()) break; // inode - cur = add_replica_inode(p, dn, finished); + decode_replica_inode(cur, p, dn, finished); next = MDiscoverReply::DIR; } @@ -10416,154 +10415,215 @@ void MDCache::handle_discover_reply(const cref_t &m) // REPLICAS -void MDCache::replicate_dir(CDir *dir, mds_rank_t to, bufferlist& bl) +void MDCache::encode_replica_dir(CDir *dir, mds_rank_t to, bufferlist& bl) { + ENCODE_START(1, 1, bl); dirfrag_t df = dir->dirfrag(); encode(df, bl); - dir->encode_replica(to, bl); + __u32 nonce = dir->add_replica(to); + encode(nonce, bl); + dir->_encode_base(bl); + ENCODE_FINISH(bl); } -void MDCache::replicate_dentry(CDentry *dn, mds_rank_t to, bufferlist& bl) +void MDCache::encode_replica_dentry(CDentry *dn, mds_rank_t to, bufferlist& bl) { + ENCODE_START(1, 1, bl); encode(dn->get_name(), bl); encode(dn->last, bl); - dn->encode_replica(to, bl, mds->get_state() < MDSMap::STATE_ACTIVE); + + __u32 nonce = dn->add_replica(to); + encode(nonce, bl); + encode(dn->first, bl); + encode(dn->linkage.remote_ino, bl); + encode(dn->linkage.remote_d_type, bl); + dn->lock.encode_state_for_replica(bl); + bool need_recover = mds->get_state() < MDSMap::STATE_ACTIVE; + encode(need_recover, bl); + ENCODE_FINISH(bl); } -void MDCache::replicate_inode(CInode *in, mds_rank_t to, bufferlist& bl, +void MDCache::encode_replica_inode(CInode *in, mds_rank_t to, bufferlist& bl, uint64_t features) { + ENCODE_START(1, 1, bl); + ceph_assert(in->is_auth()); encode(in->inode.ino, bl); // bleh, minor assymetry here encode(in->last, bl); - in->encode_replica(to, bl, features, mds->get_state() < MDSMap::STATE_ACTIVE); + + __u32 nonce = in->add_replica(to); + encode(nonce, bl); + + in->_encode_base(bl, features); + in->_encode_locks_state_for_replica(bl, mds->get_state() < MDSMap::STATE_ACTIVE); + ENCODE_FINISH(bl); } -CDir *MDCache::add_replica_dir(bufferlist::const_iterator& p, CInode *diri, mds_rank_t from, +void MDCache::decode_replica_dir(CDir *&dir, bufferlist::const_iterator& p, CInode *diri, mds_rank_t from, MDSContext::vec& finished) { + DECODE_START(1, p); dirfrag_t df; decode(df, p); ceph_assert(diri->ino() == df.ino); // add it (_replica_) - CDir *dir = diri->get_dirfrag(df.frag); + dir = diri->get_dirfrag(df.frag); if (dir) { // had replica. update w/ new nonce. - dir->decode_replica(p); - dout(7) << "add_replica_dir had " << *dir << " nonce " << dir->replica_nonce << dendl; + __u32 nonce; + decode(nonce, p); + dir->set_replica_nonce(nonce); + dir->_decode_base(p); + dout(7) << __func__ << " had " << *dir << " nonce " << dir->replica_nonce << dendl; } else { // force frag to leaf in the diri tree if (!diri->dirfragtree.is_leaf(df.frag)) { - dout(7) << "add_replica_dir forcing frag " << df.frag << " to leaf in the fragtree " + dout(7) << __func__ << " forcing frag " << df.frag << " to leaf in the fragtree " << diri->dirfragtree << dendl; diri->dirfragtree.force_to_leaf(g_ceph_context, df.frag); } - // add replica. dir = diri->add_dirfrag( new CDir(diri, df.frag, this, false) ); - dir->decode_replica(p); - + __u32 nonce; + decode(nonce, p); + dir->set_replica_nonce(nonce); + dir->_decode_base(p); // is this a dir_auth delegation boundary? if (from != diri->authority().first || diri->is_ambiguous_auth() || diri->is_base()) adjust_subtree_auth(dir, from); - dout(7) << "add_replica_dir added " << *dir << " nonce " << dir->replica_nonce << dendl; - + dout(7) << __func__ << " added " << *dir << " nonce " << dir->replica_nonce << dendl; // get waiters diri->take_dir_waiting(df.frag, finished); } - - return dir; + DECODE_FINISH(p); } -CDentry *MDCache::add_replica_dentry(bufferlist::const_iterator& p, CDir *dir, MDSContext::vec& finished) +void MDCache::decode_replica_dentry(CDentry *&dn, bufferlist::const_iterator& p, CDir *dir, MDSContext::vec& finished) { + DECODE_START(1, p); string name; snapid_t last; decode(name, p); decode(last, p); - CDentry *dn = dir->lookup(name, last); + dn = dir->lookup(name, last); // have it? + bool is_new = false; if (dn) { - dn->decode_replica(p, false); - dout(7) << "add_replica_dentry had " << *dn << dendl; + is_new = false; + dout(7) << __func__ << " had " << *dn << dendl; } else { + is_new = true; dn = dir->add_null_dentry(name, 1 /* this will get updated below */, last); - dn->decode_replica(p, true); - dout(7) << "add_replica_dentry added " << *dn << dendl; + dout(7) << __func__ << " added " << *dn << dendl; } + + __u32 nonce; + decode(nonce, p); + dn->set_replica_nonce(nonce); + decode(dn->first, p); - dir->take_dentry_waiting(name, dn->first, dn->last, finished); + inodeno_t rino; + unsigned char rdtype; + decode(rino, p); + decode(rdtype, p); + dn->lock.decode_state(p, is_new); - return dn; + bool need_recover; + decode(need_recover, p); + + if (is_new) { + if (rino) + dir->link_remote_inode(dn, rino, rdtype); + if (need_recover) + dn->lock.mark_need_recover(); + } + + dir->take_dentry_waiting(name, dn->first, dn->last, finished); + DECODE_FINISH(p); } -CInode *MDCache::add_replica_inode(bufferlist::const_iterator& p, CDentry *dn, MDSContext::vec& finished) +void MDCache::decode_replica_inode(CInode *&in, bufferlist::const_iterator& p, CDentry *dn, MDSContext::vec& finished) { + DECODE_START(1, p); inodeno_t ino; snapid_t last; + __u32 nonce; decode(ino, p); decode(last, p); - CInode *in = get_inode(ino, last); + decode(nonce, p); + in = get_inode(ino, last); if (!in) { in = new CInode(this, false, 1, last); - in->decode_replica(p, true); + in->set_replica_nonce(nonce); + in->_decode_base(p); + in->_decode_locks_state_for_replica(p, true); add_inode(in); if (in->ino() == MDS_INO_ROOT) in->inode_auth.first = 0; else if (in->is_mdsdir()) in->inode_auth.first = in->ino() - MDS_INO_MDSDIR_OFFSET; - dout(10) << "add_replica_inode added " << *in << dendl; + dout(10) << __func__ << " added " << *in << dendl; if (dn) { ceph_assert(dn->get_linkage()->is_null()); dn->dir->link_primary_inode(dn, in); } } else { - in->decode_replica(p, false); - dout(10) << "add_replica_inode had " << *in << dendl; + in->set_replica_nonce(nonce); + in->_decode_base(p); + in->_decode_locks_state_for_replica(p, false); + dout(10) << __func__ << " had " << *in << dendl; } if (dn) { if (!dn->get_linkage()->is_primary() || dn->get_linkage()->get_inode() != in) - dout(10) << "add_replica_inode different linkage in dentry " << *dn << dendl; + dout(10) << __func__ << " different linkage in dentry " << *dn << dendl; } - - return in; + DECODE_FINISH(p); } -void MDCache::replicate_stray(CDentry *straydn, mds_rank_t who, bufferlist& bl) +void MDCache::encode_replica_stray(CDentry *straydn, mds_rank_t who, bufferlist& bl) { + ENCODE_START(1, 1, bl); uint64_t features = mds->mdsmap->get_up_features(); - replicate_inode(get_myin(), who, bl, features); - replicate_dir(straydn->get_dir()->inode->get_parent_dn()->get_dir(), who, bl); - replicate_dentry(straydn->get_dir()->inode->get_parent_dn(), who, bl); - replicate_inode(straydn->get_dir()->inode, who, bl, features); - replicate_dir(straydn->get_dir(), who, bl); - replicate_dentry(straydn, who, bl); + encode_replica_inode(get_myin(), who, bl, features); + encode_replica_dir(straydn->get_dir()->inode->get_parent_dn()->get_dir(), who, bl); + encode_replica_dentry(straydn->get_dir()->inode->get_parent_dn(), who, bl); + encode_replica_inode(straydn->get_dir()->inode, who, bl, features); + encode_replica_dir(straydn->get_dir(), who, bl); + encode_replica_dentry(straydn, who, bl); + ENCODE_FINISH(bl); } -CDentry *MDCache::add_replica_stray(const bufferlist &bl, mds_rank_t from) +void MDCache::decode_replica_stray(CDentry *&straydn, const bufferlist &bl, mds_rank_t from) { MDSContext::vec finished; auto p = bl.cbegin(); - CInode *mdsin = add_replica_inode(p, NULL, finished); - CDir *mdsdir = add_replica_dir(p, mdsin, from, finished); - CDentry *straydirdn = add_replica_dentry(p, mdsdir, finished); - CInode *strayin = add_replica_inode(p, straydirdn, finished); - CDir *straydir = add_replica_dir(p, strayin, from, finished); - CDentry *straydn = add_replica_dentry(p, straydir, finished); + DECODE_START(1, p); + CInode *mdsin = nullptr; + decode_replica_inode(mdsin, p, NULL, finished); + CDir *mdsdir = nullptr; + decode_replica_dir(mdsdir, p, mdsin, from, finished); + CDentry *straydirdn = nullptr; + decode_replica_dentry(straydirdn, p, mdsdir, finished); + CInode *strayin = nullptr; + decode_replica_inode(strayin, p, straydirdn, finished); + CDir *straydir = nullptr; + decode_replica_dir(straydir, p, strayin, from, finished); + + decode_replica_dentry(straydn, p, straydir, finished); if (!finished.empty()) mds->queue_waiters(finished); - - return straydn; + DECODE_FINISH(p); } @@ -10652,9 +10712,31 @@ void MDCache::handle_dir_update(const cref_t &m) // LINK +void MDCache::encode_remote_dentry_link(CDentry::linkage_t *dnl, bufferlist& bl) +{ + ENCODE_START(1, 1, bl); + inodeno_t ino = dnl->get_remote_ino(); + encode(ino, bl); + __u8 d_type = dnl->get_remote_d_type(); + encode(d_type, bl); + ENCODE_FINISH(bl); +} + +void MDCache::decode_remote_dentry_link(CDir *dir, CDentry *dn, bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + inodeno_t ino; + __u8 d_type; + decode(ino, p); + decode(d_type, p); + dout(10) << __func__ << " remote " << ino << " " << d_type << dendl; + dir->link_remote_inode(dn, ino, d_type); + DECODE_FINISH(p); +} + void MDCache::send_dentry_link(CDentry *dn, MDRequestRef& mdr) { - dout(7) << "send_dentry_link " << *dn << dendl; + dout(7) << __func__ << " " << *dn << dendl; CDir *subtree = get_subtree_root(dn->get_dir()); for (const auto &p : dn->get_replicas()) { @@ -10668,15 +10750,11 @@ void MDCache::send_dentry_link(CDentry *dn, MDRequestRef& mdr) CDentry::linkage_t *dnl = dn->get_linkage(); auto m = make_message(subtree->dirfrag(), dn->get_dir()->dirfrag(), dn->get_name(), dnl->is_primary()); if (dnl->is_primary()) { - dout(10) << " primary " << *dnl->get_inode() << dendl; - replicate_inode(dnl->get_inode(), p.first, m->bl, + dout(10) << __func__ << " primary " << *dnl->get_inode() << dendl; + encode_replica_inode(dnl->get_inode(), p.first, m->bl, mds->mdsmap->get_up_features()); } else if (dnl->is_remote()) { - inodeno_t ino = dnl->get_remote_ino(); - __u8 d_type = dnl->get_remote_d_type(); - dout(10) << " remote " << ino << " " << d_type << dendl; - encode(ino, m->bl); - encode(d_type, m->bl); + encode_remote_dentry_link(dnl, m->bl); } else ceph_abort(); // aie, bad caller! mds->send_message_mds(m, p.first); @@ -10688,13 +10766,13 @@ void MDCache::handle_dentry_link(const cref_t &m) CDentry *dn = NULL; CDir *dir = get_dirfrag(m->get_dirfrag()); if (!dir) { - dout(7) << "handle_dentry_link don't have dirfrag " << m->get_dirfrag() << dendl; + dout(7) << __func__ << " don't have dirfrag " << m->get_dirfrag() << dendl; } else { dn = dir->lookup(m->get_dn()); if (!dn) { - dout(7) << "handle_dentry_link don't have dentry " << *dir << " dn " << m->get_dn() << dendl; + dout(7) << __func__ << " don't have dentry " << *dir << " dn " << m->get_dn() << dendl; } else { - dout(7) << "handle_dentry_link on " << *dn << dendl; + dout(7) << __func__ << " on " << *dn << dendl; CDentry::linkage_t *dnl = dn->get_linkage(); ceph_assert(!dn->is_auth()); @@ -10707,14 +10785,11 @@ void MDCache::handle_dentry_link(const cref_t &m) if (dn) { if (m->get_is_primary()) { // primary link. - add_replica_inode(p, dn, finished); + CInode *in = nullptr; + decode_replica_inode(in, p, dn, finished); } else { // remote link, easy enough. - inodeno_t ino; - __u8 d_type; - decode(ino, p); - decode(d_type, p); - dir->link_remote_inode(dn, ino, d_type); + decode_remote_dentry_link(dir, dn, p); } } else { ceph_abort(); @@ -10731,7 +10806,7 @@ void MDCache::handle_dentry_link(const cref_t &m) void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr) { - dout(10) << "send_dentry_unlink " << *dn << dendl; + dout(10) << __func__ << " " << *dn << dendl; // share unlink news with replicas set replicas; dn->list_replicas(replicas); @@ -10755,7 +10830,7 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& md auto unlink = make_message(dn->get_dir()->dirfrag(), dn->get_name()); if (straydn) { - replicate_stray(straydn, *it, unlink->straybl); + encode_replica_stray(straydn, *it, unlink->straybl); unlink->snapbl = snapbl; } mds->send_message_mds(unlink, *it); @@ -10765,19 +10840,19 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& md void MDCache::handle_dentry_unlink(const cref_t &m) { // straydn - CDentry *straydn = NULL; + CDentry *straydn = nullptr; if (m->straybl.length()) - straydn = add_replica_stray(m->straybl, mds_rank_t(m->get_source().num())); + decode_replica_stray(straydn, m->straybl, mds_rank_t(m->get_source().num())); CDir *dir = get_dirfrag(m->get_dirfrag()); if (!dir) { - dout(7) << "handle_dentry_unlink don't have dirfrag " << m->get_dirfrag() << dendl; + dout(7) << __func__ << " don't have dirfrag " << m->get_dirfrag() << dendl; } else { CDentry *dn = dir->lookup(m->get_dn()); if (!dn) { - dout(7) << "handle_dentry_unlink don't have dentry " << *dir << " dn " << m->get_dn() << dendl; + dout(7) << __func__ << " don't have dentry " << *dir << " dn " << m->get_dn() << dendl; } else { - dout(7) << "handle_dentry_unlink on " << *dn << dendl; + dout(7) << __func__ << " on " << *dn << dendl; CDentry::linkage_t *dnl = dn->get_linkage(); // open inode? @@ -11633,7 +11708,7 @@ void MDCache::_fragment_stored(MDRequestRef& mdr) // freshly replicate new dirs to peers for (const auto& dir : info.resultfrags) { - replicate_dir(dir, p.first, notify->basebl); + encode_replica_dir(dir, p.first, notify->basebl); } mds->send_message_mds(notify, p.first); @@ -11821,8 +11896,10 @@ void MDCache::handle_fragment_notify(const cref_t ¬ify) // add new replica dirs values auto p = notify->basebl.cbegin(); - while (!p.end()) - add_replica_dir(p, diri, from, waiters); + while (!p.end()) { + CDir *tmp_dir = nullptr; + decode_replica_dir(tmp_dir, p, diri, from, waiters); + } mds->queue_waiters(waiters); } else { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 2fe81e55a5e8..8dfc3f954d47 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1134,20 +1134,22 @@ protected: friend class C_MDC_Join; public: - void replicate_dir(CDir *dir, mds_rank_t to, bufferlist& bl); - void replicate_dentry(CDentry *dn, mds_rank_t to, bufferlist& bl); - void replicate_inode(CInode *in, mds_rank_t to, bufferlist& bl, + void encode_replica_dir(CDir *dir, mds_rank_t to, bufferlist& bl); + void encode_replica_dentry(CDentry *dn, mds_rank_t to, bufferlist& bl); + void encode_replica_inode(CInode *in, mds_rank_t to, bufferlist& bl, uint64_t features); - CDir* add_replica_dir(bufferlist::const_iterator& p, CInode *diri, mds_rank_t from, MDSContext::vec& finished); - CDentry *add_replica_dentry(bufferlist::const_iterator& p, CDir *dir, MDSContext::vec& finished); - CInode *add_replica_inode(bufferlist::const_iterator& p, CDentry *dn, MDSContext::vec& finished); + void decode_replica_dir(CDir *&dir, bufferlist::const_iterator& p, CInode *diri, mds_rank_t from, MDSContext::vec& finished); + void decode_replica_dentry(CDentry *&dn, bufferlist::const_iterator& p, CDir *dir, MDSContext::vec& finished); + void decode_replica_inode(CInode *&in, bufferlist::const_iterator& p, CDentry *dn, MDSContext::vec& finished); - void replicate_stray(CDentry *straydn, mds_rank_t who, bufferlist& bl); - CDentry *add_replica_stray(const bufferlist &bl, mds_rank_t from); + void encode_replica_stray(CDentry *straydn, mds_rank_t who, bufferlist& bl); + void decode_replica_stray(CDentry *&straydn, const bufferlist &bl, mds_rank_t from); // -- namespace -- public: + void encode_remote_dentry_link(CDentry::linkage_t *dnl, bufferlist& bl); + void decode_remote_dentry_link(CDir *dir, CDentry *dn, bufferlist::const_iterator& p); void send_dentry_link(CDentry *dn, MDRequestRef& mdr); void send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr); protected: diff --git a/src/mds/MDSDaemon.h b/src/mds/MDSDaemon.h index 13b29d19d6b5..bbb7f1ed3886 100644 --- a/src/mds/MDSDaemon.h +++ b/src/mds/MDSDaemon.h @@ -35,7 +35,7 @@ #include "MDSMap.h" #include "MDSRank.h" -#define CEPH_MDS_PROTOCOL 34 /* cluster internal */ +#define CEPH_MDS_PROTOCOL 35 /* cluster internal */ class Messenger; class MonClient; diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 0551f37b134a..debfcc2bb633 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1251,6 +1251,57 @@ void Migrator::export_sessions_flushed(CDir *dir, uint64_t tid) export_go(dir); // start export. } +void Migrator::encode_export_prep_trace(bufferlist &final_bl, CDir *bound, + CDir *dir, export_state_t &es, + set &inodes_added, + set &dirfrags_added) +{ + ENCODE_START(1, 1, final_bl); + + dout(7) << __func__ << " started to encode dir " << *bound << dendl; + CDir *cur = bound; + bufferlist tracebl; + char start = '-'; + + while (1) { + // don't repeat inodes + if (inodes_added.count(cur->inode->ino())) + break; + inodes_added.insert(cur->inode->ino()); + + // prepend dentry + inode + ceph_assert(cur->inode->is_auth()); + bufferlist bl; + cache->encode_replica_dentry(cur->inode->parent, es.peer, bl); + dout(7) << " added " << *cur->inode->parent << dendl; + cache->encode_replica_inode(cur->inode, es.peer, bl, mds->mdsmap->get_up_features()); + dout(7) << " added " << *cur->inode << dendl; + bl.claim_append(tracebl); + tracebl.claim(bl); + + cur = cur->get_parent_dir(); + // don't repeat dirfrags + if (dirfrags_added.count(cur->dirfrag()) || cur == dir) { + start = 'd'; // start with dentry + break; + } + dirfrags_added.insert(cur->dirfrag()); + + // prepend dir + cache->encode_replica_dir(cur, es.peer, bl); + dout(7) << " added " << *cur << dendl; + bl.claim_append(tracebl); + tracebl.claim(bl); + start = 'f'; // start with dirfrag + } + dirfrag_t df = cur->dirfrag(); + encode(df, final_bl); + encode(start, final_bl); + final_bl.claim_append(tracebl); + + ENCODE_FINISH(final_bl); +} + void Migrator::export_frozen(CDir *dir, uint64_t tid) { dout(7) << "export_frozen on " << *dir << dendl; @@ -1307,7 +1358,7 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) } // include base dirfrag - cache->replicate_dir(dir, it->second.peer, prep->basedir); + cache->encode_replica_dir(dir, it->second.peer, prep->basedir); /* * include spanning tree for all nested exports. @@ -1322,63 +1373,16 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) set dirfrags_added; // check bounds - for (set::iterator p = bounds.begin(); - p != bounds.end(); - ++p) { - CDir *bound = *p; - + for (auto &bound : bounds){ // pin it. bound->get(CDir::PIN_EXPORTBOUND); bound->state_set(CDir::STATE_EXPORTBOUND); - + dout(7) << " export bound " << *bound << dendl; prep->add_bound( bound->dirfrag() ); - - // trace to bound - bufferlist tracebl; - CDir *cur = bound; - - char start = '-'; - while (1) { - // don't repeat inodes - if (inodes_added.count(cur->inode->ino())) - break; - inodes_added.insert(cur->inode->ino()); - - // prepend dentry + inode - ceph_assert(cur->inode->is_auth()); - bufferlist bl; - cache->replicate_dentry(cur->inode->parent, it->second.peer, bl); - dout(7) << " added " << *cur->inode->parent << dendl; - cache->replicate_inode(cur->inode, it->second.peer, bl, - mds->mdsmap->get_up_features()); - dout(7) << " added " << *cur->inode << dendl; - bl.claim_append(tracebl); - tracebl.claim(bl); - - cur = cur->get_parent_dir(); - - // don't repeat dirfrags - if (dirfrags_added.count(cur->dirfrag()) || - cur == dir) { - start = 'd'; // start with dentry - break; - } - dirfrags_added.insert(cur->dirfrag()); - - // prepend dir - cache->replicate_dir(cur, it->second.peer, bl); - dout(7) << " added " << *cur << dendl; - bl.claim_append(tracebl); - tracebl.claim(bl); - - start = 'f'; // start with dirfrag - } + bufferlist final_bl; - dirfrag_t df = cur->dirfrag(); - encode(df, final_bl); - encode(start, final_bl); - final_bl.claim_append(tracebl); + encode_export_prep_trace(final_bl, bound, dir, it->second, inodes_added, dirfrags_added); prep->add_trace(final_bl); } @@ -1558,10 +1562,10 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid) auto req = make_message(dir->dirfrag(), it->second.tid); map exported_client_map; map exported_client_metadata_map; - uint64_t num_exported_inodes = encode_export_dir(req->export_data, - dir, // recur start point - exported_client_map, - exported_client_metadata_map); + uint64_t num_exported_inodes = 0; + encode_export_dir(req->export_data, dir, // recur start point + exported_client_map, exported_client_metadata_map, + num_exported_inodes); encode(exported_client_map, req->client_map, mds->mdsmap->get_up_features()); encode(exported_client_metadata_map, req->client_map); @@ -1599,7 +1603,8 @@ void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, map& exported_client_map, map& exported_client_metadata_map) { - dout(7) << "encode_export_inode " << *in << dendl; + ENCODE_START(1, 1, enc_state); + dout(7) << __func__ << " " << *in << dendl; ceph_assert(!in->is_replica(mds->get_nodeid())); encode(in->inode.ino, enc_state); @@ -1608,14 +1613,15 @@ void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, // caps encode_export_inode_caps(in, true, enc_state, exported_client_map, exported_client_metadata_map); + ENCODE_FINISH(enc_state); } void Migrator::encode_export_inode_caps(CInode *in, bool auth_cap, bufferlist& bl, map& exported_client_map, map& exported_client_metadata_map) { + ENCODE_START(1, 1, bl); dout(20) << "encode_export_inode_caps " << *in << dendl; - // encode caps map cap_map; in->export_client_caps(cap_map); @@ -1635,6 +1641,7 @@ void Migrator::encode_export_inode_caps(CInode *in, bool auth_cap, bufferlist& b exported_client_map[p.first] = session->info.inst; exported_client_metadata_map[p.first] = session->info.client_metadata; } + ENCODE_FINISH(bl); } void Migrator::finish_export_inode_caps(CInode *in, mds_rank_t peer, @@ -1710,14 +1717,17 @@ void Migrator::finish_export_inode(CInode *in, mds_rank_t peer, finish_export_inode_caps(in, peer, peer_imported); } -uint64_t Migrator::encode_export_dir(bufferlist& exportbl, +void Migrator::encode_export_dir(bufferlist& exportbl, CDir *dir, map& exported_client_map, - map& exported_client_metadata_map) + map& exported_client_metadata_map, + uint64_t &num_exported) { - uint64_t num_exported = 0; - - dout(7) << "encode_export_dir " << *dir << " " << dir->get_num_head_items() << " head items" << dendl; + // This has to be declared before ENCODE_STARTED as it will need to be referenced after ENCODE_FINISH. + std::vector subdirs; + + ENCODE_START(1, 1, exportbl); + dout(7) << __func__ << " " << *dir << " " << dir->get_num_head_items() << " head items" << dendl; ceph_assert(dir->get_projected_version() == dir->get_version()); @@ -1735,7 +1745,6 @@ uint64_t Migrator::encode_export_dir(bufferlist& exportbl, encode(nden, exportbl); // dentries - std::vector subdirs; for (auto &p : *dir) { CDentry *dn = p.second; CInode *in = dn->get_linkage()->get_inode(); @@ -1743,7 +1752,7 @@ uint64_t Migrator::encode_export_dir(bufferlist& exportbl, num_exported++; // -- dentry - dout(7) << "encode_export_dir exporting " << *dn << dendl; + dout(7) << __func__ << " exporting " << *dn << dendl; // dn name encode(dn->get_name(), exportbl); @@ -1788,11 +1797,11 @@ uint64_t Migrator::encode_export_dir(bufferlist& exportbl, } } + ENCODE_FINISH(exportbl); // subdirs - for (const auto& dir : subdirs) - num_exported += encode_export_dir(exportbl, dir, exported_client_map, exported_client_metadata_map); - - return num_exported; + for (const auto &dir : subdirs) { + encode_export_dir(exportbl, dir, exported_client_map, exported_client_metadata_map, num_exported); + } } void Migrator::finish_export_dir(CDir *dir, mds_rank_t peer, @@ -2390,6 +2399,47 @@ private: cref_t m; }; +void Migrator::decode_export_prep_trace(bufferlist::const_iterator& blp, mds_rank_t oldauth, MDSContext::vec& finished) +{ + DECODE_START(1, blp); + dirfrag_t df; + decode(df, blp); + char start; + decode(start, blp); + dout(10) << " trace from " << df << " start " << start << dendl; + + CDir *cur = nullptr; + if (start == 'd') { + cur = cache->get_dirfrag(df); + ceph_assert(cur); + dout(10) << " had " << *cur << dendl; + } else if (start == 'f') { + CInode *in = cache->get_inode(df.ino); + ceph_assert(in); + dout(10) << " had " << *in << dendl; + cache->decode_replica_dir(cur, blp, in, oldauth, finished); + dout(10) << " added " << *cur << dendl; + } else if (start == '-') { + // nothing + } else + ceph_abort_msg("unrecognized start char"); + + while (!blp.end()) { + CDentry *dn = nullptr; + cache->decode_replica_dentry(dn, blp, cur, finished); + dout(10) << " added " << *dn << dendl; + CInode *in = nullptr; + cache->decode_replica_inode(in, blp, dn, finished); + dout(10) << " added " << *in << dendl; + if (blp.end()) + break; + cache->decode_replica_dir(cur, blp, in, oldauth, finished); + dout(10) << " added " << *cur << dendl; + } + + DECODE_FINISH(blp); +} + void Migrator::handle_export_prep(const cref_t &m, bool did_assim) { mds_rank_t oldauth = mds_rank_t(m->get_source().num()); @@ -2408,7 +2458,7 @@ void Migrator::handle_export_prep(const cref_t &m, bool did_assi diri = cache->get_inode(m->get_dirfrag().ino); ceph_assert(diri); auto p = m->basedir.cbegin(); - dir = cache->add_replica_dir(p, diri, oldauth, finished); + cache->decode_replica_dir(dir, p, diri, oldauth, finished); dout(7) << "handle_export_prep on " << *dir << " (first pass)" << dendl; } else { if (it == import_state.end() || @@ -2457,39 +2507,8 @@ void Migrator::handle_export_prep(const cref_t &m, bool did_assi // assimilate traces to exports // each trace is: df ('-' | ('f' dir | 'd') dentry inode (dir dentry inode)*) for (const auto &bl : m->traces) { - auto q = bl.cbegin(); - dirfrag_t df; - decode(df, q); - char start; - decode(start, q); - dout(10) << " trace from " << df << " start " << start << " len " << bl.length() << dendl; - - CDir *cur = 0; - if (start == 'd') { - cur = cache->get_dirfrag(df); - ceph_assert(cur); - dout(10) << " had " << *cur << dendl; - } else if (start == 'f') { - CInode *in = cache->get_inode(df.ino); - ceph_assert(in); - dout(10) << " had " << *in << dendl; - cur = cache->add_replica_dir(q, in, oldauth, finished); - dout(10) << " added " << *cur << dendl; - } else if (start == '-') { - // nothing - } else - ceph_abort_msg("unrecognized start char"); - - while (!q.end()) { - CDentry *dn = cache->add_replica_dentry(q, cur, finished); - dout(10) << " added " << *dn << dendl; - CInode *in = cache->add_replica_inode(q, dn, finished); - dout(10) << " added " << *in << dendl; - if (q.end()) - break; - cur = cache->add_replica_dir(q, in, oldauth, finished); - dout(10) << " added " << *cur << dendl; - } + auto blp = bl.cbegin(); + decode_export_prep_trace(blp, oldauth, finished); } // make bound sticky @@ -2662,14 +2681,14 @@ void Migrator::handle_export_dir(const cref_t &m) auto blp = m->export_data.cbegin(); int num_imported_inodes = 0; while (!blp.end()) { - num_imported_inodes += - decode_import_dir(blp, - oldauth, - dir, // import root - le, - mds->mdlog->get_current_segment(), - it->second.peer_exports, - it->second.updated_scatterlocks); + decode_import_dir(blp, + oldauth, + dir, // import root + le, + mds->mdlog->get_current_segment(), + it->second.peer_exports, + it->second.updated_scatterlocks, + num_imported_inodes); } dout(10) << " " << m->bounds.size() << " imported bounds" << dendl; @@ -3139,8 +3158,9 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, mds_rank_t oldauth, LogSegment *ls, map >& peer_exports, list& updated_scatterlocks) -{ - dout(15) << "decode_import_inode on " << *dn << dendl; +{ + DECODE_START(1, blp); + dout(15) << __func__ << " on " << *dn << dendl; inodeno_t ino; snapid_t last; @@ -3201,12 +3221,15 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, if (in->snaplock.is_stable() && in->snaplock.get_state() != LOCK_SYNC) mds->locker->try_eval(&in->snaplock, NULL); + + DECODE_FINISH(blp); } void Migrator::decode_import_inode_caps(CInode *in, bool auth_cap, bufferlist::const_iterator &blp, map >& peer_exports) { + DECODE_START(1, blp); map cap_map; decode(cap_map, blp); if (auth_cap) { @@ -3220,6 +3243,7 @@ void Migrator::decode_import_inode_caps(CInode *in, bool auth_cap, peer_exports[in].swap(cap_map); in->get(CInode::PIN_IMPORTINGCAPS); } + DECODE_FINISH(blp); } void Migrator::finish_import_inode_caps(CInode *in, mds_rank_t peer, bool auth_cap, @@ -3271,14 +3295,15 @@ void Migrator::finish_import_inode_caps(CInode *in, mds_rank_t peer, bool auth_c } } -int Migrator::decode_import_dir(bufferlist::const_iterator& blp, +void Migrator::decode_import_dir(bufferlist::const_iterator& blp, mds_rank_t oldauth, CDir *import_root, EImportStart *le, LogSegment *ls, map >& peer_exports, - list& updated_scatterlocks) + list& updated_scatterlocks, int &num_imported) { + DECODE_START(1, blp); // set up dir dirfrag_t df; decode(df, blp); @@ -3288,7 +3313,7 @@ int Migrator::decode_import_dir(bufferlist::const_iterator& blp, CDir *dir = diri->get_or_open_dirfrag(mds->mdcache, df.frag); ceph_assert(dir); - dout(7) << "decode_import_dir " << *dir << dendl; + dout(7) << __func__ << " " << *dir << dendl; if (!dir->freeze_tree_state) { ceph_assert(dir->get_version() == 0); @@ -3347,7 +3372,7 @@ int Migrator::decode_import_dir(bufferlist::const_iterator& blp, if (dn->lock.get_state() != LOCK_SYNC) mds->locker->try_eval(&dn->lock, NULL); - dout(15) << "decode_import_dir got " << *dn << dendl; + dout(15) << __func__ << " got " << *dn << dendl; // points to... char icode; @@ -3390,8 +3415,8 @@ int Migrator::decode_import_dir(bufferlist::const_iterator& blp, dir->inode->maybe_export_pin(); - dout(7) << "decode_import_dir done " << *dir << dendl; - return num_imported; + dout(7) << __func__ << " done " << *dir << dendl; + DECODE_FINISH(blp); } diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index a0fa4eafc67a..dbb299129a3c 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -172,6 +172,9 @@ protected: void export_logged_finish(CDir *dir); void handle_export_notify_ack(const cref_t &m); void export_finish(CDir *dir); + void encode_export_prep_trace(bufferlist& bl, CDir *bound, CDir *dir, export_state_t &es, + set &inodes_added, set &dirfrags_added); + void decode_export_prep_trace(bufferlist::const_iterator& blp, mds_rank_t oldauth, MDSContext::vec &finished); void handle_gather_caps(const cref_t &m); @@ -332,10 +335,11 @@ public: std::map& peer_imported); - uint64_t encode_export_dir(bufferlist& exportbl, + void encode_export_dir(bufferlist& exportbl, CDir *dir, std::map& exported_client_map, - std::map& exported_client_metadata_map); + std::map& exported_client_metadata_map, + uint64_t &num_exported); void finish_export_dir(CDir *dir, mds_rank_t target, std::map >& peer_imported, MDSContext::vec& finished, int *num_dentries); @@ -354,13 +358,13 @@ public: const std::map >& smap, const std::map &export_map, std::map &import_map); - int decode_import_dir(bufferlist::const_iterator& blp, + void decode_import_dir(bufferlist::const_iterator& blp, mds_rank_t oldauth, CDir *import_root, EImportStart *le, LogSegment *ls, std::map >& cap_imports, - std::list& updated_scatterlocks); + std::list& updated_scatterlocks, int &num_imported); void import_reverse(CDir *dir); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 92ad6e8a5c9d..7e93194fa2c2 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2453,7 +2453,7 @@ void Server::handle_slave_request(const cref_t &m) CDentry *straydn = NULL; if (m->straybl.length() > 0) { - straydn = mdcache->add_replica_stray(m->straybl, from); + mdcache->decode_replica_stray(straydn, m->straybl, from); ceph_assert(straydn); m->straybl.clear(); } @@ -6939,7 +6939,7 @@ bool Server::_rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vectorsrcdnpath = filepath(trace.front()->get_dir()->ino()); for (auto dn : trace) req->srcdnpath.push_dentry(dn->get_name()); - mdcache->replicate_stray(straydn, who, req->straybl); + mdcache->encode_replica_stray(straydn, who, req->straybl); if (mdr->more()->desti_srnode) encode(*mdr->more()->desti_srnode, req->desti_snapbl); @@ -7863,7 +7863,7 @@ bool Server::_rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, setdestdnpath.push_dentry(dn->get_name()); if (straydn) - mdcache->replicate_stray(straydn, who, req->straybl); + mdcache->encode_replica_stray(straydn, who, req->straybl); if (mdr->more()->srci_srnode) encode(*mdr->more()->srci_srnode, req->srci_snapbl); diff --git a/src/messages/MDentryLink.h b/src/messages/MDentryLink.h index 293fe0d4c5c6..38cccf1fae1b 100644 --- a/src/messages/MDentryLink.h +++ b/src/messages/MDentryLink.h @@ -22,6 +22,9 @@ class MDentryLink : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t subtree; dirfrag_t dirfrag; string dn; @@ -37,9 +40,9 @@ private: protected: MDentryLink() : - Message{MSG_MDS_DENTRYLINK} { } + Message(MSG_MDS_DENTRYLINK, HEAD_VERSION, COMPAT_VERSION) { } MDentryLink(dirfrag_t r, dirfrag_t df, std::string_view n, bool p) : - Message{MSG_MDS_DENTRYLINK}, + Message(MSG_MDS_DENTRYLINK, HEAD_VERSION, COMPAT_VERSION), subtree(r), dirfrag(df), dn(n), diff --git a/src/messages/MDentryUnlink.h b/src/messages/MDentryUnlink.h index eed4ac47dbaf..acff7150f3f5 100644 --- a/src/messages/MDentryUnlink.h +++ b/src/messages/MDentryUnlink.h @@ -22,6 +22,9 @@ class MDentryUnlink : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t dirfrag; string dn; @@ -34,9 +37,9 @@ private: protected: MDentryUnlink() : - Message{MSG_MDS_DENTRYUNLINK} { } + Message(MSG_MDS_DENTRYUNLINK, HEAD_VERSION, COMPAT_VERSION) { } MDentryUnlink(dirfrag_t df, std::string_view n) : - Message{MSG_MDS_DENTRYUNLINK}, + Message(MSG_MDS_DENTRYUNLINK, HEAD_VERSION, COMPAT_VERSION), dirfrag(df), dn(n) {} ~MDentryUnlink() override {} diff --git a/src/messages/MDirUpdate.h b/src/messages/MDirUpdate.h index 729e76b79b45..e5f97f3847d2 100644 --- a/src/messages/MDirUpdate.h +++ b/src/messages/MDirUpdate.h @@ -57,14 +57,14 @@ public: protected: ~MDirUpdate() {} - MDirUpdate() : Message{MSG_MDS_DIRUPDATE} {} + MDirUpdate() : Message(MSG_MDS_DIRUPDATE, HEAD_VERSION, COMPAT_VERSION) {} MDirUpdate(mds_rank_t f, dirfrag_t dirfrag, int dir_rep, const std::set& dir_rep_by, filepath& path, bool discover = false) : - Message{MSG_MDS_DIRUPDATE}, from_mds(f), dirfrag(dirfrag), + Message(MSG_MDS_DIRUPDATE, HEAD_VERSION, COMPAT_VERSION), from_mds(f), dirfrag(dirfrag), dir_rep(dir_rep), dir_rep_by(dir_rep_by), path(path) { this->discover = discover ? 5 : 0; } @@ -88,6 +88,8 @@ protected: mutable int tried_discover = 0; // XXX HACK private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; template friend boost::intrusive_ptr ceph::make_message(Args&&... args); }; diff --git a/src/messages/MDiscover.h b/src/messages/MDiscover.h index 3b0ac20214a8..0b54ad3e6f4b 100644 --- a/src/messages/MDiscover.h +++ b/src/messages/MDiscover.h @@ -24,6 +24,9 @@ class MDiscover : public Message { private: + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + inodeno_t base_ino; // 1 -> root frag_t base_dir_frag; @@ -47,7 +50,7 @@ private: void set_base_dir_frag(frag_t f) { base_dir_frag = f; } protected: - MDiscover() : Message{MSG_MDS_DISCOVER} { } + MDiscover() : Message(MSG_MDS_DISCOVER, HEAD_VERSION, COMPAT_VERSION) { } MDiscover(inodeno_t base_ino_, frag_t base_frag_, snapid_t s, diff --git a/src/messages/MDiscoverReply.h b/src/messages/MDiscoverReply.h index 978f1383e13e..8010320bdf7c 100644 --- a/src/messages/MDiscoverReply.h +++ b/src/messages/MDiscoverReply.h @@ -66,6 +66,7 @@ class MDiscoverReply : public Message { private: static constexpr int HEAD_VERSION = 2; + static constexpr int COMPAT_VERSION = 2; // info about original request inodeno_t base_ino; @@ -109,9 +110,9 @@ private: void set_base_dir_frag(frag_t df) { base_dir_frag = df; } protected: - MDiscoverReply() : Message{MSG_MDS_DISCOVERREPLY, HEAD_VERSION} { } + MDiscoverReply() : Message{MSG_MDS_DISCOVERREPLY, HEAD_VERSION, COMPAT_VERSION} { } MDiscoverReply(const MDiscover &dis) : - Message{MSG_MDS_DISCOVERREPLY, HEAD_VERSION}, + Message{MSG_MDS_DISCOVERREPLY, HEAD_VERSION, COMPAT_VERSION}, base_ino(dis.get_base_ino()), base_dir_frag(dis.get_base_dir_frag()), wanted_base_dir(dis.wants_base_dir()), @@ -126,7 +127,7 @@ protected: header.tid = dis.get_tid(); } MDiscoverReply(dirfrag_t df) : - Message{MSG_MDS_DISCOVERREPLY, HEAD_VERSION}, + Message{MSG_MDS_DISCOVERREPLY, HEAD_VERSION, COMPAT_VERSION}, base_ino(df.ino), base_dir_frag(df.frag), wanted_base_dir(false), diff --git a/src/messages/MExportCaps.h b/src/messages/MExportCaps.h index 641d31aa3e99..c05836efe6a4 100644 --- a/src/messages/MExportCaps.h +++ b/src/messages/MExportCaps.h @@ -23,7 +23,8 @@ class MExportCaps : public Message { private: static constexpr int HEAD_VERSION = 2; static constexpr int COMPAT_VERSION = 1; -public: + +public: inodeno_t ino; bufferlist cap_bl; map client_map; diff --git a/src/messages/MExportCapsAck.h b/src/messages/MExportCapsAck.h index df7e9839f1ee..2382ba60d5b2 100644 --- a/src/messages/MExportCapsAck.h +++ b/src/messages/MExportCapsAck.h @@ -20,15 +20,18 @@ class MExportCapsAck : public Message { + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + public: inodeno_t ino; bufferlist cap_bl; protected: MExportCapsAck() : - Message{MSG_MDS_EXPORTCAPSACK} {} + Message{MSG_MDS_EXPORTCAPSACK, HEAD_VERSION, COMPAT_VERSION} {} MExportCapsAck(inodeno_t i) : - Message{MSG_MDS_EXPORTCAPSACK}, ino(i) {} + Message{MSG_MDS_EXPORTCAPSACK, HEAD_VERSION, COMPAT_VERSION}, ino(i) {} ~MExportCapsAck() override {} public: diff --git a/src/messages/MExportDirCancel.h b/src/messages/MExportDirCancel.h index c66cc1e4a0c4..7e31c103d3df 100644 --- a/src/messages/MExportDirCancel.h +++ b/src/messages/MExportDirCancel.h @@ -20,15 +20,17 @@ class MExportDirCancel : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; dirfrag_t dirfrag; public: dirfrag_t get_dirfrag() const { return dirfrag; } protected: - MExportDirCancel() : Message{MSG_MDS_EXPORTDIRCANCEL} {} + MExportDirCancel() : Message{MSG_MDS_EXPORTDIRCANCEL, HEAD_VERSION, COMPAT_VERSION} {} MExportDirCancel(dirfrag_t df, uint64_t tid) : - Message{MSG_MDS_EXPORTDIRCANCEL}, dirfrag(df) { + Message{MSG_MDS_EXPORTDIRCANCEL, HEAD_VERSION, COMPAT_VERSION}, dirfrag(df) { set_tid(tid); } ~MExportDirCancel() override {} diff --git a/src/messages/MExportDirDiscover.h b/src/messages/MExportDirDiscover.h index bd03fbb0f4dd..2de45c7140dd 100644 --- a/src/messages/MExportDirDiscover.h +++ b/src/messages/MExportDirDiscover.h @@ -20,6 +20,8 @@ class MExportDirDiscover : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; mds_rank_t from = -1; dirfrag_t dirfrag; filepath path; @@ -34,10 +36,10 @@ private: protected: MExportDirDiscover() : - Message{MSG_MDS_EXPORTDIRDISCOVER}, + Message{MSG_MDS_EXPORTDIRDISCOVER, HEAD_VERSION, COMPAT_VERSION}, started(false) { } MExportDirDiscover(dirfrag_t df, filepath& p, mds_rank_t f, uint64_t tid) : - Message{MSG_MDS_EXPORTDIRDISCOVER}, + Message{MSG_MDS_EXPORTDIRDISCOVER, HEAD_VERSION, COMPAT_VERSION}, from(f), dirfrag(df), path(p), started(false) { set_tid(tid); } diff --git a/src/messages/MExportDirDiscoverAck.h b/src/messages/MExportDirDiscoverAck.h index 4b0e5141e854..09633b4cfaf0 100644 --- a/src/messages/MExportDirDiscoverAck.h +++ b/src/messages/MExportDirDiscoverAck.h @@ -20,6 +20,9 @@ class MExportDirDiscoverAck : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t dirfrag; bool success; @@ -29,9 +32,9 @@ private: bool is_success() const { return success; } protected: - MExportDirDiscoverAck() : Message{MSG_MDS_EXPORTDIRDISCOVERACK} {} + MExportDirDiscoverAck() : Message{MSG_MDS_EXPORTDIRDISCOVERACK, HEAD_VERSION, COMPAT_VERSION} {} MExportDirDiscoverAck(dirfrag_t df, uint64_t tid, bool s=true) : - Message{MSG_MDS_EXPORTDIRDISCOVERACK}, + Message{MSG_MDS_EXPORTDIRDISCOVERACK, HEAD_VERSION, COMPAT_VERSION}, dirfrag(df), success(s) { set_tid(tid); } diff --git a/src/messages/MExportDirFinish.h b/src/messages/MExportDirFinish.h index 9ed4f01064b8..8a73c40de5f6 100644 --- a/src/messages/MExportDirFinish.h +++ b/src/messages/MExportDirFinish.h @@ -19,6 +19,9 @@ class MExportDirFinish : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t dirfrag; bool last; @@ -27,9 +30,10 @@ private: bool is_last() const { return last; } protected: - MExportDirFinish() : last(false) {} + MExportDirFinish() : + Message{MSG_MDS_EXPORTDIRFINISH, HEAD_VERSION, COMPAT_VERSION}, last(false) {} MExportDirFinish(dirfrag_t df, bool l, uint64_t tid) : - Message{MSG_MDS_EXPORTDIRFINISH}, dirfrag(df), last(l) { + Message{MSG_MDS_EXPORTDIRFINISH, HEAD_VERSION, COMPAT_VERSION}, dirfrag(df), last(l) { set_tid(tid); } ~MExportDirFinish() override {} diff --git a/src/messages/MExportDirNotify.h b/src/messages/MExportDirNotify.h index 03d8fc626eb7..10962afe818d 100644 --- a/src/messages/MExportDirNotify.h +++ b/src/messages/MExportDirNotify.h @@ -19,6 +19,9 @@ class MExportDirNotify : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t base; bool ack; pair<__s32,__s32> old_auth, new_auth; @@ -33,9 +36,10 @@ private: list& get_bounds() { return bounds; } protected: - MExportDirNotify() {} + MExportDirNotify() : + Message{MSG_MDS_EXPORTDIRNOTIFY, HEAD_VERSION, COMPAT_VERSION} {} MExportDirNotify(dirfrag_t i, uint64_t tid, bool a, pair<__s32,__s32> oa, pair<__s32,__s32> na) : - Message{MSG_MDS_EXPORTDIRNOTIFY}, + Message{MSG_MDS_EXPORTDIRNOTIFY, HEAD_VERSION, COMPAT_VERSION}, base(i), ack(a), old_auth(oa), new_auth(na) { set_tid(tid); } diff --git a/src/messages/MExportDirNotifyAck.h b/src/messages/MExportDirNotifyAck.h index 2db265495875..06796fc15108 100644 --- a/src/messages/MExportDirNotifyAck.h +++ b/src/messages/MExportDirNotifyAck.h @@ -19,6 +19,9 @@ class MExportDirNotifyAck : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t dirfrag; pair<__s32,__s32> new_auth; @@ -27,9 +30,10 @@ private: pair<__s32,__s32> get_new_auth() const { return new_auth; } protected: - MExportDirNotifyAck() {} + MExportDirNotifyAck() : + Message{MSG_MDS_EXPORTDIRNOTIFYACK, HEAD_VERSION, COMPAT_VERSION} {} MExportDirNotifyAck(dirfrag_t df, uint64_t tid, pair<__s32,__s32> na) : - Message{MSG_MDS_EXPORTDIRNOTIFYACK}, dirfrag(df), new_auth(na) { + Message{MSG_MDS_EXPORTDIRNOTIFYACK, HEAD_VERSION, COMPAT_VERSION}, dirfrag(df), new_auth(na) { set_tid(tid); } ~MExportDirNotifyAck() override {} diff --git a/src/messages/MExportDirPrep.h b/src/messages/MExportDirPrep.h index 0061b9510d9f..f98fbc385d1b 100644 --- a/src/messages/MExportDirPrep.h +++ b/src/messages/MExportDirPrep.h @@ -21,6 +21,9 @@ class MExportDirPrep : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t dirfrag; public: bufferlist basedir; @@ -41,7 +44,7 @@ public: protected: MExportDirPrep() = default; MExportDirPrep(dirfrag_t df, uint64_t tid) : - Message{MSG_MDS_EXPORTDIRPREP}, + Message{MSG_MDS_EXPORTDIRPREP, HEAD_VERSION, COMPAT_VERSION}, dirfrag(df) { set_tid(tid); diff --git a/src/messages/MExportDirPrepAck.h b/src/messages/MExportDirPrepAck.h index 02ada30534b6..217d13e4ac46 100644 --- a/src/messages/MExportDirPrepAck.h +++ b/src/messages/MExportDirPrepAck.h @@ -20,6 +20,9 @@ class MExportDirPrepAck : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + dirfrag_t dirfrag; bool success = false; @@ -27,9 +30,10 @@ private: dirfrag_t get_dirfrag() const { return dirfrag; } protected: - MExportDirPrepAck() {} + MExportDirPrepAck() : + Message{MSG_MDS_EXPORTDIRPREPACK, HEAD_VERSION, COMPAT_VERSION} {} MExportDirPrepAck(dirfrag_t df, bool s, uint64_t tid) : - Message{MSG_MDS_EXPORTDIRPREPACK}, dirfrag(df), success(s) { + Message{MSG_MDS_EXPORTDIRPREPACK, HEAD_VERSION, COMPAT_VERSION}, dirfrag(df), success(s) { set_tid(tid); } ~MExportDirPrepAck() override {} diff --git a/src/messages/MGatherCaps.h b/src/messages/MGatherCaps.h index 24dd46b64f8d..e4f981d6b865 100644 --- a/src/messages/MGatherCaps.h +++ b/src/messages/MGatherCaps.h @@ -5,12 +5,15 @@ class MGatherCaps : public Message { + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + public: inodeno_t ino; protected: MGatherCaps() : - Message{MSG_MDS_GATHERCAPS} {} + Message{MSG_MDS_GATHERCAPS, HEAD_VERSION, COMPAT_VERSION} {} ~MGatherCaps() override {} public: diff --git a/src/messages/MInodeFileCaps.h b/src/messages/MInodeFileCaps.h index 6bd636bf47d6..ab02f66286e2 100644 --- a/src/messages/MInodeFileCaps.h +++ b/src/messages/MInodeFileCaps.h @@ -20,18 +20,19 @@ class MInodeFileCaps : public Message { private: + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; inodeno_t ino; __u32 caps = 0; - public: - +public: inodeno_t get_ino() const { return ino; } int get_caps() const { return caps; } protected: - MInodeFileCaps() : Message{MSG_MDS_INODEFILECAPS} {} + MInodeFileCaps() : Message(MSG_MDS_INODEFILECAPS, HEAD_VERSION, COMPAT_VERSION) {} MInodeFileCaps(inodeno_t ino, int caps) : - Message{MSG_MDS_INODEFILECAPS} { + Message(MSG_MDS_INODEFILECAPS, HEAD_VERSION, COMPAT_VERSION) { this->ino = ino; this->caps = caps; } diff --git a/src/messages/MLock.h b/src/messages/MLock.h index cfa4b271985e..1b284cdb2d29 100644 --- a/src/messages/MLock.h +++ b/src/messages/MLock.h @@ -22,6 +22,9 @@ class MLock : public Message { private: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + int32_t action = 0; // action type mds_rank_t asker = 0; // who is initiating this request metareqid_t reqid; // for remote lock requests @@ -43,19 +46,19 @@ public: MDSCacheObjectInfo &get_object_info() { return object_info; } protected: - MLock() : Message{MSG_MDS_LOCK} {} + MLock() : Message{MSG_MDS_LOCK, HEAD_VERSION, COMPAT_VERSION} {} MLock(int ac, mds_rank_t as) : - Message{MSG_MDS_LOCK}, + Message{MSG_MDS_LOCK, HEAD_VERSION, COMPAT_VERSION}, action(ac), asker(as), lock_type(0) { } MLock(SimpleLock *lock, int ac, mds_rank_t as) : - Message{MSG_MDS_LOCK}, + Message{MSG_MDS_LOCK, HEAD_VERSION, COMPAT_VERSION}, action(ac), asker(as), lock_type(lock->get_type()) { lock->get_parent()->set_object_info(object_info); } MLock(SimpleLock *lock, int ac, mds_rank_t as, bufferlist& bl) : - Message{MSG_MDS_LOCK}, + Message{MSG_MDS_LOCK, HEAD_VERSION, COMPAT_VERSION}, action(ac), asker(as), lock_type(lock->get_type()) { lock->get_parent()->set_object_info(object_info); lockdata.claim(bl); diff --git a/src/messages/MMDSFindIno.h b/src/messages/MMDSFindIno.h index e585d843e763..49737ee2191f 100644 --- a/src/messages/MMDSFindIno.h +++ b/src/messages/MMDSFindIno.h @@ -19,13 +19,15 @@ #include "include/filepath.h" class MMDSFindIno : public Message { + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; public: ceph_tid_t tid {0}; inodeno_t ino; protected: - MMDSFindIno() : Message{MSG_MDS_FINDINO} {} - MMDSFindIno(ceph_tid_t t, inodeno_t i) : Message{MSG_MDS_FINDINO}, tid(t), ino(i) {} + MMDSFindIno() : Message{MSG_MDS_FINDINO, HEAD_VERSION, COMPAT_VERSION} {} + MMDSFindIno(ceph_tid_t t, inodeno_t i) : Message{MSG_MDS_FINDINO, HEAD_VERSION, COMPAT_VERSION}, tid(t), ino(i) {} ~MMDSFindIno() override {} public: diff --git a/src/messages/MMDSFindInoReply.h b/src/messages/MMDSFindInoReply.h index f00cd933413e..dfb2202e4cc9 100644 --- a/src/messages/MMDSFindInoReply.h +++ b/src/messages/MMDSFindInoReply.h @@ -19,13 +19,15 @@ #include "include/filepath.h" class MMDSFindInoReply : public Message { + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; public: ceph_tid_t tid = 0; filepath path; protected: - MMDSFindInoReply() : Message{MSG_MDS_FINDINOREPLY} {} - MMDSFindInoReply(ceph_tid_t t) : Message{MSG_MDS_FINDINOREPLY}, tid(t) {} + MMDSFindInoReply() : Message{MSG_MDS_FINDINOREPLY, HEAD_VERSION, COMPAT_VERSION} {} + MMDSFindInoReply(ceph_tid_t t) : Message{MSG_MDS_FINDINOREPLY, HEAD_VERSION, COMPAT_VERSION}, tid(t) {} ~MMDSFindInoReply() override {} public: diff --git a/src/messages/MMDSOpenIno.h b/src/messages/MMDSOpenIno.h index 23605225ad24..7182c8055521 100644 --- a/src/messages/MMDSOpenIno.h +++ b/src/messages/MMDSOpenIno.h @@ -18,14 +18,16 @@ #include "msg/Message.h" class MMDSOpenIno : public Message { + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; public: inodeno_t ino; vector ancestors; protected: - MMDSOpenIno() : Message{MSG_MDS_OPENINO} {} + MMDSOpenIno() : Message{MSG_MDS_OPENINO, HEAD_VERSION, COMPAT_VERSION} {} MMDSOpenIno(ceph_tid_t t, inodeno_t i, vector* pa) : - Message{MSG_MDS_OPENINO}, ino(i) { + Message{MSG_MDS_OPENINO, HEAD_VERSION, COMPAT_VERSION}, ino(i) { header.tid = t; if (pa) ancestors = *pa; diff --git a/src/messages/MMDSOpenInoReply.h b/src/messages/MMDSOpenInoReply.h index 4cfca9aecb1f..521065ea3928 100644 --- a/src/messages/MMDSOpenInoReply.h +++ b/src/messages/MMDSOpenInoReply.h @@ -19,15 +19,17 @@ class MMDSOpenInoReply : public Message { public: + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; inodeno_t ino; vector ancestors; mds_rank_t hint; int32_t error; protected: - MMDSOpenInoReply() : Message{MSG_MDS_OPENINOREPLY}, error(0) {} + MMDSOpenInoReply() : Message{MSG_MDS_OPENINOREPLY, HEAD_VERSION, COMPAT_VERSION}, error(0) {} MMDSOpenInoReply(ceph_tid_t t, inodeno_t i, mds_rank_t h=MDS_RANK_NONE, int e=0) : - Message{MSG_MDS_OPENINOREPLY}, ino(i), hint(h), error(e) { + Message{MSG_MDS_OPENINOREPLY, HEAD_VERSION, COMPAT_VERSION}, ino(i), hint(h), error(e) { header.tid = t; } diff --git a/src/messages/MMDSResolve.h b/src/messages/MMDSResolve.h index 68b9d4a218d6..bd1148c337c6 100644 --- a/src/messages/MMDSResolve.h +++ b/src/messages/MMDSResolve.h @@ -19,24 +19,54 @@ #include "include/types.h" +#include "mds/Capability.h" + class MMDSResolve : public Message { + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; + public: map > subtrees; map > ambiguous_imports; + class slave_inode_cap { + public: + inodeno_t ino; + map cap_exports; + slave_inode_cap() {} + slave_inode_cap(inodeno_t a, map b) : ino(a), cap_exports(b) {} + void encode(bufferlist &bl) const + { + ENCODE_START(1, 1, bl); + encode(ino, bl); + encode(cap_exports, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator &blp) + { + DECODE_START(1, blp); + decode(ino, blp); + decode(cap_exports, blp); + DECODE_FINISH(blp); + } + }; + WRITE_CLASS_ENCODER(slave_inode_cap) + struct slave_request { bufferlist inode_caps; bool committing; slave_request() : committing(false) {} void encode(bufferlist &bl) const { - using ceph::encode; + ENCODE_START(1, 1, bl); encode(inode_caps, bl); encode(committing, bl); + ENCODE_FINISH(bl); } - void decode(bufferlist::const_iterator &bl) { - using ceph::decode; - decode(inode_caps, bl); - decode(committing, bl); + void decode(bufferlist::const_iterator &blp) { + DECODE_START(1, blp); + decode(inode_caps, blp); + decode(committing, blp); + DECODE_FINISH(blp); } }; @@ -66,7 +96,7 @@ public: list table_clients; protected: - MMDSResolve() : Message{MSG_MDS_RESOLVE} + MMDSResolve() : Message{MSG_MDS_RESOLVE, HEAD_VERSION, COMPAT_VERSION} {} ~MMDSResolve() override {} @@ -128,4 +158,5 @@ inline ostream& operator<<(ostream& out, const MMDSResolve::slave_request&) { WRITE_CLASS_ENCODER(MMDSResolve::slave_request) WRITE_CLASS_ENCODER(MMDSResolve::table_client) +WRITE_CLASS_ENCODER(MMDSResolve::slave_inode_cap) #endif diff --git a/src/messages/MMDSResolveAck.h b/src/messages/MMDSResolveAck.h index a754b3ca153a..cea45cb7ee6b 100644 --- a/src/messages/MMDSResolveAck.h +++ b/src/messages/MMDSResolveAck.h @@ -21,12 +21,14 @@ class MMDSResolveAck : public Message { + static const int HEAD_VERSION = 1; + static const int COMPAT_VERSION = 1; public: map commit; vector abort; protected: - MMDSResolveAck() : Message{MSG_MDS_RESOLVEACK} {} + MMDSResolveAck() : Message{MSG_MDS_RESOLVEACK, HEAD_VERSION, COMPAT_VERSION} {} ~MMDSResolveAck() override {} public: diff --git a/src/messages/MMDSSlaveRequest.h b/src/messages/MMDSSlaveRequest.h index 3bdec7a095cf..ee4a7f55f648 100644 --- a/src/messages/MMDSSlaveRequest.h +++ b/src/messages/MMDSSlaveRequest.h @@ -20,6 +20,8 @@ #include "mds/mdstypes.h" class MMDSSlaveRequest : public Message { + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; public: static constexpr int OP_XLOCK = 1; static constexpr int OP_XLOCKACK = -1; @@ -156,9 +158,9 @@ public: bufferlist& get_lock_data() { return inode_export; } protected: - MMDSSlaveRequest() : Message{MSG_MDS_SLAVE_REQUEST} { } + MMDSSlaveRequest() : Message{MSG_MDS_SLAVE_REQUEST, HEAD_VERSION, COMPAT_VERSION} { } MMDSSlaveRequest(metareqid_t ri, __u32 att, int o) : - Message{MSG_MDS_SLAVE_REQUEST}, + Message{MSG_MDS_SLAVE_REQUEST, HEAD_VERSION, COMPAT_VERSION}, reqid(ri), attempt(att), op(o), flags(0), lock_type(0), inode_export_v(0), srcdn_auth(MDS_RANK_NONE) { } ~MMDSSlaveRequest() override {}