From: Yan, Zheng Date: Mon, 10 Sep 2018 10:37:54 +0000 (+0800) Subject: mds: cleanup nested auth pins tracking X-Git-Tag: v14.1.0~1090^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6de9090502ac8130033f3e5cc4d94dc1414d95b1;p=ceph.git mds: cleanup nested auth pins tracking Limit nested auth pins tracking within dirfrag. This is preparation for multi-threads (different threads modify different dirfrags) The main change is that, when freezing a subtree, we need to traverse the subtree, manually mark dirfrags as 'freezing'. Compare to the old approach, more work need to be done in freeze/unfreeze tree functions. But the new approach simplifies checks for freezing/frozen object. They do not need to walk up the tree to find subtree root. Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index 069a4affef3..bfd9e88b927 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -83,8 +83,8 @@ ostream& operator<<(ostream& out, const CDentry& dn) out << " pv=" << dn.get_projected_version(); out << " v=" << dn.get_version(); - if (dn.is_auth_pinned()) { - out << " ap=" << dn.get_num_auth_pins() << "+" << dn.get_num_nested_auth_pins(); + if (dn.get_num_auth_pins()) { + out << " ap=" << dn.get_num_auth_pins(); #ifdef MDS_AUTHPIN_SET dn.print_authpin_set(out); #endif @@ -354,11 +354,9 @@ void CDentry::auth_pin(void *by) auth_pin_set.insert(by); #endif - dout(10) << "auth_pin by " << by << " on " << *this - << " now " << auth_pins << "+" << nested_auth_pins - << dendl; + dout(10) << "auth_pin by " << by << " on " << *this << " now " << auth_pins << dendl; - dir->adjust_nested_auth_pins(1, 1, by); + dir->adjust_nested_auth_pins(1, by); } void CDentry::auth_unpin(void *by) @@ -376,25 +374,15 @@ void CDentry::auth_unpin(void *by) if (auth_pins == 0) put(PIN_AUTHPIN); - dout(10) << "auth_unpin by " << by << " on " << *this - << " now " << auth_pins << "+" << nested_auth_pins - << dendl; + dout(10) << "auth_unpin by " << by << " on " << *this << " now " << auth_pins << dendl; ceph_assert(auth_pins >= 0); - dir->adjust_nested_auth_pins(-1, -1, by); + dir->adjust_nested_auth_pins(-1, by); } -void CDentry::adjust_nested_auth_pins(int adjustment, int diradj, void *by) +void CDentry::adjust_nested_auth_pins(int diradj, void *by) { - nested_auth_pins += adjustment; - - dout(35) << __func__ << " by " << by - << ", change " << adjustment << " yields " - << auth_pins << "+" << nested_auth_pins - << dendl; - ceph_assert(nested_auth_pins >= 0); - - dir->adjust_nested_auth_pins(adjustment, diradj, by); + dir->adjust_nested_auth_pins(diradj, by); } bool CDentry::is_frozen() const @@ -599,7 +587,6 @@ void CDentry::dump(Formatter *f) const f->dump_unsigned("projected_version", get_projected_version()); f->dump_int("auth_pins", auth_pins); - f->dump_int("nested_auth_pins", nested_auth_pins); MDSCacheObject::dump(f); diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index 0958c8044ac..8af8d522d02 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -209,7 +209,7 @@ public: bool can_auth_pin(int *err_ret=nullptr) const override; void auth_pin(void *by) override; void auth_unpin(void *by) override; - void adjust_nested_auth_pins(int adjustment, int diradj, void *by); + void adjust_nested_auth_pins(int diradj, void *by); bool is_frozen() const override; bool is_freezing() const override; int get_num_dir_auth_pins() const; diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index d63236643db..8d0307a336d 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -107,10 +107,9 @@ ostream& operator<<(ostream& out, const CDir& dir) out << " dir_auth=" << dir.get_dir_auth(); } - if (dir.get_cum_auth_pins()) { + if (dir.get_auth_pins() || dir.get_dir_auth_pins()) { out << " ap=" << dir.get_auth_pins() - << "+" << dir.get_dir_auth_pins() - << "+" << dir.get_nested_auth_pins(); + << "+" << dir.get_dir_auth_pins(); #ifdef MDS_AUTHPIN_SET dir.print_authpin_set(out); #endif @@ -201,7 +200,7 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : num_head_items(0), num_head_null(0), num_snap_items(0), num_snap_null(0), num_dirty(0), committing_version(0), committed_version(0), - dir_auth_pins(0), request_pins(0), + dir_auth_pins(0), dir_rep(REP_NONE), pop_me(mdcache->decayrate), pop_nested(mdcache->decayrate), @@ -595,8 +594,8 @@ void CDir::link_inode_work( CDentry *dn, CInode *in) adjust_num_inodes_with_caps(1); // adjust auth pin count - if (in->auth_pins + in->nested_auth_pins) - dn->adjust_nested_auth_pins(in->auth_pins + in->nested_auth_pins, in->auth_pins, NULL); + if (in->auth_pins) + dn->adjust_nested_auth_pins(in->auth_pins, NULL); // verify open snaprealm parent if (in->snaprealm) @@ -654,7 +653,7 @@ void CDir::try_remove_unlinked_dn(CDentry *dn) } -void CDir::unlink_inode_work( CDentry *dn ) +void CDir::unlink_inode_work(CDentry *dn) { CInode *in = dn->get_linkage()->get_inode(); @@ -676,8 +675,8 @@ void CDir::unlink_inode_work( CDentry *dn ) adjust_num_inodes_with_caps(-1); // unlink auth_pin count - if (in->auth_pins + in->nested_auth_pins) - dn->adjust_nested_auth_pins(0 - (in->auth_pins + in->nested_auth_pins), 0 - in->auth_pins, NULL); + if (in->auth_pins) + dn->adjust_nested_auth_pins(-in->auth_pins, nullptr); // detach inode in->remove_primary_parent(dn); @@ -892,13 +891,12 @@ void CDir::steal_dentry(CDentry *dn) } } - if (dn->auth_pins || dn->nested_auth_pins) { - // use the helpers here to maintain the auth_pin invariants on the dir inode - int ap = dn->get_num_auth_pins() + dn->get_num_nested_auth_pins(); + { int dap = dn->get_num_dir_auth_pins(); - ceph_assert(dap <= ap); - adjust_nested_auth_pins(ap, dap, NULL); - dn->dir->adjust_nested_auth_pins(-ap, -dap, NULL); + if (dap) { + adjust_nested_auth_pins(dap, NULL); + dn->dir->adjust_nested_auth_pins(-dap, NULL); + } } if (dn->is_dirty()) { @@ -949,7 +947,6 @@ void CDir::finish_old_fragment(MDSInternalContextBase::vec& waiters, bool replay } } - ceph_assert(nested_auth_pins == 0); ceph_assert(dir_auth_pins == 0); ceph_assert(auth_pins == 0); @@ -1311,17 +1308,6 @@ void CDir::take_sub_waiting(MDSInternalContextBase::vec& ls) void CDir::add_waiter(uint64_t tag, MDSInternalContextBase *c) { // hierarchical? - - // at free root? - if (tag & WAIT_ATFREEZEROOT) { - if (!(is_freezing_tree_root() || is_frozen_tree_root() || - is_freezing_dir() || is_frozen_dir())) { - // try parent - dout(10) << "add_waiter " << std::hex << tag << std::dec << " " << c << " should be ATFREEZEROOT, " << *this << " is not root, trying parent" << dendl; - inode->parent->dir->add_waiter(tag, c); - return; - } - } // at subtree root? if (tag & WAIT_ATSUBTREEROOT) { @@ -2685,10 +2671,6 @@ void CDir::set_dir_auth(const mds_authority_t &a) dout(10) << " new subtree root, adjusting auth_pins" << dendl; inode->num_subtree_roots++; - - // adjust nested auth pins - if (get_cum_auth_pins()) - inode->adjust_nested_auth_pins(-1, NULL); // unpin parent of frozen dir/tree? if (inode->is_auth()) { @@ -2701,10 +2683,6 @@ void CDir::set_dir_auth(const mds_authority_t &a) dout(10) << " old subtree root, adjusting auth_pins" << dendl; inode->num_subtree_roots--; - - // adjust nested auth pins - if (get_cum_auth_pins()) - inode->adjust_nested_auth_pins(1, NULL); // pin parent of frozen dir/tree? if (inode->is_auth()) { @@ -2748,14 +2726,10 @@ void CDir::auth_pin(void *by) auth_pin_set.insert(by); #endif - dout(10) << "auth_pin by " << by - << " on " << *this - << " count now " << auth_pins << " + " << nested_auth_pins << dendl; + dout(10) << "auth_pin by " << by << " on " << *this << " count now " << auth_pins << dendl; - // nest pins? - if (!is_subtree_root() && - get_cum_auth_pins() == 1) - inode->adjust_nested_auth_pins(1, by); + if (freeze_tree_state) + freeze_tree_state->auth_pins += 1; } void CDir::auth_unpin(void *by) @@ -2772,44 +2746,30 @@ void CDir::auth_unpin(void *by) if (auth_pins == 0) put(PIN_AUTHPIN); - dout(10) << "auth_unpin by " << by - << " on " << *this - << " count now " << auth_pins << " + " << nested_auth_pins << dendl; + dout(10) << "auth_unpin by " << by << " on " << *this << " count now " << auth_pins << dendl; ceph_assert(auth_pins >= 0); - - int newcum = get_cum_auth_pins(); + + if (freeze_tree_state) + freeze_tree_state->auth_pins -= 1; maybe_finish_freeze(); // pending freeze? - - // nest? - if (!is_subtree_root() && - newcum == 0) - inode->adjust_nested_auth_pins(-1, by); } -void CDir::adjust_nested_auth_pins(int inc, int dirinc, void *by) +void CDir::adjust_nested_auth_pins(int dirinc, void *by) { - ceph_assert(inc); - nested_auth_pins += inc; + ceph_assert(dirinc); dir_auth_pins += dirinc; - dout(15) << __func__ << " " << inc << "/" << dirinc << " on " << *this + dout(15) << __func__ << " " << dirinc << " on " << *this << " by " << by << " count now " - << auth_pins << " + " << nested_auth_pins << dendl; - ceph_assert(nested_auth_pins >= 0); + << auth_pins << "/" << dir_auth_pins << dendl; ceph_assert(dir_auth_pins >= 0); - int newcum = get_cum_auth_pins(); + if (freeze_tree_state) + freeze_tree_state->auth_pins += dirinc; - maybe_finish_freeze(); // pending freeze? - - // nest? - if (!is_subtree_root()) { - if (newcum == 0) - inode->adjust_nested_auth_pins(-1, by); - else if (newcum == inc) - inode->adjust_nested_auth_pins(1, by); - } + if (dirinc < 0) + maybe_finish_freeze(); // pending freeze? } #ifdef MDS_VERIFY_FRAGSTAT @@ -2864,12 +2824,60 @@ void CDir::verify_fragstat() // FREEZE TREE +void CDir::_walk_tree(std::function callback) +{ + + deque dfq; + dfq.push_back(this); + + vector dfv; + while (!dfq.empty()) { + CDir *dir = dfq.front(); + dfq.pop_front(); + + for (auto& p : *dir) { + CDentry *dn = p.second; + if (!dn->get_linkage()->is_primary()) + continue; + CInode *in = dn->get_linkage()->get_inode(); + if (!in->is_dir()) + continue; + + in->get_nested_dirfrags(dfv); + for (auto& dir : dfv) { + auto ret = callback(dir); + if (ret) + dfq.push_back(dir); + } + dfv.clear(); + } + } +} + bool CDir::freeze_tree() { ceph_assert(!is_frozen()); ceph_assert(!is_freezing()); + ceph_assert(!freeze_tree_state); auth_pin(this); + + // Travese the subtree to mark dirfrags as 'freezing' (set freeze_tree_state) + // and to accumulate auth pins and record total count in freeze_tree_state. + // when auth unpin an 'freezing' object, the counter in freeze_tree_state also + // gets decreased. Subtree become 'frozen' when the counter reaches zero. + freeze_tree_state = std::make_shared(this); + freeze_tree_state->auth_pins += get_auth_pins() + get_dir_auth_pins(); + + _walk_tree([this](CDir *dir) { + if (dir->freeze_tree_state) + return false; + dir->freeze_tree_state = freeze_tree_state; + freeze_tree_state->auth_pins += dir->get_auth_pins() + dir->get_dir_auth_pins(); + return true; + } + ); + if (is_freezeable(true)) { _freeze_tree(); auth_unpin(this); @@ -2887,6 +2895,14 @@ void CDir::_freeze_tree() dout(10) << __func__ << " " << *this << dendl; ceph_assert(is_freezeable(true)); + if (freeze_tree_state) { + ceph_assert(is_auth()); + } else { + ceph_assert(!is_auth()); + freeze_tree_state = std::make_shared(this); + } + freeze_tree_state->frozen = true; + // twiddle state if (state_test(STATE_FREEZINGTREE)) { state_clear(STATE_FREEZINGTREE); // actually, this may get set again by next context? @@ -2905,12 +2921,29 @@ void CDir::_freeze_tree() auth = authority(); } + _walk_tree([this, &auth] (CDir *dir) { + if (dir->freeze_tree_state != freeze_tree_state) { + inode->mdcache->adjust_subtree_auth(dir, auth); + return false; + } + return true; + } + ); + ceph_assert(auth.first >= 0); ceph_assert(auth.second == CDIR_AUTH_UNKNOWN); auth.second = auth.first; inode->mdcache->adjust_subtree_auth(this, auth); if (!was_subtree) inode->auth_unpin(this); + } else { + // importing subtree ? + _walk_tree([this] (CDir *dir) { + ceph_assert(!dir->freeze_tree_state); + dir->freeze_tree_state = freeze_tree_state; + return true; + } + ); } state_set(STATE_FROZENTREE); @@ -2922,6 +2955,20 @@ void CDir::unfreeze_tree() { dout(10) << __func__ << " " << *this << dendl; + MDSInternalContextBase::vec unfreeze_waiters; + take_waiting(WAIT_UNFREEZE, unfreeze_waiters); + + if (freeze_tree_state) { + _walk_tree([this, &unfreeze_waiters](CDir *dir) { + if (dir->freeze_tree_state != freeze_tree_state) + return false; + dir->freeze_tree_state.reset(); + dir->take_waiting(WAIT_UNFREEZE, unfreeze_waiters); + return true; + } + ); + } + if (state_test(STATE_FROZENTREE)) { // frozen. unfreeze. state_clear(STATE_FROZENTREE); @@ -2939,20 +2986,51 @@ void CDir::unfreeze_tree() auth.second = CDIR_AUTH_UNKNOWN; inode->mdcache->adjust_subtree_auth(this, auth); } - - // waiters? - finish_waiting(WAIT_UNFREEZE); + freeze_tree_state.reset(); } else { - finish_waiting(WAIT_FROZEN, -1); + ceph_assert(state_test(STATE_FREEZINGTREE)); // freezing. stop it. - ceph_assert(state_test(STATE_FREEZINGTREE)); state_clear(STATE_FREEZINGTREE); --num_freezing_trees; + freeze_tree_state.reset(); + + finish_waiting(WAIT_FROZEN, -1); auth_unpin(this); - - finish_waiting(WAIT_UNFREEZE); } + + cache->mds->queue_waiters(unfreeze_waiters); +} + +void CDir::adjust_freeze_after_rename(CDir *dir) +{ + if (!freeze_tree_state || dir->freeze_tree_state != freeze_tree_state) + return; + CDir *newdir = dir->get_inode()->get_parent_dir(); + if (newdir == this || newdir->freeze_tree_state == freeze_tree_state) + return; + + ceph_assert(!freeze_tree_state->frozen); + ceph_assert(get_dir_auth_pins() > 0); + + MDSInternalContextBase::vec unfreeze_waiters; + + auto unfreeze = [this, &unfreeze_waiters](CDir *dir) { + if (dir->freeze_tree_state != freeze_tree_state) + return false; + int dec = dir->get_auth_pins() + dir->get_dir_auth_pins(); + // shouldn't become zero because srcdn of rename was auth pinned + ceph_assert(freeze_tree_state->auth_pins > dec); + freeze_tree_state->auth_pins -= dec; + dir->freeze_tree_state.reset(); + dir->take_waiting(WAIT_UNFREEZE, unfreeze_waiters); + return true; + }; + + unfreeze(dir); + dir->_walk_tree(unfreeze); + + cache->mds->queue_waiters(unfreeze_waiters); } bool CDir::can_auth_pin(int *err_ret) const @@ -2975,28 +3053,6 @@ bool CDir::can_auth_pin(int *err_ret) const return !err; } -pair CDir::is_freezing_or_frozen_tree() const -{ - if (!num_freezing_trees && !num_frozen_trees) - return make_pair(false, false); - - bool freezing, frozen; - const CDir *dir = this; - while (1) { - freezing = dir->is_freezing_tree_root(); - frozen = dir->is_frozen_tree_root(); - if (freezing || frozen) - break; - if (dir->is_subtree_root()) - break; - if (dir->inode->parent) - dir = dir->inode->parent->dir; - else - break; // root on replica - } - return make_pair(freezing, frozen); -} - CDir *CDir::get_freezing_tree_root() { if (num_freezing_trees == 0) @@ -3014,20 +3070,6 @@ CDir *CDir::get_freezing_tree_root() } } -CDir *CDir::get_frozen_tree_root() -{ - ceph_assert(is_frozen()); - CDir *dir = this; - while (1) { - if (dir->is_frozen_tree_root()) - return dir; - if (dir->inode->parent) - dir = dir->inode->parent->dir; - else - ceph_abort(); - } -} - class C_Dir_AuthUnpin : public CDirContext { public: explicit C_Dir_AuthUnpin(CDir *d) : CDirContext(d) {} @@ -3038,20 +3080,30 @@ class C_Dir_AuthUnpin : public CDirContext { void CDir::maybe_finish_freeze() { - if (auth_pins != 1 || dir_auth_pins != 0) + if (dir_auth_pins != 0) return; // we can freeze the _dir_ even with nested pins... if (state_test(STATE_FREEZINGDIR)) { - _freeze_dir(); - auth_unpin(this); - finish_waiting(WAIT_FROZEN); + if (auth_pins == 1) { + _freeze_dir(); + auth_unpin(this); + finish_waiting(WAIT_FROZEN); + } } - if (nested_auth_pins != 0) - return; + if (freeze_tree_state) { + if (freeze_tree_state->frozen || + freeze_tree_state->auth_pins != 1) + return; + + if (freeze_tree_state->dir != this) { + freeze_tree_state->dir->maybe_finish_freeze(); + return; + } + + ceph_assert(state_test(STATE_FREEZINGTREE)); - if (state_test(STATE_FREEZINGTREE)) { if (!is_subtree_root() && inode->is_frozen()) { dout(10) << __func__ << " !subtree root and frozen inode, waiting for unfreeze on " << inode << dendl; // retake an auth_pin... diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 33a5fa9b015..a4490e062f0 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -148,7 +148,6 @@ public: static const int WAIT_DNLOCK_OFFSET = 4; static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1); - static const uint64_t WAIT_ATFREEZEROOT = (WAIT_UNFREEZE); static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH); // -- dump flags -- @@ -378,7 +377,6 @@ protected: static int num_freezing_trees; int dir_auth_pins; - int request_pins; // cache control (defined for authority; hints for replicas) __s32 dir_rep; @@ -673,15 +671,6 @@ public: void first_get() override; void last_put() override; - void request_pin_get() { - if (request_pins == 0) get(PIN_REQUEST); - request_pins++; - } - void request_pin_put() { - request_pins--; - if (request_pins == 0) put(PIN_REQUEST); - } - // -- waiters -- protected: mempool::mds_co::compact_map< string_snap_t, MDSInternalContextBase::vec_alloc > waiting_on_dentry; // FIXME string_snap_t not in mempool @@ -710,20 +699,30 @@ public: // -- auth pins -- bool can_auth_pin(int *err_ret=nullptr) const override; - int get_cum_auth_pins() const { return auth_pins + nested_auth_pins; } int get_auth_pins() const { return auth_pins; } - int get_nested_auth_pins() const { return nested_auth_pins; } int get_dir_auth_pins() const { return dir_auth_pins; } void auth_pin(void *who) override; void auth_unpin(void *who) override; - void adjust_nested_auth_pins(int inc, int dirinc, void *by); + void adjust_nested_auth_pins(int dirinc, void *by); void verify_fragstat(); // -- freezing -- + struct freeze_tree_state_t { + CDir *dir; // freezing/frozen tree root + int auth_pins = 0; + bool frozen = false; + freeze_tree_state_t(CDir *d) : dir(d) {} + }; + // all dirfrags within freezing/frozen tree reference the 'state' + std::shared_ptr freeze_tree_state; + + void _walk_tree(std::function cb); + bool freeze_tree(); void _freeze_tree(); void unfreeze_tree(); + void adjust_freeze_after_rename(CDir *dir); bool freeze_dir(); void _freeze_dir(); @@ -731,7 +730,14 @@ public: void maybe_finish_freeze(); - pair is_freezing_or_frozen_tree() const; + pair is_freezing_or_frozen_tree() const { + if (freeze_tree_state) { + if (freeze_tree_state->frozen) + return make_pair(false, true); + return make_pair(true, false); + } + return make_pair(false, false); + } bool is_freezing() const override { return is_freezing_dir() || is_freezing_tree(); } bool is_freezing_tree() const { @@ -751,10 +757,11 @@ public: } bool is_frozen_tree_root() const { return state & STATE_FROZENTREE; } bool is_frozen_dir() const { return state & STATE_FROZENDIR; } - + bool is_freezeable(bool freezing=false) const { // no nested auth pins. - if ((auth_pins-freezing) > 0 || nested_auth_pins > 0) + if (auth_pins - (freezing ? 1 : 0) > 0 || + (freeze_tree_state && freeze_tree_state->auth_pins != auth_pins)) return false; // inode must not be frozen. @@ -763,8 +770,9 @@ public: return true; } + bool is_freezeable_dir(bool freezing=false) const { - if ((auth_pins-freezing) > 0 || dir_auth_pins > 0) + if ((auth_pins - freezing) > 0 || dir_auth_pins > 0) return false; // if not subtree root, inode must not be frozen (tree--frozen_dir is okay). @@ -774,9 +782,6 @@ public: return true; } - CDir *get_frozen_tree_root(); - - ostream& print_db_line_prefix(ostream& out) override; void print(ostream& out) override; void dump(Formatter *f, int flags = DUMP_DEFAULT) const; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index ad7b3eb9cd9..992fe95d88d 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -127,8 +127,8 @@ ostream& operator<<(ostream& out, const CInode& in) if (in.get_projected_version() > in.get_version()) out << " pv" << in.get_projected_version(); - if (in.is_auth_pinned()) { - out << " ap=" << in.get_num_auth_pins() << "+" << in.get_num_nested_auth_pins(); + if (in.get_num_auth_pins()) { + out << " ap=" << in.get_num_auth_pins(); #ifdef MDS_AUTHPIN_SET in.print_authpin_set(out); #endif @@ -2559,12 +2559,10 @@ void CInode::auth_pin(void *by) auth_pin_set.insert(by); #endif - dout(10) << "auth_pin by " << by << " on " << *this - << " now " << auth_pins << "+" << nested_auth_pins - << dendl; + dout(10) << "auth_pin by " << by << " on " << *this << " now " << auth_pins << dendl; if (parent) - parent->adjust_nested_auth_pins(1, 1, this); + parent->adjust_nested_auth_pins(1, this); } void CInode::auth_unpin(void *by) @@ -2582,14 +2580,12 @@ void CInode::auth_unpin(void *by) if (auth_pins == 0) put(PIN_AUTHPIN); - dout(10) << "auth_unpin by " << by << " on " << *this - << " now " << auth_pins << "+" << nested_auth_pins - << dendl; + dout(10) << "auth_unpin by " << by << " on " << *this << " now " << auth_pins << dendl; ceph_assert(auth_pins >= 0); if (parent) - parent->adjust_nested_auth_pins(-1, -1, by); + parent->adjust_nested_auth_pins(-1, by); if (is_freezing_inode() && auth_pins == auth_pin_freeze_allowance) { @@ -2602,31 +2598,6 @@ void CInode::auth_unpin(void *by) } } -void CInode::adjust_nested_auth_pins(int a, void *by) -{ - ceph_assert(a); - nested_auth_pins += a; - dout(35) << __func__ << " by " << by - << " change " << a << " yields " - << auth_pins << "+" << nested_auth_pins << dendl; - ceph_assert(nested_auth_pins >= 0); - - if (g_conf()->mds_debug_auth_pins) { - // audit - int s = 0; - for (const auto &p : dirfrags) { - CDir *dir = p.second; - if (!dir->is_subtree_root() && dir->get_cum_auth_pins()) - s++; - } - ceph_assert(s == nested_auth_pins); - } - - if (parent) - parent->adjust_nested_auth_pins(a, 0, by); -} - - // authority mds_authority_t CInode::authority() const diff --git a/src/mds/CInode.h b/src/mds/CInode.h index f74c7d9fa77..8f69ed31d58 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -1060,7 +1060,6 @@ public: mds_authority_t authority() const override; // -- auth pins -- - void adjust_nested_auth_pins(int a, void *by); bool can_auth_pin(int *err_ret=nullptr) const override; void auth_pin(void *by) override; void auth_unpin(void *by) override; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 8743ec9955a..27c6b7c9e12 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1346,14 +1346,21 @@ void MDCache::adjust_subtree_after_rename(CInode *diri, CDir *olddir, bool pop) projected_subtree_renames.erase(p); } + vector dfls; + + // adjust total auth pin of freezing subtree + if (olddir != newdir) { + diri->get_nested_dirfrags(dfls); + for (auto dir : dfls) + olddir->adjust_freeze_after_rename(dir); + dfls.clear(); + } + // adjust subtree - list dfls; // make sure subtree dirfrags are at the front of the list diri->get_subtree_dirfrags(dfls); diri->get_nested_dirfrags(dfls); - for (list::iterator p = dfls.begin(); p != dfls.end(); ++p) { - CDir *dir = *p; - + for (auto dir : dfls) { dout(10) << "dirfrag " << *dir << dendl; CDir *oldparent = get_subtree_root(olddir); dout(10) << " old parent " << *oldparent << dendl; @@ -7701,8 +7708,8 @@ bool MDCache::shutdown_pass() ceph_assert(!migrator->is_exporting()); ceph_assert(!migrator->is_importing()); - if ((myin && myin->is_auth_pinned()) || - (mydir && mydir->is_auth_pinned())) { + if ((myin && myin->get_num_auth_pins()) || + (mydir && (mydir->get_auth_pins() || mydir->get_dir_auth_pins()))) { dout(7) << "still have auth pinned objects" << dendl; return false; } diff --git a/src/mds/MDSCacheObject.cc b/src/mds/MDSCacheObject.cc index eabcabc7e1a..4a1da8827ba 100644 --- a/src/mds/MDSCacheObject.cc +++ b/src/mds/MDSCacheObject.cc @@ -42,7 +42,6 @@ void MDSCacheObject::dump(Formatter *f) const f->close_section(); // replica_state f->dump_int("auth_pins", auth_pins); - f->dump_int("nested_auth_pins", nested_auth_pins); f->dump_bool("is_frozen", is_frozen()); f->dump_bool("is_freezing", is_freezing()); diff --git a/src/mds/MDSCacheObject.h b/src/mds/MDSCacheObject.h index c69ca9d9d07..2f009e13f67 100644 --- a/src/mds/MDSCacheObject.h +++ b/src/mds/MDSCacheObject.h @@ -216,17 +216,14 @@ protected: #endif } - protected: +protected: int auth_pins = 0; - int nested_auth_pins = 0; #ifdef MDS_AUTHPIN_SET mempool::mds_co::multiset auth_pin_set; #endif - public: - bool is_auth_pinned() const { return auth_pins || nested_auth_pins; } +public: int get_num_auth_pins() const { return auth_pins; } - int get_num_nested_auth_pins() const { return nested_auth_pins; } #ifdef MDS_AUTHPIN_SET void print_authpin_set(std::ostream& out) const { out << " (" << auth_pin_set << ")"; diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 6e9b6c66331..1b06e344ff7 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -237,8 +237,9 @@ void Migrator::find_stale_export_freeze() ++p; if (stat.state != EXPORT_DISCOVERING && stat.state != EXPORT_FREEZING) continue; - if (stat.last_cum_auth_pins != dir->get_cum_auth_pins()) { - stat.last_cum_auth_pins = dir->get_cum_auth_pins(); + ceph_assert(dir->freeze_tree_state); + if (stat.last_cum_auth_pins != dir->freeze_tree_state->auth_pins) { + stat.last_cum_auth_pins = dir->freeze_tree_state->auth_pins; stat.last_cum_auth_pins_change = now; continue; } @@ -1346,7 +1347,6 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) ceph_assert(it->second.state == EXPORT_FREEZING); ceph_assert(dir->is_frozen_tree_root()); - ceph_assert(dir->get_cum_auth_pins() == 0); CInode *diri = dir->get_inode(); @@ -1631,7 +1631,6 @@ void Migrator::export_go_synced(CDir *dir, uint64_t tid) ceph_assert(g_conf()->mds_kill_export_at != 7); ceph_assert(dir->is_frozen_tree_root()); - ceph_assert(dir->get_cum_auth_pins() == 0); // set ambiguous auth cache->adjust_subtree_auth(dir, mds->get_nodeid(), dest); @@ -2706,6 +2705,7 @@ void Migrator::handle_export_dir(const MExportDir::const_ref &m) dout(7) << "handle_export_dir importing " << *dir << " from " << oldauth << dendl; ceph_assert(!dir->is_auth()); + ceph_assert(dir->freeze_tree_state); map::iterator it = import_state.find(m->dirfrag); ceph_assert(it != import_state.end()); @@ -3373,6 +3373,11 @@ int Migrator::decode_import_dir(bufferlist::const_iterator& blp, dout(7) << "decode_import_dir " << *dir << dendl; + if (!dir->freeze_tree_state) { + ceph_assert(dir->get_version() == 0); + dir->freeze_tree_state = import_root->freeze_tree_state; + } + // assimilate state dir->decode_import(blp, ls); @@ -3392,12 +3397,9 @@ int Migrator::decode_import_dir(bufferlist::const_iterator& blp, // NOTE: a pass of imported data is guaranteed to get all of my waiters because // a replica's presense in my cache implies/forces it's presense in authority's. MDSInternalContextBase::vec waiters; - dir->take_waiting(CDir::WAIT_ANY_MASK, waiters); - for (MDSInternalContextBase::vec::iterator it = waiters.begin(); - it != waiters.end(); - ++it) - import_root->add_waiter(CDir::WAIT_UNFREEZE, *it); // UNFREEZE will get kicked both on success or failure + for (auto c : waiters) + dir->add_waiter(CDir::WAIT_UNFREEZE, c); // UNFREEZE will get kicked both on success or failure dout(15) << "doing contents" << dendl;