out << " dir_auth=" << dir.get_dir_auth();
}
- if (dir.get_cum_auth_pins()) {
+ if (dir.get_auth_pins() || dir.get_dir_auth_pins()) {
out << " ap=" << dir.get_auth_pins()
- << "+" << dir.get_dir_auth_pins()
- << "+" << dir.get_nested_auth_pins();
+ << "+" << dir.get_dir_auth_pins();
#ifdef MDS_AUTHPIN_SET
dir.print_authpin_set(out);
#endif
num_head_items(0), num_head_null(0),
num_snap_items(0), num_snap_null(0),
num_dirty(0), committing_version(0), committed_version(0),
- dir_auth_pins(0), request_pins(0),
+ dir_auth_pins(0),
dir_rep(REP_NONE),
pop_me(mdcache->decayrate),
pop_nested(mdcache->decayrate),
adjust_num_inodes_with_caps(1);
// adjust auth pin count
- if (in->auth_pins + in->nested_auth_pins)
- dn->adjust_nested_auth_pins(in->auth_pins + in->nested_auth_pins, in->auth_pins, NULL);
+ if (in->auth_pins)
+ dn->adjust_nested_auth_pins(in->auth_pins, NULL);
// verify open snaprealm parent
if (in->snaprealm)
}
-void CDir::unlink_inode_work( CDentry *dn )
+void CDir::unlink_inode_work(CDentry *dn)
{
CInode *in = dn->get_linkage()->get_inode();
adjust_num_inodes_with_caps(-1);
// unlink auth_pin count
- if (in->auth_pins + in->nested_auth_pins)
- dn->adjust_nested_auth_pins(0 - (in->auth_pins + in->nested_auth_pins), 0 - in->auth_pins, NULL);
+ if (in->auth_pins)
+ dn->adjust_nested_auth_pins(-in->auth_pins, nullptr);
// detach inode
in->remove_primary_parent(dn);
}
}
- if (dn->auth_pins || dn->nested_auth_pins) {
- // use the helpers here to maintain the auth_pin invariants on the dir inode
- int ap = dn->get_num_auth_pins() + dn->get_num_nested_auth_pins();
+ {
int dap = dn->get_num_dir_auth_pins();
- ceph_assert(dap <= ap);
- adjust_nested_auth_pins(ap, dap, NULL);
- dn->dir->adjust_nested_auth_pins(-ap, -dap, NULL);
+ if (dap) {
+ adjust_nested_auth_pins(dap, NULL);
+ dn->dir->adjust_nested_auth_pins(-dap, NULL);
+ }
}
if (dn->is_dirty()) {
}
}
- ceph_assert(nested_auth_pins == 0);
ceph_assert(dir_auth_pins == 0);
ceph_assert(auth_pins == 0);
void CDir::add_waiter(uint64_t tag, MDSInternalContextBase *c)
{
// hierarchical?
-
- // at free root?
- if (tag & WAIT_ATFREEZEROOT) {
- if (!(is_freezing_tree_root() || is_frozen_tree_root() ||
- is_freezing_dir() || is_frozen_dir())) {
- // try parent
- dout(10) << "add_waiter " << std::hex << tag << std::dec << " " << c << " should be ATFREEZEROOT, " << *this << " is not root, trying parent" << dendl;
- inode->parent->dir->add_waiter(tag, c);
- return;
- }
- }
// at subtree root?
if (tag & WAIT_ATSUBTREEROOT) {
dout(10) << " new subtree root, adjusting auth_pins" << dendl;
inode->num_subtree_roots++;
-
- // adjust nested auth pins
- if (get_cum_auth_pins())
- inode->adjust_nested_auth_pins(-1, NULL);
// unpin parent of frozen dir/tree?
if (inode->is_auth()) {
dout(10) << " old subtree root, adjusting auth_pins" << dendl;
inode->num_subtree_roots--;
-
- // adjust nested auth pins
- if (get_cum_auth_pins())
- inode->adjust_nested_auth_pins(1, NULL);
// pin parent of frozen dir/tree?
if (inode->is_auth()) {
auth_pin_set.insert(by);
#endif
- dout(10) << "auth_pin by " << by
- << " on " << *this
- << " count now " << auth_pins << " + " << nested_auth_pins << dendl;
+ dout(10) << "auth_pin by " << by << " on " << *this << " count now " << auth_pins << dendl;
- // nest pins?
- if (!is_subtree_root() &&
- get_cum_auth_pins() == 1)
- inode->adjust_nested_auth_pins(1, by);
+ if (freeze_tree_state)
+ freeze_tree_state->auth_pins += 1;
}
void CDir::auth_unpin(void *by)
if (auth_pins == 0)
put(PIN_AUTHPIN);
- dout(10) << "auth_unpin by " << by
- << " on " << *this
- << " count now " << auth_pins << " + " << nested_auth_pins << dendl;
+ dout(10) << "auth_unpin by " << by << " on " << *this << " count now " << auth_pins << dendl;
ceph_assert(auth_pins >= 0);
-
- int newcum = get_cum_auth_pins();
+
+ if (freeze_tree_state)
+ freeze_tree_state->auth_pins -= 1;
maybe_finish_freeze(); // pending freeze?
-
- // nest?
- if (!is_subtree_root() &&
- newcum == 0)
- inode->adjust_nested_auth_pins(-1, by);
}
-void CDir::adjust_nested_auth_pins(int inc, int dirinc, void *by)
+void CDir::adjust_nested_auth_pins(int dirinc, void *by)
{
- ceph_assert(inc);
- nested_auth_pins += inc;
+ ceph_assert(dirinc);
dir_auth_pins += dirinc;
- dout(15) << __func__ << " " << inc << "/" << dirinc << " on " << *this
+ dout(15) << __func__ << " " << dirinc << " on " << *this
<< " by " << by << " count now "
- << auth_pins << " + " << nested_auth_pins << dendl;
- ceph_assert(nested_auth_pins >= 0);
+ << auth_pins << "/" << dir_auth_pins << dendl;
ceph_assert(dir_auth_pins >= 0);
- int newcum = get_cum_auth_pins();
+ if (freeze_tree_state)
+ freeze_tree_state->auth_pins += dirinc;
- maybe_finish_freeze(); // pending freeze?
-
- // nest?
- if (!is_subtree_root()) {
- if (newcum == 0)
- inode->adjust_nested_auth_pins(-1, by);
- else if (newcum == inc)
- inode->adjust_nested_auth_pins(1, by);
- }
+ if (dirinc < 0)
+ maybe_finish_freeze(); // pending freeze?
}
#ifdef MDS_VERIFY_FRAGSTAT
// FREEZE TREE
+void CDir::_walk_tree(std::function<bool(CDir*)> callback)
+{
+
+ deque<CDir*> dfq;
+ dfq.push_back(this);
+
+ vector<CDir*> dfv;
+ while (!dfq.empty()) {
+ CDir *dir = dfq.front();
+ dfq.pop_front();
+
+ for (auto& p : *dir) {
+ CDentry *dn = p.second;
+ if (!dn->get_linkage()->is_primary())
+ continue;
+ CInode *in = dn->get_linkage()->get_inode();
+ if (!in->is_dir())
+ continue;
+
+ in->get_nested_dirfrags(dfv);
+ for (auto& dir : dfv) {
+ auto ret = callback(dir);
+ if (ret)
+ dfq.push_back(dir);
+ }
+ dfv.clear();
+ }
+ }
+}
+
bool CDir::freeze_tree()
{
ceph_assert(!is_frozen());
ceph_assert(!is_freezing());
+ ceph_assert(!freeze_tree_state);
auth_pin(this);
+
+ // Travese the subtree to mark dirfrags as 'freezing' (set freeze_tree_state)
+ // and to accumulate auth pins and record total count in freeze_tree_state.
+ // when auth unpin an 'freezing' object, the counter in freeze_tree_state also
+ // gets decreased. Subtree become 'frozen' when the counter reaches zero.
+ freeze_tree_state = std::make_shared<freeze_tree_state_t>(this);
+ freeze_tree_state->auth_pins += get_auth_pins() + get_dir_auth_pins();
+
+ _walk_tree([this](CDir *dir) {
+ if (dir->freeze_tree_state)
+ return false;
+ dir->freeze_tree_state = freeze_tree_state;
+ freeze_tree_state->auth_pins += dir->get_auth_pins() + dir->get_dir_auth_pins();
+ return true;
+ }
+ );
+
if (is_freezeable(true)) {
_freeze_tree();
auth_unpin(this);
dout(10) << __func__ << " " << *this << dendl;
ceph_assert(is_freezeable(true));
+ if (freeze_tree_state) {
+ ceph_assert(is_auth());
+ } else {
+ ceph_assert(!is_auth());
+ freeze_tree_state = std::make_shared<freeze_tree_state_t>(this);
+ }
+ freeze_tree_state->frozen = true;
+
// twiddle state
if (state_test(STATE_FREEZINGTREE)) {
state_clear(STATE_FREEZINGTREE); // actually, this may get set again by next context?
auth = authority();
}
+ _walk_tree([this, &auth] (CDir *dir) {
+ if (dir->freeze_tree_state != freeze_tree_state) {
+ inode->mdcache->adjust_subtree_auth(dir, auth);
+ return false;
+ }
+ return true;
+ }
+ );
+
ceph_assert(auth.first >= 0);
ceph_assert(auth.second == CDIR_AUTH_UNKNOWN);
auth.second = auth.first;
inode->mdcache->adjust_subtree_auth(this, auth);
if (!was_subtree)
inode->auth_unpin(this);
+ } else {
+ // importing subtree ?
+ _walk_tree([this] (CDir *dir) {
+ ceph_assert(!dir->freeze_tree_state);
+ dir->freeze_tree_state = freeze_tree_state;
+ return true;
+ }
+ );
}
state_set(STATE_FROZENTREE);
{
dout(10) << __func__ << " " << *this << dendl;
+ MDSInternalContextBase::vec unfreeze_waiters;
+ take_waiting(WAIT_UNFREEZE, unfreeze_waiters);
+
+ if (freeze_tree_state) {
+ _walk_tree([this, &unfreeze_waiters](CDir *dir) {
+ if (dir->freeze_tree_state != freeze_tree_state)
+ return false;
+ dir->freeze_tree_state.reset();
+ dir->take_waiting(WAIT_UNFREEZE, unfreeze_waiters);
+ return true;
+ }
+ );
+ }
+
if (state_test(STATE_FROZENTREE)) {
// frozen. unfreeze.
state_clear(STATE_FROZENTREE);
auth.second = CDIR_AUTH_UNKNOWN;
inode->mdcache->adjust_subtree_auth(this, auth);
}
-
- // waiters?
- finish_waiting(WAIT_UNFREEZE);
+ freeze_tree_state.reset();
} else {
- finish_waiting(WAIT_FROZEN, -1);
+ ceph_assert(state_test(STATE_FREEZINGTREE));
// freezing. stop it.
- ceph_assert(state_test(STATE_FREEZINGTREE));
state_clear(STATE_FREEZINGTREE);
--num_freezing_trees;
+ freeze_tree_state.reset();
+
+ finish_waiting(WAIT_FROZEN, -1);
auth_unpin(this);
-
- finish_waiting(WAIT_UNFREEZE);
}
+
+ cache->mds->queue_waiters(unfreeze_waiters);
+}
+
+void CDir::adjust_freeze_after_rename(CDir *dir)
+{
+ if (!freeze_tree_state || dir->freeze_tree_state != freeze_tree_state)
+ return;
+ CDir *newdir = dir->get_inode()->get_parent_dir();
+ if (newdir == this || newdir->freeze_tree_state == freeze_tree_state)
+ return;
+
+ ceph_assert(!freeze_tree_state->frozen);
+ ceph_assert(get_dir_auth_pins() > 0);
+
+ MDSInternalContextBase::vec unfreeze_waiters;
+
+ auto unfreeze = [this, &unfreeze_waiters](CDir *dir) {
+ if (dir->freeze_tree_state != freeze_tree_state)
+ return false;
+ int dec = dir->get_auth_pins() + dir->get_dir_auth_pins();
+ // shouldn't become zero because srcdn of rename was auth pinned
+ ceph_assert(freeze_tree_state->auth_pins > dec);
+ freeze_tree_state->auth_pins -= dec;
+ dir->freeze_tree_state.reset();
+ dir->take_waiting(WAIT_UNFREEZE, unfreeze_waiters);
+ return true;
+ };
+
+ unfreeze(dir);
+ dir->_walk_tree(unfreeze);
+
+ cache->mds->queue_waiters(unfreeze_waiters);
}
bool CDir::can_auth_pin(int *err_ret) const
return !err;
}
-pair<bool,bool> CDir::is_freezing_or_frozen_tree() const
-{
- if (!num_freezing_trees && !num_frozen_trees)
- return make_pair(false, false);
-
- bool freezing, frozen;
- const CDir *dir = this;
- while (1) {
- freezing = dir->is_freezing_tree_root();
- frozen = dir->is_frozen_tree_root();
- if (freezing || frozen)
- break;
- if (dir->is_subtree_root())
- break;
- if (dir->inode->parent)
- dir = dir->inode->parent->dir;
- else
- break; // root on replica
- }
- return make_pair(freezing, frozen);
-}
-
CDir *CDir::get_freezing_tree_root()
{
if (num_freezing_trees == 0)
}
}
-CDir *CDir::get_frozen_tree_root()
-{
- ceph_assert(is_frozen());
- CDir *dir = this;
- while (1) {
- if (dir->is_frozen_tree_root())
- return dir;
- if (dir->inode->parent)
- dir = dir->inode->parent->dir;
- else
- ceph_abort();
- }
-}
-
class C_Dir_AuthUnpin : public CDirContext {
public:
explicit C_Dir_AuthUnpin(CDir *d) : CDirContext(d) {}
void CDir::maybe_finish_freeze()
{
- if (auth_pins != 1 || dir_auth_pins != 0)
+ if (dir_auth_pins != 0)
return;
// we can freeze the _dir_ even with nested pins...
if (state_test(STATE_FREEZINGDIR)) {
- _freeze_dir();
- auth_unpin(this);
- finish_waiting(WAIT_FROZEN);
+ if (auth_pins == 1) {
+ _freeze_dir();
+ auth_unpin(this);
+ finish_waiting(WAIT_FROZEN);
+ }
}
- if (nested_auth_pins != 0)
- return;
+ if (freeze_tree_state) {
+ if (freeze_tree_state->frozen ||
+ freeze_tree_state->auth_pins != 1)
+ return;
+
+ if (freeze_tree_state->dir != this) {
+ freeze_tree_state->dir->maybe_finish_freeze();
+ return;
+ }
+
+ ceph_assert(state_test(STATE_FREEZINGTREE));
- if (state_test(STATE_FREEZINGTREE)) {
if (!is_subtree_root() && inode->is_frozen()) {
dout(10) << __func__ << " !subtree root and frozen inode, waiting for unfreeze on " << inode << dendl;
// retake an auth_pin...
static const int WAIT_DNLOCK_OFFSET = 4;
static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
- static const uint64_t WAIT_ATFREEZEROOT = (WAIT_UNFREEZE);
static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH);
// -- dump flags --
static int num_freezing_trees;
int dir_auth_pins;
- int request_pins;
// cache control (defined for authority; hints for replicas)
__s32 dir_rep;
void first_get() override;
void last_put() override;
- void request_pin_get() {
- if (request_pins == 0) get(PIN_REQUEST);
- request_pins++;
- }
- void request_pin_put() {
- request_pins--;
- if (request_pins == 0) put(PIN_REQUEST);
- }
-
// -- waiters --
protected:
mempool::mds_co::compact_map< string_snap_t, MDSInternalContextBase::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool
// -- auth pins --
bool can_auth_pin(int *err_ret=nullptr) const override;
- int get_cum_auth_pins() const { return auth_pins + nested_auth_pins; }
int get_auth_pins() const { return auth_pins; }
- int get_nested_auth_pins() const { return nested_auth_pins; }
int get_dir_auth_pins() const { return dir_auth_pins; }
void auth_pin(void *who) override;
void auth_unpin(void *who) override;
- void adjust_nested_auth_pins(int inc, int dirinc, void *by);
+ void adjust_nested_auth_pins(int dirinc, void *by);
void verify_fragstat();
// -- freezing --
+ struct freeze_tree_state_t {
+ CDir *dir; // freezing/frozen tree root
+ int auth_pins = 0;
+ bool frozen = false;
+ freeze_tree_state_t(CDir *d) : dir(d) {}
+ };
+ // all dirfrags within freezing/frozen tree reference the 'state'
+ std::shared_ptr<freeze_tree_state_t> freeze_tree_state;
+
+ void _walk_tree(std::function<bool(CDir*)> cb);
+
bool freeze_tree();
void _freeze_tree();
void unfreeze_tree();
+ void adjust_freeze_after_rename(CDir *dir);
bool freeze_dir();
void _freeze_dir();
void maybe_finish_freeze();
- pair<bool,bool> is_freezing_or_frozen_tree() const;
+ pair<bool,bool> is_freezing_or_frozen_tree() const {
+ if (freeze_tree_state) {
+ if (freeze_tree_state->frozen)
+ return make_pair(false, true);
+ return make_pair(true, false);
+ }
+ return make_pair(false, false);
+ }
bool is_freezing() const override { return is_freezing_dir() || is_freezing_tree(); }
bool is_freezing_tree() const {
}
bool is_frozen_tree_root() const { return state & STATE_FROZENTREE; }
bool is_frozen_dir() const { return state & STATE_FROZENDIR; }
-
+
bool is_freezeable(bool freezing=false) const {
// no nested auth pins.
- if ((auth_pins-freezing) > 0 || nested_auth_pins > 0)
+ if (auth_pins - (freezing ? 1 : 0) > 0 ||
+ (freeze_tree_state && freeze_tree_state->auth_pins != auth_pins))
return false;
// inode must not be frozen.
return true;
}
+
bool is_freezeable_dir(bool freezing=false) const {
- if ((auth_pins-freezing) > 0 || dir_auth_pins > 0)
+ if ((auth_pins - freezing) > 0 || dir_auth_pins > 0)
return false;
// if not subtree root, inode must not be frozen (tree--frozen_dir is okay).
return true;
}
- CDir *get_frozen_tree_root();
-
-
ostream& print_db_line_prefix(ostream& out) override;
void print(ostream& out) override;
void dump(Formatter *f, int flags = DUMP_DEFAULT) const;