Special wait mask is passed through lock wait mask to parent object.
Caller adds item to a list on the subtree root.
Removal of wait item automatically removes from said list.
Subtree topology changes adjust authchange wait lists.
Migrator auth change update waits waiters. Import/export should be
protected by freeze/thaw or the blanket wakeups.
}
+
+CDir *CDentry::get_containing_subtree()
+{
+ return get_dir()->get_containing_subtree();
+}
lru_unpin();
}
+ CDir *get_containing_subtree();
+
// auth pins
bool can_auth_pin();
void auth_pin(void *by);
// CDir
CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) :
- item_dirty(this), item_new(this)
+ item_dirty(this), item_new(this), waiting_on_auth_change(member_offset(MDSCacheObject, item_waiting_on_auth_change))
{
g_num_dir++;
g_num_dira++;
-/* NOTE: this checks dentry waiters too */
+/* NOTE: this checks dentry and authchange waiters too */
void CDir::take_waiting(uint64_t mask, list<Context*>& ls)
{
if ((mask & WAIT_DENTRY) && waiting_on_dentry.size()) {
}
put(PIN_DNWAITER);
}
+
+ if (mask & MDSCacheObject::WAIT_AUTHCHANGE) {
+ elist<MDSCacheObject*>::iterator p = waiting_on_auth_change.begin();
+ while (!p.end()) {
+ MDSCacheObject *o = *p;
+ ++p;
+ o->take_waiting(MDSCacheObject::WAIT_AUTHCHANGE, ls); // careful, this removes *o from the elist
+ }
+ }
// waiting
MDSCacheObject::take_waiting(mask, ls);
+CDir *CDir::get_containing_subtree()
+{
+ return cache->get_subtree_root(this);
+}
int num_dentries_auth_subtree_nested;
+ // extra wait stuff
+ elist<MDSCacheObject*> waiting_on_auth_change; // only on subtree roots
+
+public:
+ void add_auth_change_waiter(MDSCacheObject *o) {
+ waiting_on_auth_change.push_back(&o->item_waiting_on_auth_change);
+ }
+protected:
+
+
// friends
friend class Migrator;
friend class CInode;
public:
bool try_trim_snap_dentry(CDentry *dn, const set<snapid_t>& snaps);
+ CDir *get_containing_subtree();
+
public:
void split(int bits, list<CDir*>& subs, list<Context*>& waiters, bool replay);
+CDir *CInode::get_containing_subtree()
+{
+ return get_projected_parent_dn()->get_dir()->get_containing_subtree();
+}
// pins
void get_stickydirs();
void put_stickydirs();
+ CDir *get_containing_subtree();
+
+
protected:
// parent dentries in cache
CDentry *parent; // primary link
}
// wait!
- int wait_on;
+ uint64_t wait_on;
if (lock->get_parent()->is_auth() && lock->is_stable())
wait_on = SimpleLock::WAIT_RD;
- else
- wait_on = SimpleLock::WAIT_STABLE; // REQRDLOCK is ignored if lock is unstable, so we need to retry.
+ else {
+ // REQRDLOCK is ignored if lock is unstable, so we need to retry on stable OR auth change
+ wait_on = SimpleLock::WAIT_STABLE;
+ if (!lock->get_parent()->is_auth()) {
+ wait_on |= MDSCacheObject::WAIT_AUTHCHANGE;
+ CDir *subtree = lock->get_parent()->get_containing_subtree();
+ subtree->add_auth_change_waiter(in);
+ }
+ }
dout(7) << "rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl;
lock->add_waiter(wait_on, new C_MDS_RetryRequest(mdcache, mut));
nudge_log(lock);
} else {
// replica.
- // auth should be auth_pinned (see acquire_locks wrlock weird mustpin case).
int auth = lock->get_parent()->authority().first;
dout(10) << "requesting scatter from auth on "
<< *lock << " on " << *lock->get_parent() << dendl;
if (!nowait) {
dout(7) << "wrlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl;
- lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut));
+ uint64_t mask = SimpleLock::WAIT_STABLE;
+ if (!lock->get_parent()->is_auth()) {
+ mask |= MDSCacheObject::WAIT_AUTHCHANGE;
+ CDir *subtree = lock->get_parent()->get_containing_subtree();
+ subtree->add_auth_change_waiter(in);
+ }
+ lock->add_waiter(mask, new C_MDS_RetryRequest(mdcache, mut));
nudge_log(lock);
}
subtrees.erase(dir);
subtrees[parent].erase(dir);
+ // move auth change waiters
+ while (!dir->waiting_on_auth_change.empty()) {
+ MDSCacheObject *o = dir->waiting_on_auth_change.front();
+ parent->add_auth_change_waiter(o);
+ dout(10) << " moved auth change waiter " << *o << dendl;
+ }
+
// adjust popularity?
if (dir->is_auth()) {
utime_t now = g_clock.now();
}
p = next;
}
+
+ // move auth change waiters
+ elist<MDSCacheObject*>::iterator q = root->waiting_on_auth_change.begin();
+ while (!q.end()) {
+ MDSCacheObject *o = *q;
+ ++q;
+ if (o->get_containing_subtree() == dir) {
+ dout(20) << " moving auth change waiter " << *o << dendl;
+ dir->add_auth_change_waiter(o); // careful, this removes *o from root's list
+ }
+ }
// i am a bound of the parent subtree.
subtrees[root].insert(dir);
assert(subtrees[p].count(dir));
subtrees[p].erase(dir);
}
+ assert(dir->waiting_on_auth_change.empty());
}
void MDCache::get_subtree_bounds(CDir *dir, set<CDir*>& bounds)
set<CDir*> have;
cache->map_dirfrag_set(m->get_bounds(), have);
cache->adjust_bounded_subtree_auth(dir, have, new_auth);
+
+ if (new_auth.second == CDIR_AUTH_UNKNOWN) {
+ // wake up any auth change waiters
+ list<Context*> ls;
+ dir->take_waiting(MDSCacheObject::WAIT_AUTHCHANGE, ls);
+ if (!ls.empty())
+ dout(10) << "handle_export_notify woke up some AUTHCHANGE waiters" << dendl;
+ mds->queue_waiters(ls);
+ }
// induce a merge?
cache->try_subtree_merge(dir);
parent->take_waiting(mask << get_wait_shift(), ls);
}
void add_waiter(uint64_t mask, Context *c) {
- parent->add_waiter(mask << get_wait_shift(), c);
+ // preserve WAIT_AUTHCHANGE bit unshifted, if present.
+ parent->add_waiter((mask << get_wait_shift()) | (mask & MDSCacheObject::WAIT_AUTHCHANGE), c);
}
bool is_waiter_for(uint64_t mask) {
return parent->is_waiter_for(mask << get_wait_shift());
#include "include/frag.h"
#include "include/xlist.h"
+#include "include/elist.h"
#include <boost/pool/pool.hpp>
class MDSCacheObject;
+class CDir;
+
// -- authority delegation --
// directory authority types
// >= 0 is the auth mds
// -- wait --
const static uint64_t WAIT_SINGLEAUTH = (1ull<<60);
const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE
+ const static uint64_t WAIT_AUTHCHANGE = (1ull<<58);
// ============================================
MDSCacheObject() :
state(0),
ref(0),
- replica_nonce(0) {}
+ replica_nonce(0),
+ waiting_on_auth_change(0) {}
virtual ~MDSCacheObject() {}
// printing
bool is_ambiguous_auth() {
return authority().second != CDIR_AUTH_UNKNOWN;
}
+ virtual CDir *get_containing_subtree() = 0;
// --------------------------------------------
// pins
// waiting
protected:
multimap<uint64_t, Context*> waiting;
-
+ int waiting_on_auth_change;
+ elist<MDSCacheObject*>::item item_waiting_on_auth_change;
+ friend class CDir;
+
public:
bool is_waiter_for(uint64_t mask, uint64_t min=0) {
if (!min) {
if (waiting.empty())
get(PIN_WAITER);
waiting.insert(pair<uint64_t,Context*>(mask, c));
+
+ if (mask & WAIT_AUTHCHANGE)
+ waiting_on_auth_change++;
+
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
<< "add_waiter " << hex << mask << dec << " " << c
<< " on " << *this
while (it != waiting.end()) {
if (it->first & mask) {
ls.push_back(it->second);
+
+ if (it->first & WAIT_AUTHCHANGE) {
+ waiting_on_auth_change--;
+ if (!waiting_on_auth_change)
+ item_waiting_on_auth_change.remove_myself();
+ }
+
pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this))
<< "take_waiting mask " << hex << mask << dec << " took " << it->second
<< " tag " << it->first