}
-void Locker::send_lock_message(CInode *in, int msg, int type)
+void Locker::send_lock_message(SimpleLock *lock, int msg)
{
- for (map<int,int>::iterator it = in->replicas_begin();
- it != in->replicas_end();
+ for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
+ it != lock->get_parent()->replicas_end();
it++) {
- MLock *m = new MLock(msg, mds->get_nodeid());
- m->set_ino(in->ino(), type);
+ MLock *m = new MLock(lock, msg, mds->get_nodeid());
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
}
}
-
-void Locker::send_lock_message(CInode *in, int msg, int type, bufferlist& data)
+void Locker::send_lock_message(SimpleLock *lock, int msg, bufferlist &data)
{
- for (map<int,int>::iterator it = in->replicas_begin();
- it != in->replicas_end();
+ for (map<int,int>::iterator it = lock->get_parent()->replicas_begin();
+ it != lock->get_parent()->replicas_end();
it++) {
- MLock *m = new MLock(msg, mds->get_nodeid());
- m->set_ino(in->ino(), type);
+ MLock *m = new MLock(lock, msg, mds->get_nodeid());
m->set_data(data);
mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
}
}
-void Locker::send_lock_message(CDentry *dn, int msg)
-{
- for (map<int,int>::iterator it = dn->replicas_begin();
- it != dn->replicas_end();
- it++) {
- MLock *m = new MLock(msg, mds->get_nodeid());
- m->set_dn(dn->dir->dirfrag(), dn->name);
- mds->send_message_mds(m, it->first, MDS_PORT_LOCKER);
- }
-}
-
bool Locker::acquire_locks(MDRequest *mdr,
- set<CDentry*> &dentry_rdlocks,
- set<CDentry*> &dentry_xlocks,
- set<CInode*> &inode_hard_rdlocks,
- set<CInode*> &inode_hard_xlocks)
+ set<SimpleLock*> &rdlocks,
+ set<SimpleLock*> &xlocks)
{
dout(10) << "acquire_locks " << *mdr << endl;
+ // sort everything we will lock
+ set<SimpleLock*, SimpleLock::ptr_lt> sorted;
+
// (local) AUTH PINS
// can i auth_pin everything?
- for (set<CDentry*>::iterator p = dentry_xlocks.begin();
- p != dentry_xlocks.end();
+ for (set<SimpleLock*>::iterator p = xlocks.begin();
+ p != xlocks.end();
++p) {
- CDir *dir = (*p)->dir;
- dout(10) << "might auth_pin " << *dir << endl;
-
- if (!dir->is_auth()) continue;
- if (!mdr->is_auth_pinned(dir) &&
- !dir->can_auth_pin()) {
- // wait
- dir->add_waiter(CDir::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
- mdcache->request_drop_locks(mdr);
- mdr->drop_auth_pins();
- return false;
- }
- }
- for (set<CInode*>::iterator p = inode_hard_xlocks.begin();
- p != inode_hard_xlocks.end();
- ++p) {
- CInode *in = *p;
- if (!in->is_auth()) continue;
- if (!mdr->is_auth_pinned(in) &&
- !in->can_auth_pin()) {
- in->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
- mdcache->request_drop_locks(mdr);
- mdr->drop_auth_pins();
- return false;
+ dout(10) << "will xlock " << **p << " " << *(*p)->get_parent() << endl;
+
+ sorted.insert(*p);
+ if ((*p)->get_type() == LOCK_OTYPE_DN) {
+ CDir *dir = ((CDentry*)(*p)->get_parent())->dir;
+ dout(10) << "might auth_pin " << *dir << endl;
+
+ if (!dir->is_auth()) continue;
+ if (!mdr->is_auth_pinned(dir) &&
+ !dir->can_auth_pin()) {
+ // wait
+ dir->add_waiter(CDir::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ mdcache->request_drop_locks(mdr);
+ mdr->drop_auth_pins();
+ return false;
+ }
+ } else {
+ CInode *in = (CInode*)(*p)->get_parent();
+ if (!in->is_auth()) continue;
+ if (!mdr->is_auth_pinned(in) &&
+ !in->can_auth_pin()) {
+ in->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ mdcache->request_drop_locks(mdr);
+ mdr->drop_auth_pins();
+ return false;
+ }
}
}
// ok, grab the auth pins
- for (set<CDentry*>::iterator p = dentry_xlocks.begin();
- p != dentry_xlocks.end();
+ for (set<SimpleLock*>::iterator p = xlocks.begin();
+ p != xlocks.end();
++p) {
- CDir *dir = (*p)->dir;
- if (!dir->is_auth()) continue;
- dout(10) << "auth_pinning " << *dir << endl;
- mdr->auth_pin(dir);
+ if ((*p)->get_type() == LOCK_OTYPE_DN) {
+ CDir *dir = ((CDentry*)(*p)->get_parent())->dir;
+ if (!dir->is_auth()) continue;
+ dout(10) << "auth_pinning " << *dir << endl;
+ mdr->auth_pin(dir);
+ } else {
+ CInode *in = (CInode*)(*p)->get_parent();
+ if (!in->is_auth()) continue;
+ dout(10) << "auth_pinning " << *in << endl;
+ mdr->auth_pin(in);
+ }
}
- for (set<CInode*>::iterator p = inode_hard_xlocks.begin();
- p != inode_hard_xlocks.end();
+
+ for (set<SimpleLock*>::iterator p = rdlocks.begin();
+ p != rdlocks.end();
++p) {
- CInode *in = *p;
- if (!in->is_auth()) continue;
- dout(10) << "auth_pinning " << *in << endl;
- mdr->auth_pin(in);
+ dout(10) << "will rdlock " << **p << " " << *(*p)->get_parent() << endl;
+ sorted.insert(*p);
}
+ // acquire locks.
+ // make sure they match currently acquired locks.
+ set<SimpleLock*, SimpleLock::ptr_lt>::iterator existing = mdr->locks.begin();
+ for (set<SimpleLock*, SimpleLock::ptr_lt>::iterator p = sorted.begin();
+ p != sorted.end();
+ ++p) {
- // DENTRY LOCKS
- {
- // sort all the dentries we will lock
- set<CDentry*, CDentry::ptr_lt> sorted;
- for (set<CDentry*>::iterator p = dentry_xlocks.begin();
- p != dentry_xlocks.end();
- ++p) {
- dout(10) << "will xlock " << **p << endl;
- sorted.insert(*p);
- }
- for (set<CDentry*>::iterator p = dentry_rdlocks.begin();
- p != dentry_rdlocks.end();
- ++p) {
- dout(10) << "will rdlock " << **p << endl;
- sorted.insert(*p);
- }
-
- // acquire dentry locks. make sure they match currently acquired locks.
- set<CDentry*, CDentry::ptr_lt>::iterator existing = mdr->dentry_locks.begin();
- for (set<CDentry*, CDentry::ptr_lt>::iterator p = sorted.begin();
- p != sorted.end();
- ++p) {
-
- // already locked?
- if (existing != mdr->dentry_locks.end() && *existing == *p) {
- // right kind?
- CDentry *had = *existing;
- if (dentry_xlocks.count(*p) == had->is_xlockedbyme(mdr)) {
- dout(10) << "acquire_locks already locked " << *had << endl;
- existing++;
- continue;
- }
- }
-
- // hose any stray locks
- while (existing != mdr->dentry_locks.end()) {
- CDentry *had = *existing;
+ // already locked?
+ if (existing != mdr->locks.end() && *existing == *p) {
+ // right kind?
+ SimpleLock *had = *existing;
+ if (xlocks.count(*p) == (had->get_xlocked_by() == mdr)) {
+ dout(10) << "acquire_locks already locked " << *had << " " << *had->get_parent() << endl;
existing++;
- dout(10) << "acquire_locks had " << *had << " locked before " << **p
- << ", unlocking" << endl;
- if (had->is_xlockedbyme(mdr))
- dentry_xlock_finish(had, mdr);
- else
- dentry_rdlock_finish(had, mdr);
- }
-
- // lock
- if (dentry_xlocks.count(*p)) {
- if (!dentry_xlock_start(*p, mdr))
- return false;
- dout(10) << "acquire_locks got xlock on " << **p << endl;
- } else {
- if (!dentry_rdlock_start(*p, mdr))
- return false;
- dout(10) << "acquire_locks got rdlock on " << **p << endl;
+ continue;
}
}
- // any extra unneeded locks?
- while (existing != mdr->dentry_locks.end()) {
- dout(10) << "acquire_locks had " << *existing << " locked, unlocking" << endl;
- if ((*existing)->is_xlockedbyme(mdr))
- dentry_xlock_finish(*existing, mdr);
+ // hose any stray locks
+ while (existing != mdr->locks.end()) {
+ SimpleLock *had = *existing;
+ existing++;
+ dout(10) << "acquire_locks unlocking out-of-order " << **existing
+ << " " << *(*existing)->get_parent() << endl;
+ if (had->get_xlocked_by() == mdr)
+ xlock_finish(had, mdr);
else
- dentry_rdlock_finish(*existing, mdr);
+ rdlock_finish(had, mdr);
}
- }
-
- // INODES
- {
- // sort all the dentries we will lock
- set<CInode*, CInode::ptr_lt> sorted;
- for (set<CInode*>::iterator p = inode_hard_xlocks.begin();
- p != inode_hard_xlocks.end();
- ++p)
- sorted.insert(*p);
- for (set<CInode*>::iterator p = inode_hard_rdlocks.begin();
- p != inode_hard_rdlocks.end();
- ++p)
- sorted.insert(*p);
-
- // acquire inode locks. make sure they match currently acquired locks.
- set<CInode*, CInode::ptr_lt>::iterator existing = mdr->inode_hard_locks.begin();
- for (set<CInode*, CInode::ptr_lt>::iterator p = sorted.begin();
- p != sorted.end();
- ++p) {
- // already locked?
- if (existing != mdr->inode_hard_locks.end() && *existing == *p) {
- // right kind?
- CInode *had = *existing;
- if (inode_hard_xlocks.count(*p) == (had->hardlock.get_wrlocked_by() == mdr)) {
- dout(10) << "acquire_locks already locked " << *had << endl;
- existing++;
- continue;
- }
- }
-
- // hose any stray locks
- while (existing != mdr->inode_hard_locks.end()) {
- CInode *had = *existing;
- existing++;
- dout(10) << "acquire_locks had " << *had << " locked before " << **p
- << ", unlocking" << endl;
- if (had->hardlock.get_wrlocked_by() == mdr)
- inode_hard_xlock_finish(had, mdr);
- else
- inode_hard_rdlock_finish(had, mdr);
- }
- // lock
- if (inode_hard_xlocks.count(*p)) {
- if (!inode_hard_xlock_start(*p, mdr))
- return false;
- dout(10) << "acquire_locks got xlock on " << **p << endl;
- } else {
- if (!inode_hard_rdlock_start(*p, mdr))
- return false;
- dout(10) << "acquire_locks got rdlock on " << **p << endl;
- }
+ // lock
+ if (xlocks.count(*p)) {
+ if (!xlock_start(*p, mdr))
+ return false;
+ dout(10) << "acquire_locks got xlock on " << **p << " " << *(*p)->get_parent() << endl;
+ } else {
+ if (!rdlock_start(*p, mdr))
+ return false;
+ dout(10) << "acquire_locks got rdlock on " << **p << " " << *(*p)->get_parent() << endl;
}
+ }
- // any extra unneeded locks?
- while (existing != mdr->inode_hard_locks.end()) {
- dout(10) << "acquire_locks had " << **existing << " locked, unlocking" << endl;
- if ((*existing)->hardlock.get_wrlocked_by() == mdr)
- inode_hard_xlock_finish(*existing, mdr);
- else
- inode_hard_rdlock_finish(*existing, mdr);
- }
+ // any extra unneeded locks?
+ while (existing != mdr->locks.end()) {
+ dout(10) << "acquire_locks unlocking " << *existing
+ << " " << *(*existing)->get_parent() << endl;
+ if ((*existing)->get_xlocked_by() == mdr)
+ xlock_finish(*existing, mdr);
+ else
+ rdlock_finish(*existing, mdr);
}
return true;
+// generics
+
+/*
+bool Locker::rdlock_try(SimpleLock *lock, Context *con)
+{
+ switch (lock->get_type()) {
+ case LOCK_OTYPE_IFILE:
+ return file_rdlock_try((FileLock*)lock, con);
+ default:
+ return simple_rdlock_try(lock, con);
+ }
+}
+*/
+
+bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mdr)
+{
+ switch (lock->get_type()) {
+ case LOCK_OTYPE_IFILE:
+ return file_rdlock_start((FileLock*)lock, mdr);
+ default:
+ return simple_rdlock_start(lock, mdr);
+ }
+}
+
+void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr)
+{
+ switch (lock->get_type()) {
+ case LOCK_OTYPE_IFILE:
+ return file_rdlock_finish((FileLock*)lock, mdr);
+ default:
+ return simple_rdlock_finish(lock, mdr);
+ }
+}
+
+bool Locker::xlock_start(SimpleLock *lock, MDRequest *mdr)
+{
+ switch (lock->get_type()) {
+ case LOCK_OTYPE_IFILE:
+ return file_xlock_start((FileLock*)lock, mdr);
+ default:
+ return simple_xlock_start(lock, mdr);
+ }
+}
+void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr)
+{
+ switch (lock->get_type()) {
+ case LOCK_OTYPE_IFILE:
+ return file_xlock_finish((FileLock*)lock, mdr);
+ default:
+ return simple_xlock_finish(lock, mdr);
+ }
+}
if (in->is_auth()) {
// [auth] twiddle mode?
- inode_file_eval(in);
+ file_eval(&in->filelock);
} else {
// [replica] tell auth about any new caps wanted
request_inode_file_caps(in);
bool Locker::issue_caps(CInode *in)
{
// allowed caps are determined by the lock mode.
- int allowed = in->filelock.caps_allowed(in->is_auth());
+ int allowed = in->filelock.caps_allowed();
dout(7) << "issue_caps filelock allows=" << cap_string(allowed)
<< " on " << *in << endl;
else
in->mds_caps_wanted.erase(m->get_from());
- inode_file_eval(in);
+ file_eval(&in->filelock);
delete m;
}
assert(cap);
// filter wanted based on what we could ever give out (given auth/replica status)
- int wanted = m->get_wanted() & in->filelock.caps_allowed_ever(in->is_auth());
+ int wanted = m->get_wanted() & in->filelock.caps_allowed_ever();
dout(7) << "handle_client_file_caps seq " << m->get_seq()
<< " confirms caps " << cap_string(m->get_caps())
}
// reevaluate, waiters
- inode_file_eval(in);
+ file_eval(&in->filelock);
in->finish_waiting(CInode::WAIT_CAPS, 0);
delete m;
void Locker::handle_lock(MLock *m)
{
switch (m->get_otype()) {
- case LOCK_OTYPE_IHARD:
- handle_lock_inode_hard(m);
- break;
+ case LOCK_OTYPE_DN:
+ {
+ CDir *dir = mdcache->get_dirfrag(m->get_dirfrag());
+ CDentry *dn = 0;
+ if (dir)
+ dn = dir->lookup(m->get_dn());
+ if (!dn) {
+ dout(7) << "dont' have dn " << m->get_dirfrag() << " " << m->get_dn() << endl;
+ delete m;
+ return;
+ }
- case LOCK_OTYPE_IFILE:
- handle_lock_inode_file(m);
+ handle_simple_lock(&dn->lock, m);
+ }
break;
-
- case LOCK_OTYPE_DIR:
- handle_lock_dir(m);
+
+ case LOCK_OTYPE_IHARD:
+ {
+ CInode *in = mdcache->get_inode(m->get_ino());
+ if (!in) {
+ dout(7) << "dont' have ino " << m->get_ino() << endl;
+ delete m;
+ return;
+ }
+ handle_simple_lock(&in->hardlock, m);
+ }
break;
-
- case LOCK_OTYPE_DN:
- handle_lock_dn(m);
+
+ case LOCK_OTYPE_IFILE:
+ {
+ CInode *in = mdcache->get_inode(m->get_ino());
+ if (!in) {
+ dout(7) << "dont' have ino " << m->get_ino() << endl;
+ delete m;
+ return;
+ }
+ handle_file_lock(&in->filelock, m);
+ }
break;
default:
-// ===============================
-// hard inode metadata
-
-bool Locker::inode_hard_rdlock_try(CInode *in, Context *con)
-{
- dout(7) << "inode_hard_rdlock_try on " << *in << endl;
-
- // can read? grab ref.
- if (in->hardlock.can_read(in->is_auth()))
- return true;
-
- assert(!in->is_auth());
-
- // wait!
- dout(7) << "inode_hard_rdlock_try waiting on " << *in << endl;
- in->add_waiter(CInode::WAIT_HARDR, con);
- return false;
-}
-
-bool Locker::inode_hard_rdlock_start(CInode *in, MDRequest *mdr)
-{
- dout(7) << "inode_hard_rdlock_start on " << *in << endl;
-
- // can read? grab ref.
- if (in->hardlock.can_read(in->is_auth())) {
- in->hardlock.get_read();
- mdr->inode_hard_rdlocks.insert(in);
- mdr->inode_hard_locks.insert(in);
- return true;
- }
-
- // can't read, and replicated.
- assert(!in->is_auth());
-
- // wait!
- dout(7) << "inode_hard_rdlock_start waiting on " << *in << endl;
- in->add_waiter(CInode::WAIT_HARDR, new C_MDS_RetryRequest(mdcache, mdr));
- return false;
-}
-
-
-void Locker::inode_hard_rdlock_finish(CInode *in, MDRequest *mdr)
+void Locker::handle_simple_lock(SimpleLock *lock, MLock *m)
{
- // drop ref
- assert(in->hardlock.can_read(in->is_auth()));
- in->hardlock.put_read();
- mdr->inode_hard_rdlocks.erase(in);
- mdr->inode_hard_locks.erase(in);
-
- dout(7) << "inode_hard_rdlock_finish on " << *in << endl;
+ int from = m->get_asker();
- //if (in->hardlock.get_nread() == 0) in->finish_waiting(CInode::WAIT_HARDNORD);
-}
-
+ switch (m->get_action()) {
+ // -- replica --
+ case LOCK_AC_SYNC:
+ assert(lock->get_state() == LOCK_LOCK);
+ lock->decode_locked_state(m->get_data());
+ lock->set_state(LOCK_SYNC);
+ lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
+ break;
+
+ case LOCK_AC_LOCK:
+ assert(lock->get_state() == LOCK_SYNC);
+ //|| lock->get_state() == LOCK_GLOCKR);
+
+ // wait for readers to finish?
+ if (lock->get_num_rdlock() > 0) {
+ dout(7) << "handle_simple_lock has reader, waiting before ack on " << *lock << " on " << *lock->get_parent()
+ << endl;
+ lock->set_state(LOCK_GLOCKR);
+ lock->add_waiter(SimpleLock::WAIT_NORD, new C_MDS_RetryMessage(mds, m));
+ return;
+ }
-bool Locker::inode_hard_xlock_start(CInode *in, MDRequest *mdr)
-{
- dout(7) << "inode_hard_xlock_start on " << *in << endl;
+ // update lock and reply
+ lock->set_state(LOCK_LOCK);
+
+ {
+ MLock *reply = new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid());
+ mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
+ }
+ break;
- // if not replicated, i can twiddle lock at will
- if (in->is_auth() &&
- !in->is_replicated() &&
- in->hardlock.get_state() != LOCK_LOCK)
- in->hardlock.set_state(LOCK_LOCK);
-
- // can write? grab ref.
- if (in->hardlock.can_write(in->is_auth())) {
- assert(in->is_auth());
- in->hardlock.get_write(mdr);
- mdr->inode_hard_xlocks.insert(in);
- mdr->inode_hard_locks.insert(in);
- return true;
- }
-
- // can't write, replicated.
- if (in->is_auth()) {
- // auth
- if (in->hardlock.can_write_soon(in->is_auth())) {
- // just wait
+ // -- auth --
+ case LOCK_AC_LOCKACK:
+ assert(lock->get_state() == LOCK_GLOCKR);
+ assert(lock->is_gathering(from));
+ lock->remove_gather(from);
+
+ if (lock->is_gathering()) {
+ dout(7) << "handle_simple_lock " << *lock << " on " << *lock->get_parent() << " from " << from
+ << ", still gathering " << lock->get_gather_set() << endl;
} else {
- // initiate lock
- inode_hard_lock(in);
+ dout(7) << "handle_simple_lock " << *lock << " on " << *lock->get_parent() << " from " << from
+ << ", last one" << endl;
+ simple_eval(lock);
}
-
- dout(7) << "inode_hard_xlock_start waiting on " << *in << endl;
- in->add_waiter(CInode::WAIT_HARDW, new C_MDS_RetryRequest(mdcache, mdr));
-
- return false;
- } else {
- // replica
- // fw to auth
- int auth = in->authority().first;
- dout(7) << "inode_hard_xlock_start " << *in << " on replica, fw to auth " << auth << endl;
- assert(auth != mds->get_nodeid());
- mdcache->request_forward(mdr, auth);
- return false;
}
-}
-
-void Locker::inode_hard_xlock_finish(CInode *in, MDRequest *mdr)
-{
- // drop ref
- //assert(in->hardlock.can_write(in->is_auth()));
- in->hardlock.put_write();
- mdr->inode_hard_xlocks.erase(in);
- mdr->inode_hard_locks.erase(in);
- dout(7) << "inode_hard_xlock_finish on " << *in << endl;
-
- // others waiting?
- if (in->is_hardlock_write_wanted()) {
- // wake 'em up
- in->take_waiting(CInode::WAIT_HARDW, mds->finished_queue);
- } else {
- // auto-sync if alone.
- if (in->is_auth() &&
- !in->is_replicated() &&
- in->hardlock.get_state() != LOCK_SYNC)
- in->hardlock.set_state(LOCK_SYNC);
-
- inode_hard_eval(in);
- }
+ delete m;
}
-void Locker::inode_hard_eval(CInode *in)
+void Locker::simple_eval(SimpleLock *lock)
{
// finished gather?
- if (in->is_auth() &&
- !in->hardlock.is_stable() &&
- in->hardlock.gather_set.empty()) {
- dout(7) << "inode_hard_eval finished gather on " << *in << endl;
- switch (in->hardlock.get_state()) {
+ if (lock->get_parent()->is_auth() &&
+ !lock->is_stable() &&
+ !lock->is_gathering()) {
+ dout(7) << "simple_eval finished gather on " << *lock << " on " << *lock->get_parent() << endl;
+ switch (lock->get_state()) {
case LOCK_GLOCKR:
- in->hardlock.set_state(LOCK_LOCK);
-
- // waiters
- //in->hardlock.get_write();
- in->finish_waiting(CInode::WAIT_HARDRWB|CInode::WAIT_HARDSTABLE);
- //in->hardlock.put_write();
+ lock->set_state(LOCK_LOCK);
+ lock->finish_waiters(SimpleLock::WAIT_LOCK|SimpleLock::WAIT_STABLE);
break;
default:
assert(0);
}
}
- if (!in->hardlock.is_stable()) return;
+ if (!lock->is_stable()) return;
- if (in->is_auth()) {
-
+ if (lock->get_parent()->is_auth()) {
+
// sync?
- if (in->is_replicated() &&
- in->is_hardlock_write_wanted() &&
- in->hardlock.get_state() != LOCK_SYNC) {
- dout(7) << "inode_hard_eval stable, syncing " << *in << endl;
- inode_hard_sync(in);
+ if (lock->get_state() != LOCK_SYNC &&
+ lock->get_parent()->is_replicated() &&
+ !lock->is_waiting(SimpleLock::WAIT_WR)) {
+ dout(7) << "simple_eval stable, syncing " << *lock << " on " << *lock->get_parent() << endl;
+ simple_sync(lock);
}
} else {
// replica
}
+
}
// mid
-void Locker::inode_hard_sync(CInode *in)
+void Locker::simple_sync(SimpleLock *lock)
{
- dout(7) << "inode_hard_sync on " << *in << endl;
- assert(in->is_auth());
+ dout(7) << "simple_sync on " << *lock << " on " << *lock->get_parent() << endl;
+ assert(lock->get_parent()->is_auth());
// check state
- if (in->hardlock.get_state() == LOCK_SYNC)
+ if (lock->get_state() == LOCK_SYNC)
return; // already sync
- if (in->hardlock.get_state() == LOCK_GLOCKR)
+ if (lock->get_state() == LOCK_GLOCKR)
assert(0); // um... hmm!
- assert(in->hardlock.get_state() == LOCK_LOCK);
+ assert(lock->get_state() == LOCK_LOCK);
// hard data
- bufferlist harddata;
- in->encode_hard_state(harddata);
+ bufferlist data;
+ lock->encode_locked_state(data);
// bcast to replicas
- send_lock_message(in, LOCK_AC_SYNC, LOCK_OTYPE_IHARD, harddata);
+ send_lock_message(lock, LOCK_AC_SYNC, data);
// change lock
- in->hardlock.set_state(LOCK_SYNC);
+ lock->set_state(LOCK_SYNC);
// waiters?
- in->finish_waiting(CInode::WAIT_HARDSTABLE);
+ lock->finish_waiters(SimpleLock::WAIT_STABLE);
}
-void Locker::inode_hard_lock(CInode *in)
+void Locker::simple_lock(SimpleLock *lock)
{
- dout(7) << "inode_hard_lock on " << *in << " hardlock=" << in->hardlock << endl;
- assert(in->is_auth());
+ dout(7) << "simple_lock on " << *lock << " on " << *lock->get_parent() << endl;
+ assert(lock->get_parent()->is_auth());
// check state
- if (in->hardlock.get_state() == LOCK_LOCK ||
- in->hardlock.get_state() == LOCK_GLOCKR)
+ if (lock->get_state() == LOCK_LOCK ||
+ lock->get_state() == LOCK_GLOCKR)
return; // already lock or locking
- assert(in->hardlock.get_state() == LOCK_SYNC);
-
- // bcast to replicas
- send_lock_message(in, LOCK_AC_LOCK, LOCK_OTYPE_IHARD);
+ assert(lock->get_state() == LOCK_SYNC);
- // change lock
- in->hardlock.set_state(LOCK_GLOCKR);
- in->hardlock.init_gather(in->get_replicas());
+ if (lock->get_parent()->is_replicated()) {
+ // bcast to replicas
+ send_lock_message(lock, LOCK_AC_LOCK);
+
+ // change lock
+ lock->set_state(LOCK_GLOCKR);
+ lock->init_gather();
+ } else {
+ lock->set_state(LOCK_LOCK);
+ }
}
+// top
+bool Locker::simple_rdlock_try(SimpleLock *lock, Context *con)
+{
+ dout(7) << "simple_rdlock_try on " << *lock << " on " << *lock->get_parent() << endl;
+ // can read? grab ref.
+ if (lock->can_rdlock(0))
+ return true;
+
+ assert(!lock->get_parent()->is_auth());
-// messenger
+ // wait!
+ dout(7) << "simple_rdlock_try waiting on " << *lock << " on " << *lock->get_parent() << endl;
+ lock->add_waiter(SimpleLock::WAIT_RD, con);
+ return false;
+}
-void Locker::handle_lock_inode_hard(MLock *m)
+bool Locker::simple_rdlock_start(SimpleLock *lock, MDRequest *mdr)
{
- assert(m->get_otype() == LOCK_OTYPE_IHARD);
+ dout(7) << "simple_rdlock_start on " << *lock << " on " << *lock->get_parent() << endl;
+
+ // can read? grab ref.
+ if (lock->can_rdlock(mdr)) {
+ lock->get_rdlock();
+ mdr->rdlocks.insert(lock);
+ mdr->locks.insert(lock);
+ return true;
+ }
- if (mds->logger) mds->logger->inc("lih");
+ // can't read, and replicated.
+ assert(!lock->get_parent()->is_auth());
- int from = m->get_asker();
- CInode *in = mdcache->get_inode(m->get_ino());
+ // wait!
+ dout(7) << "simple_rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << endl;
+ lock->add_waiter(SimpleLock::WAIT_RD,
+ new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
+}
+
+void Locker::simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr)
+{
+ // drop ref
+ lock->put_rdlock();
+ if (mdr) {
+ mdr->rdlocks.erase(lock);
+ mdr->locks.erase(lock);
+ }
+
+ dout(7) << "simple_rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
- if (LOCK_AC_FOR_AUTH(m->get_action())) {
+ if (lock->get_state() == LOCK_GLOCKR &&
+ lock->get_num_rdlock() == 0) {
+ lock->set_state(LOCK_SYNC); // return state to sync, in case the unpinner flails
+ lock->finish_waiters(SimpleLock::WAIT_NORD);
+ }
+}
+
+bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr)
+{
+ dout(7) << "simple_xlock_start on " << *lock << " on " << *lock->get_parent() << endl;
+
+ // auth?
+ if (lock->get_parent()->is_auth()) {
// auth
- assert(in);
- assert(in->is_auth());// || in->is_proxy());
- dout(7) << "handle_lock_inode_hard " << *in << " hardlock=" << in->hardlock << endl;
-
- /*if (in->is_proxy()) {
- // fw
- int newauth = in->authority().first;
- assert(newauth >= 0);
- if (from == newauth) {
- dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, but from new auth, dropping" << endl;
- delete m;
- } else {
- dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, fw to " << newauth << endl;
- mds->send_message_mds(m, newauth, MDS_PORT_LOCKER);
+
+ // lock.
+ if (lock->get_state() == LOCK_SYNC)
+ simple_lock(lock);
+
+ // already locked?
+ if (lock->get_state() == LOCK_LOCK) {
+ if (lock->is_xlocked()) {
+ // by me?
+ if (lock->get_xlocked_by() == mdr)
+ return true;
+ // by someone else.
+ lock->add_waiter(SimpleLock::WAIT_WR, new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
}
- return;
+
+ // xlock.
+ lock->get_xlock(mdr);
+ mdr->xlocks.insert(lock);
+ mdr->locks.insert(lock);
+ return true;
+ } else {
+ // wait for lock
+ lock->add_waiter(SimpleLock::WAIT_LOCK, new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
}
- */
} else {
// replica
- if (!in) {
- dout(7) << "handle_lock_inode_hard " << m->get_ino() << ": don't have it anymore" << endl;
- /* do NOT nak.. if we go that route we need to duplicate all the nonce funkiness
- to keep gather_set a proper/correct subset of cached_by. better to use the existing
- cacheexpire mechanism instead!
- */
- delete m;
- return;
+
+ assert(0);
+
+ // wait for sync.
+ if (lock->get_state() != LOCK_SYNC) {
+ lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
}
-
- assert(!in->is_auth());
+
+ // do remote xlock
+ assert(0);
}
+}
- dout(7) << "handle_lock_inode_hard a=" << m->get_action() << " from " << from << " " << *in << " hardlock=" << in->hardlock << endl;
-
- CLock *lock = &in->hardlock;
-
- switch (m->get_action()) {
- // -- replica --
- case LOCK_AC_SYNC:
- assert(lock->get_state() == LOCK_LOCK);
-
- { // assim data
- int off = 0;
- in->decode_hard_state(m->get_data(), off);
- }
-
- // update lock
- lock->set_state(LOCK_SYNC);
-
- // no need to reply
+
+void Locker::simple_xlock_finish(SimpleLock *lock, MDRequest *mdr)
+{
+ // drop ref
+ assert(lock->can_xlock(mdr));
+ lock->put_xlock();
+ mdr->xlocks.erase(lock);
+ mdr->locks.erase(lock);
+ dout(7) << "simple_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
+
+ // others waiting?
+ if (lock->is_waiting(SimpleLock::WAIT_WR)) {
+ // wake 'em up
+ lock->finish_waiters(SimpleLock::WAIT_WR, 0);
+ } else {
+ // auto-sync if alone.
+ if (lock->get_parent()->is_auth() &&
+ !lock->get_parent()->is_replicated() &&
+ lock->get_state() != LOCK_SYNC)
+ lock->set_state(LOCK_SYNC);
- // waiters
- in->finish_waiting(CInode::WAIT_HARDR|CInode::WAIT_HARDSTABLE);
- break;
-
- case LOCK_AC_LOCK:
- assert(lock->get_state() == LOCK_SYNC);
- //|| lock->get_state() == LOCK_GLOCKR);
-
- // wait for readers to finish?
- if (lock->get_nread() > 0) {
- dout(7) << "handle_lock_inode_hard readers, waiting before ack on " << *in << endl;
- lock->set_state(LOCK_GLOCKR);
- in->add_waiter(CInode::WAIT_HARDNORD,
- new C_MDS_RetryMessage(mds, m));
- assert(0); // does this ever happen? (if so, fix hard_rdlock_finish, and CInodeExport.update_inode!)
- return;
- } else {
+ simple_eval(lock);
+ }
+}
- // update lock and reply
- lock->set_state(LOCK_LOCK);
-
- {
- MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
- reply->set_ino(in->ino(), LOCK_OTYPE_IHARD);
- mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
+
+
+// dentry specific helpers
+
+// trace helpers
+
+/** dentry_can_rdlock_trace
+ * see if we can _anonymously_ rdlock an entire trace.
+ * if not, and req is specified, wait and retry that message.
+ */
+bool Locker::dentry_can_rdlock_trace(vector<CDentry*>& trace, MClientRequest *req)
+{
+ // verify dentries are rdlockable.
+ // we do this because
+ // - we're being less aggressive about locks acquisition, and
+ // - we're not acquiring the locks in order!
+ for (vector<CDentry*>::iterator it = trace.begin();
+ it != trace.end();
+ it++) {
+ CDentry *dn = *it;
+ if (!dn->lock.can_rdlock(0)) {
+ if (req) {
+ dout(10) << "can_rdlock_trace can't rdlock " << *dn << ", waiting" << endl;
+ dn->lock.add_waiter(SimpleLock::WAIT_RD,
+ new C_MDS_RetryMessage(mds, req));
+ } else {
+ dout(10) << "can_rdlock_trace can't rdlock " << *dn << endl;
}
+ return false;
}
- break;
-
-
- // -- auth --
- case LOCK_AC_LOCKACK:
- assert(lock->state == LOCK_GLOCKR);
- assert(lock->gather_set.count(from));
- lock->gather_set.erase(from);
+ }
+ return true;
+}
- if (lock->gather_set.size()) {
- dout(7) << "handle_lock_inode_hard " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
- } else {
- dout(7) << "handle_lock_inode_hard " << *in << " from " << from << ", last one" << endl;
- inode_hard_eval(in);
- }
- }
- delete m;
+void Locker::dentry_anon_rdlock_trace_start(vector<CDentry*>& trace)
+{
+ // grab dentry rdlocks
+ for (vector<CDentry*>::iterator it = trace.begin();
+ it != trace.end();
+ it++)
+ (*it)->lock.get_rdlock();
+}
+
+
+void Locker::dentry_anon_rdlock_trace_finish(vector<CDentry*>& trace)
+{
+ for (vector<CDentry*>::iterator it = trace.begin();
+ it != trace.end();
+ it++)
+ simple_rdlock_finish(&(*it)->lock, 0);
}
-// =====================
-// soft inode metadata
+
+// ===============================
+// file lock
-bool Locker::inode_file_rdlock_start(CInode *in, MDRequest *mdr)
+bool Locker::file_rdlock_start(FileLock *lock, MDRequest *mdr)
{
- dout(7) << "inode_file_rdlock_start " << *in << " filelock=" << in->filelock << endl;
+ dout(7) << "file_rdlock_start " << *lock << " on " << *lock->get_parent() << endl;
// can read? grab ref.
- if (in->filelock.can_read(in->is_auth())) {
- in->filelock.get_read();
+ if (lock->can_rdlock(mdr)) {
+ lock->get_rdlock();
+ mdr->rdlocks.insert(lock);
+ mdr->locks.insert(lock);
return true;
}
// can't read, and replicated.
- if (in->filelock.can_read_soon(in->is_auth())) {
+ if (lock->can_rdlock_soon()) {
// wait
- dout(7) << "inode_file_rdlock_start can_read_soon " << *in << endl;
+ dout(7) << "file_rdlock_start can_rdlock_soon " << *lock << " on " << *lock->get_parent() << endl;
} else {
- if (in->is_auth()) {
+ if (lock->get_parent()->is_auth()) {
// auth
// FIXME or qsync?
- if (in->filelock.is_stable()) {
- inode_file_lock(in); // lock, bc easiest to back off
-
- if (in->filelock.can_read(in->is_auth())) {
- in->filelock.get_read();
+ if (lock->is_stable()) {
+ file_lock(lock); // lock, bc easiest to back off ... FIXME
+
+ if (lock->can_rdlock(mdr)) {
+ lock->get_rdlock();
+ mdr->rdlocks.insert(lock);
+ mdr->locks.insert(lock);
- //in->filelock.get_write();
- in->finish_waiting(CInode::WAIT_FILERWB|CInode::WAIT_FILESTABLE);
- //in->filelock.put_write();
-
- mdr->inode_file_rdlocks.insert(in);
- mdr->inode_file_locks.insert(in);
+ lock->finish_waiters(SimpleLock::WAIT_LOCK|SimpleLock::WAIT_STABLE);
return true;
}
} else {
- dout(7) << "inode_file_rdlock_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
- in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ dout(7) << "file_rdlock_start waiting until stable on " << *lock << " on " << *lock->get_parent() << endl;
+ lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
} else {
// replica
- if (in->filelock.is_stable()) {
-
+ if (lock->is_stable()) {
+
// fw to auth
+ CInode *in = (CInode*)lock->get_parent();
int auth = in->authority().first;
- dout(7) << "inode_file_rdlock_start " << *in << " on replica and async, fw to auth " << auth << endl;
+ dout(7) << "file_rdlock_start " << *lock << " on " << *lock->get_parent() << " on replica and async, fw to auth " << auth << endl;
assert(auth != mds->get_nodeid());
mdcache->request_forward(mdr, auth);
return false;
} else {
// wait until stable
- dout(7) << "inode_file_rdlock_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
- in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ dout(7) << "inode_file_rdlock_start waiting until stable on " << *lock << " on " << *lock->get_parent() << endl;
+ lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
}
}
-
+
// wait
- dout(7) << "inode_file_rdlock_start waiting on " << *in << ", filelock=" << in->filelock << endl;
- in->add_waiter(CInode::WAIT_FILER, new C_MDS_RetryRequest(mdcache, mdr));
+ dout(7) << "file_rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << endl;
+ lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
-void Locker::inode_file_rdlock_finish(CInode *in, MDRequest *mdr)
+
+void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mdr)
{
// drop ref
- assert(in->filelock.can_read(in->is_auth()));
- in->filelock.put_read();
- mdr->inode_file_rdlocks.erase(in);
- mdr->inode_file_locks.erase(in);
+ assert(lock->can_rdlock(mdr));
+ lock->put_rdlock();
+ mdr->rdlocks.erase(lock);
+ mdr->locks.erase(lock);
- dout(7) << "inode_file_rdlock_finish on " << *in << ", filelock=" << in->filelock << endl;
+ dout(7) << "rdlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
- if (in->filelock.get_nread() == 0) {
- in->finish_waiting(CInode::WAIT_FILENORD);
- inode_file_eval(in);
+ if (lock->get_num_rdlock() == 0) {
+ lock->finish_waiters(SimpleLock::WAIT_NORD);
+ file_eval(lock);
}
}
-bool Locker::inode_file_xlock_start(CInode *in, MDRequest *mdr)
+bool Locker::file_xlock_start(FileLock *lock, MDRequest *mdr)
{
- dout(7) << "inode_file_xlock_start on " << *in << endl;
+ dout(7) << "file_xlock_start on " << *lock << " on " << *lock->get_parent() << endl;
- // can't write?
- if (!in->filelock.can_write(in->is_auth())) {
-
- // can't write.
- if (in->is_auth()) {
- // auth
- if (!in->filelock.can_write_soon(in->is_auth())) {
- if (!in->filelock.is_stable()) {
- dout(7) << "inode_file_xlock_start on auth, waiting for stable on " << *in << endl;
- in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mdcache, mdr));
- return false;
- }
-
- // initiate lock
- inode_file_lock(in);
+ assert(lock->get_parent()->is_auth()); // remote file xlock not implemented
+
+ // already xlocked by me?
+ if (lock->get_xlocked_by() == mdr)
+ return true;
- // fall-thru to below.
+ // can't write?
+ if (!lock->can_xlock(mdr)) {
+
+ // auth
+ if (!lock->can_xlock_soon()) {
+ if (!lock->is_stable()) {
+ dout(7) << "file_xlock_start on auth, waiting for stable on " << *lock << " on " << *lock->get_parent() << endl;
+ lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
}
- } else {
- // replica
- // fw to auth
- int auth = in->authority().first;
- dout(7) << "inode_file_xlock_start " << *in << " on replica, fw to auth " << auth << endl;
- assert(auth != mds->get_nodeid());
- mdcache->request_forward(mdr, auth);
- return false;
+
+ // initiate lock
+ file_lock(lock);
+
+ // fall-thru to below.
}
}
// check again
- if (in->filelock.can_write(in->is_auth())) {
- // can i auth pin?
- assert(in->is_auth());
- in->filelock.get_write(mdr);
- mdr->inode_file_locks.insert(in);
- mdr->inode_file_xlocks.insert(in);
+ if (lock->can_xlock(mdr)) {
+ assert(lock->get_parent()->is_auth());
+ lock->get_xlock(mdr);
+ mdr->locks.insert(lock);
+ mdr->xlocks.insert(lock);
return true;
} else {
- dout(7) << "inode_file_xlock_start on auth, waiting for write on " << *in << endl;
- in->add_waiter(CInode::WAIT_FILEW, new C_MDS_RetryRequest(mdcache, mdr));
+ dout(7) << "file_xlock_start on auth, waiting for write on " << *lock << " on " << *lock->get_parent() << endl;
+ lock->add_waiter(SimpleLock::WAIT_WR, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
}
-void Locker::inode_file_xlock_finish(CInode *in, MDRequest *mdr)
+void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr)
{
// drop ref
- //assert(in->filelock.can_write(in->is_auth()));
- in->filelock.put_write();
- mdr->inode_file_locks.erase(in);
- mdr->inode_file_xlocks.erase(in);
- dout(7) << "inode_file_xlock_finish on " << *in << ", filelock=" << in->filelock << endl;
+ assert(lock->can_xlock(mdr));
+ lock->put_xlock();
+ mdr->locks.erase(lock);
+ mdr->xlocks.erase(lock);
+ dout(7) << "file_xlock_finish on " << *lock << " on " << *lock->get_parent() << endl;
// drop lock?
- if (!in->is_filelock_write_wanted()) {
- in->finish_waiting(CInode::WAIT_FILENOWR);
- inode_file_eval(in);
- }
+ if (!lock->is_waiting(SimpleLock::WAIT_LOCK))
+ file_eval(lock);
}
* - checks if soft state should change (eg bc last writer closed)
*/
-void Locker::inode_file_eval(CInode *in)
+void Locker::file_eval(FileLock *lock)
{
+ CInode *in = (CInode*)lock->get_parent();
+
int issued = in->get_caps_issued();
// [auth] finished gather?
if (in->is_auth() &&
- !in->filelock.is_stable() &&
- in->filelock.gather_set.size() == 0) {
- dout(7) << "inode_file_eval finished mds gather on " << *in << endl;
+ !lock->is_stable() &&
+ !lock->is_gathering()) {
+ dout(7) << "file_eval finished mds gather on " << *lock << " on " << *lock->get_parent() << endl;
- switch (in->filelock.get_state()) {
+ switch (lock->get_state()) {
// to lock
case LOCK_GLOCKR:
case LOCK_GLOCKM:
case LOCK_GLOCKL:
if (issued == 0) {
- in->filelock.set_state(LOCK_LOCK);
+ lock->set_state(LOCK_LOCK);
// waiters
- in->filelock.get_read();
- //in->filelock.get_write();
- in->finish_waiting(CInode::WAIT_FILERWB|CInode::WAIT_FILESTABLE);
- in->filelock.put_read();
- //in->filelock.put_write();
+ lock->get_rdlock();
+ lock->finish_waiters(SimpleLock::WAIT_LOCK|SimpleLock::WAIT_STABLE);
+ lock->put_rdlock();
}
break;
// to mixed
case LOCK_GMIXEDR:
if ((issued & ~(CAP_FILE_RD)) == 0) {
- in->filelock.set_state(LOCK_MIXED);
- in->finish_waiting(CInode::WAIT_FILESTABLE);
+ lock->set_state(LOCK_MIXED);
+ lock->finish_waiters(SimpleLock::WAIT_STABLE);
}
break;
case LOCK_GMIXEDL:
if ((issued & ~(CAP_FILE_WR)) == 0) {
- in->filelock.set_state(LOCK_MIXED);
+ lock->set_state(LOCK_MIXED);
if (in->is_replicated()) {
// data
bufferlist softdata;
- in->encode_file_state(softdata);
+ lock->encode_locked_state(softdata);
// bcast to replicas
- send_lock_message(in, LOCK_AC_MIXED, LOCK_OTYPE_IFILE, softdata);
+ send_lock_message(lock, LOCK_AC_MIXED, softdata);
}
- in->finish_waiting(CInode::WAIT_FILESTABLE);
+ lock->finish_waiters(SimpleLock::WAIT_STABLE);
}
break;
// to loner
case LOCK_GLONERR:
if (issued == 0) {
- in->filelock.set_state(LOCK_LONER);
- in->finish_waiting(CInode::WAIT_FILESTABLE);
+ lock->set_state(LOCK_LONER);
+ lock->finish_waiters(SimpleLock::WAIT_STABLE);
}
break;
case LOCK_GLONERM:
if ((issued & ~CAP_FILE_WR) == 0) {
- in->filelock.set_state(LOCK_LONER);
- in->finish_waiting(CInode::WAIT_FILESTABLE);
+ lock->set_state(LOCK_LONER);
+ lock->finish_waiters(SimpleLock::WAIT_STABLE);
}
break;
case LOCK_GSYNCL:
case LOCK_GSYNCM:
if ((issued & ~(CAP_FILE_RD)) == 0) {
- in->filelock.set_state(LOCK_SYNC);
+ lock->set_state(LOCK_SYNC);
{ // bcast data to replicas
bufferlist softdata;
- in->encode_file_state(softdata);
+ lock->encode_locked_state(softdata);
- send_lock_message(in, LOCK_AC_SYNC, LOCK_OTYPE_IFILE, softdata);
+ send_lock_message(lock, LOCK_AC_SYNC, softdata);
}
// waiters
- in->filelock.get_read();
- in->finish_waiting(CInode::WAIT_FILER|CInode::WAIT_FILESTABLE);
- in->filelock.put_read();
+ lock->get_rdlock();
+ lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
+ lock->put_rdlock();
}
break;
// [replica] finished caps gather?
if (!in->is_auth() &&
- !in->filelock.is_stable()) {
- switch (in->filelock.get_state()) {
+ !lock->is_stable()) {
+ switch (lock->get_state()) {
case LOCK_GMIXEDR:
if ((issued & ~(CAP_FILE_RD)) == 0) {
- in->filelock.set_state(LOCK_MIXED);
+ lock->set_state(LOCK_MIXED);
// ack
- MLock *reply = new MLock(LOCK_AC_MIXEDACK, mds->get_nodeid());
- reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
+ MLock *reply = new MLock(lock, LOCK_AC_MIXEDACK, mds->get_nodeid());
mds->send_message_mds(reply, in->authority().first, MDS_PORT_LOCKER);
}
break;
case LOCK_GLOCKR:
if (issued == 0) {
- in->filelock.set_state(LOCK_LOCK);
+ lock->set_state(LOCK_LOCK);
// ack
- MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
- reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
+ MLock *reply = new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid());
mds->send_message_mds(reply, in->authority().first, MDS_PORT_LOCKER);
}
break;
}
// !stable -> do nothing.
- if (!in->filelock.is_stable()) return;
+ if (!lock->is_stable()) return;
// stable.
- assert(in->filelock.is_stable());
+ assert(lock->is_stable());
if (in->is_auth()) {
// [auth]
int wanted = in->get_caps_wanted();
bool loner = (in->client_caps.size() == 1) && in->mds_caps_wanted.empty();
- dout(7) << "inode_file_eval wanted=" << cap_string(wanted)
- << " filelock=" << in->filelock
+ dout(7) << "file_eval wanted=" << cap_string(wanted)
+ << " filelock=" << *lock << " on " << *lock->get_parent()
<< " loner=" << loner
<< endl;
// * -> loner?
- if (in->filelock.get_nread() == 0 &&
- !in->is_filelock_write_wanted() &&
+ if (lock->get_num_rdlock() == 0 &&
+ !lock->is_waiting(SimpleLock::WAIT_WR) &&
(wanted & CAP_FILE_WR) &&
loner &&
- in->filelock.get_state() != LOCK_LONER) {
- dout(7) << "inode_file_eval stable, bump to loner " << *in << ", filelock=" << in->filelock << endl;
- inode_file_loner(in);
+ lock->get_state() != LOCK_LONER) {
+ dout(7) << "file_eval stable, bump to loner " << *lock << " on " << *lock->get_parent() << endl;
+ file_loner(lock);
}
// * -> mixed?
- else if (in->filelock.get_nread() == 0 &&
- !in->is_filelock_write_wanted() &&
+ else if (lock->get_num_rdlock() == 0 &&
+ !lock->is_waiting(SimpleLock::WAIT_WR) &&
(wanted & CAP_FILE_RD) &&
(wanted & CAP_FILE_WR) &&
- !(loner && in->filelock.get_state() == LOCK_LONER) &&
- in->filelock.get_state() != LOCK_MIXED) {
- dout(7) << "inode_file_eval stable, bump to mixed " << *in << ", filelock=" << in->filelock << endl;
- inode_file_mixed(in);
+ !(loner && lock->get_state() == LOCK_LONER) &&
+ lock->get_state() != LOCK_MIXED) {
+ dout(7) << "file_eval stable, bump to mixed " << *lock << " on " << *lock->get_parent() << endl;
+ file_mixed(lock);
}
// * -> sync?
- else if (!in->is_filelock_write_wanted() &&
+ else if (!in->filelock.is_waiting(SimpleLock::WAIT_WR) &&
!(wanted & CAP_FILE_WR) &&
((wanted & CAP_FILE_RD) ||
in->is_replicated() ||
- (!loner && in->filelock.get_state() == LOCK_LONER)) &&
- in->filelock.get_state() != LOCK_SYNC) {
- dout(7) << "inode_file_eval stable, bump to sync " << *in << ", filelock=" << in->filelock << endl;
- inode_file_sync(in);
+ (!loner && lock->get_state() == LOCK_LONER)) &&
+ lock->get_state() != LOCK_SYNC) {
+ dout(7) << "file_eval stable, bump to sync " << *lock << " on " << *lock->get_parent() << endl;
+ file_sync(lock);
}
// * -> lock? (if not replicated or open)
else if (!in->is_replicated() &&
wanted == 0 &&
- in->filelock.get_state() != LOCK_LOCK) {
- inode_file_lock(in);
+ lock->get_state() != LOCK_LOCK) {
+ file_lock(lock);
}
} else {
// mid
-bool Locker::inode_file_sync(CInode *in)
+bool Locker::file_sync(FileLock *lock)
{
- dout(7) << "inode_file_sync " << *in << " filelock=" << in->filelock << endl;
+ CInode *in = (CInode*)lock->get_parent();
+ dout(7) << "file_sync " << *lock << " on " << *lock->get_parent() << endl;
assert(in->is_auth());
// check state
- if (in->filelock.get_state() == LOCK_SYNC ||
- in->filelock.get_state() == LOCK_GSYNCL ||
- in->filelock.get_state() == LOCK_GSYNCM)
+ if (lock->get_state() == LOCK_SYNC ||
+ lock->get_state() == LOCK_GSYNCL ||
+ lock->get_state() == LOCK_GSYNCM)
return true;
- assert(in->filelock.is_stable());
+ assert(lock->is_stable());
int issued = in->get_caps_issued();
assert((in->get_caps_wanted() & CAP_FILE_WR) == 0);
- if (in->filelock.get_state() == LOCK_LOCK) {
+ if (lock->get_state() == LOCK_LOCK) {
if (in->is_replicated()) {
// soft data
bufferlist softdata;
- in->encode_file_state(softdata);
+ lock->encode_locked_state(softdata);
// bcast to replicas
- send_lock_message(in, LOCK_AC_SYNC, LOCK_OTYPE_IFILE, softdata);
+ send_lock_message(lock, LOCK_AC_SYNC, softdata);
}
// change lock
- in->filelock.set_state(LOCK_SYNC);
+ lock->set_state(LOCK_SYNC);
// reissue caps
issue_caps(in);
return true;
}
- else if (in->filelock.get_state() == LOCK_MIXED) {
+ else if (lock->get_state() == LOCK_MIXED) {
// writers?
if (issued & CAP_FILE_WR) {
// gather client write caps
- in->filelock.set_state(LOCK_GSYNCM);
+ lock->set_state(LOCK_GSYNCM);
issue_caps(in);
} else {
// no writers, go straight to sync
if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_SYNC, LOCK_OTYPE_IFILE);
+ send_lock_message(lock, LOCK_AC_SYNC);
}
// change lock
- in->filelock.set_state(LOCK_SYNC);
+ lock->set_state(LOCK_SYNC);
}
return false;
}
- else if (in->filelock.get_state() == LOCK_LONER) {
+ else if (lock->get_state() == LOCK_LONER) {
// writers?
if (issued & CAP_FILE_WR) {
// gather client write caps
- in->filelock.set_state(LOCK_GSYNCL);
+ lock->set_state(LOCK_GSYNCL);
issue_caps(in);
} else {
// no writers, go straight to sync
if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_SYNC, LOCK_OTYPE_IFILE);
+ send_lock_message(lock, LOCK_AC_SYNC);
}
// change lock
- in->filelock.set_state(LOCK_SYNC);
+ lock->set_state(LOCK_SYNC);
}
return false;
}
-void Locker::inode_file_lock(CInode *in)
+void Locker::file_lock(FileLock *lock)
{
- dout(7) << "inode_file_lock " << *in << " filelock=" << in->filelock << endl;
+ CInode *in = (CInode*)lock->get_parent();
+ dout(7) << "inode_file_lock " << *lock << " on " << *lock->get_parent() << endl;
assert(in->is_auth());
// check state
- if (in->filelock.get_state() == LOCK_LOCK ||
- in->filelock.get_state() == LOCK_GLOCKR ||
- in->filelock.get_state() == LOCK_GLOCKM ||
- in->filelock.get_state() == LOCK_GLOCKL)
+ if (lock->get_state() == LOCK_LOCK ||
+ lock->get_state() == LOCK_GLOCKR ||
+ lock->get_state() == LOCK_GLOCKM ||
+ lock->get_state() == LOCK_GLOCKL)
return; // lock or locking
- assert(in->filelock.is_stable());
+ assert(lock->is_stable());
int issued = in->get_caps_issued();
- if (in->filelock.get_state() == LOCK_SYNC) {
+ if (lock->get_state() == LOCK_SYNC) {
if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_LOCK, LOCK_OTYPE_IFILE);
- in->filelock.init_gather(in->get_replicas());
+ send_lock_message(lock, LOCK_AC_LOCK);
+ lock->init_gather();
// change lock
- in->filelock.set_state(LOCK_GLOCKR);
+ lock->set_state(LOCK_GLOCKR);
// call back caps
if (issued)
} else {
if (issued) {
// call back caps
- in->filelock.set_state(LOCK_GLOCKR);
+ lock->set_state(LOCK_GLOCKR);
issue_caps(in);
} else {
- in->filelock.set_state(LOCK_LOCK);
+ lock->set_state(LOCK_LOCK);
}
}
}
- else if (in->filelock.get_state() == LOCK_MIXED) {
+ else if (lock->get_state() == LOCK_MIXED) {
if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_LOCK, LOCK_OTYPE_IFILE);
- in->filelock.init_gather(in->get_replicas());
+ send_lock_message(lock, LOCK_AC_LOCK);
+ lock->init_gather();
// change lock
- in->filelock.set_state(LOCK_GLOCKM);
+ lock->set_state(LOCK_GLOCKM);
// call back caps
issue_caps(in);
//assert(issued); // ??? -sage 2/19/06
if (issued) {
// change lock
- in->filelock.set_state(LOCK_GLOCKM);
+ lock->set_state(LOCK_GLOCKM);
// call back caps
issue_caps(in);
} else {
- in->filelock.set_state(LOCK_LOCK);
+ lock->set_state(LOCK_LOCK);
}
}
}
- else if (in->filelock.get_state() == LOCK_LONER) {
+ else if (lock->get_state() == LOCK_LONER) {
if (issued & CAP_FILE_WR) {
// change lock
- in->filelock.set_state(LOCK_GLOCKL);
+ lock->set_state(LOCK_GLOCKL);
// call back caps
issue_caps(in);
} else {
- in->filelock.set_state(LOCK_LOCK);
+ lock->set_state(LOCK_LOCK);
}
}
else
}
-void Locker::inode_file_mixed(CInode *in)
+void Locker::file_mixed(FileLock *lock)
{
- dout(7) << "inode_file_mixed " << *in << " filelock=" << in->filelock << endl;
+ dout(7) << "file_mixed " << *lock << " on " << *lock->get_parent() << endl;
+ CInode *in = (CInode*)lock->get_parent();
assert(in->is_auth());
// check state
- if (in->filelock.get_state() == LOCK_GMIXEDR ||
- in->filelock.get_state() == LOCK_GMIXEDL)
+ if (lock->get_state() == LOCK_GMIXEDR ||
+ lock->get_state() == LOCK_GMIXEDL)
return; // mixed or mixing
- assert(in->filelock.is_stable());
+ assert(lock->is_stable());
int issued = in->get_caps_issued();
- if (in->filelock.get_state() == LOCK_SYNC) {
+ if (lock->get_state() == LOCK_SYNC) {
if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_MIXED, LOCK_OTYPE_IFILE);
- in->filelock.init_gather(in->get_replicas());
+ send_lock_message(lock, LOCK_AC_MIXED);
+ lock->init_gather();
- in->filelock.set_state(LOCK_GMIXEDR);
+ lock->set_state(LOCK_GMIXEDR);
issue_caps(in);
} else {
if (issued) {
- in->filelock.set_state(LOCK_GMIXEDR);
+ lock->set_state(LOCK_GMIXEDR);
issue_caps(in);
} else {
- in->filelock.set_state(LOCK_MIXED);
+ lock->set_state(LOCK_MIXED);
}
}
}
- else if (in->filelock.get_state() == LOCK_LOCK) {
+ else if (lock->get_state() == LOCK_LOCK) {
if (in->is_replicated()) {
// data
bufferlist softdata;
- in->encode_file_state(softdata);
+ lock->encode_locked_state(softdata);
// bcast to replicas
- send_lock_message(in, LOCK_AC_MIXED, LOCK_OTYPE_IFILE, softdata);
+ send_lock_message(lock, LOCK_AC_MIXED, softdata);
}
// change lock
- in->filelock.set_state(LOCK_MIXED);
+ lock->set_state(LOCK_MIXED);
issue_caps(in);
}
- else if (in->filelock.get_state() == LOCK_LONER) {
+ else if (lock->get_state() == LOCK_LONER) {
if (issued & CAP_FILE_WRBUFFER) {
// gather up WRBUFFER caps
- in->filelock.set_state(LOCK_GMIXEDL);
+ lock->set_state(LOCK_GMIXEDL);
issue_caps(in);
}
else if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_MIXED, LOCK_OTYPE_IFILE);
- in->filelock.set_state(LOCK_MIXED);
+ send_lock_message(lock, LOCK_AC_MIXED);
+ lock->set_state(LOCK_MIXED);
issue_caps(in);
} else {
- in->filelock.set_state(LOCK_MIXED);
+ lock->set_state(LOCK_MIXED);
issue_caps(in);
}
}
}
-void Locker::inode_file_loner(CInode *in)
+void Locker::file_loner(FileLock *lock)
{
- dout(7) << "inode_file_loner " << *in << " filelock=" << in->filelock << endl;
+ CInode *in = (CInode*)lock->get_parent();
+ dout(7) << "inode_file_loner " << *lock << " on " << *lock->get_parent() << endl;
assert(in->is_auth());
// check state
- if (in->filelock.get_state() == LOCK_LONER ||
- in->filelock.get_state() == LOCK_GLONERR ||
- in->filelock.get_state() == LOCK_GLONERM)
+ if (lock->get_state() == LOCK_LONER ||
+ lock->get_state() == LOCK_GLONERR ||
+ lock->get_state() == LOCK_GLONERM)
return;
- assert(in->filelock.is_stable());
+ assert(lock->is_stable());
assert((in->client_caps.size() == 1) && in->mds_caps_wanted.empty());
- if (in->filelock.get_state() == LOCK_SYNC) {
+ if (lock->get_state() == LOCK_SYNC) {
if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_LOCK, LOCK_OTYPE_IFILE);
- in->filelock.init_gather(in->get_replicas());
+ send_lock_message(lock, LOCK_AC_LOCK);
+ lock->init_gather();
// change lock
- in->filelock.set_state(LOCK_GLONERR);
+ lock->set_state(LOCK_GLONERR);
} else {
// only one guy with file open, who gets it all, so
- in->filelock.set_state(LOCK_LONER);
+ lock->set_state(LOCK_LONER);
issue_caps(in);
}
}
- else if (in->filelock.get_state() == LOCK_LOCK) {
+ else if (lock->get_state() == LOCK_LOCK) {
// change lock. ignore replicas; they don't know about LONER.
- in->filelock.set_state(LOCK_LONER);
+ lock->set_state(LOCK_LONER);
issue_caps(in);
}
- else if (in->filelock.get_state() == LOCK_MIXED) {
+ else if (lock->get_state() == LOCK_MIXED) {
if (in->is_replicated()) {
// bcast to replicas
- send_lock_message(in, LOCK_AC_LOCK, LOCK_OTYPE_IFILE);
- in->filelock.init_gather(in->get_replicas());
+ send_lock_message(lock, LOCK_AC_LOCK);
+ lock->init_gather();
// change lock
- in->filelock.set_state(LOCK_GLONERM);
+ lock->set_state(LOCK_GLONERM);
} else {
- in->filelock.set_state(LOCK_LONER);
+ lock->set_state(LOCK_LONER);
issue_caps(in);
}
}
assert(0);
}
+
+
// messenger
-void Locker::handle_lock_inode_file(MLock *m)
+void Locker::handle_file_lock(FileLock *lock, MLock *m)
{
- assert(m->get_otype() == LOCK_OTYPE_IFILE);
-
if (mds->logger) mds->logger->inc("lif");
- CInode *in = mdcache->get_inode(m->get_ino());
+ CInode *in = (CInode*)lock->get_parent();
int from = m->get_asker();
- if (LOCK_AC_FOR_AUTH(m->get_action())) {
- // auth
- assert(in);
- assert(in->is_auth());// || in->is_proxy());
- dout(7) << "handle_lock_inode_file " << *in << " hardlock=" << in->hardlock << endl;
-
- /*if (in->is_proxy()) {
- // fw
- int newauth = in->authority().first;
- assert(newauth >= 0);
- if (from == newauth) {
- dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, but from new auth, dropping" << endl;
- delete m;
- } else {
- dout(7) << "handle_lock " << m->get_ino() << " from " << from << ": proxy, fw to " << newauth << endl;
- mds->send_message_mds(m, newauth, MDS_PORT_LOCKER);
- }
- return;
- }
- */
- } else {
- // replica
- if (!in) {
- // drop it. don't nak.
- dout(7) << "handle_lock " << m->get_ino() << ": don't have it anymore" << endl;
- delete m;
- return;
- }
-
- assert(!in->is_auth());
- }
-
- dout(7) << "handle_lock_inode_file a=" << m->get_action() << " from " << from << " " << *in << " filelock=" << in->filelock << endl;
+ dout(7) << "handle_file_lock a=" << m->get_action() << " from " << from << " "
+ << *in << " filelock=" << *lock << endl;
- CLock *lock = &in->filelock;
int issued = in->get_caps_issued();
switch (m->get_action()) {
assert(lock->get_state() == LOCK_LOCK ||
lock->get_state() == LOCK_MIXED);
- { // assim data
- int off = 0;
- in->decode_file_state(m->get_data(), off);
- }
-
- // update lock
+ lock->decode_locked_state(m->get_data());
lock->set_state(LOCK_SYNC);
// no need to reply.
// waiters
- in->filelock.get_read();
- in->finish_waiting(CInode::WAIT_FILER|CInode::WAIT_FILESTABLE);
- in->filelock.put_read();
- inode_file_eval(in);
+ lock->get_rdlock();
+ lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE);
+ lock->put_rdlock();
+ file_eval(lock);
break;
case LOCK_AC_LOCK:
// call back caps?
if (issued & CAP_FILE_RD) {
- dout(7) << "handle_lock_inode_file client readers, gathering caps on " << *in << endl;
+ dout(7) << "handle_file_lock client readers, gathering caps on " << *in << endl;
issue_caps(in);
}
- if (lock->get_nread() > 0) {
- dout(7) << "handle_lock_inode_file readers, waiting before ack on " << *in << endl;
- in->add_waiter(CInode::WAIT_FILENORD,
- new C_MDS_RetryMessage(mds,m));
+ if (lock->get_num_rdlock() > 0) {
+ dout(7) << "handle_file_lock readers, waiting before ack on " << *in << endl;
+ in->add_waiter(SimpleLock::WAIT_NORD, new C_MDS_RetryMessage(mds, m));
lock->set_state(LOCK_GLOCKR);
assert(0);// i am broken.. why retry message when state captures all the info i need?
return;
// nothing to wait for, lock and ack.
{
lock->set_state(LOCK_LOCK);
-
- MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
- reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
+
+ MLock *reply = new MLock(lock, LOCK_AC_LOCKACK, mds->get_nodeid());
mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
}
break;
lock->set_state(LOCK_MIXED);
// ack
- MLock *reply = new MLock(LOCK_AC_MIXEDACK, mds->get_nodeid());
- reply->set_ino(in->ino(), LOCK_OTYPE_IFILE);
+ MLock *reply = new MLock(lock, LOCK_AC_MIXEDACK, mds->get_nodeid());
mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
}
} else {
issue_caps(in);
// waiters
- //in->filelock.get_write();
- in->finish_waiting(CInode::WAIT_FILEW|CInode::WAIT_FILESTABLE);
- //in->filelock.put_write();
- inode_file_eval(in);
+ lock->finish_waiters(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE);
+ file_eval(lock);
break;
// -- auth --
case LOCK_AC_LOCKACK:
- assert(lock->state == LOCK_GLOCKR ||
- lock->state == LOCK_GLOCKM ||
- lock->state == LOCK_GLONERM ||
- lock->state == LOCK_GLONERR);
- assert(lock->gather_set.count(from));
- lock->gather_set.erase(from);
-
- if (lock->gather_set.size()) {
- dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
+ assert(lock->get_state() == LOCK_GLOCKR ||
+ lock->get_state() == LOCK_GLOCKM ||
+ lock->get_state() == LOCK_GLONERM ||
+ lock->get_state() == LOCK_GLONERR);
+ assert(lock->is_gathering(from));
+ lock->remove_gather(from);
+
+ if (lock->is_gathering()) {
+ dout(7) << "handle_lock_inode_file " << *in << " from " << from
+ << ", still gathering " << lock->get_gather_set() << endl;
} else {
- dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", last one" << endl;
- inode_file_eval(in);
+ dout(7) << "handle_lock_inode_file " << *in << " from " << from
+ << ", last one" << endl;
+ file_eval(lock);
}
break;
case LOCK_AC_SYNCACK:
- assert(lock->state == LOCK_GSYNCM);
- assert(lock->gather_set.count(from));
- lock->gather_set.erase(from);
+ assert(lock->get_state() == LOCK_GSYNCM);
+ assert(lock->is_gathering(from));
+ lock->remove_gather(from);
/* not used currently
{
}
*/
- if (lock->gather_set.size()) {
- dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
+ if (lock->is_gathering()) {
+ dout(7) << "handle_lock_inode_file " << *in << " from " << from
+ << ", still gathering " << lock->get_gather_set() << endl;
} else {
- dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", last one" << endl;
- inode_file_eval(in);
+ dout(7) << "handle_lock_inode_file " << *in << " from " << from
+ << ", last one" << endl;
+ file_eval(lock);
}
break;
case LOCK_AC_MIXEDACK:
- assert(lock->state == LOCK_GMIXEDR);
- assert(lock->gather_set.count(from));
- lock->gather_set.erase(from);
+ assert(lock->get_state() == LOCK_GMIXEDR);
+ assert(lock->is_gathering(from));
+ lock->remove_gather(from);
- if (lock->gather_set.size()) {
- dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", still gathering " << lock->gather_set << endl;
+ if (lock->is_gathering()) {
+ dout(7) << "handle_lock_inode_file " << *in << " from " << from
+ << ", still gathering " << lock->get_gather_set() << endl;
} else {
- dout(7) << "handle_lock_inode_file " << *in << " from " << from << ", last one" << endl;
- inode_file_eval(in);
+ dout(7) << "handle_lock_inode_file " << *in << " from " << from
+ << ", last one" << endl;
+ file_eval(lock);
}
break;
-
-
-
-
-
-
-
-
-void Locker::handle_lock_dir(MLock *m)
-{
-}
-
-
-
-// DENTRY
-
-
-// trace helpers
-
-/** dentry_can_rdlock_trace
- * see if we can _anonymously_ rdlock an entire trace.
- * if not, and req is specified, wait and retry that message.
- */
-bool Locker::dentry_can_rdlock_trace(vector<CDentry*>& trace, MClientRequest *req)
-{
- // verify dentries are rdlockable.
- // we do this because
- // - we're being less aggressive about locks acquisition, and
- // - we're not acquiring the locks in order!
- for (vector<CDentry*>::iterator it = trace.begin();
- it != trace.end();
- it++) {
- CDentry *dn = *it;
- if (!dn->can_rdlock(0)) {
- if (req) {
- dout(10) << "can_rdlock_trace can't rdlock " << *dn << ", waiting" << endl;
- dn->dir->add_waiter(CDir::WAIT_DNPINNABLE,
- dn->name,
- new C_MDS_RetryMessage(mds, req));
- } else {
- dout(10) << "can_rdlock_trace can't rdlock " << *dn << endl;
- }
- return false;
- }
- }
- return true;
-}
-
-void Locker::dentry_anon_rdlock_trace_start(vector<CDentry*>& trace)
-{
- // grab dentry rdlocks
- for (vector<CDentry*>::iterator it = trace.begin();
- it != trace.end();
- it++)
- (*it)->get_rdlock(0);
-}
-
-
-
-bool Locker::dentry_rdlock_start(CDentry *dn, MDRequest *mdr)
-{
- // verify lockable
- if (!dn->can_rdlock(mdr)) {
- // wait
- dout(10) << "dentry_rdlock_start waiting on " << *dn << endl;
- dn->dir->add_waiter(CDir::WAIT_DNPINNABLE,
- dn->name,
- new C_MDS_RetryRequest(mdcache, mdr));
- return false;
- }
-
- // rdlock
- dout(10) << "dentry_rdlock_start " << *dn << endl;
- dn->get_rdlock(mdr);
-
- mdr->dentry_rdlocks.insert(dn);
- mdr->dentry_locks.insert(dn);
-
- return true;
-}
-
-
-void Locker::_dentry_rdlock_finish(CDentry *dn, MDRequest *mdr)
-{
- dn->put_rdlock(mdr);
-
- // did we completely unpin a waiter?
- if (dn->lockstate == DN_LOCK_UNPINNING && !dn->get_num_ref()) {
- // return state to sync, in case the unpinner flails
- dn->lockstate = DN_LOCK_SYNC;
-
- // run finisher right now to give them a fair shot.
- dn->dir->finish_waiting(CDir::WAIT_DNUNPINNED, dn->name);
- }
-}
-
-void Locker::dentry_rdlock_finish(CDentry *dn, MDRequest *mdr)
-{
- dout(10) << "dentry_rdlock_finish " << *dn << endl;
- _dentry_rdlock_finish(dn, mdr);
- mdr->dentry_rdlocks.erase(dn);
- mdr->dentry_locks.erase(dn);
-}
-
-void Locker::dentry_anon_rdlock_trace_finish(vector<CDentry*>& trace)
-{
- for (vector<CDentry*>::iterator it = trace.begin();
- it != trace.end();
- it++)
- _dentry_rdlock_finish(*it, 0);
-}
-
-bool Locker::dentry_xlock_start(CDentry *dn, MDRequest *mdr)
-{
- dout(7) << "dentry_xlock_start on " << *dn << endl;
-
- // locked?
- if (dn->lockstate == DN_LOCK_XLOCK) {
- if (dn->xlockedby == mdr) return true; // locked by me!
-
- // not by me, wait
- dout(7) << "dentry " << *dn << " xlock by someone else" << endl;
- dn->dir->add_waiter(CDir::WAIT_DNREAD, dn->name,
- new C_MDS_RetryRequest(mdcache, mdr));
- return false;
- }
-
- // prelock?
- if (dn->lockstate == DN_LOCK_PREXLOCK) {
- if (dn->xlockedby == mdr) {
- dout(7) << "dentry " << *dn << " prexlock by me" << endl;
- dn->dir->add_waiter(CDir::WAIT_DNLOCK, dn->name,
- new C_MDS_RetryRequest(mdcache, mdr));
- } else {
- dout(7) << "dentry " << *dn << " prexlock by someone else" << endl;
- dn->dir->add_waiter(CDir::WAIT_DNREAD, dn->name,
- new C_MDS_RetryRequest(mdcache, mdr));
- }
- return false;
- }
-
-
- // lockable!
- assert(dn->lockstate == DN_LOCK_SYNC ||
- dn->lockstate == DN_LOCK_UNPINNING);
-
- // is dentry path pinned?
- if (dn->is_rdlocked()) {
- dout(7) << "dentry " << *dn << " pinned, waiting" << endl;
- dn->lockstate = DN_LOCK_UNPINNING;
- dn->dir->add_waiter(CDir::WAIT_DNUNPINNED,
- dn->name,
- new C_MDS_RetryRequest(mdcache, mdr));
- return false;
- }
-
- // mine!
- dn->xlockedby = mdr;
-
- // pin me!
- dn->get(CDentry::PIN_XLOCK);
-
- if (dn->is_replicated()) {
- dn->lockstate = DN_LOCK_PREXLOCK;
-
- // xlock with whom?
- set<int> who;
- for (map<int,int>::iterator p = dn->replicas_begin();
- p != dn->replicas_end();
- ++p)
- who.insert(p->first);
- dn->gather_set = who;
-
- // make path
- string path;
- dn->make_path(path);
- dout(10) << "path is " << path << " for " << *dn << endl;
-
- for (set<int>::iterator it = who.begin();
- it != who.end();
- it++) {
- MLock *m = new MLock(LOCK_AC_LOCK, mds->get_nodeid());
- m->set_dn(dn->dir->dirfrag(), dn->name);
- m->set_path(path);
- mds->send_message_mds(m, *it, MDS_PORT_LOCKER);
- }
-
- // wait
- dout(7) << "dentry_xlock_start locking, waiting for replicas " << endl;
- dn->dir->add_waiter(CDir::WAIT_DNLOCK, dn->name,
- new C_MDS_RetryRequest(mdcache, mdr));
- return false;
- } else {
- dn->lockstate = DN_LOCK_XLOCK;
- mdr->dentry_xlocks.insert(dn);
- mdr->dentry_locks.insert(dn);
- return true;
- }
-}
-
-void Locker::dentry_xlock_finish(CDentry *dn, MDRequest *mdr, bool quiet)
-{
- dout(7) << "dentry_xlock_finish on " << *dn << endl;
-
- assert(dn->xlockedby);
- if (dn->xlockedby == DN_XLOCK_FOREIGN) {
- dout(7) << "this was a foreign xlock" << endl;
- } else {
- // remove from request record
- mdr->dentry_xlocks.erase(dn);
- mdr->dentry_locks.erase(dn);
- }
-
- dn->xlockedby = 0;
- dn->lockstate = DN_LOCK_SYNC;
-
- // unpin
- dn->put(CDentry::PIN_XLOCK);
-
- // tell replicas?
- if (!quiet) {
- // tell even if dn is null.
- if (dn->is_replicated()) {
- send_lock_message(dn, LOCK_AC_SYNC);
- }
- }
-
- // kick waiters
- list<Context*> finished;
- dn->dir->take_waiting(CDir::WAIT_DNREAD, finished);
- mds->queue_finished(finished);
-}
-
-
-void Locker::dentry_xlock_downgrade_to_rdlock(CDentry *dn, MDRequest *mdr)
-{
- dout(7) << "dentry_xlock_downgrade_to_rdlock on " << *dn << endl;
-
- assert(dn->xlockedby);
- if (dn->xlockedby == DN_XLOCK_FOREIGN) {
- dout(7) << "this was a foreign xlock" << endl;
- assert(0); // rewrite me
- }
-
- // un-xlock
- dn->xlockedby = 0;
- dn->lockstate = DN_LOCK_SYNC;
- mdr->dentry_xlocks.erase(dn);
- dn->put(CDentry::PIN_XLOCK);
-
- // rdlock
- mdr->dentry_rdlocks.insert(dn);
- dn->get_rdlock(mdr);
-
- // tell replicas?
- if (dn->is_replicated()) {
- send_lock_message(dn, LOCK_AC_SYNC);
- }
-
- // kick waiters
- list<Context*> finished;
- dn->dir->take_waiting(CDir::WAIT_DNREAD, finished);
- mds->queue_finished(finished);
-}
-
-
-/*
- * onfinish->finish() will be called with
- * 0 on successful xlock,
- * -1 on failure
- */
-/*
-class C_MDC_XlockRequest : public Context {
- Locker *mdc;
- CDir *dir;
- string dname;
- Message *req;
- Context *finisher;
-public:
- C_MDC_XlockRequest(Locker *mdc,
- CDir *dir, const string& dname,
- Message *req,
- Context *finisher) {
- this->mdc = mdc;
- this->dir = dir;
- this->dname = dname;
- this->req = req;
- this->finisher = finisher;
- }
-
- void finish(int r) {
- mdc->dentry_xlock_request_finish(r, dir, dname, req, finisher);
- }
-};
-
-void Locker::dentry_xlock_request_finish(int r,
- CDir *dir, const string& dname,
- Message *req,
- Context *finisher)
-{
- dout(10) << "dentry_xlock_request_finish r = " << r << endl;
- if (r == 1) { // 1 for xlock request success
- CDentry *dn = dir->lookup(dname);
- if (dn && dn->xlockedby == 0) {
- // success
- dn->xlockedby = req; // our request was the winner
- dout(10) << "xlock request success, now xlocked by req " << req << " dn " << *dn << endl;
-
- // remember!
- mdcache->active_requests[req].foreign_xlocks.insert(dn);
- }
- }
-
- // retry request (or whatever)
- finisher->finish(0);
- delete finisher;
-}
-
-void Locker::dentry_xlock_request(CDir *dir, const string& dname, bool create,
- Message *req, Context *onfinish)
-{
- dout(10) << "dentry_xlock_request on dn " << dname << " create=" << create << " in " << *dir << endl;
- // send request
- int dauth = dir->dentry_authority(dname).first;
- MLock *m = new MLock(create ? LOCK_AC_REQXLOCKC:LOCK_AC_REQXLOCK, mds->get_nodeid());
- m->set_dn(dir->dirfrag(), dname);
- mds->send_message_mds(m, dauth, MDS_PORT_LOCKER);
-
- // add waiter
- dir->add_waiter(CDir::WAIT_DNREQXLOCK, dname,
- new C_MDC_XlockRequest(this,
- dir, dname, req,
- onfinish));
-}
-*/
-
-
-
-void Locker::handle_lock_dn(MLock *m)
-{
- assert(m->get_otype() == LOCK_OTYPE_DN);
-
- CDir *dir = mdcache->get_dirfrag(m->get_dirfrag()); // may be null
- string dname = m->get_dn();
- int from = m->get_asker();
- CDentry *dn = 0;
-
- if (LOCK_AC_FOR_AUTH(m->get_action())) {
- // auth
-
- // normally we have it always
- if (dir) {
- int dauth = dir->dentry_authority(dname).first;
- assert(dauth == mds->get_nodeid() || dir->is_proxy() || // mine or proxy,
- m->get_action() == LOCK_AC_REQXLOCKACK || // or we did a REQXLOCK and this is our ack/nak
- m->get_action() == LOCK_AC_REQXLOCKNAK);
-
- if (dir->is_proxy()) {
-
- assert(dauth >= 0);
-
- if (dauth == m->get_asker() &&
- (m->get_action() == LOCK_AC_REQXLOCK ||
- m->get_action() == LOCK_AC_REQXLOCKC)) {
- dout(7) << "handle_lock_dn got reqxlock from " << dauth << " and they are auth.. dropping on floor (their import will have woken them up)" << endl;
- /*if (mdcache->active_requests.count(m))
- mdcache->request_finish(m);
- else
- delete m;
- */
- assert(0); // FIXME REWRITE ME >>>>>>>
- return;
- }
-
- dout(7) << "handle_lock_dn " << m << " " << m->get_ino() << " dname " << dname << " from " << from << ": proxy, fw to " << dauth << endl;
-
- /* ******* REWRITE ME SDFKJDSFDSFJK:SDFJKDFSJKFDSHJKDFSHJKDFS>>>>>>>
- // forward
- if (mdcache->active_requests.count(m)) {
- // xlock requests are requests, use request_* functions!
- assert(m->get_action() == LOCK_AC_REQXLOCK ||
- m->get_action() == LOCK_AC_REQXLOCKC);
- // forward as a request
- mdcache->request_forward(m, dauth, MDS_PORT_LOCKER);
- } else {
- // not an xlock req, or it is and we just didn't register the request yet
- // forward normally
- mds->send_message_mds(m, dauth, MDS_PORT_LOCKER);
- }
- */
- return;
- }
-
- dn = dir->lookup(dname);
- }
-
- // except with.. an xlock request?
- if (!dn) {
- assert(dir); // we should still have the dir, though! the requester has the dir open.
- switch (m->get_action()) {
-
- case LOCK_AC_LOCK:
- dout(7) << "handle_lock_dn xlock on " << dname << ", adding (null)" << endl;
- dn = dir->add_dentry(dname);
- break;
-
- case LOCK_AC_REQXLOCK:
- // send nak
- if (dir->state_test(CDir::STATE_DELETED)) {
- dout(7) << "handle_lock_dn reqxlock on deleted dir " << *dir << ", nak" << endl;
- } else {
- dout(7) << "handle_lock_dn reqxlock on " << dname << " in " << *dir << " dne, nak" << endl;
- }
- {
- MLock *reply = new MLock(LOCK_AC_REQXLOCKNAK, mds->get_nodeid());
- reply->set_dn(dir->dirfrag(), dname);
- reply->set_path(m->get_path());
- mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
- }
-
- // finish request (if we got that far)
- /* FIXME F>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
- if (mdcache->active_requests.count(m))
- mdcache->request_finish(m);
- */
-
- delete m;
- return;
-
- case LOCK_AC_REQXLOCKC:
- dout(7) << "handle_lock_dn reqxlockc on " << dname << " in " << *dir << " dne (yet!)" << endl;
- break;
-
- default:
- assert(0);
- }
- }
- } else {
- // replica
- if (dir) dn = dir->lookup(dname);
- if (!dn) {
- dout(7) << "handle_lock_dn " << m << " don't have " << m->get_ino() << " dname " << dname << endl;
-
- if (m->get_action() == LOCK_AC_REQXLOCKACK ||
- m->get_action() == LOCK_AC_REQXLOCKNAK) {
- dout(7) << "handle_lock_dn got reqxlockack/nak, but don't have dn " << m->get_path() << ", discovering" << endl;
- //assert(0); // how can this happen? tell me now!
-
- vector<CDentry*> trace;
- filepath path = m->get_path();
- int r = mdcache->path_traverse(0, 0, // FIXME FIXME >>>>>>>>>>>>>>>>>>>>>>>>
- path, trace, true,
- m, new C_MDS_RetryMessage(mds, m),
- MDS_TRAVERSE_DISCOVER,
- false, true);
- assert(r>0);
- return;
- }
-
- if (m->get_action() == LOCK_AC_LOCK) {
- if (0) { // not anymore
- dout(7) << "handle_lock_dn don't have " << m->get_path() << ", discovering" << endl;
-
- vector<CDentry*> trace;
- filepath path = m->get_path();
- int r = mdcache->path_traverse(0, 0, // FIXME >>>>>>>>>>>>>>>>>>>>>>>>
- path, trace, true,
- m, new C_MDS_RetryMessage(mds,m),
- MDS_TRAVERSE_DISCOVER,
- false, true);
- assert(r>0);
- }
- if (1) {
- // NAK
- MLock *reply = new MLock(LOCK_AC_LOCKNAK, mds->get_nodeid());
- reply->set_dn(m->get_dirfrag(), dname);
- mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
- }
- } else {
- dout(7) << "safely ignoring." << endl;
- delete m;
- }
- return;
- }
-
- assert(dn);
- }
-
- if (dn) {
- dout(7) << "handle_lock_dn a=" << m->get_action() << " from " << from << " " << *dn << endl;
- } else {
- dout(7) << "handle_lock_dn a=" << m->get_action() << " from " << from << " " << dname << " in " << *dir << endl;
- }
-
- switch (m->get_action()) {
- // -- replica --
- case LOCK_AC_LOCK:
- assert(dn->lockstate == DN_LOCK_SYNC ||
- dn->lockstate == DN_LOCK_UNPINNING ||
- dn->lockstate == DN_LOCK_XLOCK); // <-- bc the handle_lock_dn did the discover!
-
- if (dn->is_rdlocked()) {
- dn->lockstate = DN_LOCK_UNPINNING;
-
- // wait
- dout(7) << "dn pinned, waiting " << *dn << endl;
- dn->dir->add_waiter(CDir::WAIT_DNUNPINNED,
- dn->name,
- new C_MDS_RetryMessage(mds, m));
- return;
- } else {
- dn->lockstate = DN_LOCK_XLOCK;
- dn->xlockedby = 0;
-
- // ack now
- MLock *reply = new MLock(LOCK_AC_LOCKACK, mds->get_nodeid());
- reply->set_dn(dir->dirfrag(), dname);
- mds->send_message_mds(reply, from, MDS_PORT_LOCKER);
- }
-
- // wake up waiters
- dir->finish_waiting(CDir::WAIT_DNLOCK, dname); // ? will this happen on replica ?
- break;
-
- case LOCK_AC_SYNC:
- assert(dn->lockstate == DN_LOCK_XLOCK);
- dn->lockstate = DN_LOCK_SYNC;
- dn->xlockedby = 0;
-
- // null? hose it.
- if (dn->is_null()) {
- dout(7) << "hosing null (and now sync) dentry " << *dn << endl;
- dir->remove_dentry(dn);
- }
-
- // wake up waiters
- dir->finish_waiting(CDir::WAIT_DNREAD, dname); // will this happen either? YES: if a rename lock backs out
- break;
-
- case LOCK_AC_REQXLOCKACK:
- case LOCK_AC_REQXLOCKNAK:
- {
- dout(10) << "handle_lock_dn got ack/nak on a reqxlock for " << *dn << endl;
- list<Context*> finished;
- dir->take_waiting(CDir::WAIT_DNREQXLOCK, m->get_dn(), finished, 1); // TAKE ONE ONLY!
- finish_contexts(finished,
- (m->get_action() == LOCK_AC_REQXLOCKACK) ? 1:-1);
- }
- break;
-
-
- // -- auth --
- case LOCK_AC_LOCKACK:
- case LOCK_AC_LOCKNAK:
- assert(dn->gather_set.count(from) == 1);
- dn->gather_set.erase(from);
- if (dn->gather_set.size() == 0) {
- dout(7) << "handle_lock_dn finish gather, now xlock on " << *dn << endl;
- dn->lockstate = DN_LOCK_XLOCK;
- // FIXME
- mdcache->slave_requests[dn->xlockedby->reqid]->dentry_xlocks.insert(dn);
- mdcache->slave_requests[dn->xlockedby->reqid]->dentry_locks.insert(dn);
- dir->finish_waiting(CDir::WAIT_DNLOCK, dname);
- }
- break;
-
-
- case LOCK_AC_REQXLOCKC:
- // make sure it's a _file_, if it exists.
- if (dn && dn->inode && dn->inode->is_dir()) {
- dout(7) << "handle_lock_dn failing, reqxlockc on dir " << *dn->inode << endl;
-
- // nak
- string path;
- dn->make_path(path);
-
- MLock *reply = new MLock(LOCK_AC_REQXLOCKNAK, mds->get_nodeid());
- reply->set_dn(dir->dirfrag(), dname);
- reply->set_path(path);
- mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
-
- assert(0); // FIXME
- /*
- // done
- if (mdcache->active_requests.count(m))
- mdcache->request_finish(m);
- else
- delete m;
- */
- return;
- }
-
- /* REWRITE ME HELP
- case LOCK_AC_REQXLOCK:
- if (dn) {
- dout(7) << "handle_lock_dn reqxlock on " << *dn << endl;
- } else {
- dout(7) << "handle_lock_dn reqxlock on " << dname << " in " << *dir << endl;
- }
-
-
- // start request?
- if (!mdcache->active_requests.count(m)) {
- vector<CDentry*> trace;
- if (!mdcache->request_start(m, dir->inode, trace))
- return; // waiting for pin
- }
-
- // try to xlock!
- if (!dn) {
- assert(m->get_action() == LOCK_AC_REQXLOCKC);
- dn = dir->add_dentry(dname);
- }
-
- if (dn->xlockedby != m) {
- if (!dentry_xlock_start(dn, m, dir->inode)) {
- // hose null dn if we're waiting on something
- if (dn->is_clean() && dn->is_null() && dn->is_sync()) dir->remove_dentry(dn);
- return; // waiting for xlock
- }
- } else {
- // successfully xlocked! on behalf of requestor.
- string path;
- dn->make_path(path);
-
- dout(7) << "handle_lock_dn reqxlock success for " << m->get_asker() << " on " << *dn << ", acking" << endl;
-
- // ACK xlock request
- MLock *reply = new MLock(LOCK_AC_REQXLOCKACK, mds->get_nodeid());
- reply->set_dn(dir->dirfrag(), dname);
- reply->set_path(path);
- mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
-
- // note: keep request around in memory (to hold the xlock/pins on behalf of requester)
- return;
- }
- break;
-*/
-
- case LOCK_AC_UNXLOCK:
- dout(7) << "handle_lock_dn unxlock on " << *dn << endl;
- {
- MDRequest *mdr = dn->xlockedby;
-
- // finish request
- mdcache->request_finish(mdr); // this will drop the locks (and unpin paths!)
- return;
- }
- break;
-
- default:
- assert(0);
- }
-
- delete m;
-}
-
-
-
-
-
-
-