+
+
+
+
+
+
+
+
+bool Locker::acquire_locks(MDRequest *mdr,
+ set<CDentry*> &dentry_rdlocks,
+ set<CDentry*> &dentry_xlocks,
+ set<CInode*> &inode_hard_rdlocks,
+ set<CInode*> &inode_hard_xlocks)
+{
+ dout(10) << "acquire_locks " << *mdr << endl;
+
+ // (local) AUTH PINS
+
+ // can i auth_pin everything?
+ for (set<CDentry*>::iterator p = dentry_xlocks.begin();
+ p != dentry_xlocks.end();
+ ++p) {
+ CDir *dir = (*p)->dir;
+ if (!dir->is_auth()) continue;
+ if (!mdr->is_auth_pinned(dir) &&
+ !dir->can_auth_pin()) {
+ // wait
+ dir->add_waiter(CDir::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ mdcache->request_drop_locks(mdr);
+ mdr->drop_auth_pins();
+ return false;
+ }
+ }
+ for (set<CInode*>::iterator p = inode_hard_xlocks.begin();
+ p != inode_hard_xlocks.end();
+ ++p) {
+ CInode *in = *p;
+ if (!in->is_auth()) continue;
+ if (!mdr->is_auth_pinned(in) &&
+ !in->can_auth_pin()) {
+ in->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ mdcache->request_drop_locks(mdr);
+ mdr->drop_auth_pins();
+ return false;
+ }
+ }
+
+ // ok, grab the auth pins
+ for (set<CDentry*>::iterator p = dentry_xlocks.begin();
+ p != dentry_xlocks.end();
+ ++p) {
+ CDir *dir = (*p)->dir;
+ if (!dir->is_auth()) continue;
+ mdr->auth_pin(dir);
+ }
+ for (set<CInode*>::iterator p = inode_hard_xlocks.begin();
+ p != inode_hard_xlocks.end();
+ ++p) {
+ CInode *in = *p;
+ if (!in->is_auth()) continue;
+ mdr->auth_pin(in);
+ }
+
+
+ // DENTRY LOCKS
+ {
+ // sort all the dentries we will lock
+ set<CDentry*, CDentry::ptr_lt> sorted;
+ for (set<CDentry*>::iterator p = dentry_xlocks.begin();
+ p != dentry_xlocks.end();
+ ++p) {
+ dout(10) << "will xlock " << **p << endl;
+ sorted.insert(*p);
+ }
+ for (set<CDentry*>::iterator p = dentry_rdlocks.begin();
+ p != dentry_rdlocks.end();
+ ++p) {
+ dout(10) << "will rdlock " << **p << endl;
+ sorted.insert(*p);
+ }
+
+ // acquire dentry locks. make sure they match currently acquired locks.
+ set<CDentry*, CDentry::ptr_lt>::iterator existing = mdr->dentry_locks.begin();
+ for (set<CDentry*, CDentry::ptr_lt>::iterator p = sorted.begin();
+ p != sorted.end();
+ ++p) {
+
+ // already locked?
+ if (existing != mdr->dentry_locks.end() && *existing == *p) {
+ // right kind?
+ CDentry *had = *existing;
+ if (dentry_xlocks.count(*p) == had->is_xlockedbyme(mdr)) {
+ dout(10) << "acquire_locks already locked " << *had << endl;
+ existing++;
+ continue;
+ }
+ }
+
+ // hose any stray locks
+ while (existing != mdr->dentry_locks.end()) {
+ CDentry *had = *existing;
+ existing++;
+ dout(10) << "acquire_locks had " << *had << " locked before " << **p
+ << ", unlocking" << endl;
+ if (had->is_xlockedbyme(mdr))
+ dentry_xlock_finish(had, mdr);
+ else
+ dentry_rdlock_finish(had, mdr);
+ }
+
+ // lock
+ if (dentry_xlocks.count(*p)) {
+ if (!dentry_xlock_start(*p, mdr))
+ return false;
+ dout(10) << "acquire_locks got xlock on " << **p << endl;
+ } else {
+ if (!dentry_rdlock_start(*p, mdr))
+ return false;
+ dout(10) << "acquire_locks got rdlock on " << **p << endl;
+ }
+ }
+
+ // any extra unneeded locks?
+ while (existing != mdr->dentry_locks.end()) {
+ dout(10) << "acquire_locks had " << *existing << " locked, unlocking" << endl;
+ if ((*existing)->is_xlockedbyme(mdr))
+ dentry_xlock_finish(*existing, mdr);
+ else
+ dentry_rdlock_finish(*existing, mdr);
+ }
+ }
+
+ // INODES
+ {
+ // sort all the dentries we will lock
+ set<CInode*, CInode::ptr_lt> sorted;
+ for (set<CInode*>::iterator p = inode_hard_xlocks.begin();
+ p != inode_hard_xlocks.end();
+ ++p)
+ sorted.insert(*p);
+ for (set<CInode*>::iterator p = inode_hard_rdlocks.begin();
+ p != inode_hard_rdlocks.end();
+ ++p)
+ sorted.insert(*p);
+
+ // acquire inode locks. make sure they match currently acquired locks.
+ set<CInode*, CInode::ptr_lt>::iterator existing = mdr->inode_hard_locks.begin();
+ for (set<CInode*, CInode::ptr_lt>::iterator p = sorted.begin();
+ p != sorted.end();
+ ++p) {
+ // already locked?
+ if (existing != mdr->inode_hard_locks.end() && *existing == *p) {
+ // right kind?
+ CInode *had = *existing;
+ if (inode_hard_xlocks.count(*p) == (had->hardlock.get_wrlocked_by() == mdr)) {
+ dout(10) << "acquire_locks already locked " << *had << endl;
+ existing++;
+ continue;
+ }
+ }
+
+ // hose any stray locks
+ while (existing != mdr->inode_hard_locks.end()) {
+ CInode *had = *existing;
+ existing++;
+ dout(10) << "acquire_locks had " << *had << " locked before " << **p
+ << ", unlocking" << endl;
+ if (had->hardlock.get_wrlocked_by() == mdr)
+ inode_hard_xlock_finish(had, mdr);
+ else
+ inode_hard_rdlock_finish(had, mdr);
+ }
+
+ // lock
+ if (inode_hard_xlocks.count(*p)) {
+ if (!inode_hard_xlock_start(*p, mdr))
+ return false;
+ dout(10) << "acquire_locks got xlock on " << **p << endl;
+ } else {
+ if (!inode_hard_rdlock_start(*p, mdr))
+ return false;
+ dout(10) << "acquire_locks got rdlock on " << **p << endl;
+ }
+ }
+
+ // any extra unneeded locks?
+ while (existing != mdr->inode_hard_locks.end()) {
+ dout(10) << "acquire_locks had " << **existing << " locked, unlocking" << endl;
+ if ((*existing)->hardlock.get_wrlocked_by() == mdr)
+ inode_hard_xlock_finish(*existing, mdr);
+ else
+ inode_hard_rdlock_finish(*existing, mdr);
+ }
+ }
+
+ return true;
+}
+
+
+
+
+
+
+
+
+
+
// file i/o -----------------------------------------
__uint64_t Locker::issue_file_data_version(CInode *in)
// ===============================
// hard inode metadata
-bool Locker::inode_hard_read_try(CInode *in, Context *con)
+bool Locker::inode_hard_rdlock_try(CInode *in, Context *con)
{
- dout(7) << "inode_hard_read_try on " << *in << endl;
+ dout(7) << "inode_hard_rdlock_try on " << *in << endl;
// can read? grab ref.
if (in->hardlock.can_read(in->is_auth()))
assert(!in->is_auth());
// wait!
- dout(7) << "inode_hard_read_try waiting on " << *in << endl;
+ dout(7) << "inode_hard_rdlock_try waiting on " << *in << endl;
in->add_waiter(CInode::WAIT_HARDR, con);
return false;
}
-bool Locker::inode_hard_read_start(CInode *in, MClientRequest *m, CInode *ref)
+bool Locker::inode_hard_rdlock_start(CInode *in, MDRequest *mdr)
{
- dout(7) << "inode_hard_read_start on " << *in << endl;
+ dout(7) << "inode_hard_rdlock_start on " << *in << endl;
// can read? grab ref.
if (in->hardlock.can_read(in->is_auth())) {
in->hardlock.get_read();
+ mdr->inode_hard_rdlocks.insert(in);
+ mdr->inode_hard_locks.insert(in);
return true;
}
assert(!in->is_auth());
// wait!
- dout(7) << "inode_hard_read_start waiting on " << *in << endl;
- in->add_waiter(CInode::WAIT_HARDR, new C_MDS_RetryRequest(mds, m, ref));
+ dout(7) << "inode_hard_rdlock_start waiting on " << *in << endl;
+ in->add_waiter(CInode::WAIT_HARDR, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
-void Locker::inode_hard_read_finish(CInode *in)
+void Locker::inode_hard_rdlock_finish(CInode *in, MDRequest *mdr)
{
// drop ref
assert(in->hardlock.can_read(in->is_auth()));
in->hardlock.put_read();
+ mdr->inode_hard_rdlocks.erase(in);
+ mdr->inode_hard_locks.erase(in);
- dout(7) << "inode_hard_read_finish on " << *in << endl;
+ dout(7) << "inode_hard_rdlock_finish on " << *in << endl;
//if (in->hardlock.get_nread() == 0) in->finish_waiting(CInode::WAIT_HARDNORD);
}
-bool Locker::inode_hard_write_start(CInode *in, MClientRequest *m, CInode *ref)
+bool Locker::inode_hard_xlock_start(CInode *in, MDRequest *mdr)
{
- dout(7) << "inode_hard_write_start on " << *in << endl;
+ dout(7) << "inode_hard_xlock_start on " << *in << endl;
// if not replicated, i can twiddle lock at will
if (in->is_auth() &&
// can write? grab ref.
if (in->hardlock.can_write(in->is_auth())) {
assert(in->is_auth());
- in->hardlock.get_write(m);
+ in->hardlock.get_write(mdr);
+ mdr->inode_hard_xlocks.insert(in);
+ mdr->inode_hard_locks.insert(in);
return true;
}
inode_hard_lock(in);
}
- dout(7) << "inode_hard_write_start waiting on " << *in << endl;
- in->add_waiter(CInode::WAIT_HARDW, new C_MDS_RetryRequest(mds, m, ref));
+ dout(7) << "inode_hard_xlock_start waiting on " << *in << endl;
+ in->add_waiter(CInode::WAIT_HARDW, new C_MDS_RetryRequest(mdcache, mdr));
return false;
} else {
// replica
// fw to auth
int auth = in->authority().first;
- dout(7) << "inode_hard_write_start " << *in << " on replica, fw to auth " << auth << endl;
+ dout(7) << "inode_hard_xlock_start " << *in << " on replica, fw to auth " << auth << endl;
assert(auth != mds->get_nodeid());
- mdcache->request_forward(m, auth);
+ mdcache->request_forward(mdr, auth);
return false;
}
}
-void Locker::inode_hard_write_finish(CInode *in)
+void Locker::inode_hard_xlock_finish(CInode *in, MDRequest *mdr)
{
// drop ref
//assert(in->hardlock.can_write(in->is_auth()));
in->hardlock.put_write();
- dout(7) << "inode_hard_write_finish on " << *in << endl;
+ mdr->inode_hard_xlocks.erase(in);
+ mdr->inode_hard_locks.erase(in);
+ dout(7) << "inode_hard_xlock_finish on " << *in << endl;
// others waiting?
if (in->is_hardlock_write_wanted()) {
dout(7) << "handle_lock_inode_hard readers, waiting before ack on " << *in << endl;
lock->set_state(LOCK_GLOCKR);
in->add_waiter(CInode::WAIT_HARDNORD,
- new C_MDS_RetryMessage(mds,m));
- assert(0); // does this ever happen? (if so, fix hard_read_finish, and CInodeExport.update_inode!)
+ new C_MDS_RetryMessage(mds, m));
+ assert(0); // does this ever happen? (if so, fix hard_rdlock_finish, and CInodeExport.update_inode!)
return;
} else {
// soft inode metadata
-bool Locker::inode_file_read_start(CInode *in, MClientRequest *m, CInode *ref)
+bool Locker::inode_file_rdlock_start(CInode *in, MDRequest *mdr)
{
- dout(7) << "inode_file_read_start " << *in << " filelock=" << in->filelock << endl;
+ dout(7) << "inode_file_rdlock_start " << *in << " filelock=" << in->filelock << endl;
// can read? grab ref.
if (in->filelock.can_read(in->is_auth())) {
// can't read, and replicated.
if (in->filelock.can_read_soon(in->is_auth())) {
// wait
- dout(7) << "inode_file_read_start can_read_soon " << *in << endl;
+ dout(7) << "inode_file_rdlock_start can_read_soon " << *in << endl;
} else {
if (in->is_auth()) {
// auth
//in->filelock.get_write();
in->finish_waiting(CInode::WAIT_FILERWB|CInode::WAIT_FILESTABLE);
//in->filelock.put_write();
+
+ mdr->inode_file_rdlocks.insert(in);
+ mdr->inode_file_locks.insert(in);
return true;
}
} else {
- dout(7) << "inode_file_read_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
- in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, ref));
+ dout(7) << "inode_file_rdlock_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
+ in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
} else {
// fw to auth
int auth = in->authority().first;
- dout(7) << "inode_file_read_start " << *in << " on replica and async, fw to auth " << auth << endl;
+ dout(7) << "inode_file_rdlock_start " << *in << " on replica and async, fw to auth " << auth << endl;
assert(auth != mds->get_nodeid());
- mdcache->request_forward(m, auth);
+ mdcache->request_forward(mdr, auth);
return false;
} else {
// wait until stable
- dout(7) << "inode_file_read_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
- in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, ref));
+ dout(7) << "inode_file_rdlock_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl;
+ in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
}
}
// wait
- dout(7) << "inode_file_read_start waiting on " << *in << ", filelock=" << in->filelock << endl;
- in->add_waiter(CInode::WAIT_FILER, new C_MDS_RetryRequest(mds, m, ref));
+ dout(7) << "inode_file_rdlock_start waiting on " << *in << ", filelock=" << in->filelock << endl;
+ in->add_waiter(CInode::WAIT_FILER, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
-void Locker::inode_file_read_finish(CInode *in)
+void Locker::inode_file_rdlock_finish(CInode *in, MDRequest *mdr)
{
// drop ref
assert(in->filelock.can_read(in->is_auth()));
in->filelock.put_read();
+ mdr->inode_file_rdlocks.erase(in);
+ mdr->inode_file_locks.erase(in);
- dout(7) << "inode_file_read_finish on " << *in << ", filelock=" << in->filelock << endl;
+ dout(7) << "inode_file_rdlock_finish on " << *in << ", filelock=" << in->filelock << endl;
if (in->filelock.get_nread() == 0) {
in->finish_waiting(CInode::WAIT_FILENORD);
}
-bool Locker::inode_file_write_start(CInode *in, MClientRequest *m, CInode *ref)
+bool Locker::inode_file_xlock_start(CInode *in, MDRequest *mdr)
{
- dout(7) << "inode_file_write_start on " << *in << endl;
+ dout(7) << "inode_file_xlock_start on " << *in << endl;
// can't write?
if (!in->filelock.can_write(in->is_auth())) {
// auth
if (!in->filelock.can_write_soon(in->is_auth())) {
if (!in->filelock.is_stable()) {
- dout(7) << "inode_file_write_start on auth, waiting for stable on " << *in << endl;
- in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, ref));
+ dout(7) << "inode_file_xlock_start on auth, waiting for stable on " << *in << endl;
+ in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
// replica
// fw to auth
int auth = in->authority().first;
- dout(7) << "inode_file_write_start " << *in << " on replica, fw to auth " << auth << endl;
+ dout(7) << "inode_file_xlock_start " << *in << " on replica, fw to auth " << auth << endl;
assert(auth != mds->get_nodeid());
- mdcache->request_forward(m, auth);
+ mdcache->request_forward(mdr, auth);
return false;
}
}
if (in->filelock.can_write(in->is_auth())) {
// can i auth pin?
assert(in->is_auth());
- in->filelock.get_write(m);
+ in->filelock.get_write(mdr);
+ mdr->inode_file_locks.insert(in);
+ mdr->inode_file_xlocks.insert(in);
return true;
} else {
- dout(7) << "inode_file_write_start on auth, waiting for write on " << *in << endl;
- in->add_waiter(CInode::WAIT_FILEW, new C_MDS_RetryRequest(mds, m, ref));
+ dout(7) << "inode_file_xlock_start on auth, waiting for write on " << *in << endl;
+ in->add_waiter(CInode::WAIT_FILEW, new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
}
-void Locker::inode_file_write_finish(CInode *in)
+void Locker::inode_file_xlock_finish(CInode *in, MDRequest *mdr)
{
// drop ref
//assert(in->filelock.can_write(in->is_auth()));
in->filelock.put_write();
- dout(7) << "inode_file_write_finish on " << *in << ", filelock=" << in->filelock << endl;
+ mdr->inode_file_locks.erase(in);
+ mdr->inode_file_xlocks.erase(in);
+ dout(7) << "inode_file_xlock_finish on " << *in << ", filelock=" << in->filelock << endl;
// drop lock?
if (!in->is_filelock_write_wanted()) {
void Locker::handle_lock_dir(MLock *m)
{
-
}
// DENTRY
-bool Locker::dentry_xlock_start(CDentry *dn, Message *m, CInode *ref)
+
+// trace helpers
+
+/** dentry_can_rdlock_trace
+ * see if we can _anonymously_ rdlock an entire trace.
+ * if not, and req is specified, wait and retry that message.
+ */
+bool Locker::dentry_can_rdlock_trace(vector<CDentry*>& trace, MClientRequest *req)
+{
+ // verify dentries are rdlockable.
+ // we do this because
+ // - we're being less aggressive about locks acquisition, and
+ // - we're not acquiring the locks in order!
+ for (vector<CDentry*>::iterator it = trace.begin();
+ it != trace.end();
+ it++) {
+ CDentry *dn = *it;
+ if (!dn->is_pinnable(0)) {
+ if (req) {
+ dout(10) << "can_rdlock_trace can't rdlock " << *dn << ", waiting" << endl;
+ dn->dir->add_waiter(CDir::WAIT_DNPINNABLE,
+ dn->name,
+ new C_MDS_RetryMessage(mds, req));
+ } else {
+ dout(10) << "can_rdlock_trace can't rdlock " << *dn << endl;
+ }
+ return false;
+ }
+ }
+ return true;
+}
+
+void Locker::dentry_anon_rdlock_trace_start(vector<CDentry*>& trace)
+{
+ // grab dentry rdlocks
+ for (vector<CDentry*>::iterator it = trace.begin();
+ it != trace.end();
+ it++)
+ (*it)->pin(0);
+}
+
+
+
+bool Locker::dentry_rdlock_start(CDentry *dn, MDRequest *mdr)
+{
+ // verify lockable
+ if (!dn->is_pinnable(mdr)) {
+ // wait
+ dout(10) << "dentry_rdlock_start waiting on " << *dn << endl;
+ dn->dir->add_waiter(CDir::WAIT_DNPINNABLE,
+ dn->name,
+ new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
+ }
+
+ // rdlock
+ dout(10) << "dentry_rdlock_start " << *dn << endl;
+ dn->pin(mdr);
+
+ mdr->dentry_rdlocks.insert(dn);
+ mdr->dentry_locks.insert(dn);
+
+ return true;
+}
+
+
+void Locker::_dentry_rdlock_finish(CDentry *dn, MDRequest *mdr)
+{
+ dn->unpin(mdr);
+
+ // did we completely unpin a waiter?
+ if (dn->lockstate == DN_LOCK_UNPINNING && !dn->get_num_ref()) {
+ // return state to sync, in case the unpinner flails
+ dn->lockstate = DN_LOCK_SYNC;
+
+ // run finisher right now to give them a fair shot.
+ dn->dir->finish_waiting(CDir::WAIT_DNUNPINNED, dn->name);
+ }
+}
+
+void Locker::dentry_rdlock_finish(CDentry *dn, MDRequest *mdr)
+{
+ dout(10) << "dentry_rdlock_finish " << *dn << endl;
+ _dentry_rdlock_finish(dn, mdr);
+ mdr->dentry_rdlocks.erase(dn);
+ mdr->dentry_locks.erase(dn);
+}
+
+void Locker::dentry_anon_rdlock_trace_finish(vector<CDentry*>& trace)
+{
+ for (vector<CDentry*>::iterator it = trace.begin();
+ it != trace.end();
+ it++)
+ _dentry_rdlock_finish(*it, 0);
+}
+
+bool Locker::dentry_xlock_start(CDentry *dn, MDRequest *mdr)
{
dout(7) << "dentry_xlock_start on " << *dn << endl;
// locked?
if (dn->lockstate == DN_LOCK_XLOCK) {
- if (dn->xlockedby == m) return true; // locked by me!
+ if (dn->xlockedby == mdr) return true; // locked by me!
// not by me, wait
dout(7) << "dentry " << *dn << " xlock by someone else" << endl;
dn->dir->add_waiter(CDir::WAIT_DNREAD, dn->name,
- new C_MDS_RetryRequest(mds,m,ref));
+ new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
// prelock?
if (dn->lockstate == DN_LOCK_PREXLOCK) {
- if (dn->xlockedby == m) {
+ if (dn->xlockedby == mdr) {
dout(7) << "dentry " << *dn << " prexlock by me" << endl;
dn->dir->add_waiter(CDir::WAIT_DNLOCK, dn->name,
- new C_MDS_RetryRequest(mds,m,ref));
+ new C_MDS_RetryRequest(mdcache, mdr));
} else {
dout(7) << "dentry " << *dn << " prexlock by someone else" << endl;
dn->dir->add_waiter(CDir::WAIT_DNREAD, dn->name,
- new C_MDS_RetryRequest(mds,m,ref));
+ new C_MDS_RetryRequest(mdcache, mdr));
}
return false;
}
dn->lockstate = DN_LOCK_UNPINNING;
dn->dir->add_waiter(CDir::WAIT_DNUNPINNED,
dn->name,
- new C_MDS_RetryRequest(mds,m,ref));
+ new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
- // pin path up to dentry! (if success, point of no return)
- CDentry *pdn = dn->dir->inode->get_parent_dn();
- if (pdn) {
- if (mdcache->active_requests[m].traces.count(pdn)) {
- dout(7) << "already path pinned parent dentry " << *pdn << endl;
- } else {
- dout(7) << "pinning parent dentry " << *pdn << endl;
- vector<CDentry*> trace;
- mdcache->make_trace(trace, pdn->inode);
- assert(trace.size());
-
- if (!mdcache->path_pin(trace, m, new C_MDS_RetryRequest(mds, m, ref))) return false;
-
- mdcache->active_requests[m].traces[trace[trace.size()-1]] = trace;
- }
- }
-
// mine!
- dn->xlockedby = m;
+ dn->xlockedby = mdr;
// pin me!
dn->get(CDentry::PIN_XLOCK);
// wait
dout(7) << "dentry_xlock_start locking, waiting for replicas " << endl;
dn->dir->add_waiter(CDir::WAIT_DNLOCK, dn->name,
- new C_MDS_RetryRequest(mds, m, ref));
+ new C_MDS_RetryRequest(mdcache, mdr));
return false;
} else {
dn->lockstate = DN_LOCK_XLOCK;
- mdcache->active_requests[dn->xlockedby].xlocks.insert(dn);
+ mdr->dentry_xlocks.insert(dn);
+ mdr->dentry_locks.insert(dn);
return true;
}
}
-void Locker::dentry_xlock_finish(CDentry *dn, bool quiet)
+void Locker::dentry_xlock_finish(CDentry *dn, MDRequest *mdr, bool quiet)
{
dout(7) << "dentry_xlock_finish on " << *dn << endl;
dout(7) << "this was a foreign xlock" << endl;
} else {
// remove from request record
- assert(mdcache->active_requests[dn->xlockedby].xlocks.count(dn) == 1);
- mdcache->active_requests[dn->xlockedby].xlocks.erase(dn);
+ mdr->dentry_xlocks.erase(dn);
+ mdr->dentry_locks.erase(dn);
}
dn->xlockedby = 0;
// unpin
dn->put(CDentry::PIN_XLOCK);
- // unpin parent dir?
- // -> no? because we might have xlocked 2 things in this dir.
- // instead, we let request_finish clean up the mess.
-
// tell replicas?
if (!quiet) {
// tell even if dn is null.
}
+void Locker::dentry_xlock_downgrade_to_rdlock(CDentry *dn, MDRequest *mdr)
+{
+ dout(7) << "dentry_xlock_downgrade_to_rdlock on " << *dn << endl;
+
+ assert(dn->xlockedby);
+ if (dn->xlockedby == DN_XLOCK_FOREIGN) {
+ dout(7) << "this was a foreign xlock" << endl;
+ assert(0); // rewrite me
+ }
+
+ // un-xlock
+ dn->xlockedby = 0;
+ dn->lockstate = DN_LOCK_SYNC;
+ mdr->dentry_xlocks.erase(dn);
+ dn->put(CDentry::PIN_XLOCK);
+
+ // rdlock
+ mdr->dentry_rdlocks.insert(dn);
+ dn->pin(mdr);
+
+ // tell replicas?
+ if (dn->is_replicated()) {
+ send_lock_message(dn, LOCK_AC_SYNC);
+ }
+
+ // kick waiters
+ list<Context*> finished;
+ dn->dir->take_waiting(CDir::WAIT_DNREAD, finished);
+ mds->queue_finished(finished);
+}
+
+
/*
* onfinish->finish() will be called with
* 0 on successful xlock,
* -1 on failure
*/
-
+/*
class C_MDC_XlockRequest : public Context {
Locker *mdc;
CDir *dir;
dir, dname, req,
onfinish));
}
-
+*/
(m->get_action() == LOCK_AC_REQXLOCK ||
m->get_action() == LOCK_AC_REQXLOCKC)) {
dout(7) << "handle_lock_dn got reqxlock from " << dauth << " and they are auth.. dropping on floor (their import will have woken them up)" << endl;
- if (mdcache->active_requests.count(m))
+ /*if (mdcache->active_requests.count(m))
mdcache->request_finish(m);
else
delete m;
+ */
+ assert(0); // FIXME REWRITE ME >>>>>>>
return;
}
dout(7) << "handle_lock_dn " << m << " " << m->get_ino() << " dname " << dname << " from " << from << ": proxy, fw to " << dauth << endl;
+ /* ******* REWRITE ME SDFKJDSFDSFJK:SDFJKDFSJKFDSHJKDFSHJKDFS>>>>>>>
// forward
if (mdcache->active_requests.count(m)) {
// xlock requests are requests, use request_* functions!
// forward normally
mds->send_message_mds(m, dauth, MDS_PORT_LOCKER);
}
+ */
return;
}
}
// finish request (if we got that far)
+ /* FIXME F>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if (mdcache->active_requests.count(m))
mdcache->request_finish(m);
+ */
delete m;
return;
vector<CDentry*> trace;
filepath path = m->get_path();
- int r = mdcache->path_traverse(path, trace, true,
- m, new C_MDS_RetryMessage(mds,m),
+ int r = mdcache->path_traverse(0, 0, // FIXME FIXME >>>>>>>>>>>>>>>>>>>>>>>>
+ path, trace, true,
+ m, new C_MDS_RetryMessage(mds, m),
MDS_TRAVERSE_DISCOVER);
assert(r>0);
return;
vector<CDentry*> trace;
filepath path = m->get_path();
- int r = mdcache->path_traverse(path, trace, true,
+ int r = mdcache->path_traverse(0, 0, // FIXME >>>>>>>>>>>>>>>>>>>>>>>>
+ path, trace, true,
m, new C_MDS_RetryMessage(mds,m),
MDS_TRAVERSE_DISCOVER);
assert(r>0);
if (dn->gather_set.size() == 0) {
dout(7) << "handle_lock_dn finish gather, now xlock on " << *dn << endl;
dn->lockstate = DN_LOCK_XLOCK;
- mdcache->active_requests[dn->xlockedby].xlocks.insert(dn);
+ mdcache->active_requests[dn->xlockedby->reqid].dentry_xlocks.insert(dn);
+ mdcache->active_requests[dn->xlockedby->reqid].dentry_locks.insert(dn);
dir->finish_waiting(CDir::WAIT_DNLOCK, dname);
}
break;
reply->set_path(path);
mds->send_message_mds(reply, m->get_asker(), MDS_PORT_LOCKER);
+ assert(0); // FIXME
+ /*
// done
if (mdcache->active_requests.count(m))
mdcache->request_finish(m);
else
delete m;
+ */
return;
}
+ /* REWRITE ME HELP
case LOCK_AC_REQXLOCK:
if (dn) {
dout(7) << "handle_lock_dn reqxlock on " << *dn << endl;
return;
}
break;
+*/
case LOCK_AC_UNXLOCK:
dout(7) << "handle_lock_dn unxlock on " << *dn << endl;
{
- Message *m = dn->xlockedby;
+ MDRequest *mdr = dn->xlockedby;
// finish request
- mdcache->request_finish(m); // this will drop the locks (and unpin paths!)
+ mdcache->request_finish(mdr); // this will drop the locks (and unpin paths!)
return;
}
break;
* the context is needed to pass a (failure) result code.
*/
+/*
class C_MDC_TraverseDiscover : public Context {
Context *onfinish, *ondelay;
public:
delete ondelay;
}
};
+*/
-int MDCache::path_traverse(filepath& origpath,
+int MDCache::path_traverse(MDRequest *mdr,
+ CInode *base, // traverse starting from here.
+ filepath& origpath,
vector<CDentry*>& trace,
bool follow_trailing_symlink,
Message *req,
Context *ondelay,
int onfail,
- Context *onfinish,
bool is_client_req) // true if req is MClientRequest .. gross, FIXME
{
set< pair<CInode*, string> > symlinks_resolved; // keep a list of symlinks we touch to avoid loops
onfail == MDS_TRAVERSE_DISCOVERXLOCK) noperm = true;
// root
- CInode *cur = get_root();
+ CInode *cur = base;
+ if (!cur) cur = get_root();
if (cur == NULL) {
dout(7) << "traverse: i don't have root" << endl;
open_root(ondelay);
- if (onfinish) delete onfinish;
return 1;
}
if (!cur->is_dir()) {
dout(7) << "traverse: " << *cur << " not a dir " << endl;
delete ondelay;
- if (onfinish) {
- onfinish->finish(-ENOTDIR);
- delete onfinish;
- }
return -ENOTDIR;
}
if (cur->is_frozen_dir()) {
dout(7) << "traverse: " << *cur->get_parent_dir() << " is frozen_dir, waiting" << endl;
cur->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE, ondelay);
- if (onfinish) delete onfinish;
return 1;
}
else if (cur->auth_is_ambiguous()) {
dout(10) << "traverse: need dir, waiting for single auth on " << *cur << endl;
cur->add_waiter(CInode::WAIT_SINGLEAUTH, ondelay);
- if (onfinish) delete onfinish;
return 1;
} else {
filepath want = path.postfixpath(depth);
dir_discovers[cur->ino()].insert(cur->authority().first);
}
cur->add_waiter(CInode::WAIT_DIR, ondelay);
- if (onfinish) delete onfinish;
return 1;
}
}
*/
// must read directory hard data (permissions, x bit) to traverse
- if (!noperm && !mds->locker->inode_hard_read_try(cur, ondelay)) {
- if (onfinish) delete onfinish;
+ if (!noperm && !mds->locker->inode_hard_rdlock_try(cur, ondelay)) {
return 1;
}
// null and last_bit and xlocked by me?
if (dn && dn->is_null() &&
- dn->is_xlockedbyme(req) &&
+ dn->is_xlockedbyme(mdr) &&
depth == path.depth()-1) {
dout(10) << "traverse: hit (my) xlocked dentry at tail of traverse, succeeding" << endl;
trace.push_back(dn);
if (dn && !dn->is_null()) {
// dentry exists. xlocked?
- if (!noperm && dn->is_xlockedbyother(req)) {
+ if (!noperm && dn->is_xlockedbyother(mdr)) {
dout(10) << "traverse: xlocked dentry at " << *dn << endl;
curdir->add_waiter(CDir::WAIT_DNREAD,
path[depth],
ondelay);
- if (onfinish) delete onfinish;
return 1;
}
dn->link_remote(in);
} else {
dout(7) << "remote link to " << dn->get_remote_ino() << ", which i don't have" << endl;
- open_remote_ino(dn->get_remote_ino(), req,
- ondelay);
+ assert(0); // REWRITE ME
+ //open_remote_ino(dn->get_remote_ino(), req, ondelay);
return 1;
}
}
if (curdir->is_complete()) {
// file not found
delete ondelay;
- if (onfinish) {
- onfinish->finish(-ENOENT);
- delete onfinish;
- }
return -ENOENT;
} else {
if (mds->logger) mds->logger->inc("cmiss");
- if (onfinish) delete onfinish;
return 1;
}
} else {
}
else if (curdir->auth_is_ambiguous()) {
dout(7) << "traverse: waiting for single auth on " << *curdir << endl;
- curdir->add_waiter(CDir::WAIT_SINGLEAUTH,
- new C_MDC_TraverseDiscover(onfinish, ondelay));
+ curdir->add_waiter(CDir::WAIT_SINGLEAUTH, ondelay);
return 1;
} else {
dout(7) << "traverse: discover " << want << " from " << *curdir << endl;
}
// delay processing of current request.
- // delay finish vs ondelay until result of traverse, so that ENOENT can be
- // passed to onfinish if necessary
- curdir->add_waiter(CDir::WAIT_DENTRY,
- path[depth],
- new C_MDC_TraverseDiscover(onfinish, ondelay));
-
+ curdir->add_waiter(CDir::WAIT_DENTRY, path[depth], ondelay);
if (mds->logger) mds->logger->inc("cmiss");
return 1;
}
// wait
dout(7) << "traverse: waiting for single auth in " << *curdir << endl;
curdir->add_waiter(CDir::WAIT_SINGLEAUTH, ondelay);
- if (onfinish) delete onfinish;
return 1;
} else {
dout(7) << "traverse: forwarding, not auth for " << *curdir << endl;
mds->forward_message_mds(req, dauth.first, req->get_dest_port());
if (mds->logger) mds->logger->inc("cfw");
- if (onfinish) delete onfinish;
delete ondelay;
return 2;
}
}
if (onfail == MDS_TRAVERSE_FAIL) {
delete ondelay;
- if (onfinish) {
- onfinish->finish(-ENOENT); // -ENOENT, but only because i'm not the authority!
- delete onfinish;
- }
return -ENOENT; // not necessarily exactly true....
}
}
// success.
delete ondelay;
- if (onfinish) {
- onfinish->finish(0);
- delete onfinish;
- }
return 0;
}
/** get_dentry_inode
* will return inode for primary, or link up/open up remote link's inode as necessary.
*/
-CInode *MDCache::get_dentry_inode(CDentry *dn, MClientRequest *req, CInode *ref)
+CInode *MDCache::get_dentry_inode(CDentry *dn, MDRequest *mdr)
{
assert(!dn->is_null());
return in;
} else {
dout(10) << "get_dentry_ninode on remote dn, opening inode for " << *dn << endl;
- open_remote_ino(dn->get_remote_ino(), req,
- new C_MDS_RetryRequest(mds, req, ref));
+ open_remote_ino(dn->get_remote_ino(), mdr, new C_MDS_RetryRequest(this, mdr));
return 0;
}
}
class C_MDC_OpenRemoteInoLookup : public Context {
MDCache *mdc;
inodeno_t ino;
- Message *req;
+ MDRequest *mdr;
Context *onfinish;
public:
vector<Anchor> anchortrace;
- C_MDC_OpenRemoteInoLookup(MDCache *mdc, inodeno_t ino, Message *req, Context *onfinish) {
+ C_MDC_OpenRemoteInoLookup(MDCache *mdc, inodeno_t ino, MDRequest *r, Context *onfinish) {
this->mdc = mdc;
this->ino = ino;
- this->req = req;
+ this->mdr = r;
this->onfinish = onfinish;
}
void finish(int r) {
assert(r == 0);
if (r == 0)
- mdc->open_remote_ino_2(ino, req, anchortrace, onfinish);
+ mdc->open_remote_ino_2(ino, mdr, anchortrace, onfinish);
else {
onfinish->finish(r);
delete onfinish;
};
void MDCache::open_remote_ino(inodeno_t ino,
- Message *req,
+ MDRequest *mdr,
Context *onfinish)
{
dout(7) << "open_remote_ino on " << ino << endl;
- C_MDC_OpenRemoteInoLookup *c = new C_MDC_OpenRemoteInoLookup(this, ino, req, onfinish);
+ C_MDC_OpenRemoteInoLookup *c = new C_MDC_OpenRemoteInoLookup(this, ino, mdr, onfinish);
mds->anchorclient->lookup(ino, c->anchortrace, c);
}
void MDCache::open_remote_ino_2(inodeno_t ino,
- Message *req,
+ MDRequest *mdr,
vector<Anchor>& anchortrace,
Context *onfinish)
{
-// path pins
-
-bool MDCache::path_pin(vector<CDentry*>& trace,
- Message *m,
- Context *c)
-{
- // verify everything is pinnable
- for (vector<CDentry*>::iterator it = trace.begin();
- it != trace.end();
- it++) {
- CDentry *dn = *it;
- if (!dn->is_pinnable(m)) {
- // wait
- if (c) {
- dout(10) << "path_pin can't pin " << *dn << ", waiting" << endl;
- dn->dir->add_waiter(CDir::WAIT_DNPINNABLE,
- dn->name,
- c);
- } else {
- dout(10) << "path_pin can't pin, no waiter, failing." << endl;
- }
- return false;
- }
- }
-
- // pin!
- for (vector<CDentry*>::iterator it = trace.begin();
- it != trace.end();
- it++) {
- (*it)->pin(m);
- dout(11) << "path_pinned " << *(*it) << endl;
- }
-
- delete c;
- return true;
-}
-
-
-void MDCache::path_unpin(vector<CDentry*>& trace,
- Message *m)
-{
- for (vector<CDentry*>::iterator it = trace.begin();
- it != trace.end();
- it++) {
- CDentry *dn = *it;
- dn->unpin(m);
- dout(11) << "path_unpinned " << *dn << endl;
-
- // did we completely unpin a waiter?
- if (dn->lockstate == DN_LOCK_UNPINNING && !dn->get_num_ref()) {
- // return state to sync, in case the unpinner flails
- dn->lockstate = DN_LOCK_SYNC;
-
- // run finisher right now to give them a fair shot.
- dn->dir->finish_waiting(CDir::WAIT_DNUNPINNED, dn->name);
- }
- }
-}
-
-
void MDCache::make_trace(vector<CDentry*>& trace, CInode *in)
{
CInode *parent = in->get_parent_inode();
}
-bool MDCache::request_start(Message *req,
- CInode *ref,
- vector<CDentry*>& trace)
+MDRequest *MDCache::request_start(reqid_t ri)
{
- assert(active_requests.count(req) == 0);
-
- // pin path
- if (!trace.empty())
- if (!path_pin(trace, req, new C_MDS_RetryMessage(mds,req))) return false;
+ assert(active_requests.count(ri) == 0);
+ active_requests[ri].reqid = ri;
+ MDRequest *mdr = &active_requests[ri];
+ dout(7) << "request_start " << *mdr << endl;
+ return mdr;
+}
- dout(7) << "request_start " << *req << endl;
+MDRequest *MDCache::request_start(MClientRequest *req)
+{
+ reqid_t ri = req->get_reqid();
+ MDRequest *mdr = request_start(ri);
+ mdr->request = req;
+ return mdr;
+}
- // add to map
- active_requests[req].ref = ref;
- if (trace.size()) active_requests[req].traces[trace[trace.size()-1]] = trace;
+void MDCache::request_finish(MDRequest *mdr)
+{
+ dout(7) << "request_finish " << *mdr << endl;
- // request pins
- request_pin_inode(req, ref);
+ delete mdr->request;
+ request_cleanup(mdr);
- if (mds->logger) mds->logger->inc("req");
-
- return true;
+ if (mds->logger) mds->logger->inc("reply");
}
-void MDCache::request_pin_inode(Message *req, CInode *in)
+void MDCache::request_forward(MDRequest *mdr, int who, int port)
{
- if (active_requests[req].request_inode_pins.count(in) == 0) {
- in->request_pin_get();
- active_requests[req].request_inode_pins.insert(in);
- }
-}
+ if (!port) port = MDS_PORT_SERVER;
-void MDCache::request_pin_dn(Message *req, CDentry *dn)
-{
- if (active_requests[req].request_dn_pins.count(dn) == 0) {
- dn->get(CDentry::PIN_REQUEST);
- active_requests[req].request_dn_pins.insert(dn);
- }
-}
+ dout(7) << "request_forward to " << who << " req " << *mdr << endl;
-void MDCache::request_pin_dir(Message *req, CDir *dir)
-{
- if (active_requests[req].request_dir_pins.count(dir) == 0) {
- dir->request_pin_get();
- active_requests[req].request_dir_pins.insert(dir);
- }
-}
+ mds->forward_message_mds(mdr->request, who, port);
+ request_cleanup(mdr);
-void MDCache::request_auth_pin(Message *req, CDir *dir)
-{
- if (active_requests[req].dir_auth_pins.count(dir) == 0) {
- dir->auth_pin();
- active_requests[req].dir_auth_pins.insert(dir);
- }
+ if (mds->logger) mds->logger->inc("fw");
}
-void MDCache::request_auth_pin(Message *req, CInode *in)
-{
- if (active_requests[req].inode_auth_pins.count(in) == 0) {
- in->auth_pin();
- active_requests[req].inode_auth_pins.insert(in);
- }
-}
-bool MDCache::request_auth_pinned(Message *req, CDir *dir)
+void MDCache::dispatch_request(MDRequest *mdr)
{
- return active_requests[req].dir_auth_pins.count(dir);
-}
+ assert(mdr->request);
-bool MDCache::request_auth_pinned(Message *req, CInode *in)
-{
- return active_requests[req].inode_auth_pins.count(in);
-}
+ switch (mdr->request->get_type()) {
+ case MSG_CLIENT_REQUEST:
+ mds->server->dispatch_request(mdr);
+ break;
-void MDCache::request_drop_auth_pins(Message *req)
-{
- // dirs
- for (set<CDir*>::iterator p = active_requests[req].dir_auth_pins.begin();
- p != active_requests[req].dir_auth_pins.end();
- ++p)
- (*p)->auth_unpin();
- active_requests[req].dir_auth_pins.clear();
+ case MSG_MDS_LOCK:
+ mds->locker->handle_lock_dn((MLock*)mdr->request);
+ break;
- // inodes
- for (set<CInode*>::iterator p = active_requests[req].inode_auth_pins.begin();
- p != active_requests[req].inode_auth_pins.end();
- ++p)
- (*p)->auth_unpin();
- active_requests[req].inode_auth_pins.clear();
+ default:
+ assert(0); // shouldn't get here
+ }
}
-void MDCache::request_cleanup(Message *req)
+void MDCache::request_drop_locks(MDRequest *mdr)
{
- assert(active_requests.count(req) == 1);
-
- // leftover xlocks?
- if (active_requests[req].xlocks.size()) {
- set<CDentry*> dns = active_requests[req].xlocks;
-
- for (set<CDentry*>::iterator it = dns.begin();
- it != dns.end();
- it++) {
- CDentry *dn = *it;
-
- dout(7) << "request_cleanup leftover xlock " << *dn << endl;
-
- mds->locker->dentry_xlock_finish(dn);
-
- // queue finishers
- dn->dir->take_waiting(CDir::WAIT_ANY, dn->name, mds->finished_queue);
-
- // remove clean, null dentry? (from a failed rename or whatever)
- if (dn->is_null() && dn->is_sync() && !dn->is_dirty()) {
- dn->dir->remove_dentry(dn);
- }
- }
-
- assert(active_requests[req].xlocks.empty()); // we just finished finished them
- }
+ // leftover dentry locks
+ while (!mdr->dentry_xlocks.empty())
+ mds->locker->dentry_xlock_finish(*mdr->dentry_xlocks.begin(), mdr);
+ while (!mdr->dentry_rdlocks.empty())
+ mds->locker->dentry_rdlock_finish(*mdr->dentry_rdlocks.begin(), mdr);
+
+ // inode locks
+ while (!mdr->inode_hard_xlocks.empty())
+ mds->locker->inode_hard_xlock_finish(*mdr->inode_hard_xlocks.begin(), mdr);
+ while (!mdr->inode_hard_rdlocks.empty())
+ mds->locker->inode_hard_rdlock_finish(*mdr->inode_hard_rdlocks.begin(), mdr);
+
+ while (!mdr->inode_file_xlocks.empty())
+ mds->locker->inode_file_xlock_finish(*mdr->inode_file_xlocks.begin(), mdr);
+ while (!mdr->inode_file_rdlocks.empty())
+ mds->locker->inode_file_rdlock_finish(*mdr->inode_file_rdlocks.begin(), mdr);
+
+ /*
// foreign xlocks?
if (active_requests[req].foreign_xlocks.size()) {
set<CDentry*> dns = active_requests[req].foreign_xlocks;
mds->send_message_mds(m, dauth, MDS_PORT_CACHE);
}
}
+ */
- // unpin paths
- for (map< CDentry*, vector<CDentry*> >::iterator it = active_requests[req].traces.begin();
- it != active_requests[req].traces.end();
- it++) {
- path_unpin(it->second, req);
- }
-
- // request pins
- for (set<CInode*>::iterator it = active_requests[req].request_inode_pins.begin();
- it != active_requests[req].request_inode_pins.end();
- it++) {
- (*it)->request_pin_put();
- }
- for (set<CDentry*>::iterator it = active_requests[req].request_dn_pins.begin();
- it != active_requests[req].request_dn_pins.end();
- it++) {
- (*it)->put(CDentry::PIN_REQUEST);
- }
- for (set<CDir*>::iterator it = active_requests[req].request_dir_pins.begin();
- it != active_requests[req].request_dir_pins.end();
- it++) {
- (*it)->request_pin_put();
- }
+ // make sure ref and trace are empty
+ // if we are doing our own locking, we can't use them!
+ assert(mdr->ref == 0);
+ assert(mdr->trace.empty());
+}
- // auth pins
- request_drop_auth_pins(req);
+void MDCache::request_cleanup(MDRequest *mdr)
+{
+ reqid_t ri = mdr->reqid;
+ assert(active_requests.count(ri));
- // remove from map
- active_requests.erase(req);
+ // clear ref, trace
+ mdr->ref = 0;
+ mdr->trace.clear();
+ // drop locks
+ request_drop_locks(mdr);
+
+ // auth pins
+ mdr->drop_auth_pins();
+
+ // drop cache pins
+ for (set<CInode*>::iterator it = mdr->inode_pins.begin();
+ it != mdr->inode_pins.end();
+ it++)
+ (*it)->put(CInode::PIN_REQUEST);
+ mdr->inode_pins.clear();
+ for (set<CDentry*>::iterator it = mdr->dentry_pins.begin();
+ it != mdr->dentry_pins.end();
+ it++)
+ (*it)->put(CDentry::PIN_REQUEST);
+ mdr->dentry_pins.clear();
+ for (set<CDir*>::iterator it = mdr->dir_pins.begin();
+ it != mdr->dir_pins.end();
+ it++)
+ (*it)->put(CDir::PIN_REQUEST);
+ mdr->dir_pins.clear();
+
+ // remove from map
+ active_requests.erase(ri);
// log some stats *****
if (mds->logger) {
}
-void MDCache::request_finish(Message *req)
-{
- dout(7) << "request_finish " << *req << endl;
- request_cleanup(req);
- delete req; // delete req
-
- if (mds->logger) mds->logger->inc("reply");
-
-
- //dump();
-}
-
-
-void MDCache::request_forward(Message *req, int who, int port)
-{
- if (!port) port = MDS_PORT_SERVER;
-
- dout(7) << "request_forward to " << who << " req " << *req << endl;
-
- // clean up my state
- request_cleanup(req);
-
- mds->forward_message_mds(req, who, port);
-
- if (mds->logger) mds->logger->inc("fw");
-}
-
// --------------------------------------------------------------------
// ANCHORS
// lookup dentry
CDentry *dn = curdir->lookup( dis->get_dentry(i) );
- if (dn) {
- // add dentry
- reply->add_dentry( dn->replicate_to( dis->get_asker() ) );
- dout(7) << "added dentry " << *dn << endl;
-
- if (!dn->is_primary()) break; // stop on null or remote link.
-
- // add inode
- CInode *next = dn->inode;
- assert(next->is_auth());
-
- reply->add_inode( next->replicate_to( dis->get_asker() ) );
- dout(7) << "added inode " << *next << endl;
-
- // descend, keep going.
- cur = next;
- continue;
- }
+ if (!dn) {
+ // don't have it.
+ if (!curdir->is_complete()) {
+ // readdir
+ dout(7) << "incomplete dir contents for " << *curdir << ", fetching" << endl;
+ if (reply->is_empty()) {
+ // fetch and wait
+ curdir->fetch(new C_MDS_RetryMessage(mds, dis));
+ return;
+ } else {
+ // initiate fetch, but send what we have so far
+ curdir->fetch(0);
+ break;
+ }
+ }
- // don't have dentry.
- if (curdir->is_complete()) {
- // set error flag in reply
- dout(7) << "dname " << dis->get_dentry(i) << " dne in " << *curdir
- << ", flagging error" << endl;
- reply->set_flag_error_dn( dis->get_dentry(i) );
- } else {
- // readdir
- dout(7) << "incomplete dir contents for " << *curdir << ", fetching" << endl;
-
- if (reply->is_empty()) {
- // fetch and wait
- curdir->fetch(new C_MDS_RetryMessage(mds, dis));
- return;
+ if (1) {
+ // send null dentry
+ dout(7) << "dentry " << dis->get_dentry(i) << " dne, returning null in "
+ << *curdir << endl;
+ dn = curdir->add_dentry(dis->get_dentry(i), 0);
} else {
- // fetch, but send what we have so far
- curdir->fetch(0);
+ // set error flag in reply
+ dout(7) << "dentry " << dis->get_dentry(i) << " dne, flagging error in "
+ << *curdir << endl;
+ reply->set_flag_error_dn( dis->get_dentry(i) );
}
}
- break;
+
+ assert(dn);
+
+ // add dentry
+ reply->add_dentry( dn->replicate_to( dis->get_asker() ) );
+ dout(7) << "added dentry " << *dn << endl;
+
+ if (!dn->is_primary()) break; // stop on null or remote link.
+
+ // add inode
+ CInode *next = dn->inode;
+ assert(next->is_auth());
+
+ reply->add_inode( next->replicate_to( dis->get_asker() ) );
+ dout(7) << "added inode " << *next << endl;
+
+ // descend, keep going.
+ cur = next;
+ continue;
}
// how did we do?
dout(5) << "trying discover on dir_update for " << path << endl;
- int r = path_traverse(path, trace, true,
+ int r = path_traverse(0, 0,
+ path, trace, true,
m, new C_MDS_RetryMessage(mds, m),
MDS_TRAVERSE_DISCOVER);
if (r > 0)
#include "MDLog.h"
#include "Migrator.h"
#include "MDBalancer.h"
-//#include "Renamer.h"
#include "AnchorClient.h"
#include "msg/Messenger.h"
+
/*******
* some generic stuff for finishing off requests
*/
-/** C_MDS_CommitRequest
- */
-
-class C_MDS_CommitRequest : public Context {
- Server *server;
- MClientRequest *req;
- MClientReply *reply;
- CInode *tracei; // inode to include a trace for
- LogEvent *event;
-
-public:
- C_MDS_CommitRequest(Server *server,
- MClientRequest *req, MClientReply *reply, CInode *tracei,
- LogEvent *event=0) {
- this->server = server;
- this->req = req;
- this->tracei = tracei;
- this->reply = reply;
- this->event = event;
- }
- void finish(int r) {
- if (r != 0) {
- // failure. set failure code and reply.
- reply->set_result(r);
- }
- if (event) {
- server->commit_request(req, reply, tracei, event);
- } else {
- // reply.
- server->reply_request(req, reply, tracei);
- }
- }
-};
-
/*
* send generic response (just and error code)
*/
-void Server::reply_request(MClientRequest *req, int r, CInode *tracei)
+void Server::reply_request(MDRequest *mdr, int r, CInode *tracei)
{
- reply_request(req, new MClientReply(req, r), tracei);
+ MClientRequest *req = mdr->client_request();
+ reply_request(mdr, new MClientReply(req, r), tracei);
}
* send given reply
* include a trace to tracei
*/
-void Server::reply_request(MClientRequest *req, MClientReply *reply, CInode *tracei)
+void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei)
{
+ MClientRequest *req = mdr->client_request();
+
dout(10) << "reply_request " << reply->get_result()
<< " (" << strerror(-reply->get_result())
<< ") " << *req << endl;
// send reply
messenger->send_message(reply,
req->get_client_inst());
-
- // discard request
- mdcache->request_finish(req);
-
- // stupid stats crap (FIXME)
- stat_ops++;
-}
-
-
-void Server::submit_update(MClientRequest *req,
- CInode *wrlockedi,
- LogEvent *event,
- Context *oncommit)
-{
- // log
- mdlog->submit_entry(event);
-
- // pin
- mdcache->request_pin_inode(req, wrlockedi);
-
- // wait
- mdlog->wait_for_sync(oncommit);
+
+ // finish request
+ mdcache->request_finish(mdr);
}
-/*
- * commit event(s) to the metadata journal, then reply.
- * or, be sloppy and do it concurrently (see g_conf.mds_log_before_reply)
- *
- * NOTE: this is old and bad (write-behind!)
- */
-void Server::commit_request(MClientRequest *req,
- MClientReply *reply,
- CInode *tracei,
- LogEvent *event,
- LogEvent *event2)
-{
- // log
- if (event) mdlog->submit_entry(event);
- if (event2) mdlog->submit_entry(event2);
-
- if (g_conf.mds_log_before_reply && g_conf.mds_log && event) {
- // SAFE mode!
-
- // pin inode so it doesn't go away!
- if (tracei) mdcache->request_pin_inode(req, tracei);
-
- // wait for log sync
- mdlog->wait_for_sync(new C_MDS_CommitRequest(this, req, reply, tracei));
- return;
- }
- else {
- // just reply
- reply_request(req, reply, tracei);
- }
-}
/***
* process a client request
*/
-
void Server::handle_client_request(MClientRequest *req)
{
- dout(4) << "req " << *req << endl;
+ dout(4) << "handle_client_request " << *req << endl;
if (!mds->is_active()) {
dout(5) << " not active, discarding client request." << endl;
// okay, i want
CInode *ref = 0;
- vector<CDentry*> trace; // might be blank, for fh guys
- bool follow_trailing_symlink = false;
- // operations on fh's or other non-files
+ // -----
+ // some ops are on ino's
switch (req->get_op()) {
- /*
case MDS_OP_FSTAT:
- reply = handle_client_fstat(req, cur);
- break; ****** fiX ME ***
- */
+ ref = mdcache->get_inode(req->args.fstat.ino);
+ assert(ref);
+ break;
case MDS_OP_TRUNCATE:
- if (!req->args.truncate.ino) break; // can be called w/ either fh OR path
+ if (!req->args.truncate.ino)
+ break; // can be called w/ either fh OR path
+ ref = mdcache->get_inode(req->args.truncate.ino);
+ assert(ref);
+ break;
case MDS_OP_FSYNC:
ref = mdcache->get_inode(req->args.fsync.ino); // fixme someday no ino needed?
+ assert(ref);
+ break;
+ }
- if (!ref) {
- int next = mds->get_nodeid() + 1;
- if (next >= mds->mdsmap->get_num_mds()) next = 0;
- dout(10) << "got request on ino we don't have, passing buck to " << next << endl;
- mds->send_message_mds(req, next, MDS_PORT_SERVER);
- return;
- }
+ if (ref) {
+ MDRequest *mdr = mdcache->request_start(req);
+ dout(10) << "inode op on ref " << *ref << endl;
+ mdr->ref = ref;
+ mdr->pin(ref);
+ dispatch_request(mdr);
+ return;
}
- if (!ref) {
- // we need to traverse a path
- filepath refpath = req->get_filepath();
-
- // ops on non-existing files --> directory paths
- switch (req->get_op()) {
- case MDS_OP_OPEN:
- if (!(req->args.open.flags & O_CREAT)) break;
-
- case MDS_OP_MKNOD:
- case MDS_OP_MKDIR:
- case MDS_OP_SYMLINK:
- case MDS_OP_LINK:
- case MDS_OP_UNLINK: // also wrt parent dir, NOT the unlinked inode!!
- case MDS_OP_RMDIR:
- case MDS_OP_RENAME:
- // remove last bit of path
- refpath = refpath.prefixpath(refpath.depth()-1);
- break;
- }
- dout(10) << "refpath = " << refpath << endl;
-
- Context *ondelay = new C_MDS_RetryMessage(mds, req);
-
- if (req->get_op() == MDS_OP_LSTAT) {
- follow_trailing_symlink = false;
- }
- // do trace
- int r = mdcache->path_traverse(refpath, trace, follow_trailing_symlink,
- req, ondelay,
- MDS_TRAVERSE_FORWARD,
- 0,
- true); // is MClientRequest
-
- if (r > 0) return; // delayed
- if (r == -ENOENT ||
- r == -ENOTDIR ||
- r == -EISDIR) {
- // error!
- dout(10) << " path traverse error " << r << ", replying" << endl;
+ // -----
+ // some ops are on existing inodes
+
+ bool follow_trailing_symlink = false;
+
+ switch (req->get_op()) {
+ case MDS_OP_LSTAT:
+ follow_trailing_symlink = false;
+ case MDS_OP_OPEN:
+ if (req->args.open.flags & O_CREAT) break; // handled below.
+ case MDS_OP_STAT:
+ case MDS_OP_UTIME:
+ case MDS_OP_CHMOD:
+ case MDS_OP_CHOWN:
+ case MDS_OP_READDIR:
+ {
+ filepath refpath = req->get_filepath();
+ Context *ondelay = new C_MDS_RetryMessage(mds, req);
+ vector<CDentry*> trace;
- // send error
- messenger->send_message(new MClientReply(req, r),
- req->get_client_inst());
-
- // <HACK>
- // is this a special debug command?
- if (refpath.depth() - 1 == trace.size() &&
- refpath.last_dentry().find(".ceph.") == 0) {
- /*
-FIXME dirfrag
- CDir *dir = 0;
- if (!trace.empty())
- dir = mdcache->get_root()->dir;
- else
- dir = trace[trace.size()-1]->get_inode()->dir;
-
- dout(1) << "** POSSIBLE CEPH DEBUG COMMAND '" << refpath.last_dentry() << "' in " << *dir << endl;
-
- if (refpath.last_dentry() == ".ceph.hash" &&
- refpath.depth() > 1) {
- dout(1) << "got explicit hash command " << refpath << endl;
- /// ....
- }
- else if (refpath.last_dentry() == ".ceph.commit") {
- dout(1) << "got explicit commit command on " << *dir << endl;
- dir->commit(0, 0);
+ int r = mdcache->path_traverse(0, 0,
+ refpath, trace, follow_trailing_symlink,
+ req, ondelay,
+ MDS_TRAVERSE_FORWARD,
+ true); // is MClientRequest
+
+ if (r > 0) return; // delayed
+ if (r < 0) {
+ dout(10) << "traverse error " << r << " " << strerror(-r) << endl;
+
+ // send error. don't bother registering request.
+ messenger->send_message(new MClientReply(req, r),
+ req->get_client_inst());
+
+ // <HACK>
+ // is this a special debug command?
+ if (refpath.depth() - 1 == trace.size() &&
+ refpath.last_dentry().find(".ceph.") == 0) {
+ // ...
}
-*/
+ // </HACK>
}
- // </HACK>
+ // can we dnlock whole path?
+ if (!mds->locker->dentry_can_rdlock_trace(trace, req))
+ return;
- delete req;
+ // go
+ MDRequest *mdr = mdcache->request_start(req);
+ mds->locker->dentry_anon_rdlock_trace_start(trace);
+ dispatch_request(mdr);
return;
}
-
- if (trace.size())
- ref = trace[trace.size()-1]->inode;
- else
- ref = mdcache->get_root();
}
+
- dout(10) << "ref is " << *ref << endl;
+ // ----
+ // the rest handle things themselves.
- // rename doesn't pin src path (initially)
- if (req->get_op() == MDS_OP_RENAME) trace.clear();
+ switch (req->get_op()) {
+ case MDS_OP_OPEN:
+ assert(req->args.open.flags & O_CREAT);
+ case MDS_OP_MKNOD:
+ case MDS_OP_MKDIR:
+ case MDS_OP_SYMLINK:
+ case MDS_OP_LINK:
+ case MDS_OP_UNLINK:
+ case MDS_OP_RMDIR:
+ case MDS_OP_RENAME:
+ {
+ // register request
+ MDRequest *mdr = mdcache->request_start(req);
+ dispatch_request(mdr);
+ return;
+ }
+ }
- // register
- if (!mdcache->request_start(req, ref, trace))
- return;
-
- // process
- dispatch_request(req, ref);
+ assert(0); // we missed something!
}
-
-void Server::dispatch_request(Message *m, CInode *ref)
+void Server::dispatch_request(MDRequest *mdr)
{
- MClientRequest *req = 0;
-
- // MLock or MClientRequest?
- /* this is a little weird.
- client requests and mlocks both initial dentry xlocks, path pins, etc.,
- and thus both make use of the context C_MDS_RetryRequest.
- */
- switch (m->get_type()) {
- case MSG_CLIENT_REQUEST:
- req = (MClientRequest*)m;
- break; // continue below!
-
- case MSG_MDS_LOCK:
- mds->locker->handle_lock_dn((MLock*)m);
- return; // done
+ MClientRequest *req = mdr->client_request();
- default:
- assert(0); // shouldn't get here
+ if (mdr->ref) {
+ dout(7) << "dispatch_request " << *req << " ref " << *mdr->ref << endl;
+ } else {
+ dout(7) << "dispatch_request " << *req << endl;
}
- // MClientRequest.
-
- dout(7) << "handle_client " << *m << " ref " << *ref << endl;
-
switch (req->get_op()) {
-
- // files
- case MDS_OP_OPEN:
- if (req->args.open.flags & O_CREAT)
- handle_client_openc(req, ref);
- else
- handle_client_open(req, ref);
- break;
- case MDS_OP_TRUNCATE:
- handle_client_truncate(req, ref);
- break;
- /*
- case MDS_OP_FSYNC:
- handle_client_fsync(req, ref);
- break;
- */
- /*
- case MDS_OP_RELEASE:
- handle_client_release(req, ref);
- break;
- */
- // inodes
+ // inodes ops.
case MDS_OP_STAT:
case MDS_OP_LSTAT:
- handle_client_stat(req, ref);
+ handle_client_stat(mdr);
break;
case MDS_OP_UTIME:
- handle_client_utime(req, ref);
+ handle_client_utime(mdr);
break;
case MDS_OP_CHMOD:
- handle_client_chmod(req, ref);
+ handle_client_chmod(mdr);
break;
case MDS_OP_CHOWN:
- handle_client_chown(req, ref);
+ handle_client_chown(mdr);
+ break;
+ case MDS_OP_TRUNCATE:
+ handle_client_truncate(mdr);
break;
-
- // namespace
case MDS_OP_READDIR:
- handle_client_readdir(req, ref);
+ handle_client_readdir(mdr);
+ break;
+ case MDS_OP_FSYNC:
+ //handle_client_fsync(req, ref);
+ break;
+
+ // funky.
+ case MDS_OP_OPEN:
+ if ((req->args.open.flags & O_CREAT) &&
+ !mdr->ref)
+ handle_client_openc(mdr);
+ else
+ handle_client_open(mdr);
break;
+
+ // namespace.
+ // no prior locks.
case MDS_OP_MKNOD:
- handle_client_mknod(req, ref);
+ handle_client_mknod(mdr);
break;
case MDS_OP_LINK:
- handle_client_link(req, ref);
+ handle_client_link(mdr);
break;
case MDS_OP_UNLINK:
- handle_client_unlink(req, ref);
+ handle_client_unlink(mdr);
break;
case MDS_OP_RENAME:
- handle_client_rename(req, ref);
+ handle_client_rename(mdr);
break;
case MDS_OP_RMDIR:
- handle_client_unlink(req, ref);
+ handle_client_unlink(mdr);
break;
case MDS_OP_MKDIR:
- handle_client_mkdir(req, ref);
+ handle_client_mkdir(mdr);
break;
case MDS_OP_SYMLINK:
- handle_client_symlink(req, ref);
+ handle_client_symlink(mdr);
break;
-
default:
dout(1) << " unknown client op " << req->get_op() << endl;
assert(0);
}
+}
+
- return;
+
+// ---------------------------------------
+// HELPERS
+
+
+/** request_pin_ref
+ * return the ref inode, referred to by the last dentry in the trace.
+ * open if it is remote.
+ * pin.
+ * return existing, if mdr->ref already set.
+ */
+CInode *Server::request_pin_ref(MDRequest *mdr)
+{
+ // already did it?
+ if (mdr->ref)
+ return mdr->ref;
+
+ // open and pin ref inode in cache too
+ CInode *ref = 0;
+ if (mdr->trace.empty())
+ ref = mdcache->get_root();
+ else {
+ ref = mdcache->get_dentry_inode(mdr->trace[mdr->trace.size()-1], mdr);
+ if (!ref) return 0;
+ }
+ mdr->pin(ref);
+ mdr->ref = ref;
+ return ref;
}
+
+/** validate_dentry_dir
+ *
+ * verify that the dir exists and would own the dname.
+ * do not check if the dentry exists.
+ */
+CDir *Server::validate_dentry_dir(MDRequest *mdr, CInode *diri, const string& dname)
+{
+ // make sure parent is a dir?
+ if (!diri->is_dir()) {
+ dout(7) << "validate_dentry_dir: not a dir" << endl;
+ reply_request(mdr, -ENOTDIR);
+ return false;
+ }
+
+ // which dirfrag?
+ frag_t fg = diri->pick_dirfrag(dname);
+
+ CDir *dir = try_open_auth_dir(diri, fg, mdr);
+ if (!dir)
+ return 0;
+
+ // frozen?
+ if (dir->is_frozen()) {
+ dout(7) << "dir is frozen " << *dir << endl;
+ dir->add_waiter(CDir::WAIT_UNFREEZE,
+ new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
+ }
+
+ return dir;
+}
+
+
+/** prepare_null_dentry
+ * prepare a null (or existing) dentry in given dir.
+ * wait for any dn lock.
+ */
+CDentry* Server::prepare_null_dentry(MDRequest *mdr, CDir *dir, const string& dname, bool okexist)
+{
+ dout(10) << "prepare_null_dentry " << dname << " in " << *dir << endl;
+ assert(dir->is_auth());
+
+ // does it already exist?
+ CDentry *dn = dir->lookup(dname);
+ if (dn) {
+ if (!dn->can_read(mdr)) {
+ dout(10) << "waiting on (existing!) unreadable dentry " << *dn << endl;
+ dir->add_waiter(CDir::WAIT_DNREAD, dname, new C_MDS_RetryRequest(mdcache, mdr));
+ return 0;
+ }
+
+ if (!dn->is_null()) {
+ // name already exists
+ dout(10) << "dentry " << dname << " exists in " << *dir << endl;
+ if (!okexist) {
+ reply_request(mdr, -EEXIST);
+ return 0;
+ }
+ }
+
+ return dn;
+ }
+
+ // make sure dir is complete
+ if (!dir->is_complete()) {
+ dout(7) << " incomplete dir contents for " << *dir << ", fetching" << endl;
+ dir->fetch(new C_MDS_RetryRequest(mdcache, mdr));
+ return 0;
+ }
+
+ // create
+ dn = dir->add_dentry(dname, 0);
+ dout(10) << "prepare_null_dentry added " << *dn << endl;
+
+ return dn;
+}
+
+
+/** prepare_new_inode
+ *
+ * create a new inode. set c/m/atime. hit dir pop.
+ */
+CInode* Server::prepare_new_inode(MClientRequest *req, CDir *dir)
+{
+ CInode *in = mdcache->create_inode();
+ in->inode.uid = req->get_caller_uid();
+ in->inode.gid = req->get_caller_gid();
+ in->inode.ctime = in->inode.mtime = in->inode.atime = g_clock.gettime(); // now
+ dout(10) << "prepare_new_inode " << *in << endl;
+
+ // bump modify pop
+ mds->balancer->hit_dir(dir, META_POP_DWR);
+
+ return in;
+}
+
+
+
+CDir *Server::traverse_to_auth_dir(MDRequest *mdr, vector<CDentry*> &trace, filepath refpath)
+{
+ // figure parent dir vs dname
+ if (refpath.depth() == 0) {
+ dout(7) << "can't do that to root" << endl;
+ reply_request(mdr, -EINVAL);
+ return 0;
+ }
+ string dname = refpath.last_dentry();
+ refpath.pop_dentry();
+
+ dout(10) << "traverse_to_auth_dir dirpath " << refpath << " dname " << dname << endl;
+
+ // traverse to parent dir
+ Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
+ int r = mdcache->path_traverse(mdr,
+ 0,
+ refpath, trace, true,
+ mdr->request, ondelay,
+ MDS_TRAVERSE_FORWARD,
+ true); // is MClientRequest
+ if (r > 0) return 0; // delayed
+ if (r < 0) {
+ reply_request(mdr, r);
+ return 0;
+ }
+
+ // open inode
+ CInode *diri;
+ if (trace.empty())
+ diri = mdcache->get_root();
+ else
+ diri = mdcache->get_dentry_inode(trace[trace.size()-1], mdr);
+ if (!diri)
+ return 0; // opening inode.
+
+ // is it an auth dir?
+ CDir *dir = validate_dentry_dir(mdr, diri, dname);
+ if (!dir)
+ return 0; // forwarded or waiting for freeze
+
+ dout(10) << "traverse_to_auth_dir " << *dir << endl;
+ return dir;
+}
+
+
+/** rdlock_path_xlock_dentry
+ * traverse path to the directory that could/would contain dentry.
+ * make sure i am auth for that dentry, forward as necessary.
+ * create null dentry in place (or use existing if okexist).
+ * get rdlocks on traversed dentries, xlock on new dentry.
+ */
+CDentry* Server::rdlock_path_xlock_dentry(MDRequest *mdr, bool okexist, bool mustexist)
+{
+ MClientRequest *req = mdr->client_request();
+
+ vector<CDentry*> trace;
+ CDir *dir = traverse_to_auth_dir(mdr, trace, req->get_filepath());
+ dout(10) << "rdlock_path_xlock_dentry dir " << *dir << endl;
+
+ // make sure we can auth_pin dir
+ if (!dir->can_auth_pin()) {
+ dout(7) << "waiting for authpinnable on " << *dir << endl;
+ dir->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
+ return 0;
+ }
+
+ // make a null dentry?
+ const string &dname = req->get_filepath().last_dentry();
+ CDentry *dn;
+ if (mustexist) {
+ dn = dir->lookup(dname);
+
+ // make sure dir is complete
+ if (!dn && !dir->is_complete()) {
+ dout(7) << " incomplete dir contents for " << *dir << ", fetching" << endl;
+ dir->fetch(new C_MDS_RetryRequest(mdcache, mdr));
+ return 0;
+ }
+
+ // readable?
+ if (dn && !dn->can_read(mdr)) {
+ dout(10) << "waiting on (existing!) unreadable dentry " << *dn << endl;
+ dir->add_waiter(CDir::WAIT_DNREAD, dname, new C_MDS_RetryRequest(mdcache, mdr));
+ return 0;
+ }
+
+ // exists?
+ if (!dn || dn->is_null()) {
+ dout(7) << "dentry " << dname << " dne in " << *dir << endl;
+ reply_request(mdr, -ENOENT);
+ return 0;
+ }
+ } else {
+ dn = prepare_null_dentry(mdr, dir, dname, okexist);
+ if (!dn)
+ return 0;
+ }
+
+ // -- lock --
+ set<CDentry*> dentry_rdlocks;
+ set<CDentry*> dentry_xlocks;
+ set<CInode*> inode_empty;
+
+ for (unsigned i=0; i<trace.size(); i++) {
+ dout(10) << "will rdlock trace " << i << " " << *trace[i] << endl;
+ dentry_rdlocks.insert(trace[i]);
+ }
+ dout(10) << "will rd or x lock " << *dn << endl;
+ if (dn->is_null())
+ dentry_xlocks.insert(dn); // new dn, xlock
+ else
+ dentry_rdlocks.insert(dn); // existing dn, rdlock
+
+ if (!mds->locker->acquire_locks(mdr,
+ dentry_rdlocks, dentry_xlocks,
+ inode_empty, inode_empty))
+ return 0;
+
+ // save the locked trace.
+ mdr->trace.swap(trace);
+
+ return dn;
+}
+
+
+
+
+
// FIXME: this probably should go somewhere else.
-CDir* Server::try_open_auth_dir(CInode *diri, frag_t fg, MClientRequest *req)
+CDir* Server::try_open_auth_dir(CInode *diri, frag_t fg, MDRequest *mdr)
{
CDir *dir = diri->get_dirfrag(fg);
if (!dir && !diri->is_auth()) {
int inauth = diri->authority().first;
dout(7) << "try_open_auth_dir: not open, not inode auth, fw to mds" << inauth << endl;
- mdcache->request_forward(req, inauth);
+ mdcache->request_forward(mdr, inauth);
return 0;
}
dout(10) << "try_open_dir: dir inode is frozen, waiting " << *diri << endl;
assert(diri->get_parent_dir());
diri->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE,
- new C_MDS_RetryRequest(mds, req, diri));
+ new C_MDS_RetryRequest(mdcache, mdr));
return 0;
}
int auth = dir->authority().first;
dout(7) << "try_open_auth_dir: not auth for " << *dir
<< ", fw to mds" << auth << endl;
- mdcache->request_forward(req, auth);
+ mdcache->request_forward(mdr, auth);
return 0;
}
return dir;
}
-CDir* Server::try_open_dir(CInode *diri, frag_t fg,
- MClientRequest *req, CInode *ref)
+CDir* Server::try_open_dir(CInode *diri, frag_t fg, MDRequest *mdr)
{
CDir *dir = diri->get_dirfrag(fg);
if (dir)
dout(10) << "try_open_dir: dir inode is auth+frozen, waiting " << *diri << endl;
assert(diri->get_parent_dir());
diri->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE,
- new C_MDS_RetryRequest(mds, req, diri));
+ new C_MDS_RetryRequest(mdcache, mdr));
return 0;
}
} else {
// not auth
mdcache->open_remote_dir(diri, fg,
- new C_MDS_RetryRequest(mds, req, ref));
+ new C_MDS_RetryRequest(mdcache, mdr));
return 0;
}
}
// ===============================================================================
// STAT
-void Server::handle_client_stat(MClientRequest *req,
- CInode *ref)
+void Server::handle_client_stat(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+ CInode *ref = request_pin_ref(mdr);
+ if (!ref) return;
+
// FIXME: this is really not the way to handle the statlite mask.
// do I need file info?
int mask = req->args.stat.mask;
if (mask & (INODE_MASK_SIZE|INODE_MASK_MTIME)) {
// yes. do a full stat.
- if (!mds->locker->inode_file_read_start(ref, req, ref))
+ if (!mds->locker->inode_file_rdlock_start(ref, mdr))
return; // syncing
- mds->locker->inode_file_read_finish(ref);
+ mds->locker->inode_file_rdlock_finish(ref, mdr);
} else {
// nope! easy peasy.
}
// reply
//dout(10) << "reply to " << *req << " stat " << ref->inode.mtime << endl;
MClientReply *reply = new MClientReply(req);
- reply_request(req, reply, ref);
+ reply_request(mdr, reply, ref);
}
*/
class C_MDS_utime_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CInode *in;
version_t pv;
time_t mtime, atime;
public:
- C_MDS_utime_finish(MDS *m, MClientRequest *r, CInode *i, version_t pdv, time_t mt, time_t at) :
- mds(m), req(r), in(i),
+ C_MDS_utime_finish(MDS *m, MDRequest *r, CInode *i, version_t pdv, time_t mt, time_t at) :
+ mds(m), mdr(r), in(i),
pv(pdv),
mtime(mt), atime(at) { }
void finish(int r) {
in->inode.atime = atime;
in->mark_dirty(pv);
- // unlock
- mds->locker->inode_file_write_finish(in);
-
// reply
- MClientReply *reply = new MClientReply(req, 0);
+ MClientReply *reply = new MClientReply(mdr->client_request(), 0);
reply->set_result(0);
- mds->server->reply_request(req, reply, in);
+ mds->server->reply_request(mdr, reply, in);
}
};
// utime
-void Server::handle_client_utime(MClientRequest *req,
- CInode *cur)
+void Server::handle_client_utime(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+ CInode *cur = request_pin_ref(mdr);
+ if (!cur) return;
+
// auth pin
if (!cur->can_auth_pin()) {
dout(7) << "waiting for authpinnable on " << *cur << endl;
- cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, req, cur));
+ cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
return;
}
- mdcache->request_auth_pin(req, cur);
+ mdr->auth_pin(cur);
// write
- if (!mds->locker->inode_file_write_start(cur, req, cur))
+ if (!mds->locker->inode_file_xlock_start(cur, mdr))
return; // fw or (wait for) sync
mds->balancer->hit_inode(cur, META_POP_IWR);
version_t pdv = cur->pre_dirty();
time_t mtime = req->args.utime.modtime;
time_t atime = req->args.utime.actime;
- C_MDS_utime_finish *fin = new C_MDS_utime_finish(mds, req, cur, pdv,
+ C_MDS_utime_finish *fin = new C_MDS_utime_finish(mds, mdr, cur, pdv,
mtime, atime);
// log + wait
// --------------
/*
- * finisher: do a inode_hard_write_finish and reply.
+ * finisher: do a inode_hard_xlock_finish and reply.
*/
class C_MDS_chmod_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CInode *in;
version_t pv;
int mode;
public:
- C_MDS_chmod_finish(MDS *m, MClientRequest *r, CInode *i, version_t pdv, int mo) :
- mds(m), req(r), in(i), pv(pdv), mode(mo) { }
+ C_MDS_chmod_finish(MDS *m, MDRequest *r, CInode *i, version_t pdv, int mo) :
+ mds(m), mdr(r), in(i), pv(pdv), mode(mo) { }
void finish(int r) {
assert(r == 0);
in->inode.mode |= (mode & 04777);
in->mark_dirty(pv);
- // unlock
- mds->locker->inode_hard_write_finish(in);
-
// reply
- MClientReply *reply = new MClientReply(req, 0);
+ MClientReply *reply = new MClientReply(mdr->client_request(), 0);
reply->set_result(0);
- mds->server->reply_request(req, reply, in);
+ mds->server->reply_request(mdr, reply, in);
}
};
// chmod
-void Server::handle_client_chmod(MClientRequest *req,
- CInode *cur)
+void Server::handle_client_chmod(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+ CInode *cur = request_pin_ref(mdr);
+ if (!cur) return;
+
// auth pin
if (!cur->can_auth_pin()) {
dout(7) << "waiting for authpinnable on " << *cur << endl;
- cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, req, cur));
+ cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
return;
}
- mdcache->request_auth_pin(req, cur);
+ mdr->auth_pin(cur);
// write
- if (!mds->locker->inode_hard_write_start(cur, req, cur))
+ if (!mds->locker->inode_hard_xlock_start(cur, mdr))
return; // fw or (wait for) lock
mds->balancer->hit_inode(cur, META_POP_IWR);
// prepare
version_t pdv = cur->pre_dirty();
int mode = req->args.chmod.mode;
- C_MDS_chmod_finish *fin = new C_MDS_chmod_finish(mds, req, cur, pdv,
+ C_MDS_chmod_finish *fin = new C_MDS_chmod_finish(mds, mdr, cur, pdv,
mode);
// log + wait
class C_MDS_chown_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CInode *in;
version_t pv;
int uid, gid;
public:
- C_MDS_chown_finish(MDS *m, MClientRequest *r, CInode *i, version_t pdv, int u, int g) :
- mds(m), req(r), in(i), pv(pdv), uid(u), gid(g) { }
+ C_MDS_chown_finish(MDS *m, MDRequest *r, CInode *i, version_t pdv, int u, int g) :
+ mds(m), mdr(r), in(i), pv(pdv), uid(u), gid(g) { }
void finish(int r) {
assert(r == 0);
if (gid >= 0) in->inode.gid = gid;
in->mark_dirty(pv);
- // unlock
- mds->locker->inode_hard_write_finish(in);
-
// reply
- MClientReply *reply = new MClientReply(req, 0);
+ MClientReply *reply = new MClientReply(mdr->client_request(), 0);
reply->set_result(0);
- mds->server->reply_request(req, reply, in);
+ mds->server->reply_request(mdr, reply, in);
}
};
-void Server::handle_client_chown(MClientRequest *req,
- CInode *cur)
+void Server::handle_client_chown(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+ CInode *cur = request_pin_ref(mdr);
+ if (!cur) return;
+
// auth pin
if (!cur->can_auth_pin()) {
dout(7) << "waiting for authpinnable on " << *cur << endl;
- cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, req, cur));
+ cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
return;
}
- mdcache->request_auth_pin(req, cur);
+ mdr->auth_pin(cur);
// write
- if (!mds->locker->inode_hard_write_start(cur, req, cur))
+ if (!mds->locker->inode_hard_xlock_start(cur, mdr))
return; // fw or (wait for) lock
mds->balancer->hit_inode(cur, META_POP_IWR);
version_t pdv = cur->pre_dirty();
int uid = req->args.chown.uid;
int gid = req->args.chown.gid;
- C_MDS_chown_finish *fin = new C_MDS_chown_finish(mds, req, cur, pdv,
+ C_MDS_chown_finish *fin = new C_MDS_chown_finish(mds, mdr, cur, pdv,
uid, gid);
// log + wait
-
-
-
// =================================================================
// DIRECTORY and NAMESPACE OPS
}
-void Server::handle_client_readdir(MClientRequest *req,
- CInode *diri)
+void Server::handle_client_readdir(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+ CInode *diri = request_pin_ref(mdr);
+ if (!diri) return;
+
// it's a directory, right?
if (!diri->is_dir()) {
// not a dir
dout(10) << "reply to " << *req << " readdir -ENOTDIR" << endl;
- reply_request(req, -ENOTDIR);
+ reply_request(mdr, -ENOTDIR);
return;
}
// does it exist?
if (diri->dirfragtree[fg] != fg) {
dout(10) << "frag " << fg << " doesn't appear in fragtree " << diri->dirfragtree << endl;
- reply_request(req, -EAGAIN);
+ reply_request(mdr, -EAGAIN);
return;
}
- CDir *dir = try_open_auth_dir(diri, fg, req);
+ CDir *dir = try_open_auth_dir(diri, fg, mdr);
if (!dir) return;
// ok!
assert(dir->is_auth());
// check perm
- if (!mds->locker->inode_hard_read_start(diri, req, diri))
+ /*
+ if (!mds->locker->inode_hard_rdlock_start(diri, mdr))
return;
- mds->locker->inode_hard_read_finish(diri);
+ mds->locker->inode_hard_rdlock_finish(diri, mdr);
+ */
if (!dir->is_complete()) {
// fetch
dout(10) << " incomplete dir contents for readdir on " << *dir << ", fetching" << endl;
- dir->fetch(new C_MDS_RetryRequest(mds, req, diri));
+ dir->fetch(new C_MDS_RetryRequest(mdcache, mdr));
return;
}
//balancer->hit_dir(diri->dir);
// reply
- reply_request(req, reply, diri);
+ reply_request(mdr, reply, diri);
}
class C_MDS_mknod_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CDentry *dn;
CInode *newi;
version_t pv;
public:
- C_MDS_mknod_finish(MDS *m, MClientRequest *r, CDentry *d, CInode *ni) :
- mds(m), req(r), dn(d), newi(ni),
+ C_MDS_mknod_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni) :
+ mds(m), mdr(r), dn(d), newi(ni),
pv(d->get_projected_version()) {}
void finish(int r) {
assert(r == 0);
// dirty inode, dn, dir
newi->mark_dirty(pv);
- // unlock
- mds->locker->dentry_xlock_finish(dn);
-
// hit pop
mds->balancer->hit_inode(newi, META_POP_IWR);
// reply
- MClientReply *reply = new MClientReply(req, 0);
+ MClientReply *reply = new MClientReply(mdr->client_request(), 0);
reply->set_result(0);
- mds->server->reply_request(req, reply, newi);
+ mds->server->reply_request(mdr, reply, newi);
}
};
-void Server::handle_client_mknod(MClientRequest *req, CInode *diri)
-{
- CDir *dir = 0;
- CDentry *dn = 0;
-
- // create null dentry
- if (!prepare_null_dentry(req, diri, &dir, &dn))
- return;
- assert(dir);
- assert(dn);
-
- // xlock dentry
- if (!mds->locker->dentry_xlock_start(dn, req, diri))
- return;
+void Server::handle_client_mknod(MDRequest *mdr)
+{
+ MClientRequest *req = mdr->client_request();
+
+ CDentry *dn = rdlock_path_xlock_dentry(mdr, false, false);
+ if (!dn) return;
- CInode *newi = prepare_new_inode(req, dir);
+ CInode *newi = prepare_new_inode(req, dn->dir);
assert(newi);
// it's a file.
newi->inode.mode |= INODE_MODE_FILE;
// prepare finisher
- C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, req, dn, newi);
+ C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
EUpdate *le = new EUpdate("mknod");
le->metablob.add_client_req(req->get_reqid());
- le->metablob.add_dir_context(dir);
+ le->metablob.add_dir_context(dn->dir);
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
pi->version = dn->get_projected_version();
-/** validate_dentry_dir
- *
- * verify that the dir exists and would own the dname.
- * do not check if the dentry exists.
- */
-CDir *Server::validate_dentry_dir(MClientRequest *req, CInode *ref, CInode *diri, const string& name)
-{
- // make sure parent is a dir?
- if (!diri->is_dir()) {
- dout(7) << "validate_dentry_dir: not a dir" << endl;
- reply_request(req, -ENOTDIR);
- return false;
- }
-
- // which dirfrag?
- frag_t fg = diri->pick_dirfrag(name);
-
- CDir *dir = try_open_auth_dir(diri, fg, req);
- if (!dir)
- return 0;
-
- /*
- // dir auth pinnable?
- if (!dir->can_auth_pin()) {
- dout(7) << "validate_dentry_dir: dir " << *dir << " not pinnable, waiting" << endl;
- dir->add_waiter(CDir::WAIT_AUTHPINNABLE,
- new C_MDS_RetryRequest(mds, req, diri));
- return false;
- }
- */
-
- // frozen?
- if (dir->is_frozen()) {
- dout(7) << "dir is frozen " << *dir << endl;
- dir->add_waiter(CDir::WAIT_UNFREEZE,
- new C_MDS_RetryRequest(mds, req, ref));
- return false;
- }
-
- return dir;
-}
-
-/** prepare_null_dentry
- *
- * prepare a mknod-type operation (mknod, mkdir, symlink, open+create).
- * create the inode and dentry, but do not link them.
- * pre_dirty the dentry+dir.
- * xlock the dentry.
- *
- * return val
- * 0 - wait for something
- * 1 - created
- * 2 - already exists (only if okexist=true)
- */
-int Server::prepare_null_dentry(MClientRequest *req,
- CInode *diri, CDir **pdir, CDentry **pdn,
- bool okexist)
-{
- // get containing directory (without last bit)
- filepath dirpath = req->get_filepath().prefixpath(req->get_filepath().depth() - 1);
- string name = req->get_filepath().last_dentry();
-
- return prepare_null_dentry(req, diri,
- diri, name,
- pdir, pdn, okexist);
-}
-
-int Server::prepare_null_dentry(MClientRequest *req, CInode *ref,
- CInode *diri, const string& name,
- CDir **pdir, CDentry **pdn,
- bool okexist)
-{
- dout(10) << "prepare_null_dentry " << name << " in " << *diri << endl;
-
- CDir *dir = *pdir = validate_dentry_dir(req, ref, diri, name);
- if (!dir) return 0;
-
- // make sure name doesn't already exist
- *pdn = dir->lookup(name);
- if (*pdn) {
- if (!(*pdn)->can_read(req)) {
- dout(10) << "waiting on (existing!) unreadable dentry " << **pdn << endl;
- dir->add_waiter(CDir::WAIT_DNREAD, name, new C_MDS_RetryRequest(mds, req, ref));
- return 0;
- }
-
- if (!(*pdn)->is_null()) {
- // name already exists
- if (okexist) {
- dout(10) << "dentry " << name << " exists in " << *dir << endl;
- return 2;
- } else {
- dout(10) << "dentry " << name << " exists in " << *dir << endl;
- reply_request(req, -EEXIST);
- return 0;
- }
- }
- }
-
- // make sure dir is complete
- if (!dir->is_complete()) {
- dout(7) << " incomplete dir contents for " << *dir << ", fetching" << endl;
- dir->fetch(new C_MDS_RetryRequest(mds, req, ref));
- return 0;
- }
-
- // create null dentry
- if (!*pdn) {
- *pdn = dir->add_dentry(name, 0);
- dout(10) << "prepare_null_dentry added " << **pdn << endl;
- } else {
- dout(10) << "prepare_null_dentry had " << **pdn << endl;
- }
-
-
- return 1;
-}
-
-
-/** prepare_new_inode
- *
- * create a new inode. set c/m/atime. hit dir pop.
- */
-CInode* Server::prepare_new_inode(MClientRequest *req, CDir *dir)
-{
- CInode *in = mdcache->create_inode();
- in->inode.uid = req->get_caller_uid();
- in->inode.gid = req->get_caller_gid();
- in->inode.ctime = in->inode.mtime = in->inode.atime = g_clock.gettime(); // now
- dout(10) << "prepare_new_inode " << *in << endl;
-
- // bump modify pop
- mds->balancer->hit_dir(dir, META_POP_DWR);
-
- return in;
-}
-
-
-
-
-
// MKDIR
-void Server::handle_client_mkdir(MClientRequest *req, CInode *diri)
+void Server::handle_client_mkdir(MDRequest *mdr)
{
- CDir *dir = 0;
- CDentry *dn = 0;
+ MClientRequest *req = mdr->client_request();
- // make dentry
- if (!prepare_null_dentry(req, diri, &dir, &dn))
- return;
- assert(dir);
- assert(dn);
-
- // xlock
- if (!mds->locker->dentry_xlock_start(dn, req, diri))
- return;
+ CDentry *dn = rdlock_path_xlock_dentry(mdr, false, false);
+ if (!dn) return;
// new inode
- CInode *newi = prepare_new_inode(req, dir);
+ CInode *newi = prepare_new_inode(req, dn->dir);
assert(newi);
// it's a directory.
newdir->mark_dirty(newdir->pre_dirty());
// prepare finisher
- C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, req, dn, newi);
+ C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
EUpdate *le = new EUpdate("mkdir");
le->metablob.add_client_req(req->get_reqid());
- le->metablob.add_dir_context(dir);
+ le->metablob.add_dir_context(dn->dir);
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
pi->version = dn->get_projected_version();
le->metablob.add_dir(newdir, true);
}
-
// SYMLINK
-void Server::handle_client_symlink(MClientRequest *req, CInode *diri)
+void Server::handle_client_symlink(MDRequest *mdr)
{
- CDir *dir = 0;
- CDentry *dn = 0;
-
- // make null dentry
- if (!prepare_null_dentry(req, diri, &dir, &dn))
- return;
- assert(dir);
- assert(dn);
-
- // xlock
- if (!mds->locker->dentry_xlock_start(dn, req, diri))
- return;
+ MClientRequest *req = mdr->client_request();
+
+ CDentry *dn = rdlock_path_xlock_dentry(mdr, false, false);
+ if (!dn) return;
- CInode *newi = prepare_new_inode(req, dir);
+ CInode *newi = prepare_new_inode(req, dn->dir);
assert(newi);
// it's a symlink
newi->symlink = req->get_sarg();
// prepare finisher
- C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, req, dn, newi);
+ C_MDS_mknod_finish *fin = new C_MDS_mknod_finish(mds, mdr, dn, newi);
EUpdate *le = new EUpdate("symlink");
le->metablob.add_client_req(req->get_reqid());
- le->metablob.add_dir_context(dir);
+ le->metablob.add_dir_context(dn->dir);
inode_t *pi = le->metablob.add_primary_dentry(dn, true, newi);
pi->version = dn->get_projected_version();
// LINK
-class C_MDS_LinkTraverse : public Context {
- Server *server;
- MClientRequest *req;
- CInode *ref;
-public:
- vector<CDentry*> trace;
- C_MDS_LinkTraverse(Server *server, MClientRequest *req, CInode *ref) {
- this->server = server;
- this->req = req;
- this->ref = ref;
- }
- void finish(int r) {
- server->handle_client_link_2(r, req, ref, trace);
- }
-};
-
-void Server::handle_client_link(MClientRequest *req, CInode *ref)
+void Server::handle_client_link(MDRequest *mdr)
{
- string dname = req->get_filepath().last_dentry();
- dout(7) << "handle_client_link " << dname << " in " << *ref
+ MClientRequest *req = mdr->client_request();
+
+ dout(7) << "handle_client_link " << req->get_filepath()
<< " to " << req->get_sarg()
<< endl;
- // make sure we own the dname
- CDir *dir = validate_dentry_dir(req, ref, ref, dname);
- if (!dir) return;
-
- // discover link target
- filepath target = req->get_sarg();
- dout(7) << "handle_client_link discovering target " << target << endl;
- C_MDS_LinkTraverse *onfinish = new C_MDS_LinkTraverse(this, req, ref);
- Context *ondelay = new C_MDS_RetryRequest(mds, req, ref);
+ // traverse to dest dir, make sure it's ours.
+ const filepath &linkpath = req->get_filepath();
+ const string &dname = linkpath.last_dentry();
+ vector<CDentry*> linktrace;
+ CDir *dir = traverse_to_auth_dir(mdr, linktrace, linkpath);
+ dout(7) << "handle_client_link link " << dname << " in " << *dir << endl;
- mdcache->path_traverse(target, onfinish->trace, false,
- req, ondelay,
- MDS_TRAVERSE_DISCOVER, //XLOCK,
- onfinish);
-}
-
-
-void Server::handle_client_link_2(int r, MClientRequest *req, CInode *diri, vector<CDentry*>& trace)
-{
- // target dne?
+ // traverse to link target
+ filepath targetpath = req->get_sarg();
+ dout(7) << "handle_client_link discovering target " << targetpath << endl;
+ Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
+ vector<CDentry*> targettrace;
+ int r = mdcache->path_traverse(mdr, 0,
+ targetpath, targettrace, false,
+ req, ondelay,
+ MDS_TRAVERSE_DISCOVER);
+ if (r > 0) return; // wait
if (r < 0) {
- dout(7) << "target " << req->get_sarg() << " dne" << endl;
- reply_request(req, r);
+ reply_request(mdr, r);
return;
}
- assert(r == 0);
-
+
// identify target inode
- CInode *targeti = mdcache->get_root();
- if (trace.size()) targeti = trace[trace.size()-1]->inode;
+ CInode *targeti;
+ if (targettrace.empty())
+ targeti = mdcache->get_root();
+ else
+ targeti = targettrace[targettrace.size()-1]->inode;
assert(targeti);
+ assert(r == 0);
- // not a dir?
+ // dir?
dout(7) << "target is " << *targeti << endl;
if (targeti->is_dir()) {
dout(7) << "target is a dir, failing" << endl;
- reply_request(req, -EINVAL);
+ reply_request(mdr, -EINVAL);
return;
}
-
+
// does the target need an anchor?
if (targeti->is_auth()) {
- if (targeti->get_parent_dir()->get_inode() == diri) {
- dout(7) << "target is in the same dir, sweet" << endl;
+ /*if (targeti->get_parent_dir() == dn->dir) {
+ dout(7) << "target is in the same dirfrag, sweet" << endl;
}
- else if (targeti->is_anchored() && !targeti->is_unanchoring()) {
+ else
+ */
+ if (targeti->is_anchored() && !targeti->is_unanchoring()) {
dout(7) << "target anchored already (nlink=" << targeti->inode.nlink << "), sweet" << endl;
}
else {
dout(7) << "target needs anchor, nlink=" << targeti->inode.nlink << ", creating anchor" << endl;
mdcache->anchor_create(targeti,
- new C_MDS_RetryRequest(mds, req, diri));
+ new C_MDS_RetryRequest(mdcache, mdr));
return;
}
}
// can we create the dentry?
- CDir *dir = 0;
CDentry *dn = 0;
- // make dentry and inode, xlock dentry.
- r = prepare_null_dentry(req, diri, &dir, &dn);
- if (!r) return; // wait or forward or something
- assert(dir);
- assert(dn);
+ // make null link dentry
+ dn = prepare_null_dentry(mdr, dir, dname, false);
+ if (!dn) return;
+
+ // create lock lists
+ set<CDentry*> dentry_rdlocks;
+ set<CDentry*> dentry_xlocks;
+ set<CInode*> inode_hard_rdlocks;
+ set<CInode*> inode_hard_xlocks;
+
+ for (unsigned i=0; i<linktrace.size(); i++)
+ dentry_rdlocks.insert(linktrace[i]);
+ dentry_xlocks.insert(dn);
+ for (unsigned i=0; i<targettrace.size(); i++)
+ dentry_rdlocks.insert(targettrace[i]);
+ inode_hard_xlocks.insert(targeti);
+
+ if (!mds->locker->acquire_locks(mdr,
+ dentry_rdlocks, dentry_xlocks,
+ inode_hard_rdlocks, inode_hard_xlocks))
+ return;
+
+ // go!
// local or remote?
if (targeti->is_auth())
- _link_local(req, diri, dn, targeti);
+ _link_local(mdr, dn, targeti);
else
- _link_remote(req, diri, dn, targeti);
+ _link_remote(mdr, dn, targeti);
}
class C_MDS_link_local_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CDentry *dn;
CInode *targeti;
version_t dpv;
time_t tctime;
time_t tpv;
public:
- C_MDS_link_local_finish(MDS *m, MClientRequest *r, CDentry *d, CInode *ti, time_t ct) :
- mds(m), req(r), dn(d), targeti(ti),
+ C_MDS_link_local_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ti, time_t ct) :
+ mds(m), mdr(r), dn(d), targeti(ti),
dpv(d->get_projected_version()),
tctime(ct),
tpv(targeti->get_parent_dn()->get_projected_version()) {}
void finish(int r) {
assert(r == 0);
- mds->server->_link_local_finish(req, dn, targeti, dpv, tctime, tpv);
+ mds->server->_link_local_finish(mdr, dn, targeti, dpv, tctime, tpv);
}
};
-void Server::_link_local(MClientRequest *req, CInode *diri,
- CDentry *dn, CInode *targeti)
+void Server::_link_local(MDRequest *mdr, CDentry *dn, CInode *targeti)
{
dout(10) << "_link_local " << *dn << " to " << *targeti << endl;
- // first, auth pin the dentry dir and targeti.
- if (!mdcache->request_auth_pinned(req, dn->get_dir()) &&
- !dn->get_dir()->can_auth_pin()) {
- dn->get_dir()->add_waiter(CDir::WAIT_AUTHPINNABLE,
- new C_MDS_RetryRequest(mds, req, diri));
- return;
- }
- if (!mdcache->request_auth_pinned(req, targeti) &&
- !targeti->can_auth_pin()) {
- targeti->add_waiter(CDir::WAIT_AUTHPINNABLE,
- new C_MDS_RetryRequest(mds, req, diri));
- return;
- }
- mdcache->request_auth_pin(req, dn->get_dir());
- mdcache->request_auth_pin(req, targeti);
-
- // sweet. let's get our locks.
- // lock dentry, target inode
- if (!mds->locker->dentry_xlock_start(dn, req, diri))
- return;
- if (!mds->locker->inode_hard_write_start(targeti, req, diri))
- return;
-
// ok, let's do it.
// prepare log entry
EUpdate *le = new EUpdate("link_local");
- le->metablob.add_client_req(req->get_reqid());
+ le->metablob.add_client_req(mdr->reqid);
// predirty
dn->pre_dirty();
pi->version = tpdv;
// finisher
- C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, req, dn, targeti, pi->ctime);
+ C_MDS_link_local_finish *fin = new C_MDS_link_local_finish(mds, mdr, dn, targeti, pi->ctime);
// log + wait
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
}
-void Server::_link_local_finish(MClientRequest *req, CDentry *dn, CInode *targeti,
+void Server::_link_local_finish(MDRequest *mdr, CDentry *dn, CInode *targeti,
version_t dpv, time_t tctime, version_t tpv)
{
dout(10) << "_link_local_finish " << *dn << " to " << *targeti << endl;
targeti->inode.ctime = tctime;
targeti->mark_dirty(tpv);
- // unlock the new dentry and target inode
- mds->locker->dentry_xlock_finish(dn);
- mds->locker->inode_hard_write_finish(targeti);
-
// bump target popularity
mds->balancer->hit_inode(targeti, META_POP_IWR);
// reply
- MClientReply *reply = new MClientReply(req, 0);
- reply_request(req, reply, dn->get_dir()->get_inode()); // FIXME: imprecise ref
+ MClientReply *reply = new MClientReply(mdr->client_request(), 0);
+ reply_request(mdr, reply, dn->get_dir()->get_inode()); // FIXME: imprecise ref
}
-void Server::_link_remote(MClientRequest *req, CInode *ref,
- CDentry *dn, CInode *targeti)
+void Server::_link_remote(MDRequest *mdr, CDentry *dn, CInode *targeti)
{
dout(10) << "_link_remote " << *dn << " to " << *targeti << endl;
-
+ /*
// pin the target replica in our cache
assert(!targeti->is_auth());
mdcache->request_pin_inode(req, targeti);
// IMPLEMENT ME
MClientReply *reply = new MClientReply(req, -EXDEV);
reply_request(req, reply, dn->get_dir()->get_inode());
+ */
}
// UNLINK
-void Server::handle_client_unlink(MClientRequest *req, CInode *diri)
+void Server::handle_client_unlink(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+
// rmdir or unlink?
bool rmdir = false;
if (req->get_op() == MDS_OP_RMDIR) rmdir = true;
-
- // find it
- if (req->get_filepath().depth() == 0) {
- dout(7) << "can't rmdir root" << endl;
- reply_request(req, -EINVAL);
- return;
- }
- string name = req->get_filepath().last_dentry();
-
- // make sure parent is a dir?
- if (!diri->is_dir()) {
- dout(7) << "parent not a dir " << *diri << endl;
- reply_request(req, -ENOTDIR);
- return;
- }
-
- // get the dir, if it's not frozen etc.
- CDir *dir = validate_dentry_dir(req, diri, diri, name);
- if (!dir) return;
- // ok, it's auth, and authpinnable.
-
- // does the dentry exist?
- CDentry *dn = dir->lookup(name);
- if (!dn) {
- if (!dir->is_complete()) {
- dout(7) << "handle_client_rmdir/unlink missing dn " << name
- << " but dir not complete, fetching " << *dir << endl;
- dir->fetch(new C_MDS_RetryRequest(mds, req, diri));
- } else {
- dout(7) << "handle_client_rmdir/unlink dne " << name << " in " << *dir << endl;
- reply_request(req, -ENOENT);
- }
- return;
- }
-
+
+ // get/lock the dentry and path
+ CDentry *dn = rdlock_path_xlock_dentry(mdr, false, true); // must exist
+ if (!dn) return;
+
if (rmdir) {
dout(7) << "handle_client_rmdir on " << *dn << endl;
} else {
dout(7) << "handle_client_unlink on " << *dn << endl;
}
-
- // have it. locked?
- if (!dn->can_read(req)) {
- dout(10) << " waiting on " << *dn << endl;
- dir->add_waiter(CDir::WAIT_DNREAD, name,
- new C_MDS_RetryRequest(mds, req, diri));
- return;
- }
-
- // null?
- if (dn->is_null()) {
- dout(10) << "unlink on null dn " << *dn << endl;
- reply_request(req, -ENOENT);
- return;
- }
+
// dn looks ok.
// get/open inode.
- CInode *in = mdcache->get_dentry_inode(dn, req, diri);
+ CInode *in = request_pin_ref(mdr);
if (!in) return;
+ dout(7) << "dn links to " << *in << endl;
// rmdir vs is_dir
if (in->is_dir()) {
if (rmdir) {
// do empty directory checks
- if (!_verify_rmdir(req, diri, in))
+ if (!_verify_rmdir(mdr, in))
return;
} else {
dout(7) << "handle_client_unlink on dir " << *in << ", returning error" << endl;
- reply_request(req, -EISDIR);
+ reply_request(mdr, -EISDIR);
return;
}
} else {
if (rmdir) {
// unlink
dout(7) << "handle_client_rmdir on non-dir " << *in << ", returning error" << endl;
- reply_request(req, -ENOTDIR);
+ reply_request(mdr, -ENOTDIR);
return;
}
}
// ok!
if (dn->is_remote() && !dn->inode->is_auth())
- _unlink_remote(req, dn);
+ _unlink_remote(mdr, dn);
else
- _unlink_local(req, dn);
+ _unlink_local(mdr, dn);
}
class C_MDS_unlink_local_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CDentry *dn;
CDentry *straydn;
version_t ipv; // referred inode
time_t ictime;
version_t dpv; // deleted dentry
public:
- C_MDS_unlink_local_finish(MDS *m, MClientRequest *r, CDentry *d, CDentry *sd,
+ C_MDS_unlink_local_finish(MDS *m, MDRequest *r, CDentry *d, CDentry *sd,
version_t v, time_t ct) :
- mds(m), req(r), dn(d), straydn(sd),
+ mds(m), mdr(r), dn(d), straydn(sd),
ipv(v), ictime(ct),
dpv(d->get_projected_version()) { }
void finish(int r) {
assert(r == 0);
- mds->server->_unlink_local_finish(req, dn, straydn, ipv, ictime, dpv);
+ mds->server->_unlink_local_finish(mdr, dn, straydn, ipv, ictime, dpv);
}
};
-void Server::_unlink_local(MClientRequest *req, CDentry *dn)
+void Server::_unlink_local(MDRequest *mdr, CDentry *dn)
{
dout(10) << "_unlink_local " << *dn << endl;
- // auth pin
- if (!mdcache->request_auth_pinned(req, dn->get_dir()) &&
- !dn->get_dir()->can_auth_pin()) {
- dn->get_dir()->add_waiter(CDir::WAIT_AUTHPINNABLE,
- new C_MDS_RetryRequest(mds, req, dn->get_dir()->get_inode()));
- return;
- }
- if (!mdcache->request_auth_pinned(req, dn->inode) &&
+ // auth pin inode
+ if (!mdr->is_auth_pinned(dn->inode) &&
!dn->inode->can_auth_pin()) {
- dn->inode->add_waiter(CInode::WAIT_AUTHPINNABLE,
- new C_MDS_RetryRequest(mds, req, dn->get_dir()->get_inode()));
+ dn->inode->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
+
+ // drop all locks while we wait (racey?)
+ mdcache->request_drop_locks(mdr);
+ mdr->drop_auth_pins();
return;
}
- mdcache->request_auth_pin(req, dn->get_dir());
- mdcache->request_auth_pin(req, dn->inode);
+ mdr->auth_pin(dn->inode);
- // lock
- if (!mds->locker->dentry_xlock_start(dn, req, dn->get_dir()->get_inode()))
- return;
- if (!mds->locker->inode_hard_write_start(dn->inode, req, dn->get_dir()->get_inode()))
+ // lock inode
+ if (!mds->locker->inode_hard_xlock_start(dn->inode, mdr))
return;
// ok, let's do it.
// prepare log entry
EUpdate *le = new EUpdate("unlink_local");
- le->metablob.add_client_req(req->get_reqid());
+ le->metablob.add_client_req(mdr->reqid);
version_t ipv = 0; // dirty inode version
inode_t *pi = 0; // the inode
pi->version = ipv;
// finisher
- C_MDS_unlink_local_finish *fin = new C_MDS_unlink_local_finish(mds, req, dn, straydn,
+ C_MDS_unlink_local_finish *fin = new C_MDS_unlink_local_finish(mds, mdr, dn, straydn,
ipv, pi->ctime);
// log + wait
mds->balancer->hit_dir(dn->dir, META_POP_DWR);
}
-void Server::_unlink_local_finish(MClientRequest *req,
+void Server::_unlink_local_finish(MDRequest *mdr,
CDentry *dn, CDentry *straydn,
version_t ipv, time_t ictime, version_t dpv)
{
dout(10) << "_unlink_local " << *dn << endl;
+ /*
// unlink main dentry
CInode *in = dn->inode;
dn->dir->unlink_inode(dn);
// unlock
mds->locker->dentry_xlock_finish(dn);
- mds->locker->inode_hard_write_finish(in);
+ mds->locker->inode_hard_xlock_finish(in);
// bump target popularity
mds->balancer->hit_dir(dn->dir, META_POP_DWR);
// reply
- MClientReply *reply = new MClientReply(req, 0);
- reply_request(req, reply, dn->dir->get_inode()); // FIXME: imprecise ref
+ MClientReply *reply = new MClientReply(mdr->client_request(), 0);
+ reply_request(mdr, reply, dn->dir->get_inode()); // FIXME: imprecise ref
if (straydn)
mdcache->eval_stray(straydn);
+ */
}
-void Server::_unlink_remote(MClientRequest *req, CDentry *dn)
+void Server::_unlink_remote(MDRequest *mdr, CDentry *dn)
{
-
-
// IMPLEMENT ME
- MClientReply *reply = new MClientReply(req, -EXDEV);
- reply_request(req, reply, dn->get_dir()->get_inode());
+ MClientReply *reply = new MClientReply(mdr->client_request(), -EXDEV);
+ reply_request(mdr, reply, dn->get_dir()->get_inode());
}
*
* @param in is the inode being rmdir'd.
*/
-bool Server::_verify_rmdir(MClientRequest *req, CInode *ref, CInode *in)
+bool Server::_verify_rmdir(MDRequest *mdr, CInode *in)
{
dout(10) << "_verify_rmdir " << *in << endl;
assert(in->is_auth());
dir->get_size() == 0 &&
!dir->is_complete()) {
dout(7) << "_verify_rmdir fetching incomplete dir " << *dir << endl;
- dir->fetch(new C_MDS_RetryRequest(mds, req, ref));
+ dir->fetch(new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
// does the frag _look_ empty?
if (dir->get_size()) {
dout(10) << "_verify_rmdir still " << dir->get_size() << " items in frag " << *dir << endl;
- reply_request(req, -ENOTEMPTY);
+ reply_request(mdr, -ENOTEMPTY);
return false;
}
class C_MDS_RenameTraverseDst : public Context {
Server *server;
- MClientRequest *req;
- CInode *ref;
+ MDRequest *mdr;
CInode *srci;
CDir *srcdir;
CDentry *srcdn;
vector<CDentry*> trace;
C_MDS_RenameTraverseDst(Server *server,
- MClientRequest *req,
- CInode *ref,
+ MDRequest *r,
CDentry *srcdn,
filepath& destpath) {
this->server = server;
- this->req = req;
- this->ref = ref;
+ this->mdr = r;
this->srcdn = srcdn;
this->destpath = destpath;
}
void finish(int r) {
- server->handle_client_rename_2(req, ref,
+ server->handle_client_rename_2(mdr,
srcdn, destpath,
trace, r);
}
*/
-bool Server::_rename_open_dn(CDir *dir, CDentry *dn, bool mustexist, MClientRequest *req, CInode *ref)
+bool Server::_rename_open_dn(CDir *dir, CDentry *dn, bool mustexist, MDRequest *mdr)
{
// xlocked?
- if (dn && !dn->can_read(req)) {
+ if (dn && !dn->can_read(mdr)) {
dout(10) << "_rename_open_dn waiting on " << *dn << endl;
dir->add_waiter(CDir::WAIT_DNREAD,
dn->name,
- new C_MDS_RetryRequest(mds, req, ref));
+ new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
((dn && dn->is_null()) ||
(!dn && dir->is_complete()))) {
dout(10) << "_rename_open_dn dn dne in " << *dir << endl;
- reply_request(req, -ENOENT);
+ reply_request(mdr, -ENOENT);
return false;
}
if (!dn && !dir->is_complete()) {
dout(10) << "_rename_open_dn readding incomplete dir" << endl;
- dir->fetch(new C_MDS_RetryRequest(mds, req, ref));
+ dir->fetch(new C_MDS_RetryRequest(mdcache, mdr));
return false;
}
assert(dn && !dn->is_null());
dout(10) << "_rename_open_dn dn is " << *dn << endl;
- CInode *in = mdcache->get_dentry_inode(dn, req, ref);
+ CInode *in = mdcache->get_dentry_inode(dn, mdr);
if (!in) return false;
dout(10) << "_rename_open_dn inode is " << *in << endl;
return true;
}
-void Server::handle_client_rename(MClientRequest *req, CInode *ref)
+void Server::handle_client_rename(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+
dout(7) << "handle_client_rename on " << *req << endl;
// traverse to source
(because we don't want to screw up the lock ordering) the ref inode
(normally/initially srcdiri) may move, and this may fail.
*/
+ /*
filepath refpath = req->get_filepath();
string srcname = refpath.last_dentry();
refpath = refpath.prefixpath(refpath.depth()-1);
dout(7) << "handle_client_rename src traversing to srcdir " << refpath << endl;
vector<CDentry*> trace;
int r = mdcache->path_traverse(refpath, trace, true,
- req, new C_MDS_RetryRequest(mds, req, ref),
+ req, new C_MDS_RetryRequest(mdcache, mdr),
MDS_TRAVERSE_FORWARD);
if (r > 0) return;
if (r < 0) { // dne or something. got renamed out from under us, probably!
frag_t srcfg = srcdiri->pick_dirfrag(srcname);
// open dirfrag? is it mine?
- CDir *srcdir = try_open_auth_dir(srcdiri, srcfg, req);
+ CDir *srcdir = try_open_auth_dir(srcdiri, srcfg, mdr);
if (!srcdir) return;
dout(7) << "handle_client_rename srcdir is " << *srcdir << endl;
// src dentry
CDentry *srcdn = srcdir->lookup(srcname);
- if (!_rename_open_dn(srcdir, srcdn, true, req, ref))
+ if (!_rename_open_dn(srcdir, srcdn, true, mdr))
return;
// pin src dentry in cache (so it won't expire)
filepath destpath = req->get_sarg();
C_MDS_RenameTraverseDst *onfinish = new C_MDS_RenameTraverseDst(this, req, ref, srcdn, destpath);
- Context *ondelay = new C_MDS_RetryRequest(mds, req, ref);
+ Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
- mdcache->path_traverse(destpath, onfinish->trace, false,
+ mdcache->path_traverse(mdr,
+ destpath, onfinish->trace, false,
req, ondelay,
MDS_TRAVERSE_DISCOVER,
onfinish);
+ */
}
-void Server::handle_client_rename_2(MClientRequest *req,
- CInode *ref,
+void Server::handle_client_rename_2(MDRequest *mdr,
CDentry *srcdn,
filepath& destpath,
vector<CDentry*>& trace,
int r)
{
+ /*
+ MClientRequest *req = mdr->client_request();
+
dout(7) << "handle_client_rename_2 on " << *req << endl;
dout(12) << " r = " << r << " trace depth " << trace.size()
<< " destpath depth " << destpath.depth() << endl;
srcdn,
destdir, destdn, destname);
}
+ */
}
class C_MDS_rename_local_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CDentry *srcdn;
CDentry *destdn;
CDentry *straydn;
public:
version_t atid1;
version_t atid2;
- C_MDS_rename_local_finish(MDS *m, MClientRequest *r,
+ C_MDS_rename_local_finish(MDS *m, MDRequest *r,
CDentry *sdn, CDentry *ddn, CDentry *stdn,
version_t v, time_t ct) :
- mds(m), req(r),
+ mds(m), mdr(r),
srcdn(sdn), destdn(ddn), straydn(stdn),
ipv(v),
straypv(straydn ? straydn->get_projected_version():0),
atid1(0), atid2(0) { }
void finish(int r) {
assert(r == 0);
- mds->server->_rename_local_finish(req, srcdn, destdn, straydn,
+ mds->server->_rename_local_finish(mdr, srcdn, destdn, straydn,
srcpv, destpv, straypv, ipv, ictime,
atid1, atid2);
}
}
};
-void Server::_rename_local(MClientRequest *req,
- CInode *ref,
+void Server::_rename_local(MDRequest *mdr,
CDentry *srcdn,
CDir *destdir,
CDentry *destdn,
const string& destname)
{
+ /*
dout(10) << "_rename_local " << *srcdn << " to " << destname << " in " << *destdir << endl;
// make sure target (possibly null) dentry exists
- int r = prepare_null_dentry(req, ref,
+ int r = prepare_null_dentry(mdr,
destdir->inode, destname,
&destdir, &destdn, true);
if (!r) return;
dosrc = !dosrc;
}
if (destdn->inode &&
- !mds->locker->inode_hard_write_start(destdn->inode, req, ref))
+ !mds->locker->inode_hard_xlock_start(destdn->inode, req, ref))
return;
mdlog->submit_entry(le);
mdlog->wait_for_sync(fin);
}
+ */
}
}
-void Server::_rename_local_finish(MClientRequest *req,
+void Server::_rename_local_finish(MDRequest *mdr,
CDentry *srcdn, CDentry *destdn, CDentry *straydn,
version_t srcpv, version_t destpv, version_t straypv, version_t ipv,
time_t ictime,
version_t atid1, version_t atid2)
{
+ /*
dout(10) << "_rename_local_finish " << *req << endl;
CInode *oldin = destdn->inode;
mds->locker->dentry_xlock_finish(srcdn);
mds->locker->dentry_xlock_finish(destdn);
if (oldin)
- mds->locker->inode_hard_write_finish(oldin);
+ mds->locker->inode_hard_xlock_finish(oldin);
// reply
MClientReply *reply = new MClientReply(req, 0);
// clean up?
if (straydn)
mdcache->eval_stray(straydn);
+ */
}
* FIXME: this truncate implemention is WRONG WRONG WRONG
*/
-void Server::handle_client_truncate(MClientRequest *req, CInode *cur)
+void Server::handle_client_truncate(MDRequest *mdr)
{
+ /*
// auth pin
if (!cur->can_auth_pin()) {
dout(7) << "waiting for authpinnable on " << *cur << endl;
mdcache->request_auth_pin(req, cur);
// write
- if (!mds->locker->inode_file_write_start(cur, req, cur))
+ if (!mds->locker->inode_file_xlock_start(cur, req, cur))
return; // fw or (wait for) lock
// check permissions
cur->inode.size = req->args.truncate.length;
cur->_mark_dirty(); // fixme
- mds->locker->inode_file_write_finish(cur);
+ mds->locker->inode_file_xlock_finish(cur);
mds->balancer->hit_inode(cur, META_POP_IWR);
MClientReply *reply = new MClientReply(req, 0);
// commit
- commit_request(req, reply, cur,
- new EString("truncate fixme"));
+ assert(0); // rewrite me
+ //commit_request(req, reply, cur,
+ //new EString("truncate fixme"));
+ */
}
// ===========================
// open, openc, close
-void Server::handle_client_open(MClientRequest *req, CInode *cur)
+void Server::handle_client_open(MDRequest *mdr)
{
+ MClientRequest *req = mdr->client_request();
+ CInode *cur = request_pin_ref(mdr);
+ if (!cur) return;
+
int flags = req->args.open.flags;
int cmode = req->get_open_file_mode();
dout(7) << "open " << flags << " on " << *cur << endl;
dout(10) << "open flags = " << flags << " filemode = " << cmode << endl;
- // is it a file?
- if (!(cmode & INODE_MODE_FILE)) {
- dout(7) << "not a regular file" << endl;
- reply_request(req, -EINVAL); // FIXME what error do we want?
+ // regular file?
+ if ((cur->inode.mode & INODE_TYPE_MASK) != INODE_MODE_FILE) {
+ dout(7) << "not a regular file " << *cur << endl;
+ reply_request(mdr, -EINVAL); // FIXME what error do we want?
return;
}
assert(auth != mds->get_nodeid());
dout(9) << "open writeable on replica for " << *cur << " fw to auth " << auth << endl;
- mdcache->request_forward(req, auth);
+ mdcache->request_forward(mdr, auth);
return;
}
// auth pin
if (!cur->can_auth_pin()) {
dout(7) << "waiting for authpinnable on " << *cur << endl;
- cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, req, cur));
+ cur->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr));
return;
}
- mdcache->request_auth_pin(req, cur);
+ mdr->auth_pin(cur);
// write
- if (!mds->locker->inode_file_write_start(cur, req, cur))
+ if (!mds->locker->inode_file_xlock_start(cur, mdr))
return; // fw or (wait for) lock
// do update
cur->inode.size = 0;
cur->_mark_dirty(); // fixme
- mds->locker->inode_file_write_finish(cur);
+ mds->locker->inode_file_xlock_finish(cur, mdr);
}
reply->set_file_caps(cap->pending());
reply->set_file_caps_seq(cap->get_last_seq());
reply->set_file_data_version(fdv);
- reply_request(req, reply, cur);
+ reply_request(mdr, reply, cur);
}
class C_MDS_openc_finish : public Context {
MDS *mds;
- MClientRequest *req;
+ MDRequest *mdr;
CDentry *dn;
CInode *newi;
version_t pv;
public:
- C_MDS_openc_finish(MDS *m, MClientRequest *r, CDentry *d, CInode *ni) :
- mds(m), req(r), dn(d), newi(ni),
+ C_MDS_openc_finish(MDS *m, MDRequest *r, CDentry *d, CInode *ni) :
+ mds(m), mdr(r), dn(d), newi(ni),
pv(d->get_projected_version()) {}
void finish(int r) {
assert(r == 0);
// dirty inode, dn, dir
newi->mark_dirty(pv);
- // unlock
- mds->locker->dentry_xlock_finish(dn);
+ // downgrade xlock to rdlock
+ mds->locker->dentry_xlock_downgrade_to_rdlock(dn, mdr);
+ // set/pin ref inode for open()
+ mdr->ref = newi;
+ mdr->pin(newi);
+
// hit pop
mds->balancer->hit_inode(newi, META_POP_IWR);
// ok, do the open.
- mds->server->handle_client_open(req, newi);
+ mds->server->handle_client_open(mdr);
}
};
-void Server::handle_client_openc(MClientRequest *req, CInode *diri)
+void Server::handle_client_openc(MDRequest *mdr)
{
- dout(7) << "open w/ O_CREAT on " << req->get_filepath() << endl;
+ MClientRequest *req = mdr->client_request();
- CDir *dir = 0;
- CDentry *dn = 0;
+ dout(7) << "open w/ O_CREAT on " << req->get_filepath() << endl;
- // make dentry and inode, xlock dentry.
bool excl = (req->args.open.flags & O_EXCL);
- int r = prepare_null_dentry(req, diri, &dir, &dn, !excl); // okexist = !excl
- if (r == 0) return; // wait on something
- assert(dir);
- assert(dn);
-
-
- if (r == 1) {
- // created null dn.
-
- // xlock
- if (!mds->locker->dentry_xlock_start(dn, req, diri))
- return;
-
- // create inode.
- CInode *in = prepare_new_inode(req, dir);
- assert(in);
-
- // it's a file.
- dn->pre_dirty();
- in->inode.mode = 0644; // FIXME req should have a umask
- in->inode.mode |= INODE_MODE_FILE;
-
- // prepare finisher
- C_MDS_openc_finish *fin = new C_MDS_openc_finish(mds, req, dn, in);
- EUpdate *le = new EUpdate("openc");
- le->metablob.add_client_req(req->get_reqid());
- le->metablob.add_dir_context(dir);
- inode_t *pi = le->metablob.add_primary_dentry(dn, true, in);
- pi->version = dn->get_projected_version();
-
- // log + wait
- mdlog->submit_entry(le);
- mdlog->wait_for_sync(fin);
-
- /*
- FIXME. this needs to be rewritten when the write capability stuff starts
- getting journaled.
- */
- } else {
- // exists!
+ CDentry *dn = rdlock_path_xlock_dentry(mdr, !excl, false);
+ if (!dn) return;
- // O_EXCL?
+ if (!dn->is_null()) {
+ // it existed.
if (req->args.open.flags & O_EXCL) {
- // fail.
dout(10) << "O_EXCL, target exists, failing with -EEXIST" << endl;
- reply_request(req, -EEXIST, diri);
+ reply_request(mdr, -EEXIST, dn->get_dir()->get_inode());
return;
}
-
- // get inode
- CInode *in = mdcache->get_dentry_inode(dn, req, diri);
- if (!in) return;
-
- // FIXME: do i need to repin path based existent inode? hmm.
- handle_client_open(req, in);
+
+ // pass to regular open handler.
+ handle_client_open(mdr);
+ return;
}
+
+ // created null dn.
+
+ // create inode.
+ CInode *in = prepare_new_inode(req, dn->dir);
+ assert(in);
+
+ // it's a file.
+ dn->pre_dirty();
+ in->inode.mode = req->args.open.mode;
+ in->inode.mode |= INODE_MODE_FILE;
+
+ // prepare finisher
+ C_MDS_openc_finish *fin = new C_MDS_openc_finish(mds, mdr, dn, in);
+ EUpdate *le = new EUpdate("openc");
+ le->metablob.add_client_req(req->get_reqid());
+ le->metablob.add_dir_context(dn->dir);
+ inode_t *pi = le->metablob.add_primary_dentry(dn, true, in);
+ pi->version = dn->get_projected_version();
+
+ // log + wait
+ mdlog->submit_entry(le);
+ mdlog->wait_for_sync(fin);
+
+ /*
+ FIXME. this needs to be rewritten when the write capability stuff starts
+ getting journaled.
+ */
}