- resolve DISCOVERXLOCK versus rename issue
- witness list may change..
+ -> DISCOVERXLOCK _only_ used by witnesses!
+
- revisit wrlocks, dir inode mtime updates. esp in rename.
+ - if auth, pin and be happy. decide early.
+ - make no attempt to dirty inodes until a gather
+ - pin scattered inodes
+ - mtime will always get journaled...
+ -> so, just make sure v/pv/dirtyness is sane on recovery...
+ -> scatterlock should recover into scatter state, or whatever...
- reimplement _local_link/unlink using rename as model.
/* path_traverse
*
* return values:
- * <0 : traverse error (ENOTDIR, ENOENT)
+ * <0 : traverse error (ENOTDIR, ENOENT, etc.)
* 0 : success
* >0 : delayed or forwarded
*
- * Notes:
- * onfinish context is only needed if you specify MDS_TRAVERSE_DISCOVER _and_
- * you aren't absolutely certain that the path actually exists. If it doesn't,
- * the context is needed to pass a (failure) result code.
+ * onfail values:
+ *
+ * MDS_TRAVERSE_FORWARD - forward to auth (or best guess)
+ * MDS_TRAVERSE_DISCOVER - discover missing items. skip permission checks.
+ * MDS_TRAVERSE_DISCOVERXLOCK - discover XLOCKED items too (be careful!).
+ * MDS_TRAVERSE_FAIL - return an error
*/
-/*
-class C_MDC_TraverseDiscover : public Context {
- Context *onfinish, *ondelay;
- public:
- C_MDC_TraverseDiscover(Context *onfinish, Context *ondelay) {
- this->ondelay = ondelay;
- this->onfinish = onfinish;
- }
- void finish(int r) {
- //dout(10) << "TraverseDiscover r = " << r << endl;
- if (r < 0 && onfinish) { // ENOENT on discover, pass back to caller.
- onfinish->finish(r);
- } else {
- ondelay->finish(r); // retry as usual
- }
- delete onfinish;
- delete ondelay;
- }
-};
-*/
-
-int MDCache::path_traverse(MDRequest *mdr,
- CInode *base, // traverse starting from here.
- filepath& origpath,
- vector<CDentry*>& trace,
- bool follow_trailing_symlink,
- Message *req,
- Context *ondelay,
- int onfail,
- bool is_client_req,
- bool null_okay) // true if req is MClientRequest .. gross, FIXME
+Context *MDCache::_get_waiter(MDRequest *mdr, Message *req)
{
- set< pair<CInode*, string> > symlinks_resolved; // keep a list of symlinks we touch to avoid loops
+ if (mdr)
+ return new C_MDS_RetryRequest(this, mdr);
+ else
+ return new C_MDS_RetryMessage(mds, req);
+}
+int MDCache::path_traverse(MDRequest *mdr, Message *req, // who
+ CInode *base, filepath& origpath, // what
+ vector<CDentry*>& trace, // result
+ bool follow_trailing_symlink, // how
+ int onfail)
+{
+ assert(mdr || req);
+ bool null_okay = onfail == MDS_TRAVERSE_DISCOVERXLOCK;
bool noperm = false;
if (onfail == MDS_TRAVERSE_DISCOVER ||
- onfail == MDS_TRAVERSE_DISCOVERXLOCK) noperm = true;
+ onfail == MDS_TRAVERSE_DISCOVERXLOCK)
+ noperm = true;
+
+ // keep a list of symlinks we touch to avoid loops
+ set< pair<CInode*, string> > symlinks_resolved;
// root
CInode *cur = base;
if (!cur) cur = get_root();
if (cur == NULL) {
dout(7) << "traverse: i don't have root" << endl;
- open_root(ondelay);
+ open_root(_get_waiter(mdr, req));
return 1;
}
// ENOTDIR?
if (!cur->is_dir()) {
dout(7) << "traverse: " << *cur << " not a dir " << endl;
- delete ondelay;
return -ENOTDIR;
}
// parent dir frozen_dir?
if (cur->is_frozen_dir()) {
dout(7) << "traverse: " << *cur->get_parent_dir() << " is frozen_dir, waiting" << endl;
- cur->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE, ondelay);
+ cur->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE, _get_waiter(mdr, req));
return 1;
}
}
else if (cur->is_ambiguous_auth()) {
dout(10) << "traverse: need dir, waiting for single auth on " << *cur << endl;
- cur->add_waiter(CInode::WAIT_SINGLEAUTH, ondelay);
+ cur->add_waiter(CInode::WAIT_SINGLEAUTH, _get_waiter(mdr, req));
return 1;
} else {
filepath want = path.postfixpath(depth);
cur->authority().first, MDS_PORT_CACHE);
dir_discovers[cur->ino()].insert(cur->authority().first);
}
- cur->add_waiter(CInode::WAIT_DIR, ondelay);
+ cur->add_waiter(CInode::WAIT_DIR, _get_waiter(mdr, req));
return 1;
}
}
// doh!
// FIXME: traverse is allowed?
dout(7) << "traverse: " << *curdir << " is frozen, waiting" << endl;
- curdir->add_waiter(CDir::WAIT_UNFREEZE, ondelay);
+ curdir->add_waiter(CDir::WAIT_UNFREEZE, _get_waiter(mdr, req));
if (onfinish) delete onfinish;
return 1;
}
*/
// must read directory hard data (permissions, x bit) to traverse
- if (!noperm && !mds->locker->simple_rdlock_try(&cur->authlock, ondelay)) {
+ if (!noperm && !mds->locker->simple_rdlock_try(&cur->authlock, _get_waiter(mdr, req))) {
return 1;
}
// dentry exists. xlocked?
if (!noperm && dn->lock.is_xlocked() && dn->lock.get_xlocked_by() != mdr) {
dout(10) << "traverse: xlocked dentry at " << *dn << endl;
- dn->lock.add_waiter(SimpleLock::WAIT_RD, ondelay);
+ dn->lock.add_waiter(SimpleLock::WAIT_RD, _get_waiter(mdr, req));
return 1;
}
} else {
dout(7) << "remote link to " << dn->get_remote_ino() << ", which i don't have" << endl;
assert(0); // REWRITE ME
- //open_remote_ino(dn->get_remote_ino(), req, ondelay);
+ //open_remote_ino(dn->get_remote_ino(), req, _get_waiter(mdr, req));
return 1;
}
}
}
// forwarder wants replicas?
- if (is_client_req && ((MClientRequest*)req)->get_mds_wants_replica_in_dirino()) {
- dout(30) << "traverse: REP is here, " << ((MClientRequest*)req)->get_mds_wants_replica_in_dirino() << " vs " << curdir->dirfrag() << endl;
+ if (mdr && mdr->client_request &&
+ mdr->client_request->get_mds_wants_replica_in_dirino()) {
+ dout(30) << "traverse: REP is here, "
+ << mdr->client_request->get_mds_wants_replica_in_dirino()
+ << " vs " << curdir->dirfrag() << endl;
- if (((MClientRequest*)req)->get_mds_wants_replica_in_dirino() == curdir->ino() &&
+ if (mdr->client_request->get_mds_wants_replica_in_dirino() == curdir->ino() &&
curdir->is_auth() &&
curdir->is_rep() &&
curdir->is_replica(req->get_source().num()) &&
// dentry is mine.
if (curdir->is_complete()) {
// file not found
- delete ondelay;
return -ENOENT;
} else {
-
- //wrong?
- //if (onfail == MDS_TRAVERSE_DISCOVER)
- // return -1;
-
- // directory isn't complete; reload
+ // directory isn't complete; reload
dout(7) << "traverse: incomplete dir contents for " << *cur << ", fetching" << endl;
touch_inode(cur);
- curdir->fetch(ondelay);
-
- if (mds->logger) mds->logger->inc("cmiss");
-
+ curdir->fetch(_get_waiter(mdr, req));
+ if (mds->logger) mds->logger->inc("cmiss");
return 1;
}
} else {
}
else if (curdir->is_ambiguous_auth()) {
dout(7) << "traverse: waiting for single auth on " << *curdir << endl;
- curdir->add_waiter(CDir::WAIT_SINGLEAUTH, ondelay);
+ curdir->add_waiter(CDir::WAIT_SINGLEAUTH, _get_waiter(mdr, req));
return 1;
- } else {
+ }
+ else {
dout(7) << "traverse: discover " << want << " from " << *curdir << endl;
touch_inode(cur);
mds->send_message_mds(new MDiscover(mds->get_nodeid(),
cur->ino(),
want,
- false),
+ false,
+ onfail == MDS_TRAVERSE_DISCOVERXLOCK),
dauth.first, MDS_PORT_CACHE);
if (mds->logger) mds->logger->inc("dis");
}
// delay processing of current request.
- curdir->add_dentry_waiter(path[depth], ondelay);
+ curdir->add_dentry_waiter(path[depth], _get_waiter(mdr, req));
if (mds->logger) mds->logger->inc("cmiss");
return 1;
}
if (curdir->is_ambiguous_auth()) {
// wait
dout(7) << "traverse: waiting for single auth in " << *curdir << endl;
- curdir->add_waiter(CDir::WAIT_SINGLEAUTH, ondelay);
+ curdir->add_waiter(CDir::WAIT_SINGLEAUTH, _get_waiter(mdr, req));
return 1;
} else {
dout(7) << "traverse: forwarding, not auth for " << *curdir << endl;
// request replication?
- if (is_client_req && curdir->is_rep()) {
+ if (mdr && mdr->client_request && curdir->is_rep()) {
dout(15) << "traverse: REP fw to mds" << dauth << ", requesting rep under "
<< *curdir << " req " << *(MClientRequest*)req << endl;
- ((MClientRequest*)req)->set_mds_wants_replica_in_dirino(curdir->ino());
+ mdr->client_request->set_mds_wants_replica_in_dirino(curdir->ino());
req->clear_payload(); // reencode!
}
mds->forward_message_mds(req, dauth.first, req->get_dest_port());
if (mds->logger) mds->logger->inc("cfw");
- delete ondelay;
return 2;
}
}
if (onfail == MDS_TRAVERSE_FAIL) {
- delete ondelay;
return -ENOENT; // not necessarily exactly true....
}
}
}
// success.
- delete ondelay;
return 0;
}
// get started.
if (dis->get_base_ino() == MDS_INO_ROOT) {
// wants root
- dout(7) << "handle_discover from mds" << dis->get_asker() << " wants root + " << dis->get_want().get_path() << endl;
+ dout(7) << "handle_discover from mds" << dis->get_asker()
+ << " wants root + " << dis->get_want().get_path() << endl;
assert(mds->get_nodeid() == 0);
assert(root->is_auth());
}
else if (dis->get_base_ino() == MDS_INO_STRAY(whoami)) {
// wants root
- dout(7) << "handle_discover from mds" << dis->get_asker() << " wants stray + " << dis->get_want().get_path() << endl;
+ dout(7) << "handle_discover from mds" << dis->get_asker()
+ << " wants stray + " << dis->get_want().get_path() << endl;
reply->add_inode( stray->replicate_to( dis->get_asker() ) );
dout(10) << "added stray " << *stray << endl;
for (unsigned i = 0;
i < dis->get_want().depth() || dis->get_want().depth() == 0;
i++) {
-
+
// -- figure out the dir
// is *cur even a dir at all?
}
assert(dn);
+ // xlocked dentry?
+ // ...always block on non-tail items (they are unrelated)
+ // ...allow xlocked tail disocvery _only_ if explicitly requested
+ if (dn->lock.is_xlocked()) {
+ // is this the last (tail) item in the discover traversal?
+ bool tailitem = (dis->get_want().depth() == 0) || (i == dis->get_want().depth() - 1);
+ if (tailitem && dis->wants_xlocked()) {
+ dout(7) << "allowing discovery of xlocked tail " << *dn << endl;
+ } else {
+ dout(7) << "blocking on xlocked " << *dn << endl;
+ dn->lock.add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryMessage(mds, dis));
+ delete reply;
+ return;
+ }
+ }
+
// add dentry
reply->add_dentry( dn->replicate_to( dis->get_asker() ) );
dout(7) << "added dentry " << *dn << endl;
dout(5) << "trying discover on dir_update for " << path << endl;
- int r = path_traverse(0, 0,
- path, trace, true,
- m, new C_MDS_RetryMessage(mds, m),
+ int r = path_traverse(0, m,
+ 0, path, trace, true,
MDS_TRAVERSE_DISCOVER);
if (r > 0)
return;
CInode *create_stray_inode(int whose=-1);
void open_local_stray();
void open_foreign_stray(int who, Context *c);
- int path_traverse(MDRequest *mdr,
+
+ Context *_get_waiter(MDRequest *mdr, Message *req);
+ int path_traverse(MDRequest *mdr, Message *req,
CInode *base, filepath& path,
vector<CDentry*>& trace, bool follow_trailing_sym,
- Message *req, Context *ondelay,
- int onfail,
- bool is_client_req = false,
- bool null_okay = false);
+ int onfail);
+
void open_remote_dir(CInode *diri, frag_t fg, Context *fin);
CInode *get_dentry_inode(CDentry *dn, MDRequest *mdr);
void open_remote_ino(inodeno_t ino, MDRequest *mdr, Context *fin);
// must discover it!
filepath fpath(m->get_path());
vector<CDentry*> trace;
- int r = cache->path_traverse(0,
- 0,
- fpath, trace, true,
- m, new C_MDS_RetryMessage(mds, m), // on delay/retry
+ int r = cache->path_traverse(0, m,
+ 0, fpath, trace, true,
MDS_TRAVERSE_DISCOVER);
if (r > 0) return; // wait
if (r < 0) {
dout(10) << "traverse_to_auth_dir dirpath " << refpath << " dname " << dname << endl;
// traverse to parent dir
- Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
- int r = mdcache->path_traverse(mdr,
- 0,
- refpath, trace, true,
- mdr->client_request, ondelay,
- MDS_TRAVERSE_FORWARD,
- true); // is MClientRequest
+ int r = mdcache->path_traverse(mdr, mdr->client_request,
+ 0, refpath, trace, true,
+ MDS_TRAVERSE_FORWARD);
if (r > 0) return 0; // delayed
if (r < 0) {
reply_request(mdr, r);
// traverse
filepath refpath = req->get_filepath();
- Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
vector<CDentry*> trace;
- int r = mdcache->path_traverse(mdr, 0,
- refpath, trace, req->follow_trailing_symlink(),
- req, ondelay,
- MDS_TRAVERSE_FORWARD,
- true); // is MClientRequest
+ int r = mdcache->path_traverse(mdr, req,
+ 0, refpath,
+ trace, req->follow_trailing_symlink(),
+ MDS_TRAVERSE_FORWARD);
if (r > 0) return false; // delayed
if (r < 0) { // error
reply_request(mdr, r);
*/
version_t Server::predirty_dn_diri(CDentry *dn, EMetaBlob *blob, utime_t mtime)
{
+ return 0;
+ /*
version_t dirpv = 0;
CInode *diri = dn->dir->inode;
}
return dirpv;
+ */
}
/** dirty_dn_diri
*/
void Server::dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime)
{
+ /*
CInode *diri = dn->dir->inode;
// make the udpate
} else {
// we're not auth. dirlock scatterlock will propagate the update.
}
+ */
}
// traverse to link target
filepath targetpath = req->get_sarg();
dout(7) << "handle_client_link discovering target " << targetpath << endl;
- Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
vector<CDentry*> targettrace;
- int r = mdcache->path_traverse(mdr, 0,
- targetpath, targettrace, false,
- req, ondelay,
+ int r = mdcache->path_traverse(mdr, req,
+ 0, targetpath, targettrace, false,
MDS_TRAVERSE_DISCOVER);
if (r > 0) return; // wait
if (targettrace.empty()) r = -EINVAL;
// traverse to path
vector<CDentry*> trace;
- Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
- int r = mdcache->path_traverse(mdr, 0,
- req->get_filepath(), trace, false,
- req, ondelay,
+ int r = mdcache->path_traverse(mdr, req,
+ 0, req->get_filepath(), trace, false,
MDS_TRAVERSE_FORWARD);
if (r > 0) return;
if (trace.empty()) r = -EINVAL; // can't unlink root
// traverse to src
filepath srcpath = req->get_filepath();
vector<CDentry*> srctrace;
- Context *ondelay = new C_MDS_RetryRequest(mdcache, mdr);
- int r = mdcache->path_traverse(mdr, 0,
- srcpath, srctrace, false,
- req, ondelay,
+ int r = mdcache->path_traverse(mdr, req,
+ 0, srcpath, srctrace, false,
MDS_TRAVERSE_DISCOVER);
if (r > 0) return;
if (srctrace.empty()) r = -EINVAL; // can't rename root
filepath destpath(mdr->slave_request->destdnpath);
dout(10) << " dest " << destpath << endl;
vector<CDentry*> trace;
- int r = mdcache->path_traverse(mdr, 0, destpath, trace, false, mdr->slave_request,
- new C_MDS_RetryRequest(mdcache, mdr),
- MDS_TRAVERSE_DISCOVERXLOCK, false, true);
+ int r = mdcache->path_traverse(mdr, mdr->slave_request,
+ 0, destpath, trace, false,
+ MDS_TRAVERSE_DISCOVERXLOCK);
if (r > 0) return;
assert(r == 0); // we shouldn't get an error here!
// discover srcdn
filepath srcpath(mdr->slave_request->srcdnpath);
dout(10) << " src " << srcpath << endl;
- r = mdcache->path_traverse(mdr, 0, srcpath, trace, false, mdr->slave_request,
- new C_MDS_RetryRequest(mdcache, mdr),
- MDS_TRAVERSE_DISCOVERXLOCK, false, true);
+ r = mdcache->path_traverse(mdr, mdr->slave_request,
+ 0, srcpath, trace, false,
+ MDS_TRAVERSE_DISCOVERXLOCK);
if (r > 0) return;
assert(r == 0); // we shouldn't get an error here!
int asker;
inodeno_t base_ino; // 1 -> root
frag_t base_dir_frag;
- bool want_base_dir;
filepath want; // ... [/]need/this/stuff
inodeno_t want_ino;
+ bool want_base_dir;
+ bool want_xlocked;
+
public:
int get_asker() { return asker; }
inodeno_t get_base_ino() { return base_ino; }
filepath& get_want() { return want; }
inodeno_t get_want_ino() { return want_ino; }
const string& get_dentry(int n) { return want[n]; }
- bool wants_base_dir() { return want_base_dir; }
+
+ bool wants_base_dir() { return want_base_dir; }
+ bool wants_xlocked() { return want_xlocked; }
void set_base_dir_frag(frag_t f) { base_dir_frag = f; }
MDiscover() { }
- MDiscover(int asker,
- inodeno_t base_ino,
- filepath& want,
- bool want_base_dir = true) :
- Message(MSG_MDS_DISCOVER) {
- this->asker = asker;
- this->base_ino = base_ino;
- this->want = want;
- want_ino = 0;
- this->want_base_dir = want_base_dir;
- }
- MDiscover(int asker,
+ MDiscover(int asker_,
+ inodeno_t base_ino_,
+ filepath& want_,
+ bool want_base_dir_ = true,
+ bool discover_xlocks_ = false) :
+ Message(MSG_MDS_DISCOVER),
+ asker(asker_),
+ base_ino(base_ino_),
+ want(want_),
+ want_ino(0),
+ want_base_dir(want_base_dir_),
+ want_xlocked(discover_xlocks_) { }
+ MDiscover(int asker_,
dirfrag_t base_dirfrag,
- inodeno_t want_ino,
- bool want_base_dir = true) :
- Message(MSG_MDS_DISCOVER) {
- this->asker = asker;
- this->base_ino = base_dirfrag.ino;
- this->base_dir_frag = base_dirfrag.frag;
- this->want_ino = want_ino;
- this->want_base_dir = want_base_dir;
- }
+ inodeno_t want_ino_,
+ bool want_base_dir_ = true) :
+ Message(MSG_MDS_DISCOVER),
+ asker(asker_),
+ base_ino(base_dirfrag.ino),
+ base_dir_frag(base_dirfrag.frag),
+ want_ino(want_ino_),
+ want_base_dir(want_base_dir_),
+ want_xlocked(false) { }
char *get_type_name() { return "Dis"; }
void print(ostream &out) {
::_decode(asker, payload, off);
::_decode(base_ino, payload, off);
::_decode(base_dir_frag, payload, off);
- ::_decode(want_base_dir, payload, off);
want._decode(payload, off);
::_decode(want_ino, payload, off);
+ ::_decode(want_base_dir, payload, off);
+ ::_decode(want_xlocked, payload, off);
}
void encode_payload() {
- payload.append((char*)&asker, sizeof(asker));
- payload.append((char*)&base_ino, sizeof(base_ino));
- payload.append((char*)&base_dir_frag, sizeof(base_dir_frag));
- payload.append((char*)&want_base_dir, sizeof(want_base_dir));
+ ::_encode(asker, payload);
+ ::_encode(base_ino, payload);
+ ::_encode(base_dir_frag, payload);
want._encode(payload);
::_encode(want_ino, payload);
+ ::_encode(want_base_dir, payload);
+ ::_encode(want_xlocked, payload);
}
};