From 4718149a85f3b091c33611b6051c85b1854f0c31 Mon Sep 17 00:00:00 2001 From: sageweil Date: Mon, 11 Jun 2007 21:13:51 +0000 Subject: [PATCH] * dir mtime fix * some slave request infrastructure fixes git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1415 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/mds/Locker.cc | 7 +- branches/sage/cephmds2/mds/MDCache.cc | 16 +-- branches/sage/cephmds2/mds/MDCache.h | 7 +- branches/sage/cephmds2/mds/Migrator.cc | 2 + branches/sage/cephmds2/mds/Server.cc | 49 ++++++- branches/sage/cephmds2/mds/Server.h | 2 + branches/sage/cephmds2/mds/SimpleLock.h | 2 + branches/sage/cephmds2/mds/mdstypes.h | 166 ++++++++++++------------ 8 files changed, 144 insertions(+), 107 deletions(-) diff --git a/branches/sage/cephmds2/mds/Locker.cc b/branches/sage/cephmds2/mds/Locker.cc index 2a8945bfc51c1..b847a4f4a4eca 100644 --- a/branches/sage/cephmds2/mds/Locker.cc +++ b/branches/sage/cephmds2/mds/Locker.cc @@ -1074,7 +1074,7 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr) } else { // replica // this had better not be a remote xlock attempt! - assert(mdr->slave_request); + assert(!mdr->slave_request); // wait for single auth if (lock->get_parent()->is_ambiguous_auth()) { @@ -1089,11 +1089,10 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr) MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_XLOCK); r->set_lock_type(lock->get_type()); lock->get_parent()->set_object_info(r->get_object_info()); - mds->send_message_mds(r, auth, MDS_PORT_LOCKER); + mds->send_message_mds(r, auth, MDS_PORT_SERVER); - // wait - // note: this also waits on parent object's SINGLEAUTH bit, in case of migration race + // note: this also waits on parent object's SINGLEAUTH bit, in case of a migration race lock->add_waiter(SimpleLock::WAIT_REMOTEXLOCK, new C_MDS_RetryRequest(mdcache, mdr)); return false; } diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 08b3d4cae7826..0e2fb7c6af10c 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -3403,15 +3403,6 @@ void MDCache::make_trace(vector& trace, CInode *in) } -MDRequest *MDCache::request_start_slave(metareqid_t ri, int by) -{ - MDRequest *mdr = new MDRequest(ri, 0, by); - assert(active_requests.count(mdr->reqid) == 0); - active_requests[mdr->reqid] = mdr; - dout(7) << "request_start_slave " << *mdr << " by mds" << by << endl; - return mdr; -} - MDRequest *MDCache::request_start(MClientRequest *req) { MDRequest *mdr = new MDRequest(req->get_reqid(), req); @@ -3421,15 +3412,16 @@ MDRequest *MDCache::request_start(MClientRequest *req) return mdr; } -MDRequest *MDCache::request_start(MMDSSlaveRequest *slavereq) +MDRequest *MDCache::request_start_slave(metareqid_t ri, int by) { - MDRequest *mdr = new MDRequest(slavereq->get_reqid(), slavereq, slavereq->get_source().num()); + MDRequest *mdr = new MDRequest(ri, by); assert(active_requests.count(mdr->reqid) == 0); active_requests[mdr->reqid] = mdr; - dout(7) << "request_start " << *mdr << endl; + dout(7) << "request_start_slave " << *mdr << " by mds" << by << endl; return mdr; } + MDRequest *MDCache::request_get(metareqid_t rid) { assert(active_requests.count(rid)); diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index 1c1967e4893f3..78c9bf68a3581 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -105,9 +105,9 @@ struct MDRequest { MDRequest(metareqid_t ri, MClientRequest *req) : reqid(ri), client_request(req), ref(0), slave_request(0), slave_to_mds(-1) { } - MDRequest(metareqid_t ri, MMDSSlaveRequest *req, int by) : + MDRequest(metareqid_t ri, int by) : reqid(ri), client_request(0), ref(0), - slave_request(req), slave_to_mds(by) { } + slave_request(0), slave_to_mds(by) { } bool is_slave() { return slave_to_mds >= 0; @@ -155,6 +155,8 @@ inline ostream& operator<<(ostream& out, MDRequest &mdr) out << "request(" << mdr.reqid; //if (mdr.request) out << " " << *mdr.request; if (mdr.is_slave()) out << " slave_to mds" << mdr.slave_to_mds; + if (mdr.client_request) out << " cr=" << mdr.client_request; + if (mdr.slave_request) out << " sr=" << mdr.slave_request; out << ")"; return out; } @@ -240,7 +242,6 @@ protected: public: MDRequest* request_start(MClientRequest *req); - MDRequest* request_start(MMDSSlaveRequest *slavereq); MDRequest* request_start_slave(metareqid_t rid, int by); bool have_request(metareqid_t rid) { return active_requests.count(rid); diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index 96d33f8f068eb..7dc702abf8fd8 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -1749,6 +1749,8 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist& bl, int& off, int ol mds->locker->simple_eval(&in->linklock); if (in->dirfragtreelock.do_import(oldauth, mds->get_nodeid())) mds->locker->simple_eval(&in->dirfragtreelock); + if (in->dirlock.do_import(oldauth, mds->get_nodeid())) + mds->locker->simple_eval(&in->dirlock); // caps for (set::iterator it = merged_client_caps.begin(); diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index ea7a9e88005f6..b0bd074c249cd 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -537,12 +537,14 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) MDRequest *mdr; if (mdcache->have_request(m->get_reqid())) { mdr = mdcache->request_get(m->get_reqid()); - assert(mdr->slave_request == 0); // only one at a time, please! - mdr->slave_request = m; } else { // new. - mdcache->request_start(m); + mdr = mdcache->request_start_slave(m->get_reqid(), m->get_source().num()); } + assert(mdr->client_request == 0); + + assert(mdr->slave_request == 0); // only one at a time, please! + mdr->slave_request = m; dispatch_slave_request(mdr); } @@ -1039,15 +1041,52 @@ void Server::dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime) if (diri->is_auth() && !diri->is_root()) { // we're auth. - diri->mark_dirty(dirpv); - dout(10) << "dirty_dn_diri ctime/mtime " << mtime << " v " << diri->inode.version << " on " << *diri << endl; + if (dirpv) { + // we were before, too. + diri->mark_dirty(dirpv); + dout(10) << "dirty_dn_diri ctime/mtime " << mtime << " v " << diri->inode.version << " on " << *diri << endl; + } else { + // write-behind. + if (!diri->is_dirty()) + dirty_diri_mtime_writebehind(diri, mtime); + // otherwise, if it's dirty, we know the mtime is journaled by another local update. + // (something after the import, or the import itself) + } } else { // we're not auth. dirlock scatterlock will propagate the update. } } +class C_MDS_DirtyDiriMtimeWB : public Context { + Server *server; + CInode *diri; + version_t dirpv; +public: + C_MDS_DirtyDiriMtimeWB(Server *s, CInode *i, version_t v) : + server(s), diri(i), dirpv(v) {} + void finish(int r) { + diri->mark_dirty(dirpv); + diri->auth_unpin(); + } +}; + +void Server::dirty_diri_mtime_writebehind(CInode *diri, utime_t mtime) +{ + if (!diri->can_auth_pin()) + return; // oh well! hrm. + diri->auth_pin(); + + // we're newly auth. write-behind. + EUpdate *le = new EUpdate("dir.mtime writebehind"); + le->metablob.add_dir_context(diri->get_parent_dn()->get_dir()); + inode_t *pi = le->metablob.add_primary_dentry(diri->get_parent_dn(), true); + pi->version = diri->pre_dirty(); + + mds->mdlog->submit_entry(le); + mds->mdlog->wait_for_sync(new C_MDS_DirtyDiriMtimeWB(this, diri, pi->version)); +} diff --git a/branches/sage/cephmds2/mds/Server.h b/branches/sage/cephmds2/mds/Server.h index 0aa67d533c277..dad15b4c034a7 100644 --- a/branches/sage/cephmds2/mds/Server.h +++ b/branches/sage/cephmds2/mds/Server.h @@ -77,6 +77,8 @@ public: version_t predirty_dn_diri(CDentry *dn, class EMetaBlob *blob, utime_t mtime); void dirty_dn_diri(CDentry *dn, version_t dirpv, utime_t mtime); + void dirty_diri_mtime_writebehind(CInode *diri, utime_t mtime); + // requests on existing inodes. void handle_client_stat(MDRequest *mdr); diff --git a/branches/sage/cephmds2/mds/SimpleLock.h b/branches/sage/cephmds2/mds/SimpleLock.h index e631edf456fea..9a3c94154e776 100644 --- a/branches/sage/cephmds2/mds/SimpleLock.h +++ b/branches/sage/cephmds2/mds/SimpleLock.h @@ -233,6 +233,8 @@ public: if (!is_gathering()) return true; } + if (!is_stable() && !is_gathering()) + return true; return false; } diff --git a/branches/sage/cephmds2/mds/mdstypes.h b/branches/sage/cephmds2/mds/mdstypes.h index 560dd76131361..52db486b66972 100644 --- a/branches/sage/cephmds2/mds/mdstypes.h +++ b/branches/sage/cephmds2/mds/mdstypes.h @@ -299,14 +299,14 @@ class MDSCacheObject { const char *generic_pin_name(int p) { switch (p) { - case PIN_REPLICATED: return "replicated"; - case PIN_DIRTY: return "dirty"; + case PIN_REPLICATED: return "replicated"; + case PIN_DIRTY: return "dirty"; case PIN_RDLOCK: return "rdlock"; case PIN_XLOCK: return "xlock"; case PIN_REQUEST: return "request"; - case PIN_WAITER: return "waiter"; - default: assert(0); - } + case PIN_WAITER: return "waiter"; + default: assert(0); + } } // -- state -- @@ -322,15 +322,15 @@ class MDSCacheObject { // cons public: MDSCacheObject() : - state(0), - ref(0), - replica_nonce(0) {} + state(0), + ref(0), + replica_nonce(0) {} virtual ~MDSCacheObject() {} // printing virtual void print(ostream& out) = 0; virtual ostream& print_db_line_prefix(ostream& out) { - return out << "mdscacheobject(" << this << ") "; + return out << "mdscacheobject(" << this << ") "; } // -------------------------------------------- @@ -354,7 +354,7 @@ class MDSCacheObject { // authority virtual pair authority() = 0; bool is_ambiguous_auth() { - return authority().second != CDIR_AUTH_UNKNOWN; + return authority().second != CDIR_AUTH_UNKNOWN; } // -------------------------------------------- @@ -371,36 +371,36 @@ protected: virtual void last_put() {} virtual void bad_put(int by) { - assert(ref_set.count(by) > 0); - assert(ref > 0); + assert(ref_set.count(by) > 0); + assert(ref > 0); } void put(int by) { if (ref == 0 || ref_set.count(by) == 0) { - bad_put(by); + bad_put(by); } else { - ref--; - ref_set.erase(ref_set.find(by)); - assert(ref == (int)ref_set.size()); - if (ref == 0) - last_put(); - } + ref--; + ref_set.erase(ref_set.find(by)); + assert(ref == (int)ref_set.size()); + if (ref == 0) + last_put(); + } } virtual void first_get() {} virtual void bad_get(int by) { - assert(by < 0 || ref_set.count(by) == 0); - assert(0); + assert(by < 0 || ref_set.count(by) == 0); + assert(0); } void get(int by) { if (by >= 0 && ref_set.count(by)) { - bad_get(by); + bad_get(by); } else { - if (ref == 0) - first_get(); - ref++; - ref_set.insert(by); - assert(ref == (int)ref_set.size()); - } + if (ref == 0) + first_get(); + ref++; + ref_set.insert(by); + assert(ref == (int)ref_set.size()); + } } void print_pin_set(ostream& out) { @@ -410,11 +410,11 @@ protected: int last = *it; int c = 1; do { - it++; - if (it == ref_set.end()) break; + it++; + if (it == ref_set.end()) break; } while (*it == last); if (c > 1) - out << "*" << c; + out << "*" << c; } } @@ -430,31 +430,31 @@ protected: bool is_replica(int mds) { return replicas.count(mds); } int num_replicas() { return replicas.size(); } int add_replica(int mds) { - if (replicas.count(mds)) - return ++replicas[mds]; // inc nonce - if (replicas.empty()) - get(PIN_REPLICATED); - return replicas[mds] = 1; + if (replicas.count(mds)) + return ++replicas[mds]; // inc nonce + if (replicas.empty()) + get(PIN_REPLICATED); + return replicas[mds] = 1; } void add_replica(int mds, int nonce) { - if (replicas.empty()) - get(PIN_REPLICATED); - replicas[mds] = nonce; + if (replicas.empty()) + get(PIN_REPLICATED); + replicas[mds] = nonce; } int get_replica_nonce(int mds) { - assert(replicas.count(mds)); - return replicas[mds]; + assert(replicas.count(mds)); + return replicas[mds]; } void remove_replica(int mds) { - assert(replicas.count(mds)); - replicas.erase(mds); - if (replicas.empty()) - put(PIN_REPLICATED); + assert(replicas.count(mds)); + replicas.erase(mds); + if (replicas.empty()) + put(PIN_REPLICATED); } void clear_replicas() { - if (!replicas.empty()) - put(PIN_REPLICATED); - replicas.clear(); + if (!replicas.empty()) + put(PIN_REPLICATED); + replicas.clear(); } map::iterator replicas_begin() { return replicas.begin(); } map::iterator replicas_end() { return replicas.end(); } @@ -471,45 +471,45 @@ protected: public: bool is_waiter_for(int mask) { - return waiting.count(mask) > 0; // FIXME: not quite right. + return waiting.count(mask) > 0; // FIXME: not quite right. } - void add_waiter(int mask, Context *c) { - if (waiting.empty()) - get(PIN_WAITER); - waiting.insert(pair(mask, c)); - pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this)) - << "add_waiter " << mask << " " << c - << " on " << *this - << endl; - + virtual void add_waiter(int mask, Context *c) { + if (waiting.empty()) + get(PIN_WAITER); + waiting.insert(pair(mask, c)); + pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this)) + << "add_waiter " << mask << " " << c + << " on " << *this + << endl; + } - void take_waiting(int mask, list& ls) { - if (waiting.empty()) return; - multimap::iterator it = waiting.begin(); - while (it != waiting.end()) { - if (it->first & mask) { - ls.push_back(it->second); - pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this)) - << "take_waiting mask " << mask << " took " << it->second - << " tag " << it->first - << " on " << *this - << endl; - waiting.erase(it++); - } else { - pdout(10,g_conf.debug_mds) << "take_waiting mask " << mask << " SKIPPING " << it->second - << " tag " << it->first - << " on " << *this - << endl; - it++; - } - } - if (waiting.empty()) - put(PIN_WAITER); + virtual void take_waiting(int mask, list& ls) { + if (waiting.empty()) return; + multimap::iterator it = waiting.begin(); + while (it != waiting.end()) { + if (it->first & mask) { + ls.push_back(it->second); + pdout(10,g_conf.debug_mds) << (mdsco_db_line_prefix(this)) + << "take_waiting mask " << mask << " took " << it->second + << " tag " << it->first + << " on " << *this + << endl; + waiting.erase(it++); + } else { + pdout(10,g_conf.debug_mds) << "take_waiting mask " << mask << " SKIPPING " << it->second + << " tag " << it->first + << " on " << *this + << endl; + it++; + } + } + if (waiting.empty()) + put(PIN_WAITER); } void finish_waiting(int mask, int result = 0) { - list finished; - take_waiting(mask, finished); - finish_contexts(finished, result); + list finished; + take_waiting(mask, finished); + finish_contexts(finished, result); } -- 2.39.5