From a6f5abd95e80a2137c1e3e463fb4bdbcc95e49d2 Mon Sep 17 00:00:00 2001 From: sageweil Date: Tue, 19 Jun 2007 16:11:50 +0000 Subject: [PATCH] * force trim of replicated null dentries that sync to non-null * fixed authpinnable waits in server (now wait only if frozen; locker->acquire_locks will wait while freezing, and handle auth_pins properly) git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1428 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/config.cc | 4 +- branches/sage/cephmds2/mds/CDentry.cc | 46 +++++++- branches/sage/cephmds2/mds/Locker.cc | 33 +++--- branches/sage/cephmds2/mds/MDCache.cc | 133 +++++++++++++----------- branches/sage/cephmds2/mds/MDCache.h | 4 + branches/sage/cephmds2/mds/Server.cc | 10 +- branches/sage/cephmds2/mds/SimpleLock.h | 12 +-- 7 files changed, 152 insertions(+), 90 deletions(-) diff --git a/branches/sage/cephmds2/config.cc b/branches/sage/cephmds2/config.cc index 01314f5bfbfd2..b4c8e0edde5fa 100644 --- a/branches/sage/cephmds2/config.cc +++ b/branches/sage/cephmds2/config.cc @@ -158,8 +158,8 @@ md_config_t g_conf = { mds_decay_halflife: 30, - mds_beacon_interval: 5.0, - mds_beacon_grace: 10.0, + mds_beacon_interval: 30.0, + mds_beacon_grace: 60*60.0, mds_log: true, mds_log_max_len: MDS_CACHE_SIZE / 3, diff --git a/branches/sage/cephmds2/mds/CDentry.cc b/branches/sage/cephmds2/mds/CDentry.cc index 50c7e56354e48..e60038820ba52 100644 --- a/branches/sage/cephmds2/mds/CDentry.cc +++ b/branches/sage/cephmds2/mds/CDentry.cc @@ -259,10 +259,52 @@ void CDentry::set_object_info(MDSCacheObjectInfo &info) void CDentry::encode_lock_state(int type, bufferlist& bl) { - + // null, ino, or remote_ino? + int c; + if (is_primary()) { + c = 1; + ::_encode(c, bl); + ::_encode(inode->inode.ino, bl); + } + else if (is_remote()) { + c = 2; + ::_encode(c, bl); + ::_encode(remote_ino, bl); + } + else if (is_null()) { + // encode nothing. + } + else assert(0); } void CDentry::decode_lock_state(int type, bufferlist& bl) -{ +{ + if (bl.length() == 0) { + // null + assert(is_null()); + return; + } + int off = 0; + char c; + inodeno_t ino; + ::_decode(c, bl, off); + + switch (c) { + case 1: + case 2: + _decode(ino, bl, off); + // newly linked? + if (is_null() && !is_auth()) { + // force trim from cache! + dout(10) << "decode_lock_state replica dentry null -> non-null, must trim!" << endl; + assert(get_num_ref() == 0); + } else { + // verify? + + } + break; + default: + assert(0); + } } diff --git a/branches/sage/cephmds2/mds/Locker.cc b/branches/sage/cephmds2/mds/Locker.cc index c6b98cc981ec0..3de229d1b8052 100644 --- a/branches/sage/cephmds2/mds/Locker.cc +++ b/branches/sage/cephmds2/mds/Locker.cc @@ -152,7 +152,8 @@ bool Locker::acquire_locks(MDRequest *mdr, dout(10) << " must authpin " << *object << endl; - if (mdr->is_auth_pinned(object)) continue; + if (mdr->is_auth_pinned(object)) + continue; if (!object->is_auth()) { if (object->is_ambiguous_auth()) { @@ -178,16 +179,12 @@ bool Locker::acquire_locks(MDRequest *mdr, for (set::iterator p = mustpin.begin(); p != mustpin.end(); ++p) { - if ((*p)->get_type() == LOCK_OTYPE_DN) { - CDir *dir = ((CDentry*)(*p)->get_parent())->dir; - if (!dir->is_auth()) continue; - dout(10) << " auth_pinning " << *dir << endl; - mdr->auth_pin(dir); - } else { - CInode *in = (CInode*)(*p)->get_parent(); - if (!in->is_auth()) continue; - dout(10) << " auth_pinning " << *in << endl; - mdr->auth_pin(in); + MDSCacheObject *object = (*p)->get_parent(); + if (mdr->is_auth_pinned(object)) { + dout(10) << " auth_pinned " << *object << endl; + } else if (object->is_auth()) { + dout(10) << " auth_pinning " << *object << endl; + mdr->auth_pin(object); } } @@ -202,7 +199,6 @@ bool Locker::acquire_locks(MDRequest *mdr, q != p->second.end(); ++q) { dout(10) << " req remote auth_pin of " << **q << endl; - mdr->pin(*q); // pin locally! MDSCacheObjectInfo info; (*q)->set_object_info(info); req->get_authpins().push_back(info); @@ -847,6 +843,18 @@ void Locker::handle_simple_lock(SimpleLock *lock, MLock *m) lock->decode_locked_state(m->get_data()); lock->set_state(LOCK_SYNC); lock->finish_waiters(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE); + + // special case: trim replica no-longer-null dentry? + if (lock->get_type() == LOCK_OTYPE_DN) { + CDentry *dn = (CDentry*)lock->get_parent(); + if (dn->is_null() && m->get_data().length() > 0) { + dout(10) << "handle_simple_lock replica dentry null -> non-null, must trim " + << *dn << endl; + map expiremap; + mdcache->trim_dentry(dn, expiremap); + mdcache->send_expire_messages(expiremap); + } + } break; case LOCK_AC_LOCK: @@ -1123,7 +1131,6 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr) mds->send_message_mds(r, auth, MDS_PORT_SERVER); // wait - // note: this also waits on parent object's SINGLEAUTH bit, in case of a migration race lock->add_waiter(SimpleLock::WAIT_REMOTEXLOCK, new C_MDS_RetryRequest(mdcache, mdr)); return false; } diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 6493266d2b766..7da0a1f8831e7 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -2159,65 +2159,7 @@ bool MDCache::trim(int max) while (lru.lru_get_size() > (unsigned)max) { CDentry *dn = (CDentry*)lru.lru_expire(); if (!dn) break; - - CDir *dir = dn->get_dir(); - assert(dir); - - CDir *con = get_subtree_root(dir); - assert(con); - - dout(12) << "trim removing " << *dn << endl; - dout(12) << " in container " << *con << endl; - - // notify dentry authority? - if (!dn->is_auth()) { - pair auth = dn->authority(); - - for (int p=0; p<2; p++) { - int a = auth.first; - if (p) a = auth.second; - if (a < 0 || (p == 1 && auth.second == auth.first)) break; - if (mds->get_nodeid() == auth.second && - con->is_importing()) break; // don't send any expire while importing. - if (a == mds->get_nodeid()) continue; // on export, ignore myself. - - dout(12) << " sending expire to mds" << a << " on " << *dn << endl; - assert(a != mds->get_nodeid()); - if (expiremap.count(a) == 0) - expiremap[a] = new MCacheExpire(mds->get_nodeid()); - expiremap[a]->add_dentry(con->dirfrag(), dir->dirfrag(), dn->get_name(), dn->get_replica_nonce()); - } - } - - // unlink the dentry - if (dn->is_remote()) { - // just unlink. - dir->unlink_inode(dn); - } - else if (dn->is_primary()) { - // expire the inode, too. - CInode *in = dn->get_inode(); - assert(in); - trim_inode(dn, in, con, expiremap); - } - else { - assert(dn->is_null()); - } - - // adjust the dir state - // NOTE: we can safely remove a clean, null dentry without effecting - // directory completeness. - if (!(dn->is_null() && dn->is_clean())) - dir->state_clear(CDir::STATE_COMPLETE); - - // remove dentry - dir->remove_dentry(dn); - - // reexport? - if (dir->get_size() == 0 && dir->is_subtree_root()) - migrator->export_empty_import(dir); - - if (mds->logger) mds->logger->inc("cex"); + trim_dentry(dn, expiremap); } // trim root inode+dir? @@ -2246,6 +2188,14 @@ bool MDCache::trim(int max) } } + // send! + send_expire_messages(expiremap); + + return true; +} + +void MDCache::send_expire_messages(map& expiremap) +{ // send expires for (map::iterator it = expiremap.begin(); it != expiremap.end(); @@ -2253,10 +2203,73 @@ bool MDCache::trim(int max) dout(7) << "sending cache_expire to " << it->first << endl; mds->send_message_mds(it->second, it->first, MDS_PORT_CACHE); } +} - return true; + +void MDCache::trim_dentry(CDentry *dn, map& expiremap) +{ + dout(12) << "trim_dentry " << *dn << endl; + + CDir *dir = dn->get_dir(); + assert(dir); + + CDir *con = get_subtree_root(dir); + assert(con); + + dout(12) << " in container " << *con << endl; + + // notify dentry authority? + if (!dn->is_auth()) { + pair auth = dn->authority(); + + for (int p=0; p<2; p++) { + int a = auth.first; + if (p) a = auth.second; + if (a < 0 || (p == 1 && auth.second == auth.first)) break; + if (mds->get_nodeid() == auth.second && + con->is_importing()) break; // don't send any expire while importing. + if (a == mds->get_nodeid()) continue; // on export, ignore myself. + + dout(12) << " sending expire to mds" << a << " on " << *dn << endl; + assert(a != mds->get_nodeid()); + if (expiremap.count(a) == 0) + expiremap[a] = new MCacheExpire(mds->get_nodeid()); + expiremap[a]->add_dentry(con->dirfrag(), dir->dirfrag(), dn->get_name(), dn->get_replica_nonce()); + } + } + + // unlink the dentry + if (dn->is_remote()) { + // just unlink. + dir->unlink_inode(dn); + } + else if (dn->is_primary()) { + // expire the inode, too. + CInode *in = dn->get_inode(); + assert(in); + trim_inode(dn, in, con, expiremap); + } + else { + assert(dn->is_null()); + } + + // adjust the dir state + // NOTE: we can safely remove a clean, null dentry without effecting + // directory completeness. + if (!(dn->is_null() && dn->is_clean())) + dir->state_clear(CDir::STATE_COMPLETE); + + // remove dentry + dir->remove_dentry(dn); + + // reexport? + if (dir->get_size() == 0 && dir->is_subtree_root()) + migrator->export_empty_import(dir); + + if (mds->logger) mds->logger->inc("cex"); } + void MDCache::trim_dirfrag(CDir *dir, CDir *con, map& expiremap) { assert(dir->get_num_ref() == 0); diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index 9c6165298bbc0..3d9301966bb44 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -350,11 +350,15 @@ public: // cache void set_cache_size(size_t max) { lru.lru_set_max(max); } size_t get_cache_size() { return lru.lru_get_size(); } + + // trimming bool trim(int max = -1); // trim cache + void trim_dentry(CDentry *dn, map& expiremap); void trim_dirfrag(CDir *dir, CDir *con, map& expiremap); void trim_inode(CDentry *dn, CInode *in, CDir *con, map& expiremap); + void send_expire_messages(map& expiremap); void trim_non_auth(); // trim out trimmable non-auth items // shutdown diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index 18fc17a67342a..894973a356abf 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -522,6 +522,7 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) SimpleLock *lock = mds->locker->get_lock(m->get_lock_type(), m->get_object_info()); MDRequest *mdr = mdcache->request_get(m->get_reqid()); + dout(10) << "got remote xlock on " << *lock << " on " << *lock->get_parent() << endl; mdr->xlocks.insert(lock); mdr->locks.insert(lock); lock->get_xlock(mdr); @@ -1018,8 +1019,8 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, bool want_auth) // auth_pin? if (want_auth) { - if (!ref->can_auth_pin()) { - dout(7) << "waiting for authpinnable on " << *ref << endl; + if (ref->is_frozen()) { + dout(7) << "waiting for !frozen/authpinnable on " << *ref << endl; ref->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr)); return 0; } @@ -1062,9 +1063,8 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequest *mdr, bool okexist, bool mus dout(10) << "rdlock_path_xlock_dentry dir " << *dir << endl; // make sure we can auth_pin (or have already authpinned) dir - if (!dir->can_auth_pin() && - !mdr->is_auth_pinned(dir)) { - dout(7) << "waiting for authpinnable on " << *dir << endl; + if (dir->is_frozen()) { + dout(7) << "waiting for !frozen/authpinnable on " << *dir << endl; dir->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mdcache, mdr)); return 0; } diff --git a/branches/sage/cephmds2/mds/SimpleLock.h b/branches/sage/cephmds2/mds/SimpleLock.h index aabb42717b7bd..e20388acab030 100644 --- a/branches/sage/cephmds2/mds/SimpleLock.h +++ b/branches/sage/cephmds2/mds/SimpleLock.h @@ -66,9 +66,9 @@ class SimpleLock { public: static const int WAIT_RD = (1<<0); // to read static const int WAIT_WR = (1<<1); // to write - static const int WAIT_XLOCK = (1<<2); // to xlock - static const int WAIT_STABLE = (1<<3); // for a stable state - static const int WAIT_REMOTEXLOCK = (1<<4); // for a remote xlock (*) + static const int WAIT_XLOCK = (1<<2); // to xlock (** dup) + static const int WAIT_STABLE = (1<<2); // for a stable state + static const int WAIT_REMOTEXLOCK = (1<<3); // for a remote xlock static const int WAIT_BITS = 4; protected: @@ -114,11 +114,7 @@ public: parent->finish_waiting(mask << wait_offset, r); } void add_waiter(int mask, Context *c) { - // (*) REMOTEXLOCK events alsowait on parent's WAIT_SINGLEAUTH. - if (mask & WAIT_REMOTEXLOCK) - parent->add_waiter((mask << wait_offset) | MDSCacheObject::WAIT_SINGLEAUTH, c); - else - parent->add_waiter(mask << wait_offset, c); + parent->add_waiter(mask << wait_offset, c); } bool is_waiter_for(int mask) { return parent->is_waiter_for(mask << wait_offset); -- 2.39.5