From eb2abe084fb48a11812eb4ae85f83f11b93d48fa Mon Sep 17 00:00:00 2001 From: sageweil Date: Thu, 12 Jul 2007 21:05:26 +0000 Subject: [PATCH] * EImportMap -> ESubtreeMap * MMDSImportMap -> MMDSResolve * clientmap saved separate from journal now; much cleaner * some rejoin cleanup git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1489 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/Makefile | 1 + branches/sage/cephmds2/TODO | 3 + branches/sage/cephmds2/config.cc | 2 +- branches/sage/cephmds2/config.h | 2 +- branches/sage/cephmds2/mds/ClientMap.cc | 121 ++++++++++++ branches/sage/cephmds2/mds/ClientMap.h | 27 ++- branches/sage/cephmds2/mds/LogEvent.cc | 6 +- branches/sage/cephmds2/mds/LogEvent.h | 3 +- branches/sage/cephmds2/mds/MDCache.cc | 184 ++++++++---------- branches/sage/cephmds2/mds/MDCache.h | 24 +-- branches/sage/cephmds2/mds/MDLog.cc | 73 +------ branches/sage/cephmds2/mds/MDLog.h | 29 ++- branches/sage/cephmds2/mds/MDS.cc | 81 +++----- branches/sage/cephmds2/mds/MDS.h | 2 +- branches/sage/cephmds2/mds/Migrator.cc | 10 +- .../sage/cephmds2/mds/events/EClientMap.h | 59 ------ branches/sage/cephmds2/mds/events/EMetaBlob.h | 2 +- .../events/{EImportMap.h => ESubtreeMap.h} | 31 +-- branches/sage/cephmds2/mds/journal.cc | 111 ++++------- branches/sage/cephmds2/mds/mdstypes.h | 3 +- .../sage/cephmds2/messages/MMDSCacheRejoin.h | 2 +- .../{MMDSImportMap.h => MMDSResolve.h} | 39 ++-- branches/sage/cephmds2/msg/Message.cc | 6 +- branches/sage/cephmds2/msg/Message.h | 2 +- 24 files changed, 372 insertions(+), 451 deletions(-) create mode 100644 branches/sage/cephmds2/mds/ClientMap.cc delete mode 100644 branches/sage/cephmds2/mds/events/EClientMap.h rename branches/sage/cephmds2/mds/events/{EImportMap.h => ESubtreeMap.h} (54%) rename branches/sage/cephmds2/messages/{MMDSImportMap.h => MMDSResolve.h} (54%) diff --git a/branches/sage/cephmds2/Makefile b/branches/sage/cephmds2/Makefile index 191aefe268521..4e41ed0c2da7a 100644 --- a/branches/sage/cephmds2/Makefile +++ b/branches/sage/cephmds2/Makefile @@ -56,6 +56,7 @@ MDS_OBJS= \ mds/AnchorClient.o\ mds/LogEvent.o\ mds/IdAllocator.o\ + mds/ClientMap.o\ mds/MDLog.o OSD_OBJS= \ diff --git a/branches/sage/cephmds2/TODO b/branches/sage/cephmds2/TODO index f1564ed91cf46..7cc65921970e8 100644 --- a/branches/sage/cephmds2/TODO +++ b/branches/sage/cephmds2/TODO @@ -51,6 +51,9 @@ sage doc sage mds +- hmm, should we move ESubtreeMap out of the journal? + that would avoid all the icky weirdness in shutdown, with periodic logging, etc. + - fix rejoin / - validate dentry<->inode connectivity / - clean up remove_gather() crap diff --git a/branches/sage/cephmds2/config.cc b/branches/sage/cephmds2/config.cc index eff13b20be567..71a0940a20a29 100644 --- a/branches/sage/cephmds2/config.cc +++ b/branches/sage/cephmds2/config.cc @@ -174,7 +174,7 @@ md_config_t g_conf = { mds_log_read_inc: 1<<20, mds_log_pad_entry: 128,//256,//64, mds_log_flush_on_shutdown: true, - mds_log_import_map_interval: 128*1024, // frequency (in bytes) of EImportMap in log + mds_log_subtree_map_interval: 128*1024, // frequency (in bytes) of EImportMap in log mds_log_eopen_size: 100, // # open inodes per log entry mds_bal_replicate_threshold: 2000, diff --git a/branches/sage/cephmds2/config.h b/branches/sage/cephmds2/config.h index 85c3ce5649e4a..a77ba355e5e6a 100644 --- a/branches/sage/cephmds2/config.h +++ b/branches/sage/cephmds2/config.h @@ -172,7 +172,7 @@ struct md_config_t { int mds_log_read_inc; int mds_log_pad_entry; bool mds_log_flush_on_shutdown; - off_t mds_log_import_map_interval; + off_t mds_log_subtree_map_interval; int mds_log_eopen_size; float mds_bal_replicate_threshold; diff --git a/branches/sage/cephmds2/mds/ClientMap.cc b/branches/sage/cephmds2/mds/ClientMap.cc new file mode 100644 index 0000000000000..c1a3473ef1633 --- /dev/null +++ b/branches/sage/cephmds2/mds/ClientMap.cc @@ -0,0 +1,121 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + + +#define DBLEVEL 20 + +#include "include/types.h" + +#include "MDS.h" +#include "ClientMap.h" + +#include "osdc/Filer.h" + +#include "config.h" +#undef dout +#define dout(x) if (x <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".clientmap " + + + +void ClientMap::init_inode() +{ + memset(&inode, 0, sizeof(inode)); + inode.ino = MDS_INO_CLIENTMAP_OFFSET + mds->get_nodeid(); + inode.layout = g_OSD_FileLayout; +} + + +// ---------------- +// LOAD + +class C_CM_Load : public Context { + ClientMap *clientmap; +public: + bufferlist bl; + C_CM_Load(ClientMap *cm) : clientmap(cm) {} + void finish(int r) { + clientmap->_load_finish(bl); + } +}; + +void ClientMap::load(Context *onload) +{ + dout(10) << "load" << endl; + + init_inode(); + + if (onload) + waiting_for_load.push_back(onload); + + C_CM_Load *c = new C_CM_Load(this); + mds->filer->read(inode, + 0, inode.layout.stripe_size, + &c->bl, + c); + +} + +void ClientMap::_load_finish(bufferlist &bl) +{ + int off = 0; + decode(bl, off); + dout(10) << "_load_finish v " << version + << ", " << client_inst.size() << " clients, " + << bl.length() << " bytes" + << endl; + projected = committing = committed = version; + finish_contexts(waiting_for_load); +} + + +// ---------------- +// SAVE + +class C_CM_Save : public Context { + ClientMap *clientmap; + version_t version; +public: + C_CM_Save(ClientMap *cm, version_t v) : clientmap(cm), version(v) {} + void finish(int r) { + clientmap->_save_finish(version); + } +}; + +void ClientMap::save(Context *onsave, version_t needv) +{ + dout(10) << "save needv " << needv << ", v " << version << endl; + commit_waiters[version].push_back(onsave); + + if (needv && committing >= needv) return; + + bufferlist bl; + + init_inode(); + encode(bl); + committing = version; + mds->filer->write(inode, + 0, bl.length(), bl, + 0, + 0, new C_CM_Save(this, version)); +} + +void ClientMap::_save_finish(version_t v) +{ + dout(10) << "_save_finish v" << v << endl; + committed = v; + + finish_contexts(commit_waiters[v]); + commit_waiters.erase(v); +} diff --git a/branches/sage/cephmds2/mds/ClientMap.h b/branches/sage/cephmds2/mds/ClientMap.h index dd291d9a7b1b3..6fa68e207f5a4 100644 --- a/branches/sage/cephmds2/mds/ClientMap.h +++ b/branches/sage/cephmds2/mds/ClientMap.h @@ -23,6 +23,10 @@ using namespace std; #include using namespace __gnu_cxx; +#include "include/Context.h" +#include "mdstypes.h" + +class MDS; /* * this structure is used by the MDS purely so that @@ -35,6 +39,8 @@ using namespace __gnu_cxx; */ class ClientMap { private: + MDS *mds; + version_t version; version_t projected; version_t committing; @@ -42,7 +48,8 @@ private: map > commit_waiters; public: - ClientMap() : version(0), projected(0), committing(0), committed(0) {} + ClientMap(MDS *m) : mds(m), + version(0), projected(0), committing(0), committed(0) {} version_t get_version() { return version; } version_t get_projected() { return projected; } @@ -54,14 +61,6 @@ public: void set_committing(version_t v) { committing = v; } void set_committed(version_t v) { committed = v; } - void add_commit_waiter(Context *c) { - commit_waiters[committing].push_back(c); - } - void take_commit_waiters(version_t v, list& ls) { - ls.swap(commit_waiters[v]); - commit_waiters.erase(v); - } - private: // affects version hash_map client_inst; @@ -174,6 +173,16 @@ public: projected = committing = committed = version; } + + // -- loading, saving -- + inode_t inode; + list waiting_for_load; + + void init_inode(); + void load(Context *onload); + void _load_finish(bufferlist &bl); + void save(Context *onsave, version_t needv=0); + void _save_finish(version_t v); }; #endif diff --git a/branches/sage/cephmds2/mds/LogEvent.cc b/branches/sage/cephmds2/mds/LogEvent.cc index 4539838058fdc..687428e47b959 100644 --- a/branches/sage/cephmds2/mds/LogEvent.cc +++ b/branches/sage/cephmds2/mds/LogEvent.cc @@ -20,8 +20,7 @@ #include "events/EString.h" #include "events/ESession.h" -#include "events/EClientMap.h" -#include "events/EImportMap.h" +#include "events/ESubtreeMap.h" #include "events/EExport.h" #include "events/EImportStart.h" #include "events/EImportFinish.h" @@ -55,8 +54,7 @@ LogEvent *LogEvent::decode(bufferlist& bl) case EVENT_STRING: le = new EString; break; case EVENT_SESSION: le = new ESession; break; - case EVENT_CLIENTMAP: le = new EClientMap; break; - case EVENT_IMPORTMAP: le = new EImportMap; break; + case EVENT_SUBTREEMAP: le = new ESubtreeMap; break; case EVENT_EXPORT: le = new EExport; break; case EVENT_IMPORTSTART: le = new EImportStart; break; case EVENT_IMPORTFINISH: le = new EImportFinish; break; diff --git a/branches/sage/cephmds2/mds/LogEvent.h b/branches/sage/cephmds2/mds/LogEvent.h index f16cb9ded97d2..917fdbf1af962 100644 --- a/branches/sage/cephmds2/mds/LogEvent.h +++ b/branches/sage/cephmds2/mds/LogEvent.h @@ -18,8 +18,7 @@ #define EVENT_STRING 1 #define EVENT_SESSION 7 -#define EVENT_CLIENTMAP 8 -#define EVENT_IMPORTMAP 2 +#define EVENT_SUBTREEMAP 2 #define EVENT_EXPORT 30 #define EVENT_IMPORTSTART 31 #define EVENT_IMPORTFINISH 32 diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 6a269c1a40c19..0701df2e657a9 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -37,7 +37,7 @@ #include "osdc/Filer.h" -#include "events/EImportMap.h" +#include "events/ESubtreeMap.h" #include "events/EUpdate.h" #include "events/ESlaveUpdate.h" #include "events/EString.h" @@ -46,7 +46,7 @@ #include "messages/MGenericMessage.h" -#include "messages/MMDSImportMap.h" +#include "messages/MMDSResolve.h" #include "messages/MMDSResolveAck.h" #include "messages/MMDSCacheRejoin.h" @@ -880,24 +880,24 @@ int MDCache::num_subtrees_fullnonauth() * take note of where we write import_maps in the log, as we need * to take care not to expire them until an updated map is safely flushed. */ -class C_MDS_WroteImportMap : public Context { +class C_MDS_WroteSubtreeMap : public Context { MDCache *mdcache; off_t end_off; public: - C_MDS_WroteImportMap(MDCache *mc, off_t eo) : mdcache(mc), end_off(eo) { } + C_MDS_WroteSubtreeMap(MDCache *mc, off_t eo) : mdcache(mc), end_off(eo) { } void finish(int r) { - mdcache->_logged_import_map(end_off); + mdcache->_logged_subtree_map(end_off); } }; -void MDCache::log_import_map(Context *onsync) +void MDCache::log_subtree_map(Context *onsync) { - dout(10) << "log_import_map " << num_subtrees() << " subtrees, " + dout(10) << "log_subtree_map " << num_subtrees() << " subtrees, " << num_subtrees_fullauth() << " fullauth" << endl; - EImportMap *le = new EImportMap; + ESubtreeMap *le = new ESubtreeMap; // include all auth subtrees, and their bounds. // and a spanning tree to tie it to the root. @@ -907,7 +907,7 @@ void MDCache::log_import_map(Context *onsync) CDir *dir = p->first; if (!dir->is_auth()) continue; - le->imports.insert(dir->dirfrag()); + le->subtrees[dir->dirfrag()].clear(); le->metablob.add_dir_context(dir, true); le->metablob.add_dir(dir, false); @@ -916,81 +916,81 @@ void MDCache::log_import_map(Context *onsync) q != p->second.end(); ++q) { CDir *bound = *q; - le->bounds[dir->dirfrag()].insert(bound->dirfrag()); + le->subtrees[dir->dirfrag()].push_back(bound->dirfrag()); le->metablob.add_dir_context(bound); le->metablob.add_dir(bound, false); } } - Context *fin = new C_MDS_WroteImportMap(this, mds->mdlog->get_write_pos()); - mds->mdlog->writing_import_map = true; + Context *fin = new C_MDS_WroteSubtreeMap(this, mds->mdlog->get_write_pos()); + mds->mdlog->writing_subtree_map = true; mds->mdlog->submit_entry(le); mds->mdlog->wait_for_sync(fin); if (onsync) mds->mdlog->wait_for_sync(onsync); } -void MDCache::_logged_import_map(off_t off) +void MDCache::_logged_subtree_map(off_t off) { - dout(10) << "_logged_import_map at " << off << endl; - mds->mdlog->last_import_map = off; - mds->mdlog->writing_import_map = false; + dout(10) << "_logged_subtree_map at " << off << endl; + mds->mdlog->last_subtree_map = off; + mds->mdlog->writing_subtree_map = false; list ls; - mds->mdlog->take_import_map_expire_waiters(ls); + mds->mdlog->take_subtree_map_expire_waiters(ls); mds->queue_waiters(ls); } -void MDCache::send_import_map(int who) +void MDCache::send_resolve(int who) { if (migrator->is_exporting()) - send_import_map_later(who); + send_resolve_later(who); else - send_import_map_now(who); + send_resolve_now(who); } -void MDCache::send_import_map_later(int who) +void MDCache::send_resolve_later(int who) { - dout(10) << "send_import_map_later to mds" << who << endl; - wants_import_map.insert(who); + dout(10) << "send_resolve_later to mds" << who << endl; + wants_resolve.insert(who); } -void MDCache::send_pending_import_maps() +void MDCache::maybe_send_pending_resolves() { - if (wants_import_map.empty()) + if (wants_resolve.empty()) return; // nothing to send. // only if it's appropriate! if (migrator->is_exporting() || migrator->is_importing()) { - dout(7) << "send_pending_import_maps waiting, imports/exports still in progress" << endl; + dout(7) << "maybe_send_pending_resolves waiting, imports/exports still in progress" << endl; return; // not now } // ok, send them. - for (set::iterator p = wants_import_map.begin(); - p != wants_import_map.end(); + for (set::iterator p = wants_resolve.begin(); + p != wants_resolve.end(); p++) - send_import_map_now(*p); - wants_import_map.clear(); + send_resolve_now(*p); + wants_resolve.clear(); } -class C_MDC_SendImportMap : public Context { +class C_MDC_SendResolve : public Context { MDCache *mdc; int who; public: - C_MDC_SendImportMap(MDCache *c, int w) : mdc(c), who(w) { } + C_MDC_SendResolve(MDCache *c, int w) : mdc(c), who(w) { } void finish(int r) { - mdc->send_import_map_now(who); + mdc->send_resolve_now(who); } }; -void MDCache::send_import_map_now(int who) +void MDCache::send_resolve_now(int who) { - dout(10) << "send_import_map_now to mds" << who << endl; - MMDSImportMap *m = new MMDSImportMap; + dout(10) << "send_resolve_now to mds" << who << endl; + MMDSResolve *m = new MMDSResolve; show_subtrees(); @@ -1010,14 +1010,14 @@ void MDCache::send_import_map_now(int who) migrator->get_import_bound_inos(dir->dirfrag())); } else { // not ambiguous. - m->add_import(dir->dirfrag()); + m->add_subtree(dir->dirfrag()); // bounds too for (set::iterator q = subtrees[dir].begin(); q != subtrees[dir].end(); ++q) { CDir *bound = *q; - m->add_import_export(dir->dirfrag(), bound->dirfrag()); + m->add_subtree_bound(dir->dirfrag(), bound->dirfrag()); } } } @@ -1066,8 +1066,8 @@ void MDCache::handle_mds_failure(int who) dout(1) << "my recovery peers will be " << recovery_set << endl; // adjust my recovery lists - wants_import_map.erase(who); // MDS will ask again - got_import_map.erase(who); // i'll get another. + wants_resolve.erase(who); // MDS will ask again + got_resolve.erase(who); // i'll get another. rejoin_ack_gather.erase(who); // i'll need/get another. // adjust subtree auth @@ -1230,14 +1230,14 @@ void MDCache::set_recovery_set(set& s) /* - * during resolve state, we share import_maps to determine who - * is authoritative for which trees. we expect to get an import_map + * during resolve state, we share resolves to determine who + * is authoritative for which trees. we expect to get an resolve * from _everyone_ in the recovery_set (the mds cluster at the time of * the first failure). */ -void MDCache::handle_import_map(MMDSImportMap *m) +void MDCache::handle_resolve(MMDSResolve *m) { - dout(7) << "handle_import_map from " << m->get_source() << endl; + dout(7) << "handle_resolve from " << m->get_source() << endl; int from = m->get_source().num(); // ambiguous slave requests? @@ -1262,8 +1262,8 @@ void MDCache::handle_import_map(MMDSImportMap *m) } // update my dir_auth values - for (map >::iterator pi = m->imap.begin(); - pi != m->imap.end(); + for (map >::iterator pi = m->subtrees.begin(); + pi != m->subtrees.end(); ++pi) { CDir *im = get_dirfrag(pi->first); if (im) { @@ -1277,7 +1277,7 @@ void MDCache::handle_import_map(MMDSImportMap *m) * note: it would be cleaner to do this check before updating our own * subtree map.. then the import_finish or _reverse could operate on an * un-munged subtree map. but... checking for import completion against - * the provided import_map isn't easy. so, we skip audit checks in these + * the provided resolve isn't easy. so, we skip audit checks in these * functions. */ if (mds->is_active() || mds->is_stopping()) { @@ -1312,15 +1312,15 @@ void MDCache::handle_import_map(MMDSImportMap *m) // resolving? if (mds->is_resolve()) { // note ambiguous imports too - for (map >::iterator pi = m->ambiguous_imap.begin(); - pi != m->ambiguous_imap.end(); + for (map >::iterator pi = m->ambiguous_imports.begin(); + pi != m->ambiguous_imports.end(); ++pi) { dout(10) << "noting ambiguous import on " << pi->first << " bounds " << pi->second << endl; other_ambiguous_imports[from][pi->first].swap( pi->second ); } // did i get them all? - got_import_map.insert(from); + got_resolve.insert(from); maybe_resolve_finish(); } @@ -1330,8 +1330,8 @@ void MDCache::handle_import_map(MMDSImportMap *m) void MDCache::maybe_resolve_finish() { - if (got_import_map != recovery_set) { - dout(10) << "still waiting for more importmaps, got " << got_import_map + if (got_resolve != recovery_set) { + dout(10) << "still waiting for more importmaps, got " << got_resolve << ", need " << recovery_set << endl; } else if (!need_resolve_ack.empty()) { @@ -1709,7 +1709,6 @@ void MDCache::send_cache_rejoins() * from a rejoining node: * weak dirfrag * weak dentries (w/ connectivity) - * strong inodes, if we have open files * * from a surviving node: * strong dirfrag @@ -1720,42 +1719,38 @@ void MDCache::cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) { dout(10) << "cache_rejoin_walk " << *dir << endl; - // walk dirfrag's dentries. list nested; // finish this dir, then do nested items - if (mds->is_rejoin()) + if (mds->is_rejoin()) { + // WEAK rejoin->add_weak_dirfrag(dir->dirfrag()); - else - rejoin->add_strong_dirfrag(dir->dirfrag(), dir->get_replica_nonce()); - - for (map::iterator p = dir->items.begin(); - p != dir->items.end(); - ++p) { - // dentry - CDentry *dn = p->second; - if (mds->is_rejoin()) { - // weak - if (dn->is_null()) { - rejoin->add_weak_null_dentry(dir->dirfrag(), p->first); - } else if (dn->is_primary()) { + + for (map::iterator p = dir->items.begin(); + p != dir->items.end(); + ++p) { + CDentry *dn = p->second; + if (dn->is_primary()) rejoin->add_weak_primary_dentry(dir->dirfrag(), p->first, dn->get_inode()->ino()); - } else { + else if (dn->is_remote()) rejoin->add_weak_remote_dentry(dir->dirfrag(), p->first, dn->get_remote_ino()); - } - } else { - // strong + else + assert(0); // i shouldn't have a non-auth null dentry after journal replay.. + } + } else { + // STRONG + rejoin->add_strong_dirfrag(dir->dirfrag(), dir->get_replica_nonce()); + + for (map::iterator p = dir->items.begin(); + p != dir->items.end(); + ++p) { + CDentry *dn = p->second; rejoin->add_strong_dentry(dir->dirfrag(), p->first, dn->is_primary() ? dn->get_inode()->ino():inodeno_t(0), dn->is_remote() ? dn->get_remote_ino():inodeno_t(0), dn->get_replica_nonce(), dn->lock.get_state()); - } - - if (dn->is_primary()) { - CInode *in = dn->get_inode(); - - // strong inode? - if (!mds->is_rejoin() || in->get_caps_wanted()) + if (dn->is_primary()) { + CInode *in = dn->get_inode(); rejoin->add_strong_inode(in->ino(), in->get_replica_nonce(), in->get_caps_wanted(), in->authlock.get_state(), @@ -1763,12 +1758,11 @@ void MDCache::cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) in->dirfragtreelock.get_state(), in->filelock.get_state(), in->dirlock.get_state()); - - // dirfrags in this subtree? - in->get_nested_dirfrags(nested); + in->get_nested_dirfrags(nested); + } } } - + // recurse into nested dirs for (list::iterator p = nested.begin(); p != nested.end(); @@ -1927,15 +1921,6 @@ void MDCache::handle_cache_rejoin_weak_rejoin(MMDSCacheRejoin *weak) CInode *in = dn->get_inode(); assert(in); - // strong inode? if so, note caps_wanted - if (weak->strong_inodes.count(in->ino())) { - assert(weak->strong_inodes[in->ino()].caps_wanted); - in->mds_caps_wanted[from] = weak->strong_inodes[in->ino()].caps_wanted; - weak->strong_inodes.erase(in->ino()); - } else { - in->mds_caps_wanted.erase(from); - } - if (survivor) inode_remove_replica(in, from); int nonce = in->add_replica(from); dout(10) << " have " << *in << endl; @@ -1944,7 +1929,7 @@ void MDCache::handle_cache_rejoin_weak_rejoin(MMDSCacheRejoin *weak) if (!survivor) in->dirlock.set_state(LOCK_SCATTER); - if (ack) { + if (ack) ack->add_strong_inode(in->ino(), nonce, 0, @@ -1953,7 +1938,6 @@ void MDCache::handle_cache_rejoin_weak_rejoin(MMDSCacheRejoin *weak) in->dirfragtreelock.get_replica_state(), in->filelock.get_replica_state(), in->dirlock.get_replica_state()); - } } } } @@ -3393,9 +3377,9 @@ bool MDCache::shutdown_pass() // (wait for) flush log? if (g_conf.mds_log_flush_on_shutdown) { - if (mds->mdlog->get_non_importmap_events()) { + if (mds->mdlog->get_non_subtreemap_events()) { dout(7) << "waiting for log to flush .. " << mds->mdlog->get_num_events() - << " (" << mds->mdlog->get_non_importmap_events() << ")" << endl; + << " (" << mds->mdlog->get_non_subtreemap_events() << ")" << endl; return false; } } @@ -3411,8 +3395,8 @@ bool MDCache::shutdown_pass() } if (mds->mdlog->get_num_events()) { - dout(7) << "waiting for log to flush (including import_map, now) .. " << mds->mdlog->get_num_events() - << " (" << mds->mdlog->get_non_importmap_events() << ")" << endl; + dout(7) << "waiting for log to flush (including subtree_map, now) .. " << mds->mdlog->get_num_events() + << " (" << mds->mdlog->get_non_subtreemap_events() << ")" << endl; return false; } @@ -3463,8 +3447,8 @@ void MDCache::dispatch(Message *m) switch (m->get_type()) { // RESOLVE - case MSG_MDS_IMPORTMAP: - handle_import_map((MMDSImportMap*)m); + case MSG_MDS_RESOLVE: + handle_resolve((MMDSResolve*)m); break; case MSG_MDS_RESOLVEACK: handle_resolve_ack((MMDSResolveAck*)m); diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index f546309aa14be..924ef9692c402 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -40,7 +40,7 @@ class Logger; class Message; -class MMDSImportMap; +class MMDSResolve; class MMDSResolveAck; class MMDSCacheRejoin; class MMDSCacheRejoinAck; @@ -260,7 +260,7 @@ public: protected: // delayed cache expire - map > delayed_expire; // import|export dir -> expire msg + map > delayed_expire; // subtree root -> expire msg // -- discover -- hash_map > dir_discovers; // dirino -> mds set i'm trying to discover. @@ -310,7 +310,7 @@ protected: // [resolve] // from EImportStart w/o EImportFinish during journal replay map > my_ambiguous_imports; - // from MMDSImportMaps + // from MMDSResolves map > > other_ambiguous_imports; map > uncommitted_slave_updates; // for replay. @@ -318,11 +318,11 @@ protected: map waiting_for_slave_update_commit; friend class ESlaveUpdate; - set wants_import_map; // nodes i need to send my import map to - set got_import_map; // nodes i got import_maps from + set wants_resolve; // nodes i need to send my resolve to + set got_resolve; // nodes i got resolves from set need_resolve_ack; // nodes i need a resolve_ack from - void handle_import_map(MMDSImportMap *m); + void handle_resolve(MMDSResolve *m); void handle_resolve_ack(MMDSResolveAck *m); void maybe_resolve_finish(); void disambiguate_imports(); @@ -333,12 +333,12 @@ public: void add_ambiguous_import(CDir *base, const set& bounds); void cancel_ambiguous_import(dirfrag_t dirino); void finish_ambiguous_import(dirfrag_t dirino); - void send_import_map(int who); - void send_import_map_now(int who); - void send_import_map_later(int who); - void send_pending_import_maps(); // maybe. - void log_import_map(Context *onsync=0); - void _logged_import_map(off_t off); + void send_resolve(int who); + void send_resolve_now(int who); + void send_resolve_later(int who); + void maybe_send_pending_resolves(); + void log_subtree_map(Context *onsync=0); + void _logged_subtree_map(off_t off); protected: // [rejoin] diff --git a/branches/sage/cephmds2/mds/MDLog.cc b/branches/sage/cephmds2/mds/MDLog.cc index 530d8663b5290..1d536e493ce34 100644 --- a/branches/sage/cephmds2/mds/MDLog.cc +++ b/branches/sage/cephmds2/mds/MDLog.cc @@ -167,10 +167,10 @@ void MDLog::submit_entry( LogEvent *le, Context *c ) // should we log a new import_map? // FIXME: should this go elsewhere? - if (last_import_map && !writing_import_map && - journaler->get_write_pos() - last_import_map >= g_conf.mds_log_import_map_interval) { + if (last_subtree_map && !writing_subtree_map && + journaler->get_write_pos() - last_subtree_map >= g_conf.mds_log_subtree_map_interval) { // log import map - mds->mdcache->log_import_map(); + mds->mdcache->log_subtree_map(); } } else { @@ -438,17 +438,17 @@ void MDLog::_replay_thread() num_events++; // have we seen an import map yet? - if (!seen_import_map && - le->get_type() != EVENT_IMPORTMAP) { + if (!seen_subtree_map && + le->get_type() != EVENT_SUBTREEMAP) { dout(10) << "_replay " << pos << " / " << journaler->get_write_pos() - << " -- waiting for import_map. (skipping " << *le << ")" << endl; + << " -- waiting for subtree_map. (skipping " << *le << ")" << endl; } else { dout(10) << "_replay " << pos << " / " << journaler->get_write_pos() << " : " << *le << endl; le->replay(mds); - if (le->get_type() == EVENT_IMPORTMAP) - seen_import_map = true; + if (le->get_type() == EVENT_SUBTREEMAP) + seen_subtree_map = true; } delete le; @@ -475,60 +475,3 @@ void MDLog::_replay_thread() -void MDLog::_replay() -{ - mds->mds_lock.Lock(); - - // read what's buffered - while (journaler->is_readable() && - journaler->get_read_pos() < journaler->get_write_pos()) { - // read it - off_t pos = journaler->get_read_pos(); - bufferlist bl; - bool r = journaler->try_read_entry(bl); - assert(r); - - // unpack event - LogEvent *le = LogEvent::decode(bl); - num_events++; - - // have we seen an import map yet? - if (!seen_import_map && - le->get_type() != EVENT_IMPORTMAP) { - dout(10) << "_replay " << pos << " / " << journaler->get_write_pos() - << " -- waiting for import_map. (skipping " << *le << ")" << endl; - } else { - dout(10) << "_replay " << pos << " / " << journaler->get_write_pos() - << " : " << *le << endl; - le->replay(mds); - - if (le->get_type() == EVENT_IMPORTMAP) - seen_import_map = true; - } - delete le; - - // drop lock for a second, so other events (e.g. beacon timer!) can go off - mds->mds_lock.Unlock(); - mds->mds_lock.Lock(); - } - - // wait for read? - if (journaler->get_read_pos() < journaler->get_write_pos()) { - journaler->wait_for_readable(new C_MDL_Replay(this)); - return; - } - - // done! - assert(journaler->get_read_pos() == journaler->get_write_pos()); - dout(10) << "_replay - complete" << endl; - - // move read pointer _back_ to expire pos, for eventual trimming - journaler->set_read_pos(journaler->get_expire_pos()); - - // kick waiter(s) - list ls; - ls.swap(waitfor_replay); - finish_contexts(ls,0); -} - - diff --git a/branches/sage/cephmds2/mds/MDLog.h b/branches/sage/cephmds2/mds/MDLog.h index b721f46568da5..75464df26e304 100644 --- a/branches/sage/cephmds2/mds/MDLog.h +++ b/branches/sage/cephmds2/mds/MDLog.h @@ -92,24 +92,23 @@ class MDLog { - // -- importmaps -- - off_t last_import_map; // offsets of last committed importmap. constrains trimming. - list import_map_expire_waiters; - bool writing_import_map; // one is being written now - bool seen_import_map; // for recovery + // -- subtreemaps -- + off_t last_subtree_map; // offsets of last committed subtreemap. constrains trimming. + list subtree_map_expire_waiters; + bool writing_subtree_map; // one is being written now + bool seen_subtree_map; // for recovery - //friend class EImportMap; friend class C_MDS_WroteImportMap; friend class MDCache; void init_journaler(); public: - off_t get_last_import_map_offset() { return last_import_map; } - void add_import_map_expire_waiter(Context *c) { - import_map_expire_waiters.push_back(c); + off_t get_last_subtree_map_offset() { return last_subtree_map; } + void add_subtree_map_expire_waiter(Context *c) { + subtree_map_expire_waiters.push_back(c); } - void take_import_map_expire_waiters(list& ls) { - ls.splice(ls.end(), import_map_expire_waiters); + void take_subtree_map_expire_waiters(list& ls) { + ls.splice(ls.end(), subtree_map_expire_waiters); } @@ -128,8 +127,8 @@ class MDLog { logger(0), trim_reading(false), waiting_for_read(false), replay_thread(this), - last_import_map(0), - writing_import_map(false), seen_import_map(false) { + last_subtree_map(0), + writing_subtree_map(false), seen_subtree_map(false) { } ~MDLog(); @@ -139,7 +138,7 @@ class MDLog { void set_max_events(size_t max) { max_events = max; } size_t get_max_events() { return max_events; } size_t get_num_events() { return num_events + trimming.size(); } - size_t get_non_importmap_events() { return num_events + trimming.size() - import_map_expire_waiters.size(); } + size_t get_non_subtreemap_events() { return num_events + trimming.size() - subtree_map_expire_waiters.size(); } off_t get_read_pos(); off_t get_write_pos(); @@ -151,7 +150,7 @@ class MDLog { void cap() { capped = true; list ls; - ls.swap(import_map_expire_waiters); + ls.swap(subtree_map_expire_waiters); finish_contexts(ls); } diff --git a/branches/sage/cephmds2/mds/MDS.cc b/branches/sage/cephmds2/mds/MDS.cc index b74a69579baeb..2fbcc72b8d4f2 100644 --- a/branches/sage/cephmds2/mds/MDS.cc +++ b/branches/sage/cephmds2/mds/MDS.cc @@ -43,7 +43,6 @@ #include "common/Timer.h" -#include "events/EClientMap.h" #include "events/ESession.h" #include "messages/MMDSMap.h" @@ -72,7 +71,9 @@ LogType mds_logtype, mds_cache_logtype; // cons/des -MDS::MDS(int whoami, Messenger *m, MonMap *mm) : timer(mds_lock) { +MDS::MDS(int whoami, Messenger *m, MonMap *mm) : + timer(mds_lock), + clientmap(this) { this->whoami = whoami; monmap = mm; @@ -595,7 +596,7 @@ void MDS::handle_mds_map(MMDSMap *m) for (set::iterator p = resolve.begin(); p != resolve.end(); ++p) { if (*p == whoami) continue; if (oldresolve.count(*p)) continue; - mdcache->send_import_map(*p); // now or later. + mdcache->send_resolve(*p); // now or later. } } } @@ -757,13 +758,16 @@ void MDS::boot_create() mdlog->reset(); mdlog->write_head(fin->new_sub()); - // write our first importmap - mdcache->log_import_map(fin->new_sub()); + // write our first subtreemap + mdcache->log_subtree_map(fin->new_sub()); // fixme: fake out idalloc (reset, pretend loaded) dout(10) << "boot_create creating fresh idalloc table" << endl; idalloc->reset(); idalloc->save(fin->new_sub()); + + // write empty clientmap + clientmap.save(fin->new_sub()); // fixme: fake out anchortable if (mdsmap->get_anchortable() == whoami) { @@ -781,6 +785,9 @@ void MDS::boot_start() dout(2) << "boot_start opening idalloc" << endl; idalloc->load(fin->new_sub()); + + dout(2) << "boot_start opening clientmap" << endl; + clientmap.load(fin->new_sub()); if (mdsmap->get_anchortable() == whoami) { dout(2) << "boot_start opening anchor table" << endl; @@ -829,32 +836,32 @@ void MDS::boot_replay(int step) step = 1; // fall-thru. case 1: - dout(2) << "boot_replay " << step << ": opening idalloc" << endl; - idalloc->load(new C_MDS_BootRecover(this, 2)); - break; + { + C_Gather *gather = new C_Gather(new C_MDS_BootRecover(this, 2)); + dout(2) << "boot_replay " << step << ": opening idalloc" << endl; + idalloc->load(gather->new_sub()); - case 2: - if (mdsmap->get_anchortable() == whoami) { - dout(2) << "boot_replay " << step << ": opening anchor table" << endl; - anchortable->load(new C_MDS_BootRecover(this, 3)); - break; - } else { - dout(2) << "boot_replay " << step << ": i have no anchor table" << endl; - step++; // fall-thru + dout(2) << "boot_replay " << step << ": opening clientmap" << endl; + clientmap.load(gather->new_sub()); + + if (mdsmap->get_anchortable() == whoami) { + dout(2) << "boot_replay " << step << ": opening anchor table" << endl; + anchortable->load(gather->new_sub()); + } } + break; - case 3: + case 2: dout(2) << "boot_replay " << step << ": opening mds log" << endl; - mdlog->open(new C_MDS_BootRecover(this, 4)); + mdlog->open(new C_MDS_BootRecover(this, 3)); break; - case 4: + case 3: dout(2) << "boot_replay " << step << ": replaying mds log" << endl; - mdlog->replay(new C_MDS_BootRecover(this, 5)); + mdlog->replay(new C_MDS_BootRecover(this, 4)); break; - case 5: - // done with replay! + case 4: replay_done(); break; @@ -903,7 +910,7 @@ void MDS::resolve_start() mdsmap->get_mds_set(who, MDSMap::STATE_STOPPING); for (set::iterator p = who.begin(); p != who.end(); ++p) { if (*p == whoami) continue; - mdcache->send_import_map(*p); // now. + mdcache->send_resolve(*p); // now. } } void MDS::resolve_done() @@ -1314,31 +1321,3 @@ void MDS::handle_ping(MPing *m) - -class C_LogClientmap : public Context { - ClientMap *clientmap; - version_t cmapv; -public: - C_LogClientmap(ClientMap *cm, version_t v) : - clientmap(cm), cmapv(v) {} - void finish(int r) { - clientmap->set_committed(cmapv); - list ls; - clientmap->take_commit_waiters(cmapv, ls); - finish_contexts(ls); - } -}; - -void MDS::log_clientmap(Context *c) -{ - dout(10) << "log_clientmap " << clientmap.get_version() << endl; - - bufferlist bl; - clientmap.encode(bl); - - clientmap.set_committing(clientmap.get_version()); - clientmap.add_commit_waiter(c); - - mdlog->submit_entry(new EClientMap(bl, clientmap.get_version()), - new C_LogClientmap(&clientmap, clientmap.get_version())); -} diff --git a/branches/sage/cephmds2/mds/MDS.h b/branches/sage/cephmds2/mds/MDS.h index 1b1098d68ca47..c5dd4652aed7e 100644 --- a/branches/sage/cephmds2/mds/MDS.h +++ b/branches/sage/cephmds2/mds/MDS.h @@ -193,7 +193,7 @@ class MDS : public Dispatcher { // -- client map -- ClientMap clientmap; epoch_t last_client_mdsmap_bcast; - void log_clientmap(Context *c); + //void log_clientmap(Context *c); // shutdown crap diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index 99923f6483a78..8b17893d23b34 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -255,7 +255,7 @@ void Migrator::handle_mds_failure_or_stop(int who) export_finish_waiters.erase(dir); // send pending import_maps? (these need to go out when all exports have finished.) - cache->send_pending_import_maps(); + cache->maybe_send_pending_resolves(); cache->show_subtrees(); } @@ -377,13 +377,13 @@ void Migrator::show_importing() p++) { CDir *dir = mds->mdcache->get_dirfrag(p->first); if (dir) { - dout(10) << "importing to " << import_peer[p->first] + dout(10) << " importing to " << import_peer[p->first] << ": (" << p->second << ") " << get_import_statename(p->second) << " " << p->first << " " << *dir << endl; } else { - dout(10) << "importing to " << import_peer[p->first] + dout(10) << " importing to " << import_peer[p->first] << ": (" << p->second << ") " << get_import_statename(p->second) << " " << p->first << endl; @@ -397,7 +397,7 @@ void Migrator::show_exporting() for (map::iterator p = export_state.begin(); p != export_state.end(); p++) - dout(10) << "exporting to " << export_peer[p->first] + dout(10) << " exporting to " << export_peer[p->first] << ": (" << p->second << ") " << get_export_statename(p->second) << " " << p->first->dirfrag() << " " << *p->first @@ -1195,7 +1195,7 @@ void Migrator::export_finish(CDir *dir) audit(); // send pending import_maps? - mds->mdcache->send_pending_import_maps(); + mds->mdcache->maybe_send_pending_resolves(); } diff --git a/branches/sage/cephmds2/mds/events/EClientMap.h b/branches/sage/cephmds2/mds/events/EClientMap.h deleted file mode 100644 index d9b93f5c14872..0000000000000 --- a/branches/sage/cephmds2/mds/events/EClientMap.h +++ /dev/null @@ -1,59 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2006 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#ifndef __MDS_ECLIENTMAP_H -#define __MDS_ECLIENTMAP_H - -#include -#include "config.h" -#include "include/types.h" - -#include "../LogEvent.h" - -class EClientMap : public LogEvent { - protected: - bufferlist mapbl; - version_t cmapv; // client map version - - public: - EClientMap() : LogEvent(EVENT_CLIENTMAP) { } - EClientMap(bufferlist& bl, version_t v) : - LogEvent(EVENT_CLIENTMAP), - cmapv(v) { - mapbl.claim(bl); - } - - void encode_payload(bufferlist& bl) { - bl.append((char*)&cmapv, sizeof(cmapv)); - ::_encode(mapbl, bl); - } - void decode_payload(bufferlist& bl, int& off) { - bl.copy(off, sizeof(cmapv), (char*)&cmapv); - off += sizeof(cmapv); - ::_decode(mapbl, bl, off); - } - - - void print(ostream& out) { - out << "EClientMap v " << cmapv; - } - - - bool has_expired(MDS *mds); - void expire(MDS *mds, Context *c); - void replay(MDS *mds); - -}; - -#endif diff --git a/branches/sage/cephmds2/mds/events/EMetaBlob.h b/branches/sage/cephmds2/mds/events/EMetaBlob.h index a8ddd94ace19f..1182f250d77bd 100644 --- a/branches/sage/cephmds2/mds/events/EMetaBlob.h +++ b/branches/sage/cephmds2/mds/events/EMetaBlob.h @@ -392,7 +392,7 @@ class EMetaBlob { void print(ostream& out) const { out << "[metablob"; if (!lump_order.empty()) - out << lump_order.front() << ", " << lump_map.size() << " dirs"; + out << " " << lump_order.front() << ", " << lump_map.size() << " dirs"; if (!atids.empty()) out << " atids=" << atids; if (!allocated_inos.empty()) diff --git a/branches/sage/cephmds2/mds/events/EImportMap.h b/branches/sage/cephmds2/mds/events/ESubtreeMap.h similarity index 54% rename from branches/sage/cephmds2/mds/events/EImportMap.h rename to branches/sage/cephmds2/mds/events/ESubtreeMap.h index 410e7467aecb1..3997a6b5686c1 100644 --- a/branches/sage/cephmds2/mds/events/EImportMap.h +++ b/branches/sage/cephmds2/mds/events/ESubtreeMap.h @@ -12,46 +12,31 @@ * */ -#ifndef __MDS_EIMPORTMAP_H -#define __MDS_EIMPORTMAP_H +#ifndef __MDS_ESUBTREEMAP_H +#define __MDS_ESUBTREEMAP_H #include "../LogEvent.h" #include "EMetaBlob.h" -class EImportMap : public LogEvent { +class ESubtreeMap : public LogEvent { public: EMetaBlob metablob; - set imports; - map > bounds; + map > subtrees; - EImportMap() : LogEvent(EVENT_IMPORTMAP) { } + ESubtreeMap() : LogEvent(EVENT_SUBTREEMAP) { } void print(ostream& out) { - out << "import_map " << imports.size() << " imports " + out << "subtree_map " << subtrees.size() << " subtrees " << metablob; } void encode_payload(bufferlist& bl) { metablob._encode(bl); - ::_encode(imports, bl); - for (set::iterator p = imports.begin(); - p != imports.end(); - ++p) { - ::_encode(bounds[*p], bl); - if (bounds[*p].empty()) - bounds.erase(*p); - } + ::_encode(subtrees, bl); } void decode_payload(bufferlist& bl, int& off) { metablob._decode(bl, off); - ::_decode(imports, bl, off); - for (set::iterator p = imports.begin(); - p != imports.end(); - ++p) { - ::_decode(bounds[*p], bl, off); - if (bounds[*p].empty()) - bounds.erase(*p); - } + ::_decode(subtrees, bl, off); } bool has_expired(MDS *mds); diff --git a/branches/sage/cephmds2/mds/journal.cc b/branches/sage/cephmds2/mds/journal.cc index 9acd6f35eb7ab..cca893cc457c2 100644 --- a/branches/sage/cephmds2/mds/journal.cc +++ b/branches/sage/cephmds2/mds/journal.cc @@ -13,9 +13,8 @@ */ #include "events/EString.h" -#include "events/EImportMap.h" +#include "events/ESubtreeMap.h" #include "events/ESession.h" -#include "events/EClientMap.h" #include "events/EMetaBlob.h" @@ -517,47 +516,6 @@ void EMetaBlob::replay(MDS *mds) } // ----------------------- -// EClientMap - -bool EClientMap::has_expired(MDS *mds) -{ - if (mds->clientmap.get_committed() >= cmapv) { - dout(10) << "EClientMap.has_expired newer clientmap " << mds->clientmap.get_committed() - << " >= " << cmapv << " has committed" << endl; - return true; - } else if (mds->clientmap.get_committing() >= cmapv) { - dout(10) << "EClientMap.has_expired newer clientmap " << mds->clientmap.get_committing() - << " >= " << cmapv << " is still committing" << endl; - return false; - } else { - dout(10) << "EClientMap.has_expired clientmap " << mds->clientmap.get_version() - << " not empty" << endl; - return false; - } -} - -void EClientMap::expire(MDS *mds, Context *c) -{ - if (mds->clientmap.get_committing() >= cmapv) { - dout(10) << "EClientMap.expire logging clientmap" << endl; - assert(mds->clientmap.get_committing() > mds->clientmap.get_committed()); - mds->clientmap.add_commit_waiter(c); - } else { - dout(10) << "EClientMap.expire logging clientmap" << endl; - mds->log_clientmap(c); - } -} - -void EClientMap::replay(MDS *mds) -{ - dout(10) << "EClientMap.replay v " << cmapv << endl; - int off = 0; - mds->clientmap.decode(mapbl, off); - mds->clientmap.set_committed(mds->clientmap.get_version()); - mds->clientmap.set_committing(mds->clientmap.get_version()); -} - - // ESession bool ESession::has_expired(MDS *mds) { @@ -571,31 +529,32 @@ bool ESession::has_expired(MDS *mds) return false; } else { dout(10) << "ESession.has_expired clientmap " << mds->clientmap.get_version() - << " not empty" << endl; + << " > " << cmapv << ", need to save" << endl; return false; } } void ESession::expire(MDS *mds, Context *c) -{ - if (mds->clientmap.get_committing() >= cmapv) { - dout(10) << "ESession.expire logging clientmap" << endl; - assert(mds->clientmap.get_committing() > mds->clientmap.get_committed()); - mds->clientmap.add_commit_waiter(c); - } else { - dout(10) << "ESession.expire logging clientmap" << endl; - mds->log_clientmap(c); - } +{ + dout(10) << "ESession.expire saving clientmap" << endl; + mds->clientmap.save(c, cmapv); } void ESession::replay(MDS *mds) { - dout(10) << "ESession.replay" << endl; - if (open) - mds->clientmap.open_session(client_inst); - else - mds->clientmap.close_session(client_inst.name.num()); - mds->clientmap.reset_projected(); // make it follow version. + if (mds->clientmap.get_version() >= cmapv) { + dout(10) << "ESession.replay clientmap " << mds->clientmap.get_version() + << " >= " << cmapv << ", noop" << endl; + } else { + dout(10) << "ESession.replay clientmap " << mds->clientmap.get_version() + << " < " << cmapv << endl; + assert(mds->clientmap.get_version() + 1 == cmapv); + if (open) + mds->clientmap.open_session(client_inst); + else + mds->clientmap.close_session(client_inst.name.num()); + mds->clientmap.reset_projected(); // make it follow version. + } } @@ -863,46 +822,46 @@ void ESlaveUpdate::replay(MDS *mds) // ----------------------- -// EImportMap +// ESubtreeMap -bool EImportMap::has_expired(MDS *mds) +bool ESubtreeMap::has_expired(MDS *mds) { - if (mds->mdlog->get_last_import_map_offset() > get_start_off()) { - dout(10) << "EImportMap.has_expired -- there's a newer map" << endl; + if (mds->mdlog->get_last_subtree_map_offset() > get_start_off()) { + dout(10) << "ESubtreeMap.has_expired -- there's a newer map" << endl; return true; } else if (mds->mdlog->is_capped()) { - dout(10) << "EImportMap.has_expired -- log is capped, allowing map to expire" << endl; + dout(10) << "ESubtreeMap.has_expired -- log is capped, allowing map to expire" << endl; return true; } else { - dout(10) << "EImportMap.has_expired -- not until there's a newer map written" - << " (" << get_start_off() << " >= " << mds->mdlog->get_last_import_map_offset() << ")" + dout(10) << "ESubtreeMap.has_expired -- not until there's a newer map written" + << " (" << get_start_off() << " >= " << mds->mdlog->get_last_subtree_map_offset() << ")" << endl; return false; } } -void EImportMap::expire(MDS *mds, Context *c) +void ESubtreeMap::expire(MDS *mds, Context *c) { - dout(10) << "EImportMap.has_expire -- waiting for a newer map to be written (or for shutdown)" << endl; - mds->mdlog->add_import_map_expire_waiter(c); + dout(10) << "ESubtreeMap.has_expire -- waiting for a newer map to be written (or for shutdown)" << endl; + mds->mdlog->add_subtree_map_expire_waiter(c); } -void EImportMap::replay(MDS *mds) +void ESubtreeMap::replay(MDS *mds) { if (mds->mdcache->is_subtrees()) { - dout(10) << "EImportMap.replay -- ignoring, already have import map" << endl; + dout(10) << "ESubtreeMap.replay -- ignoring, already have import map" << endl; } else { - dout(10) << "EImportMap.replay -- reconstructing (auth) subtree spanning tree" << endl; + dout(10) << "ESubtreeMap.replay -- reconstructing (auth) subtree spanning tree" << endl; // first, stick the spanning tree in my cache metablob.replay(mds); // restore import/export maps - for (set::iterator p = imports.begin(); - p != imports.end(); + for (map >::iterator p = subtrees.begin(); + p != subtrees.end(); ++p) { - CDir *dir = mds->mdcache->get_dirfrag(*p); - mds->mdcache->adjust_subtree_auth(dir, mds->get_nodeid()); + CDir *dir = mds->mdcache->get_dirfrag(p->first); + mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, mds->get_nodeid()); } } mds->mdcache->show_subtrees(); diff --git a/branches/sage/cephmds2/mds/mdstypes.h b/branches/sage/cephmds2/mds/mdstypes.h index 0c661a9cc5b4e..4008ca8438b01 100644 --- a/branches/sage/cephmds2/mds/mdstypes.h +++ b/branches/sage/cephmds2/mds/mdstypes.h @@ -37,7 +37,8 @@ using namespace std; #define MDS_INO_ANCHORTABLE 3 #define MDS_INO_LOG_OFFSET 0x100 #define MDS_INO_IDS_OFFSET 0x200 -#define MDS_INO_STRAY_OFFSET 0x300 +#define MDS_INO_CLIENTMAP_OFFSET 0x300 +#define MDS_INO_STRAY_OFFSET 0x400 #define MDS_INO_BASE 0x1000 #define MDS_INO_STRAY(x) (MDS_INO_STRAY_OFFSET+((unsigned)x)) diff --git a/branches/sage/cephmds2/messages/MMDSCacheRejoin.h b/branches/sage/cephmds2/messages/MMDSCacheRejoin.h index 1d7e30ea93687..c0303fd1af455 100644 --- a/branches/sage/cephmds2/messages/MMDSCacheRejoin.h +++ b/branches/sage/cephmds2/messages/MMDSCacheRejoin.h @@ -219,7 +219,7 @@ class MMDSCacheRejoin : public Message { ::_decode(authpinned_inodes, payload, off); ::_decode(xlocked_inodes, payload, off); ::_decode(cap_export_bl, payload, off); - { + if (cap_export_bl.length()) { int off = 0; ::_decode(cap_exports, cap_export_bl, off); ::_decode(cap_export_paths, cap_export_bl, off); diff --git a/branches/sage/cephmds2/messages/MMDSImportMap.h b/branches/sage/cephmds2/messages/MMDSResolve.h similarity index 54% rename from branches/sage/cephmds2/messages/MMDSImportMap.h rename to branches/sage/cephmds2/messages/MMDSResolve.h index d83d5681ad71e..2103a0115081d 100644 --- a/branches/sage/cephmds2/messages/MMDSImportMap.h +++ b/branches/sage/cephmds2/messages/MMDSResolve.h @@ -12,39 +12,38 @@ * */ -#ifndef __MMDSIMPORTMAP_H -#define __MMDSIMPORTMAP_H +#ifndef __MMDSRESOLVE_H +#define __MMDSRESOLVE_H #include "msg/Message.h" #include "include/types.h" - -class MMDSImportMap : public Message { +class MMDSResolve : public Message { public: - map > imap; - map > ambiguous_imap; + map > subtrees; + map > ambiguous_imports; list slave_requests; - MMDSImportMap() : Message(MSG_MDS_IMPORTMAP) {} + MMDSResolve() : Message(MSG_MDS_RESOLVE) {} - char *get_type_name() { return "mdsimportmap"; } + char *get_type_name() { return "mds_resolve"; } void print(ostream& out) { - out << "mdsimportmap(" << imap.size() - << "+" << ambiguous_imap.size() - << " imports +" << slave_requests.size() << " slave requests)"; + out << "mds_resolve(" << subtrees.size() + << "+" << ambiguous_imports.size() + << " subtrees +" << slave_requests.size() << " slave requests)"; } - void add_import(dirfrag_t im) { - imap[im].clear(); + void add_subtree(dirfrag_t im) { + subtrees[im].clear(); } - void add_import_export(dirfrag_t im, dirfrag_t ex) { - imap[im].push_back(ex); + void add_subtree_bound(dirfrag_t im, dirfrag_t ex) { + subtrees[im].push_back(ex); } void add_ambiguous_import(dirfrag_t im, const list& m) { - ambiguous_imap[im] = m; + ambiguous_imports[im] = m; } void add_slave_request(metareqid_t reqid) { @@ -52,14 +51,14 @@ class MMDSImportMap : public Message { } void encode_payload() { - ::_encode(imap, payload); - ::_encode(ambiguous_imap, payload); + ::_encode(subtrees, payload); + ::_encode(ambiguous_imports, payload); ::_encode(slave_requests, payload); } void decode_payload() { int off = 0; - ::_decode(imap, payload, off); - ::_decode(ambiguous_imap, payload, off); + ::_decode(subtrees, payload, off); + ::_decode(ambiguous_imports, payload, off); ::_decode(slave_requests, payload, off); } }; diff --git a/branches/sage/cephmds2/msg/Message.cc b/branches/sage/cephmds2/msg/Message.cc index 4c7572bc77a7d..d6363a4c2ad11 100644 --- a/branches/sage/cephmds2/msg/Message.cc +++ b/branches/sage/cephmds2/msg/Message.cc @@ -50,7 +50,7 @@ using namespace std; #include "messages/MMDSGetMap.h" #include "messages/MMDSMap.h" #include "messages/MMDSBeacon.h" -#include "messages/MMDSImportMap.h" +#include "messages/MMDSResolve.h" #include "messages/MMDSResolveAck.h" #include "messages/MMDSCacheRejoin.h" //#include "messages/MMDSCacheRejoinAck.h" @@ -215,8 +215,8 @@ decode_message(msg_envelope_t& env, bufferlist& payload) case MSG_MDS_BEACON: m = new MMDSBeacon; break; - case MSG_MDS_IMPORTMAP: - m = new MMDSImportMap; + case MSG_MDS_RESOLVE: + m = new MMDSResolve; break; case MSG_MDS_RESOLVEACK: m = new MMDSResolveAck; diff --git a/branches/sage/cephmds2/msg/Message.h b/branches/sage/cephmds2/msg/Message.h index 05d347e0bbdbe..58f5da03f3943 100644 --- a/branches/sage/cephmds2/msg/Message.h +++ b/branches/sage/cephmds2/msg/Message.h @@ -83,7 +83,7 @@ #define MSG_MDS_HEARTBEAT 104 // for mds load balancer #define MSG_MDS_BEACON 105 // to monitor -#define MSG_MDS_IMPORTMAP 106 +#define MSG_MDS_RESOLVE 106 #define MSG_MDS_RESOLVEACK 107 #define MSG_MDS_CACHEREJOIN 108 -- 2.39.5