From 15e3db0696ef9a2353fe3283db3d4d0a1d3bc891 Mon Sep 17 00:00:00 2001 From: sageweil Date: Sun, 28 Jan 2007 03:12:42 +0000 Subject: [PATCH] EExportStart/Finish partially implemented. next up is rejoin phase, auth bit adjustment, etc.. git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1046 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/mds/CDir.cc | 17 +----- branches/sage/cephmds2/mds/CDir.h | 1 + branches/sage/cephmds2/mds/MDCache.cc | 26 ++++++++ branches/sage/cephmds2/mds/MDCache.h | 3 + branches/sage/cephmds2/mds/MDLog.cc | 16 ++--- branches/sage/cephmds2/mds/MDLog.h | 8 +++ branches/sage/cephmds2/mds/Migrator.cc | 40 +++++++++---- branches/sage/cephmds2/mds/Migrator.h | 3 +- .../sage/cephmds2/mds/events/EExportStart.h | 10 +++- branches/sage/cephmds2/mds/journal.cc | 59 ++++++++++++------- 10 files changed, 121 insertions(+), 62 deletions(-) diff --git a/branches/sage/cephmds2/mds/CDir.cc b/branches/sage/cephmds2/mds/CDir.cc index dfabfbc7c5ebf..8f5137514e00f 100644 --- a/branches/sage/cephmds2/mds/CDir.cc +++ b/branches/sage/cephmds2/mds/CDir.cc @@ -538,20 +538,9 @@ void CDir::last_put() */ int CDir::authority() { - if (get_dir_auth() >= 0) - return get_dir_auth(); - - /* - CDir *parent = inode->get_parent_dir(); - if (parent) - return parent->authority(); - - // root, or dangling - assert(inode->is_root()); // no dirs under danglers!? - //assert(inode->is_root() || inode->is_dangling()); - */ - - return inode->authority(); + if (dir_auth == CDIR_AUTH_PARENT) + return inode->authority(); + return dir_auth; } int CDir::dentry_authority(const string& dn ) diff --git a/branches/sage/cephmds2/mds/CDir.h b/branches/sage/cephmds2/mds/CDir.h index aa9114f764bc1..fa21c9cdcf3ef 100644 --- a/branches/sage/cephmds2/mds/CDir.h +++ b/branches/sage/cephmds2/mds/CDir.h @@ -46,6 +46,7 @@ class Context; // directory authority types // >= 0 is the auth mds #define CDIR_AUTH_PARENT -1 // default +#define CDIR_AUTH_UNKNOWN -2 #define CDIR_NONCE_EXPORT 1 diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index 68e6e1083ee3c..add3fa94f8c13 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -103,6 +103,7 @@ MDCache::~MDCache() } + void MDCache::log_stat(Logger *logger) { if (get_root()) { @@ -269,6 +270,31 @@ void MDCache::add_import(CDir *dir) } +void MDCache::recalc_auth_bits() +{ + dout(7) << "recalc_auth_bits" << endl; + + for (hash_map::iterator p = inode_map.begin(); + p != inode_map.end(); + ++p) { + CInode *in = p->second; + if (in->authority() == mds->get_nodeid()) + in->state_set(CInode::STATE_AUTH); + else + in->state_clear(CInode::STATE_AUTH); + + if (in->dir) { + if (in->dir->authority() == mds->get_nodeid()) + in->dir->state_set(CDIR_STATE_AUTH); + else + in->dir->state_clear(CDIR_STATE_AUTH); + } + } + show_imports(); + show_cache(); +} + + diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index fd3c41c7d09c0..768e6a4beff6e 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -122,6 +122,8 @@ class MDCache { friend class Renamer; friend class MDBalancer; friend class EImportMap; + friend class MDLog; + public: @@ -143,6 +145,7 @@ class MDCache { int get_num_imports() { return imports.size(); } void add_import(CDir *dir); void remove_import(CDir *dir); + void recalc_auth_bits(); void log_import_map(Context *onsync=0); diff --git a/branches/sage/cephmds2/mds/MDLog.cc b/branches/sage/cephmds2/mds/MDLog.cc index dcb5216d77eec..af193defe6be4 100644 --- a/branches/sage/cephmds2/mds/MDLog.cc +++ b/branches/sage/cephmds2/mds/MDLog.cc @@ -378,10 +378,6 @@ public: void MDLog::_replay() { - dout(10) << "_replay read_pos " << journaler->get_read_pos() - << " / " << journaler->get_write_pos() - << endl; - // read what's buffered while (journaler->is_readable() && journaler->get_read_pos() < journaler->get_write_pos()) { @@ -411,20 +407,12 @@ void MDLog::_replay() delete le; } - dout(10) << "_replay read_pos " << journaler->get_read_pos() - << " / " << journaler->get_write_pos() - << endl; - // wait for read? if (journaler->get_read_pos() < journaler->get_write_pos()) { journaler->wait_for_readable(new C_MDL_Replay(this)); return; } - dout(10) << "_replay read_pos " << journaler->get_read_pos() - << " / " << journaler->get_write_pos() - << endl; - // done! assert(journaler->get_read_pos() == journaler->get_write_pos()); dout(10) << "_replay - complete" << endl; @@ -432,6 +420,10 @@ void MDLog::_replay() // move read pointer _back_ to expire pos, for eventual trimming journaler->set_read_pos(journaler->get_expire_pos()); + + // twiddle all dir and inode auth bits + mds->mdcache->recalc_auth_bits(); + // kick waiter(s) list ls; ls.swap(waitfor_replay); diff --git a/branches/sage/cephmds2/mds/MDLog.h b/branches/sage/cephmds2/mds/MDLog.h index d15bea1fcc9d2..c6eec22d22887 100644 --- a/branches/sage/cephmds2/mds/MDLog.h +++ b/branches/sage/cephmds2/mds/MDLog.h @@ -76,9 +76,17 @@ class MDLog { void init_journaler(); + + public: + // replay state + map > pending_exports; + + + public: MDLog(MDS *m); ~MDLog(); + void set_max_events(size_t max) { max_events = max; } diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index 143c6ee77397c..24a3488a02e94 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -276,15 +276,8 @@ void Migrator::export_dir(CDir *dir, // take away the popularity we're sending. FIXME: do this later? mds->balancer->subtract_export(dir); - // we need to do a few things here.. - C_Gather *gather = new C_Gather(new C_MDC_ExportFreeze(this, dir, dest)); - - // 1- freeze the subtree - dir->freeze_tree(gather->new_sub()); - - // 2- log out intentions - mds->mdlog->submit_entry(new EExportStart(dir, dest), - gather->new_sub()); + // freeze the subtree + dir->freeze_tree(new C_MDC_ExportFreeze(this, dir, dest)); } @@ -306,6 +299,19 @@ void Migrator::handle_export_dir_discover_ack(MExportDirDiscoverAck *m) delete m; // done } +class C_MDC_ExportStartLogged : public Context { + Migrator *mig; + CDir *ex; // dir i'm exporting + int dest; + MExportDirPrep *prep; + +public: + C_MDC_ExportStartLogged(Migrator *m, CDir *e, int d, MExportDirPrep *p) : + mig(m), ex(e), dest(d), prep(p) {} + virtual void finish(int r) { + mig->export_dir_frozen_logged(ex, prep, dest); + } +}; void Migrator::export_dir_frozen(CDir *dir, int dest) @@ -315,6 +321,7 @@ void Migrator::export_dir_frozen(CDir *dir, show_imports(); + EExportStart *le = new EExportStart(dir, dest); MExportDirPrep *prep = new MExportDirPrep(dir->inode); // include spanning tree for all nested exports. @@ -322,9 +329,10 @@ void Migrator::export_dir_frozen(CDir *dir, // dir_auth updates on any nested exports are properly absorbed. set inodes_added; - + // include base dir prep->add_dir( new CDirDiscover(dir, dir->add_replica(dest)) ); + le->metablob.add_dir( dir, false ); // also include traces to all nested exports. set my_nested; @@ -337,6 +345,9 @@ void Migrator::export_dir_frozen(CDir *dir, dout(7) << " including nested export " << *exp << " in prep" << endl; prep->add_export( exp->ino() ); + le->get_bounds().insert(exp->ino()); + le->metablob.add_dir_context( exp ); + le->metablob.add_dir( exp, false ); /* first assemble each trace, in trace order, and put in message */ list inode_trace; @@ -377,7 +388,14 @@ void Migrator::export_dir_frozen(CDir *dir, } - // send it! + // log our intentions + dout(7) << " logging EExportStart" << endl; + mds->mdlog->submit_entry(le, new C_MDC_ExportStartLogged(this, dir, dest, prep)); +} + +void Migrator::export_dir_frozen_logged(CDir *dir, MExportDirPrep *prep, int dest) +{ + dout(7) << "export_dir_frozen_logged " << *dir << endl; mds->send_message_mds(prep, dest, MDS_PORT_MIGRATOR); } diff --git a/branches/sage/cephmds2/mds/Migrator.h b/branches/sage/cephmds2/mds/Migrator.h index 09925c667b7fa..1745022ca8c1b 100644 --- a/branches/sage/cephmds2/mds/Migrator.h +++ b/branches/sage/cephmds2/mds/Migrator.h @@ -112,6 +112,7 @@ public: protected: void handle_export_dir_discover_ack(MExportDirDiscoverAck *m); void export_dir_frozen(CDir *dir, int dest); + void export_dir_frozen_logged(CDir *dir, MExportDirPrep *prep, int dest); void handle_export_dir_prep_ack(MExportDirPrepAck *m); void export_dir_go(CDir *dir, int dest); @@ -124,8 +125,8 @@ public: void handle_export_dir_notify_ack(MExportDirNotifyAck *m); friend class C_MDC_ExportFreeze; + friend class C_MDC_ExportStartLogged; friend class C_MDS_ExportFinishLogged; - // importer void handle_export_dir_discover(MExportDirDiscover *m); void handle_export_dir_discover_2(MExportDirDiscover *m, CInode *in, int r); diff --git a/branches/sage/cephmds2/mds/events/EExportStart.h b/branches/sage/cephmds2/mds/events/EExportStart.h index d3e9c91dd64fe..37ed92a7239c2 100644 --- a/branches/sage/cephmds2/mds/events/EExportStart.h +++ b/branches/sage/cephmds2/mds/events/EExportStart.h @@ -23,10 +23,12 @@ #include "EMetaBlob.h" class EExportStart : public LogEvent { - protected: + public: EMetaBlob metablob; // exported dir - inodeno_t dirino; + protected: + inodeno_t dirino; int dest; // dest mds + set bounds; public: EExportStart(CDir *dir, int d) : LogEvent(EVENT_EXPORTSTART), @@ -36,6 +38,8 @@ class EExportStart : public LogEvent { } EExportStart() : LogEvent(EVENT_EXPORTSTART) { } + set &get_bounds() { return bounds; } + void print(ostream& out) { out << "export_start " << dirino << " -> " << dest; } @@ -44,6 +48,7 @@ class EExportStart : public LogEvent { metablob._encode(bl); bl.append((char*)&dirino, sizeof(dirino)); bl.append((char*)&dest, sizeof(dest)); + ::_encode(bounds, bl); } void decode_payload(bufferlist& bl, int& off) { metablob._decode(bl, off); @@ -51,6 +56,7 @@ class EExportStart : public LogEvent { off += sizeof(dirino); bl.copy(off, sizeof(dest), (char*)&dest); off += sizeof(dest); + ::_decode(bounds, bl, off); } bool has_expired(MDS *mds); diff --git a/branches/sage/cephmds2/mds/journal.cc b/branches/sage/cephmds2/mds/journal.cc index 82e6990fbf721..61ebcb145473c 100644 --- a/branches/sage/cephmds2/mds/journal.cc +++ b/branches/sage/cephmds2/mds/journal.cc @@ -389,7 +389,7 @@ void EImportMap::replay(MDS *mds) CDir *ex = exi->get_or_open_dir(mds->mdcache); assert(ex); - ex->set_dir_auth(mds->get_nodeid() + 1); // anything that's not me, for now! + ex->set_dir_auth(CDIR_AUTH_UNKNOWN); ex->state_set(CDIR_STATE_EXPORT); ex->get(CDir::PIN_EXPORT); mds->mdcache->exports.insert(ex); @@ -397,26 +397,7 @@ void EImportMap::replay(MDS *mds) } } - // twiddle all dir and inode auth bits - for (hash_map::iterator p = mds->mdcache->inode_map.begin(); - p != mds->mdcache->inode_map.end(); - ++p) { - CInode *in = p->second; - if (in->authority() == mds->get_nodeid()) - in->state_set(CInode::STATE_AUTH); - else - in->state_clear(CInode::STATE_AUTH); - - if (in->dir) { - if (in->dir->authority() == mds->get_nodeid()) - in->dir->state_set(CDIR_STATE_AUTH); - else - in->dir->state_clear(CDIR_STATE_AUTH); - } - } - mds->mdcache->show_imports(); - mds->mdcache->show_cache(); } @@ -515,10 +496,15 @@ void EExportStart::expire(MDS *mds, Context *c) void EExportStart::replay(MDS *mds) { dout(10) << "EExportStart.replay " << dirino << " -> " << dest << endl; - metablob.replay(mds); - + // put in pending_exports lists + CInode *diri = mds->mdcache->get_inode(dirino); + assert(diri); + CDir *dir = diri->dir; + assert(dir); + + mds->mdlog->pending_exports[dirino] = bounds; } // ----------------------- @@ -526,13 +512,42 @@ void EExportStart::replay(MDS *mds) bool EExportFinish::has_expired(MDS *mds) { + // we can always expire. return true; } + void EExportFinish::expire(MDS *mds, Context *c) { + assert(0); // should never happen. } + void EExportFinish::replay(MDS *mds) { + dout(10) << "EExportFinish.replay " << dirino << endl; + + CInode *diri = mds->mdcache->get_inode(dirino); + assert(diri); + CDir *dir = diri->dir; + assert(dir); + + set bounds = mds->mdlog->pending_exports[dirino]; + mds->mdlog->pending_exports.erase(dirino); + + // adjust dir_auth + dir->set_dir_auth( CDIR_AUTH_UNKNOWN ); // not me + + // bounds (exports, before) + for (set::iterator p = bounds.begin(); + p != bounds.end(); + ++p) { + CInode *bi = mds->mdcache->get_inode(*p); + assert(bi); + CDir *bd = bi->dir; + assert(bd); + + assert(bd->get_dir_auth() != CDIR_AUTH_PARENT); + bd->set_dir_auth( CDIR_AUTH_UNKNOWN ); // not me + } } -- 2.39.5