}
assert(s.size() == (unsigned)n);
}
+// map<T,list<U>>
+template<class T, class U>
+inline void _encode(const std::map<T, std::list<U> >& s, bufferlist& bl)
+{
+ int n = s.size();
+ bl.append((char*)&n, sizeof(n));
+ for (typename std::map<T, std::list<U> >::const_iterator it = s.begin();
+ it != s.end();
+ it++) {
+ T k = it->first;
+ bl.append((char*)&k, sizeof(k));
+ ::_encode(it->second, bl);
+ n--;
+ }
+ assert(n==0);
+}
+template<class T, class U>
+inline void _decode(std::map<T, std::list<U> >& s, bufferlist& bl, int& off)
+{
+ s.clear();
+ int n;
+ bl.copy(off, sizeof(n), (char*)&n);
+ off += sizeof(n);
+ for (int i=0; i<n; i++) {
+ T k;
+ bl.copy(off, sizeof(k), (char*)&k);
+ off += sizeof(k);
+ ::_decode(s[k], bl, off);
+ }
+ assert(s.size() == (unsigned)n);
+}
// map<T,U>
#include <vector>
using namespace std;
-#include <ext/rope>
-using namespace __gnu_cxx;
-
#include "buffer.h"
return bits.size() == 0;
}
-
- void _rope(crope& r) {
- char n = bits.size();
- r.append((char*)&n, sizeof(char));
- for (vector<string>::iterator it = bits.begin();
- it != bits.end();
- it++) {
- r.append((*it).c_str(), (*it).length()+1);
- }
- }
-
- void _unrope(crope& r, int& off) {
- clear();
-
- char n;
- r.copy(off, sizeof(char), (char*)&n);
- off += sizeof(char);
- for (int i=0; i<n; i++) {
- string s = r.c_str() + off;
- off += s.length() + 1;
- add_dentry(s);
- }
- }
void _encode(bufferlist& bl) {
char n = bits.size();
#include <iomanip>
using namespace std;
+#include <ext/hash_map>
+using namespace __gnu_cxx;
+
typedef __uint32_t objectrev_t;
}
#include <string>
+#include <list>
#include <set>
#include <map>
#include <vector>
#include <iomanip>
using namespace std;
-#include <ext/rope>
+#include <ext/hash_map>
using namespace __gnu_cxx;
+
#include "object.h"
#ifndef MIN
return out;
}
+template<class A>
+inline ostream& operator<<(ostream& out, const list<A>& ilist) {
+ for (typename list<A>::const_iterator it = ilist.begin();
+ it != ilist.end();
+ it++) {
+ if (it != ilist.begin()) out << ",";
+ out << *it;
+ }
+ return out;
+}
+
template<class A>
inline ostream& operator<<(ostream& out, const set<A>& iset) {
for (typename set<A>::const_iterator it = iset.begin();
-
-// -- rope helpers --
-
-// string
-inline void _rope(string& s, crope& r)
-{
- r.append(s.c_str(), s.length()+1);
-}
-inline void _unrope(string& s, crope& r, int& off)
-{
- s = r.c_str() + off;
- off += s.length() + 1;
-}
-
-// set<int>
-inline void _rope(set<int>& s, crope& r)
-{
- int n = s.size();
- r.append((char*)&n, sizeof(n));
- for (set<int>::iterator it = s.begin();
- it != s.end();
- it++) {
- int v = *it;
- r.append((char*)&v, sizeof(v));
- n--;
- }
- assert(n==0);
-}
-inline void _unrope(set<int>& s, crope& r, int& off)
-{
- s.clear();
- int n;
- r.copy(off, sizeof(n), (char*)&n);
- off += sizeof(n);
- for (int i=0; i<n; i++) {
- int v;
- r.copy(off, sizeof(v), (char*)&v);
- off += sizeof(v);
- s.insert(v);
- }
- assert(s.size() == (unsigned)n);
-}
-
#endif
// nest pins?
if (is_subtree_root()) return; // no.
- assert(!is_import());
+ //assert(!is_import());
inode->nested_auth_pins++;
if (inode->parent)
// nest?
if (is_subtree_root()) return; // no.
- assert(!is_import());
+ //assert(!is_import());
inode->nested_auth_pins--;
if (inode->parent)
CDir *dir = this;
while (1) {
if (dir->is_freezing_tree_root()) return true;
- if (dir->is_import()) return false;
- if (dir->is_hashed()) return false;
+ if (dir->is_subtree_root()) return false;
if (dir->inode->parent)
dir = dir->inode->parent->dir;
else
static const int PIN_CHILD = 0;
static const int PIN_OPENED = 1; // open by another node
static const int PIN_WAITER = 2; // waiter(s)
- static const int PIN_IMPORT = 3;
+ //static const int PIN_IMPORT = 3;
static const int PIN_EXPORT = 4;
//static const int PIN_FREEZE = 5;
// static const int PIN_FREEZELEAF = 6;
case PIN_CHILD: return "child";
case PIN_OPENED: return "opened";
case PIN_WAITER: return "waiter";
- case PIN_IMPORT: return "import";
+ //case PIN_IMPORT: return "import";
case PIN_EXPORT: return "export";
case PIN_EXPORTING: return "exporting";
case PIN_IMPORTING: return "importing";
static const unsigned STATE_COMMITTING = (1<< 8); // mid-commit
static const unsigned STATE_FETCHING = (1<< 9); // currenting fetching
static const unsigned STATE_DELETED = (1<<10);
- static const unsigned STATE_IMPORT = (1<<11); // flag set if this is an import.
+ //static const unsigned STATE_IMPORT = (1<<11); // flag set if this is an import.
static const unsigned STATE_EXPORT = (1<<12);
static const unsigned STATE_IMPORTBOUND = (1<<13);
static const unsigned STATE_EXPORTBOUND = (1<<14);
static const unsigned MASK_STATE_EXPORTED =
STATE_COMPLETE|STATE_DIRTY;
static const unsigned MASK_STATE_IMPORT_KEPT =
- STATE_IMPORT|STATE_EXPORT
+ //STATE_IMPORT|
+ STATE_EXPORT
|STATE_IMPORTBOUND|STATE_EXPORTBOUND
|STATE_FROZENTREE|STATE_PROXY;
static const unsigned MASK_STATE_EXPORT_KEPT =
bool is_auth() { return state & STATE_AUTH; }
bool is_proxy() { return state & STATE_PROXY; }
- bool is_import() { return state & STATE_IMPORT; }
- bool is_export() { return state & STATE_EXPORT; }
+ //bool is_import() { return state & STATE_IMPORT; }
+ //bool is_export() { return state & STATE_EXPORT; }
bool is_hashed() { return state & STATE_HASHED; }
bool is_hashing() { return state & STATE_HASHING; }
off += sizeof(inode.atime);
}
-/* not used currently
-void CInode::decode_merge_file_state(crope& r, int& off)
-{
- __uint64_t size;
- r.copy(off, sizeof(size), (char*)&size);
- off += sizeof(size);
- if (size > inode.size) inode.size = size;
-
- time_t t;
- r.copy(off, sizeof(t), (char*)&t);
- off += sizeof(t);
- if (t > inode.mtime) inode.mtime = t;
-
- r.copy(off, sizeof(t), (char*)&t);
- off += sizeof(t);
- if (t > inode.atime) inode.atime = t;
-}
-*/
void CInode::encode_hard_state(bufferlist& r)
{
#include "events/EUnlink.h"
#include "events/EAlloc.h"
#include "events/EPurgeFinish.h"
-#include "events/EExportStart.h"
-#include "events/EExportFinish.h"
+#include "events/EExport.h"
#include "events/EImportStart.h"
#include "events/EImportFinish.h"
case EVENT_UNLINK: le = new EUnlink(); break;
case EVENT_PURGEFINISH: le = new EPurgeFinish(); break;
case EVENT_ALLOC: le = new EAlloc(); break;
- case EVENT_EXPORTSTART: le = new EExportStart; break;
- case EVENT_EXPORTFINISH: le = new EExportFinish; break;
+ case EVENT_EXPORT: le = new EExport; break;
case EVENT_IMPORTSTART: le = new EImportStart; break;
case EVENT_IMPORTFINISH: le = new EImportFinish; break;
default:
#define EVENT_RMDIR 21
#define EVENT_PURGEFINISH 22
-#define EVENT_EXPORTSTART 30
-#define EVENT_EXPORTFINISH 31
-#define EVENT_IMPORTSTART 32
-#define EVENT_IMPORTFINISH 33
+#define EVENT_EXPORT 30
+#define EVENT_IMPORTSTART 31
+#define EVENT_IMPORTFINISH 32
}
-// MDCache
+// ====================================================================
+// some inode functions
CInode *MDCache::create_inode()
{
}
-/*
- * take note of where we write import_maps in the log, as we need
- * to take care not to expire them until an updated map is safely flushed.
- */
-class C_MDS_WroteImportMap : public Context {
- MDLog *mdlog;
- off_t end_off;
-public:
- C_MDS_WroteImportMap(MDLog *ml, off_t eo) : mdlog(ml), end_off(eo) { }
- void finish(int r) {
- // cout << "WroteImportMap at " << end_off << endl;
- if (r >= 0)
- mdlog->last_import_map = end_off;
- mdlog->writing_import_map = false;
- }
-};
-
-
-void MDCache::log_import_map(Context *onsync)
+CInode *MDCache::create_root_inode()
{
- dout(10) << "log_import_map " << num_subtrees() << " subtrees"
- << num_subtrees_fullauth() << " fullauth"
- << endl;
+ CInode *root = new CInode(this);
+ memset(&root->inode, 0, sizeof(inode_t));
+ root->inode.ino = 1;
+ root->inode.hash_seed = 0; // not hashed!
- EImportMap *le = new EImportMap;
+ // make it up (FIXME)
+ root->inode.mode = 0755 | INODE_MODE_DIR;
+ root->inode.size = 0;
+ root->inode.ctime =
+ root->inode.mtime = g_clock.gettime();
- // include import/export inodes,
- // and a spanning tree to tie it to the root of the fs
- /*
- for (set<CDir*>::iterator p = imports.begin();
- p != imports.end();
- p++) {
- CDir *im = *p;
- le->imports.insert(im->ino());
- le->metablob.add_dir_context(im, true);
- le->metablob.add_dir(im, false);
-
- if (nested_exports.count(im)) {
- for (set<CDir*>::iterator q = nested_exports[im].begin();
- q != nested_exports[im].end();
- ++q) {
- CDir *ex = *q;
- le->nested_exports[im->ino()].insert(ex->ino());
- le->exports.insert(ex->ino());
- le->metablob.add_dir_context(ex);
- le->metablob.add_dir(ex, false);
- }
- }
- }
- */
-
- mds->mdlog->writing_import_map = true;
- mds->mdlog->submit_entry(le);
- mds->mdlog->wait_for_sync(new C_MDS_WroteImportMap(mds->mdlog, mds->mdlog->get_write_pos()));
- if (onsync)
- mds->mdlog->wait_for_sync(onsync);
-}
-
-
-
-
-
-// =====================
-// recovery stuff
-
-void MDCache::send_pending_import_maps()
-{
- if (wants_import_map.empty())
- return; // nothing to send.
-
- // only if it's appropriate!
- if (migrator->is_exporting()) {
- dout(7) << "send_pending_import_maps waiting, exports still in progress" << endl;
- return; // not now
- }
-
- // ok, send them.
- for (set<int>::iterator p = wants_import_map.begin();
- p != wants_import_map.end();
- p++)
- send_import_map_now(*p);
- wants_import_map.clear();
-}
-
-void MDCache::send_import_map(int who)
-{
- if (migrator->is_exporting())
- send_import_map_later(who);
- else
- send_import_map_now(who);
-}
-
-void MDCache::send_import_map_now(int who)
-{
- dout(10) << "send_import_map to mds" << who << endl;
-
- /*
- MMDSImportMap *m = new MMDSImportMap;
-
- // known
- for (map<CDir*,set<CDir*> >::iterator p = subtrees.begin();
- p != subtrees.end();
- p++) {
- CDir *dir = p->first;
-
- // only our subtrees
- if (dir->authority().first != mds->get_nodeid())
- continue;
-
- if (migrator->is_importing(dir->ino())) {
- // ambiguous (mid-import)
- m->add_ambiguous_import(dir->ino(),
- migrator->get_import_bound_inos(dir->ino()));
- } else {
- // not ambiguous.
- m->add_import(dir->ino());
-
- // bounds too
- if (nested_exports.count(dir)) {
- for (set<CDir*>::iterator q = p->second.begin();
- q != p->second.end();
- ++q) {
- CDir *bound = *q;
- m->add_import_export(dir->ino(), bound->ino());
- }
- }
- }
- }
-
- // ambiguous
- for (map<inodeno_t, set<inodeno_t> >::iterator p = my_ambiguous_imports.begin();
- p != my_ambiguous_imports.end();
- ++p)
- m->add_ambiguous_import(p->first, p->second);
+ root->inode.nlink = 1;
+ root->inode.layout = g_OSD_MDDirLayout;
- // second
- mds->send_message_mds(m, who, MDS_PORT_CACHE);
+ set_root( root );
+ add_inode( root );
- */
+ return root;
}
-
-/*
- * during resolve state, we share import_maps to determine who
- * is authoritative for which trees. we expect to get an import_map
- * from _everyone_ in the recovery_set (the mds cluster at the time of
- * the first failure).
- */
-void MDCache::handle_import_map(MMDSImportMap *m)
+int MDCache::open_root(Context *c)
{
- dout(7) << "handle_import_map from " << m->get_source() << endl;
- int from = m->get_source().num();
-
- // FIXME: check if we are a surviving ambiguous importer
-
- // update my dir_auth values
- for (map<inodeno_t, set<inodeno_t> >::iterator pi = m->imap.begin();
- pi != m->imap.end();
- ++pi) {
- CInode *imi = get_inode(pi->first);
- if (!imi) continue;
- CDir *im = imi->dir;
- if (!im) continue;
-
- im->set_dir_auth(from);
-
- for (set<inodeno_t>::iterator pe = pi->second.begin();
- pe != pi->second.end();
- ++pe) {
- CInode *exi = get_inode(*pe);
- if (!exi) continue;
- CDir *ex = exi->dir;
- if (!ex) continue;
-
- if (ex->get_dir_auth().first == CDIR_AUTH_PARENT)
- ex->set_dir_auth(CDIR_AUTH_UNKNOWN);
- }
- }
-
- // note ambiguous imports too
- for (map<inodeno_t, set<inodeno_t> >::iterator pi = m->ambiguous_imap.begin();
- pi != m->ambiguous_imap.end();
- ++pi)
- mds->mdcache->other_ambiguous_imports[from][pi->first].swap( pi->second );
-
- // did i get them all?
- got_import_map.insert(from);
-
- if (got_import_map == recovery_set) {
- dout(10) << "got all import maps, ready to rejoin" << endl;
- disambiguate_imports();
- recalc_auth_bits();
- trim_non_auth();
-
- // move to rejoin state
- mds->set_want_state(MDSMap::STATE_REJOIN);
-
- } else {
- dout(10) << "still waiting for more importmaps, got " << got_import_map
- << ", need " << recovery_set << endl;
- }
-
- delete m;
-}
+ int whoami = mds->get_nodeid();
+ // open root inode
+ if (whoami == 0) {
+ // i am root inode
+ CInode *root = create_root_inode();
-void MDCache::disambiguate_imports()
-{
- dout(10) << "disambiguate_imports" << endl;
+ // root directory too
+ assert(root->dir == NULL);
+ root->set_dir(new CDir(root, this, true));
+ adjust_subtree_auth(root->dir, 0);
+ root->dir->dir_rep = CDir::REP_ALL; //NONE;
- // other nodes' ambiguous imports
- for (map<int, map<inodeno_t, set<inodeno_t> > >::iterator p = other_ambiguous_imports.begin();
- p != other_ambiguous_imports.begin();
- ++p) {
- int who = p->first;
+ show_imports();
- for (map<inodeno_t, set<inodeno_t> >::iterator q = p->second.begin();
- q != p->second.end();
- ++q) {
- CInode *diri = get_inode(q->first);
- if (!diri) continue;
- CDir *dir = diri->dir;
- if (!dir) continue;
-
- if (dir->authority().first >= CDIR_AUTH_UNKNOWN) {
- dout(10) << "mds" << who << " did not import " << *dir << endl;
- } else {
- dout(10) << "mds" << who << " did import " << *dir << endl;
- int was = dir->authority().first;
- dir->set_dir_auth(who);
-
- for (set<inodeno_t>::iterator r = q->second.begin();
- r != q->second.end();
- ++r) {
- CInode *exi = get_inode(q->first);
- if (!exi) continue;
- CDir *ex = exi->dir;
- if (!ex) continue;
- if (ex->get_dir_auth().first == CDIR_AUTH_PARENT)
- ex->set_dir_auth(was);
- dout(10) << " bound " << *ex << endl;
- }
- }
+ if (c) {
+ c->finish(0);
+ delete c;
}
- }
- other_ambiguous_imports.clear();
-
- // my ambiguous imports
- while (!my_ambiguous_imports.empty()) {
- map<inodeno_t, set<inodeno_t> >::iterator q = my_ambiguous_imports.begin();
+ } else {
+ // request inode from root mds
+ if (waiting_for_root.empty()) {
+ dout(7) << "discovering root" << endl;
- CInode *diri = get_inode(q->first);
- if (!diri) continue;
- CDir *dir = diri->dir;
- if (!dir) continue;
-
- if (dir->authority().first != CDIR_AUTH_UNKNOWN) {
- dout(10) << "ambiguous import auth known, must not be me " << *dir << endl;
- cancel_ambiguous_import(q->first);
+ filepath want;
+ MDiscover *req = new MDiscover(whoami,
+ 0,
+ want,
+ false); // there _is_ no base dir for the root inode
+ mds->send_message_mds(req, 0, MDS_PORT_CACHE);
} else {
- dout(10) << "ambiguous import auth unknown, must be me " << *dir << endl;
- finish_ambiguous_import(q->first);
- }
- }
- assert(my_ambiguous_imports.empty());
-
- show_imports();
-}
-
-void MDCache::cancel_ambiguous_import(inodeno_t dirino)
-{
- assert(my_ambiguous_imports.count(dirino));
- dout(10) << "cancel_ambiguous_import " << dirino
- << " bounds " << my_ambiguous_imports[dirino]
- << endl;
- my_ambiguous_imports.erase(dirino);
-}
-
-void MDCache::finish_ambiguous_import(inodeno_t dirino)
-{
- assert(my_ambiguous_imports.count(dirino));
- set<inodeno_t> bound_inos;
- bound_inos.swap(my_ambiguous_imports[dirino]);
- my_ambiguous_imports.erase(dirino);
+ dout(7) << "waiting for root" << endl;
+ }
- dout(10) << "finish_ambiguous_import " << dirino
- << " bounds " << bound_inos
- << endl;
+ // wait
+ waiting_for_root.push_back(c);
- CInode *diri = get_inode(dirino);
- assert(diri);
- CDir *dir = diri->dir;
- assert(dir);
-
- // make bounds list
- set<CDir*> bounds;
- for (set<inodeno_t>::iterator p = bound_inos.begin();
- p != bound_inos.end();
- ++p) {
- CInode *bi = get_inode(*p);
- assert(bi);
- CDir *bd = bi->dir;
- assert(bd);
- bounds.insert(bd);
}
- // adjust dir_auth, import maps
- adjust_subtree_auth(dir, mds->get_nodeid());
+ return 0;
}
-void MDCache::finish_ambiguous_export(inodeno_t dirino, set<inodeno_t>& bounds)
-{
- CInode *diri = get_inode(dirino);
- assert(diri);
- CDir *dir = diri->dir;
- assert(dir);
-
- dout(10) << "finish_ambiguous_export " << dirino
- << " bounds " << bounds
- << endl;
-
- /*
- // adjust dir_auth
- CDir *im = get_auth_container(dir);
- if (dir->get_inode()->authority().first == CDIR_AUTH_UNKNOWN) {
- // was an import, hose it
- assert(im == dir);
- assert(imports.count(dir));
- imports.erase(dir);
- dir->set_dir_auth( CDIR_AUTH_PARENT );
- dir->state_clear(CDir::STATE_IMPORT);
- dir->put(CDir::PIN_IMPORT);
- } else {
- // i'm now an export
- exports.insert(dir);
- nested_exports[im].insert(dir);
- dir->set_dir_auth( CDIR_AUTH_UNKNOWN ); // not me
- dir->state_set(CDir::STATE_EXPORT);
- dir->get(CDir::PIN_EXPORT);
- }
- dout(10) << " root " << *dir << endl;
- if (dir != im)
- dout(10) << " under " << *im << endl;
-
- // bounds (there were exports, before)
- for (set<inodeno_t>::iterator p = bounds.begin();
- p != bounds.end();
- ++p) {
- CInode *bi = get_inode(*p);
- assert(bi);
- CDir *bd = bi->dir;
- assert(bd);
-
- // hose export
- assert(exports.count(bd));
- exports.erase(bd);
- nested_exports[im].erase(bd);
-
- // fix dir_auth
- assert(bd->get_dir_auth().first != CDIR_AUTH_PARENT);
- bd->set_dir_auth( CDIR_AUTH_PARENT ); // not me
- bd->state_clear(CDir::STATE_EXPORT);
- bd->put(CDir::PIN_EXPORT);
- dout(10) << " bound " << *bd << endl;
- }
- */
- show_imports();
-}
+// ====================================================================
+// subtree management
/*
* adjust the dir_auth of a subtree.
*/
void MDCache::adjust_export_state(CDir *dir)
{
- if (!dir->is_auth() && dir->inode->is_auth()) {
+ //if (!dir->is_auth() && dir->inode->is_auth()) {
+
+ // be auth bit agnostic, so that we work during recovery
+ // (before recalc_auth_bits)
+ if (!dir->authority().first == mds->get_nodeid() &&
+ dir->inode->authority().first != mds->get_nodeid()) {
// export.
if (!dir->state_test(CDir::STATE_EXPORT)) {
dout(10) << "adjust_export_state pinning new export " << *dir << endl;
CDir *root;
if (dir->ino() == 1) {
root = dir; // bootstrap hack.
- subtrees[root].clear();
+ if (subtrees.count(root) == 0)
+ subtrees[root].clear();
} else {
root = get_subtree_root(dir); // subtree root
}
// i am now the subtree root.
root = dir;
}
-
+
// verify/adjust bounds.
// these may be new, but no deeper than any existing bounds.
for (set<CDir*>::iterator p = bounds.begin();
p != bounds.end();
++p) {
CDir *bound = *p;
- if (subtrees[dir].count(bound)) continue; // have it.
- adjust_subtree_auth(bound, oldauth); // otherwise, adjust at bound.
+ if (subtrees[dir].count(bound)) {
+ dout(10) << " already have bound " << *bound << endl;
+ } else {
+ dout(10) << " missing bound " << *bound << ", adjusting auth back to old " << oldauth << endl;
+ adjust_subtree_auth(bound, oldauth); // otherwise, adjust at bound.
+ }
+ }
+
+ verify_subtree_bounds(dir, bounds);
+
+ show_subtrees();
+}
+
+void MDCache::adjust_bounded_subtree_auth(CDir *dir, list<inodeno_t>& bound_inos, pair<int,int> auth)
+{
+ dout(7) << "adjust_bounded_subtree_auth " << dir->get_dir_auth() << " -> " << auth
+ << " on " << *dir
+ << " bound_inos " << bound_inos
+ << endl;
+
+ // make bounds list
+ set<CDir*> bounds;
+ for (list<inodeno_t>::iterator p = bound_inos.begin();
+ p != bound_inos.end();
+ ++p) {
+ CDir *bd = get_dir(*p);
+ if (bd)
+ bounds.insert(bd);
}
-
- verify_subtree_bounds(dir, bounds);
-
- show_subtrees();
+
+ adjust_bounded_subtree_auth(dir, bounds, auth);
}
+
CDir *MDCache::get_subtree_root(CDir *dir)
{
// find the underlying dir that delegates (or is about to delegate) auth
}
}
+void MDCache::remove_subtree(CDir *dir)
+{
+ dout(10) << "remove_subtree " << *dir << endl;
+ assert(subtrees.count(dir));
+ assert(subtrees[dir].empty());
+ subtrees.erase(dir);
+ if (dir->get_parent_dir()) {
+ CDir *p = get_subtree_root(dir->get_parent_dir());
+ assert(subtrees[p].count(dir));
+ subtrees[p].erase(dir);
+ }
+}
+
void MDCache::get_subtree_bounds(CDir *dir, set<CDir*>& bounds)
{
assert(subtrees.count(dir));
+// ====================================================================
+// import map, recovery
+
+/*
+ * take note of where we write import_maps in the log, as we need
+ * to take care not to expire them until an updated map is safely flushed.
+ */
+class C_MDS_WroteImportMap : public Context {
+ MDLog *mdlog;
+ off_t end_off;
+public:
+ C_MDS_WroteImportMap(MDLog *ml, off_t eo) : mdlog(ml), end_off(eo) { }
+ void finish(int r) {
+ // cout << "WroteImportMap at " << end_off << endl;
+ if (r >= 0)
+ mdlog->last_import_map = end_off;
+ mdlog->writing_import_map = false;
+ }
+};
+
+
+void MDCache::log_import_map(Context *onsync)
+{
+ dout(10) << "log_import_map " << num_subtrees() << " subtrees"
+ << num_subtrees_fullauth() << " fullauth"
+ << endl;
+
+ EImportMap *le = new EImportMap;
+
+ // include all auth subtrees, and their bounds.
+ // and a spanning tree to tie it to the root.
+ for (map<CDir*, set<CDir*> >::iterator p = subtrees.begin();
+ p != subtrees.end();
+ ++p) {
+ CDir *dir = p->first;
+ if (!dir->is_auth()) continue;
+
+ le->imports.insert(dir->ino());
+ le->metablob.add_dir_context(dir, true);
+ le->metablob.add_dir(dir, false);
+
+ // bounds
+ for (set<CDir*>::iterator q = p->second.begin();
+ q != p->second.end();
+ ++q) {
+ CDir *bound = *q;
+ le->bounds[dir->ino()].insert(bound->ino());
+ le->metablob.add_dir_context(bound);
+ le->metablob.add_dir(bound, false);
+ }
+ }
+
+ mds->mdlog->writing_import_map = true;
+ mds->mdlog->submit_entry(le);
+ mds->mdlog->wait_for_sync(new C_MDS_WroteImportMap(mds->mdlog, mds->mdlog->get_write_pos()));
+ if (onsync)
+ mds->mdlog->wait_for_sync(onsync);
+}
+
+
+void MDCache::send_import_map(int who)
+{
+ if (migrator->is_exporting())
+ send_import_map_later(who);
+ else
+ send_import_map_now(who);
+}
+
+void MDCache::send_import_map_later(int who)
+{
+ dout(10) << "send_import_map_later to mds" << who << endl;
+ wants_import_map.insert(who);
+}
+
+void MDCache::send_pending_import_maps()
+{
+ if (wants_import_map.empty())
+ return; // nothing to send.
+
+ // only if it's appropriate!
+ if (migrator->is_exporting() ||
+ migrator->is_importing()) {
+ dout(7) << "send_pending_import_maps waiting, imports/exports still in progress" << endl;
+ return; // not now
+ }
+
+ // ok, send them.
+ for (set<int>::iterator p = wants_import_map.begin();
+ p != wants_import_map.end();
+ p++)
+ send_import_map_now(*p);
+ wants_import_map.clear();
+}
+
+void MDCache::send_import_map_now(int who)
+{
+ dout(10) << "send_import_map_now to mds" << who << endl;
+
+ MMDSImportMap *m = new MMDSImportMap;
+
+ // known
+ for (map<CDir*,set<CDir*> >::iterator p = subtrees.begin();
+ p != subtrees.end();
+ p++) {
+ CDir *dir = p->first;
+
+ // only our subtrees
+ if (dir->authority().first != mds->get_nodeid())
+ continue;
+
+ if (migrator->is_importing(dir->ino())) {
+ // ambiguous (mid-import)
+ m->add_ambiguous_import(dir->ino(),
+ migrator->get_import_bound_inos(dir->ino()));
+ } else {
+ // not ambiguous.
+ m->add_import(dir->ino());
+
+ // bounds too
+ for (set<CDir*>::iterator q = subtrees[dir].begin();
+ q != subtrees[dir].end();
+ ++q) {
+ CDir *bound = *q;
+ m->add_import_export(dir->ino(), bound->ino());
+ }
+ }
+ }
+
+ // ambiguous
+ for (map<inodeno_t, list<inodeno_t> >::iterator p = my_ambiguous_imports.begin();
+ p != my_ambiguous_imports.end();
+ ++p)
+ m->add_ambiguous_import(p->first, p->second);
+
+ // send
+ mds->send_message_mds(m, who, MDS_PORT_CACHE);
+}
+
+
+/*
+ * during resolve state, we share import_maps to determine who
+ * is authoritative for which trees. we expect to get an import_map
+ * from _everyone_ in the recovery_set (the mds cluster at the time of
+ * the first failure).
+ */
+void MDCache::handle_import_map(MMDSImportMap *m)
+{
+ dout(7) << "handle_import_map from " << m->get_source() << endl;
+ int from = m->get_source().num();
+
+ // FIXME: check if we are a surviving ambiguous importer
+
+ // update my dir_auth values
+ for (map<inodeno_t, list<inodeno_t> >::iterator pi = m->imap.begin();
+ pi != m->imap.end();
+ ++pi) {
+ CDir *im = get_dir(pi->first);
+ if (im)
+ adjust_bounded_subtree_auth(im, pi->second, from);
+
+ // ambiguous import failure?
+ if ((mds->is_active() || mds->is_stopping()) &&
+ my_ambiguous_imports.count(pi->first)) {
+ assert(im);
+ dout(7) << "ambiguous import failed on " << *im << endl;
+ migrator->reverse_import(im);
+ }
+ }
+
+ // ambiguous import success?
+ if (mds->is_active() || mds->is_stopping()) {
+ map<inodeno_t, list<inodeno_t> >::iterator p = my_ambiguous_imports.begin();
+ while (p != my_ambiguous_imports.end()) {
+ map<inodeno_t, list<inodeno_t> >::iterator n = p;
+ n++;
+ CDir *dir = get_dir(p->first);
+ assert(dir);
+ assert(migrator->is_importing(dir->ino()));
+ assert(migrator->get_import_state(dir->ino()) == Migrator::IMPORT_ACKING);
+ if (migrator->get_import_peer(dir->ino()) == from) {
+ dout(7) << "ambiguous import succeeded on " << *dir << endl;
+ migrator->import_dir_finish(dir); // success, yay!
+ my_ambiguous_imports.erase(p);
+ }
+ p = n;
+ }
+ }
+
+ // note ambiguous imports too
+ for (map<inodeno_t, list<inodeno_t> >::iterator pi = m->ambiguous_imap.begin();
+ pi != m->ambiguous_imap.end();
+ ++pi)
+ mds->mdcache->other_ambiguous_imports[from][pi->first].swap( pi->second );
+
+ // did i get them all?
+ got_import_map.insert(from);
+
+ if (got_import_map == recovery_set) {
+ dout(10) << "got all import maps, ready to rejoin" << endl;
+ disambiguate_imports();
+ recalc_auth_bits();
+ trim_non_auth();
+
+ // move to rejoin state
+ mds->set_want_state(MDSMap::STATE_REJOIN);
+
+ } else {
+ dout(10) << "still waiting for more importmaps, got " << got_import_map
+ << ", need " << recovery_set << endl;
+ }
+
+ delete m;
+}
+
+
+void MDCache::disambiguate_imports()
+{
+ dout(10) << "disambiguate_imports" << endl;
+
+ // other nodes' ambiguous imports
+ for (map<int, map<inodeno_t, list<inodeno_t> > >::iterator p = other_ambiguous_imports.begin();
+ p != other_ambiguous_imports.begin();
+ ++p) {
+ int who = p->first;
+
+ for (map<inodeno_t, list<inodeno_t> >::iterator q = p->second.begin();
+ q != p->second.end();
+ ++q) {
+ CDir *dir = get_dir(q->first);
+ if (!dir) continue;
+
+ if (dir->authority().first == CDIR_AUTH_UNKNOWN) {
+ dout(10) << "mds" << who << " did import " << *dir << endl;
+ adjust_bounded_subtree_auth(dir, q->second, who);
+ try_subtree_merge(dir);
+ } else {
+ dout(10) << "mds" << who << " did not import " << *dir << endl;
+ }
+ }
+ }
+ other_ambiguous_imports.clear();
+
+ // my ambiguous imports
+ while (!my_ambiguous_imports.empty()) {
+ map<inodeno_t, list<inodeno_t> >::iterator q = my_ambiguous_imports.begin();
+
+ CDir *dir = get_dir(q->first);
+ if (!dir) continue;
+
+ if (dir->authority().first != CDIR_AUTH_UNKNOWN) {
+ dout(10) << "ambiguous import auth known, must not be me " << *dir << endl;
+ cancel_ambiguous_import(q->first);
+ } else {
+ dout(10) << "ambiguous import auth unknown, must be me " << *dir << endl;
+ finish_ambiguous_import(q->first);
+ }
+ }
+ assert(my_ambiguous_imports.empty());
+
+ show_imports();
+}
+
+
+void MDCache::add_ambiguous_import(inodeno_t base, list<inodeno_t>& bounds)
+{
+ assert(my_ambiguous_imports.count(base) == 0);
+ my_ambiguous_imports[base].swap(bounds);
+}
+
+
+void MDCache::add_ambiguous_import(CDir *base, const set<CDir*>& bounds)
+{
+ list<inodeno_t> binos;
+ for (set<CDir*>::iterator p = bounds.begin();
+ p != bounds.end();
+ ++p)
+ binos.push_back((*p)->ino());
+ add_ambiguous_import(base->ino(), binos);
+}
+
+void MDCache::cancel_ambiguous_import(inodeno_t dirino)
+{
+ assert(my_ambiguous_imports.count(dirino));
+ dout(10) << "cancel_ambiguous_import " << dirino
+ << " bounds " << my_ambiguous_imports[dirino]
+ << endl;
+ my_ambiguous_imports.erase(dirino);
+}
+
+void MDCache::finish_ambiguous_import(inodeno_t dirino)
+{
+ assert(my_ambiguous_imports.count(dirino));
+ list<inodeno_t> bound_inos;
+ bound_inos.swap(my_ambiguous_imports[dirino]);
+ my_ambiguous_imports.erase(dirino);
+
+ dout(10) << "finish_ambiguous_import " << dirino
+ << " bounds " << bound_inos
+ << endl;
+ CDir *dir = get_dir(dirino);
+ assert(dir);
+
+ // adjust dir_auth, import maps
+ adjust_bounded_subtree_auth(dir, bound_inos, mds->get_nodeid());
+ try_subtree_merge(dir);
+}
+
+
+/*
+ * once subtree auth is disambiguated, we need to adjust all the
+ * auth (and dirty) bits in our cache before moving on.
+ */
+void MDCache::recalc_auth_bits()
+{
+ dout(7) << "recalc_auth_bits" << endl;
+
+ for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
+ p != inode_map.end();
+ ++p) {
+ CInode *in = p->second;
+ if (in->authority().first == mds->get_nodeid())
+ in->state_set(CInode::STATE_AUTH);
+ else {
+ in->state_clear(CInode::STATE_AUTH);
+ if (in->is_dirty())
+ in->mark_clean();
+ }
+
+ if (in->parent) {
+ if (in->parent->authority().first == mds->get_nodeid())
+ in->parent->state_set(CDentry::STATE_AUTH);
+ else {
+ in->parent->state_clear(CDentry::STATE_AUTH);
+ if (in->parent->is_dirty())
+ in->parent->mark_clean();
+ }
+ }
+ if (in->dir) {
+ if (in->dir->authority().first == mds->get_nodeid())
+ in->dir->state_set(CDir::STATE_AUTH);
+ else {
+ in->dir->state_clear(CDir::STATE_AUTH);
+ if (in->dir->is_dirty())
+ in->dir->mark_clean();
+ }
+ }
+ }
+ show_imports();
+ show_cache();
+}
/*
* rejoin phase!
}
-
void MDCache::cache_rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin)
{
dout(10) << "cache_rejoin_walk " << *dir << endl;
for (set<inodeno_t>::iterator p = m->dirs.begin();
p != m->dirs.end();
++p) {
- CInode *diri = get_inode(*p);
- assert(diri);
- CDir *dir = diri->dir;
+ CDir *dir = get_dir(*p);
assert(dir);
int nonce = dir->add_replica(from);
dout(10) << " has " << *dir << endl;
for (list<MMDSCacheRejoinAck::dirinfo>::iterator p = m->dirs.begin();
p != m->dirs.end();
++p) {
- CInode *diri = get_inode(p->dirino);
- CDir *dir = diri->dir;
+ CDir *dir = get_dir(p->dirino);
assert(dir);
dir->set_replica_nonce(p->nonce);
root->state_set(CInode::STATE_ROOT);
}
-/*
-void MDCache::add_import(CDir *dir)
-{
- imports.insert(dir);
- dir->state_set(CDir::STATE_IMPORT);
- dir->get(CDir::PIN_IMPORT);
-}
-*/
-
-
-void MDCache::recalc_auth_bits()
-{
- dout(7) << "recalc_auth_bits" << endl;
-
- for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
- p != inode_map.end();
- ++p) {
- CInode *in = p->second;
- if (in->authority().first == mds->get_nodeid())
- in->state_set(CInode::STATE_AUTH);
- else {
- in->state_clear(CInode::STATE_AUTH);
- if (in->is_dirty())
- in->mark_clean();
- }
-
- if (in->parent) {
- if (in->parent->authority().first == mds->get_nodeid())
- in->parent->state_set(CDentry::STATE_AUTH);
- else {
- in->parent->state_clear(CDentry::STATE_AUTH);
- if (in->parent->is_dirty())
- in->parent->mark_clean();
- }
- }
-
- if (in->dir) {
- if (in->dir->authority().first == mds->get_nodeid())
- in->dir->state_set(CDir::STATE_AUTH);
- else {
- in->dir->state_clear(CDir::STATE_AUTH);
- if (in->dir->is_dirty())
- in->dir->mark_clean();
- }
- }
- }
- show_imports();
- show_cache();
-}
+// ================================================================================
+// cache trimming
+
+
bool MDCache::trim(int max)
{
// trim LRU
dir->remove_dentry(dn);
// adjust the dir state
- CInode *diri = dir->get_inode();
- diri->dir->state_clear(CDir::STATE_COMPLETE); // dir incomplete!
+ dir->state_clear(CDir::STATE_COMPLETE); // dir incomplete!
// reexport?
- if (diri->dir->is_import() && // import
- diri->dir->get_size() == 0 && // no children
- !diri->is_root()) // not root
- migrator->export_empty_import(diri->dir);
+ if (dir->get_size() == 0)
+ migrator->export_empty_import(dir);
if (mds->logger) mds->logger->inc("cex");
}
// troot inode+dir?
- while (max == 0 && // only if we're trimming everything!
- lru.lru_get_size() == 0 &&
- root &&
- root->get_num_ref() == 0 &&
- root->dir &&
- root->dir->get_num_ref() == 0)
+ if (max == 0 && // only if we're trimming everything!
+ lru.lru_get_size() == 0 &&
+ root &&
+ root->get_num_ref() == 0 &&
+ root->dir &&
+ root->dir->get_num_ref() == 0)
trim_inode(0, root, 1, expiremap);
// send expires
if (in->dir->is_subtree_root()) {
dout(12) << " this is a subtree, removing from map, container is " << *in->dir << endl;
dconino = in->ino();
-
- // remove from subtree map
- assert(subtrees.count(in->dir));
- assert(subtrees[in->dir].empty());
- subtrees.erase(in->dir);
}
for (int a=dirauth.first;
}
}
+ if (in->dir->is_subtree_root())
+ remove_subtree(in->dir); // remove from subtree map
in->close_dir();
}
if (dn->is_auth()) {
// add back into lru (at the top)
lru.lru_insert_top(dn);
-
+
if (!first_auth) {
first_auth = dn;
} else {
// unlink the dentry
dout(15) << "trim_non_auth removing " << *dn << endl;
- if (!dn->is_null())
+ if (dn->is_remote()) {
+ dir->unlink_inode(dn);
+ }
+ else if (dn->is_primary()) {
+ CInode *in = dn->get_inode();
+ if (in->dir) {
+ if (in->dir->is_subtree_root())
+ remove_subtree(in->dir);
+ in->close_dir();
+ }
dir->unlink_inode(dn);
+ remove_inode(in);
+ if (in == root) root = 0;
+ }
+ else {
+ assert(dn->is_null());
+ }
dir->remove_dentry(dn);
// adjust the dir state
- CInode *diri = dir->get_inode();
- diri->dir->state_clear(CDir::STATE_COMPLETE); // dir incomplete!
+ dir->state_clear(CDir::STATE_COMPLETE); // dir incomplete!
+ }
+ }
+
+ if (lru.lru_get_size() == 0) {
+ if (root->dir) {
+ assert(root->dir->get_num_ref() == 0);
+ remove_subtree(root->dir);
+ root->close_dir();
+ }
+ assert(root->get_num_ref() == 0);
+ remove_inode(root);
+ root = 0;
+ }
+
+ show_subtrees();
+}
+
+void MDCache::handle_cache_expire(MCacheExpire *m)
+{
+ int from = m->get_from();
+
+ dout(7) << "cache_expire from mds" << from << endl;
+
+ // loop over realms
+ for (map<inodeno_t,MCacheExpire::realm>::iterator p = m->realms.begin();
+ p != m->realms.end();
+ ++p) {
+ // get container
+ CInode *coni = get_inode(p->first);
+ CDir *con = coni ? coni->dir : 0;
+
+ assert(con); // we had better have this.
+
+ if (!con->is_auth()) {
+ // not auth.
+ dout(7) << "delaying nonauth expires for " << *con << endl;
+ assert(con->is_frozen_tree_root());
+
+ // make a message container
+ if (delayed_expire[con].count(from) == 0)
+ delayed_expire[con][from] = new MCacheExpire(from);
+
+ // merge these expires into it
+ delayed_expire[con][from]->add_realm(p->first, p->second);
+ continue;
+ }
+ dout(7) << "expires for " << *con << endl;
+
+ // INODES
+ for (map<inodeno_t,int>::iterator it = p->second.inodes.begin();
+ it != p->second.inodes.end();
+ it++) {
+ CInode *in = get_inode(it->first);
+ int nonce = it->second;
+
+ if (!in) {
+ dout(0) << " inode expire on " << it->first << " from " << from << ", don't have it" << endl;
+ assert(in);
+ }
+ assert(in->is_auth());
+
+ // check nonce
+ if (nonce == in->get_replica_nonce(from)) {
+ // remove from our cached_by
+ dout(7) << " inode expire on " << *in << " from mds" << from << " cached_by was " << in->get_replicas() << endl;
+ inode_remove_replica(in, from);
+ }
+ else {
+ // this is an old nonce, ignore expire.
+ dout(7) << " inode expire on " << *in << " from mds" << from
+ << " with old nonce " << nonce << " (current " << in->get_replica_nonce(from) << "), dropping"
+ << endl;
+ assert(in->get_replica_nonce(from) > nonce);
+ }
+ }
+
+ // DIRS
+ for (map<inodeno_t,int>::iterator it = p->second.dirs.begin();
+ it != p->second.dirs.end();
+ it++) {
+ CDir *dir = get_dir(it->first);
+ int nonce = it->second;
+
+ if (!dir) {
+ dout(0) << " dir expire on " << it->first << " from " << from << ", don't have it" << endl;
+ assert(dir);
+ }
+ assert(dir->is_auth());
+
+ // check nonce
+ if (nonce == dir->get_replica_nonce(from)) {
+ // remove from our cached_by
+ dout(7) << " dir expire on " << *dir << " from mds" << from
+ << " replicas was " << dir->replicas << endl;
+ dir->remove_replica(from);
+ }
+ else {
+ // this is an old nonce, ignore expire.
+ dout(7) << " dir expire on " << *dir << " from mds" << from
+ << " with old nonce " << nonce << " (current " << dir->get_replica_nonce(from)
+ << "), dropping" << endl;
+ assert(dir->get_replica_nonce(from) > nonce);
+ }
+ }
+
+ // DENTRIES
+ for (map<inodeno_t, map<string,int> >::iterator pd = p->second.dentries.begin();
+ pd != p->second.dentries.end();
+ ++pd) {
+ dout(0) << " dn expires in dir " << pd->first << endl;
+ CDir *dir = get_dir(pd->first);
+
+ if (!dir) {
+ dout(0) << " dn expires on " << pd->first << " from " << from << ", don't have it" << endl;
+ assert(dir);
+ }
+ assert(dir->is_auth());
+
+ for (map<string,int>::iterator p = pd->second.begin();
+ p != pd->second.end();
+ ++p) {
+ int nonce = p->second;
+
+ CDentry *dn = dir->lookup(p->first);
+ if (!dn)
+ dout(0) << " missing dentry for " << p->first << " in " << *dir << endl;
+ assert(dn);
+
+ if (nonce == dn->get_replica_nonce(from)) {
+ dout(7) << " dentry_expire on " << *dn << " from mds" << from << endl;
+ dn->remove_replica(from);
+ }
+ else {
+ dout(7) << " dentry_expire on " << *dn << " from mds" << from
+ << " with old nonce " << nonce << " (current " << dn->get_replica_nonce(from)
+ << "), dropping" << endl;
+ assert(dn->get_replica_nonce(from) > nonce);
+ }
+ }
}
}
-
- // inode expire queue
- while (!inode_expire_queue.empty()) {
- CInode *in = inode_expire_queue.front();
- inode_expire_queue.pop_front();
- dout(15) << "trim_non_auth removing " << *in << endl;
- if (in == root) root = 0;
- remove_inode(in);
+
+
+ // done
+ delete m;
+}
+
+void MDCache::process_delayed_expire(CDir *dir)
+{
+ dout(7) << "process_delayed_expire on " << *dir << endl;
+ for (map<int,MCacheExpire*>::iterator p = delayed_expire[dir].begin();
+ p != delayed_expire[dir].end();
+ ++p)
+ handle_cache_expire(p->second);
+ delayed_expire.erase(dir);
+}
+
+void MDCache::discard_delayed_expire(CDir *dir)
+{
+ dout(7) << "discard_delayed_expire on " << *dir << endl;
+ for (map<int,MCacheExpire*>::iterator p = delayed_expire[dir].begin();
+ p != delayed_expire[dir].end();
+ ++p)
+ delete p->second;
+ delayed_expire.erase(dir);
+}
+
+void MDCache::inode_remove_replica(CInode *in, int from)
+{
+ in->remove_replica(from);
+ in->mds_caps_wanted.erase(from);
+
+ // note: this code calls _eval more often than it needs to!
+ // fix lock
+ if (in->hardlock.is_gathering(from)) {
+ in->hardlock.gather_set.erase(from);
+ if (in->hardlock.gather_set.size() == 0)
+ mds->locker->inode_hard_eval(in);
+ }
+ if (in->filelock.is_gathering(from)) {
+ in->filelock.gather_set.erase(from);
+ if (in->filelock.gather_set.size() == 0)
+ mds->locker->inode_file_eval(in);
+ }
+
+ // alone now?
+ if (!in->is_replicated()) {
+ mds->locker->inode_hard_eval(in);
+ mds->locker->inode_file_eval(in);
}
}
+// =========================================================================================
+// shutdown
+
class C_MDC_ShutdownCommit : public Context {
MDCache *mdc;
public:
-CInode *MDCache::create_root_inode()
-{
- CInode *root = new CInode(this);
- memset(&root->inode, 0, sizeof(inode_t));
- root->inode.ino = 1;
- root->inode.hash_seed = 0; // not hashed!
-
- // make it up (FIXME)
- root->inode.mode = 0755 | INODE_MODE_DIR;
- root->inode.size = 0;
- root->inode.ctime =
- root->inode.mtime = g_clock.gettime();
-
- root->inode.nlink = 1;
- root->inode.layout = g_OSD_MDDirLayout;
-
- set_root( root );
- add_inode( root );
-
- return root;
-}
-
-
-int MDCache::open_root(Context *c)
-{
- int whoami = mds->get_nodeid();
-
- // open root inode
- if (whoami == 0) {
- // i am root inode
- CInode *root = create_root_inode();
-
- // root directory too
- assert(root->dir == NULL);
- root->set_dir(new CDir(root, this, true));
- adjust_subtree_auth(root->dir, 0);
- root->dir->dir_rep = CDir::REP_ALL; //NONE;
-
- show_imports();
-
- if (c) {
- c->finish(0);
- delete c;
- }
- } else {
- // request inode from root mds
- if (waiting_for_root.empty()) {
- dout(7) << "discovering root" << endl;
-
- filepath want;
- MDiscover *req = new MDiscover(whoami,
- 0,
- want,
- false); // there _is_ no base dir for the root inode
- mds->send_message_mds(req, 0, MDS_PORT_CACHE);
- } else {
- dout(7) << "waiting for root" << endl;
- }
-
- // wait
- waiting_for_root.push_back(c);
-
- }
-
- return 0;
-}
-
-
-
-
-void MDCache::handle_cache_expire(MCacheExpire *m)
-{
- int from = m->get_from();
-
- dout(7) << "cache_expire from mds" << from << endl;
-
- // loop over realms
- for (map<inodeno_t,MCacheExpire::realm>::iterator p = m->realms.begin();
- p != m->realms.end();
- ++p) {
- // get container
- CInode *coni = get_inode(p->first);
- CDir *con = coni ? coni->dir : 0;
-
- assert(con); // we had better have this.
-
- if (!con->is_auth()) {
- // not auth.
- dout(7) << "delaying nonauth expires for " << *con << endl;
- assert(con->is_frozen_tree_root());
-
- // make a message container
- if (delayed_expire[con].count(from) == 0)
- delayed_expire[con][from] = new MCacheExpire(from);
-
- // merge these expires into it
- delayed_expire[con][from]->add_realm(p->first, p->second);
- continue;
- }
- dout(7) << "expires for " << *con << endl;
-
- // INODES
- for (map<inodeno_t,int>::iterator it = p->second.inodes.begin();
- it != p->second.inodes.end();
- it++) {
- CInode *in = get_inode(it->first);
- int nonce = it->second;
-
- if (!in) {
- dout(0) << " inode expire on " << it->first << " from " << from << ", don't have it" << endl;
- assert(in);
- }
- assert(in->is_auth());
-
- // check nonce
- if (nonce == in->get_replica_nonce(from)) {
- // remove from our cached_by
- dout(7) << " inode expire on " << *in << " from mds" << from << " cached_by was " << in->get_replicas() << endl;
- inode_remove_replica(in, from);
- }
- else {
- // this is an old nonce, ignore expire.
- dout(7) << " inode expire on " << *in << " from mds" << from
- << " with old nonce " << nonce << " (current " << in->get_replica_nonce(from) << "), dropping"
- << endl;
- assert(in->get_replica_nonce(from) > nonce);
- }
- }
-
- // DIRS
- for (map<inodeno_t,int>::iterator it = p->second.dirs.begin();
- it != p->second.dirs.end();
- it++) {
- CInode *diri = get_inode(it->first);
- assert(diri);
- CDir *dir = diri->dir;
- int nonce = it->second;
-
- if (!dir) {
- dout(0) << " dir expire on " << it->first << " from " << from << ", don't have it" << endl;
- assert(dir);
- }
- assert(dir->is_auth());
-
- // check nonce
- if (nonce == dir->get_replica_nonce(from)) {
- // remove from our cached_by
- dout(7) << " dir expire on " << *dir << " from mds" << from
- << " replicas was " << dir->replicas << endl;
- dir->remove_replica(from);
- }
- else {
- // this is an old nonce, ignore expire.
- dout(7) << " dir expire on " << *dir << " from mds" << from
- << " with old nonce " << nonce << " (current " << dir->get_replica_nonce(from)
- << "), dropping" << endl;
- assert(dir->get_replica_nonce(from) > nonce);
- }
- }
-
- // DENTRIES
- for (map<inodeno_t, map<string,int> >::iterator pd = p->second.dentries.begin();
- pd != p->second.dentries.end();
- ++pd) {
- dout(0) << " dn expires in dir " << pd->first << endl;
- CInode *diri = get_inode(pd->first);
- CDir *dir = diri ? diri->dir : 0;
-
- if (!dir) {
- dout(0) << " dn expires on " << pd->first << " from " << from << ", don't have it" << endl;
- assert(dir);
- }
- assert(dir->is_auth());
-
- for (map<string,int>::iterator p = pd->second.begin();
- p != pd->second.end();
- ++p) {
- int nonce = p->second;
-
- CDentry *dn = dir->lookup(p->first);
- if (!dn)
- dout(0) << " missing dentry for " << p->first << " in " << *dir << endl;
- assert(dn);
-
- if (nonce == dn->get_replica_nonce(from)) {
- dout(7) << " dentry_expire on " << *dn << " from mds" << from << endl;
- dn->remove_replica(from);
- }
- else {
- dout(7) << " dentry_expire on " << *dn << " from mds" << from
- << " with old nonce " << nonce << " (current " << dn->get_replica_nonce(from)
- << "), dropping" << endl;
- assert(dn->get_replica_nonce(from) > nonce);
- }
- }
- }
- }
-
-
- // done
- delete m;
-}
-
-void MDCache::process_delayed_expire(CDir *dir)
-{
- dout(7) << "process_delayed_expire on " << *dir << endl;
- for (map<int,MCacheExpire*>::iterator p = delayed_expire[dir].begin();
- p != delayed_expire[dir].end();
- ++p)
- handle_cache_expire(p->second);
- delayed_expire.erase(dir);
-}
-
-void MDCache::discard_delayed_expire(CDir *dir)
-{
- dout(7) << "discard_delayed_expire on " << *dir << endl;
- for (map<int,MCacheExpire*>::iterator p = delayed_expire[dir].begin();
- p != delayed_expire[dir].end();
- ++p)
- delete p->second;
- delayed_expire.erase(dir);
-}
-void MDCache::inode_remove_replica(CInode *in, int from)
-{
- in->remove_replica(from);
- in->mds_caps_wanted.erase(from);
-
- // note: this code calls _eval more often than it needs to!
- // fix lock
- if (in->hardlock.is_gathering(from)) {
- in->hardlock.gather_set.erase(from);
- if (in->hardlock.gather_set.size() == 0)
- mds->locker->inode_hard_eval(in);
- }
- if (in->filelock.is_gathering(from)) {
- in->filelock.gather_set.erase(from);
- if (in->filelock.gather_set.size() == 0)
- mds->locker->inode_file_eval(in);
- }
-
- // alone now?
- if (!in->is_replicated()) {
- mds->locker->inode_hard_eval(in);
- mds->locker->inode_file_eval(in);
- }
-}
int MDCache::send_dir_updates(CDir *dir, bool bcast)
mds->locker->dentry_xlock_finish(dn, true); // quiet, no need to bother replicas since they're already unlinking
// did i empty out an imported dir?
- if (dir->is_import() && !dir->inode->is_root() && dir->get_size() == 0)
+ if (dir->get_size() == 0)
migrator->export_empty_import(dir);
// wake up any waiters
void MDCache::handle_dentry_unlink(MDentryUnlink *m)
{
- CInode *diri = get_inode(m->get_dirino());
- CDir *dir = 0;
- if (diri) dir = diri->dir;
+ CDir *dir = get_dir(m->get_dirino());
- if (!diri || !dir) {
+ if (!dir) {
dout(7) << "handle_dentry_unlink don't have dir " << m->get_dirino() << endl;
}
else {
if (dir->is_auth())
auth = "auth ";
else
- auth = "rep ";
+ auth = " rep ";
char s[10];
if (dir->get_dir_auth().second == CDIR_AUTH_UNKNOWN)
// the cache
CInode *root; // root inode
hash_map<inodeno_t,CInode*> inode_map; // map of inodes by ino
-
- list<CInode*> inode_expire_queue; // inodes to delete
- friend class MDS; // for inode_map, FIXME
+ friend class MDS; // for thrash_exports hack.
// root
list<Context*> waiting_for_root;
+public:
+ int get_num_inodes() { return inode_map.size(); }
+ int get_num_dentries() { return lru.lru_get_size(); }
+
/*
// imports, exports, and hashes.
set<CDir*> imports; // includes root (on mds0)
public:
void adjust_subtree_auth(CDir *root, pair<int,int> auth);
void adjust_subtree_auth(CDir *root, int a, int b=CDIR_AUTH_UNKNOWN) {
- adjust_subtree_auth(root, pair<int,int>(a,b)); }
+ adjust_subtree_auth(root, pair<int,int>(a,b));
+ }
void adjust_bounded_subtree_auth(CDir *dir, set<CDir*>& bounds, pair<int,int> auth);
+ void adjust_bounded_subtree_auth(CDir *dir, set<CDir*>& bounds, int a) {
+ adjust_bounded_subtree_auth(dir, bounds, pair<int,int>(a, CDIR_AUTH_UNKNOWN));
+ }
+ void adjust_bounded_subtree_auth(CDir *dir, list<inodeno_t>& bounds, pair<int,int> auth);
+ void adjust_bounded_subtree_auth(CDir *dir, list<inodeno_t>& bounds, int a) {
+ adjust_bounded_subtree_auth(dir, bounds, pair<int,int>(a, CDIR_AUTH_UNKNOWN));
+ }
void adjust_export_state(CDir *dir);
void try_subtree_merge(CDir *root);
CDir *get_subtree_root(CDir *dir);
+ void remove_subtree(CDir *dir);
void get_subtree_bounds(CDir *root, set<CDir*>& bounds);
void get_wouldbe_subtree_bounds(CDir *root, set<CDir*>& bounds);
void verify_subtree_bounds(CDir *root, const set<CDir*>& bounds);
// recovery
protected:
// from EImportStart w/o EImportFinish during journal replay
- map<inodeno_t, set<inodeno_t> > my_ambiguous_imports;
+ map<inodeno_t, list<inodeno_t> > my_ambiguous_imports;
// from MMDSImportMaps
- map<int, map<inodeno_t, set<inodeno_t> > > other_ambiguous_imports;
+ map<int, map<inodeno_t, list<inodeno_t> > > other_ambiguous_imports;
set<int> recovery_set;
set<int> wants_import_map; // nodes i need to send my import map to
public:
void send_import_map(int who);
void send_import_map_now(int who);
- void send_import_map_later(int who) {
- wants_import_map.insert(who);
- }
+ void send_import_map_later(int who);
void send_pending_import_maps(); // maybe.
void send_cache_rejoins();
}
// ambiguous imports
- void add_ambiguous_import(inodeno_t base, set<inodeno_t>& bounds) {
- my_ambiguous_imports[base].swap(bounds);
- }
+ void add_ambiguous_import(inodeno_t base, list<inodeno_t>& bounds);
+ void add_ambiguous_import(CDir *base, const set<CDir*>& bounds);
void cancel_ambiguous_import(inodeno_t dirino);
void finish_ambiguous_import(inodeno_t dirino);
-
- void finish_ambiguous_export(inodeno_t dirino, set<inodeno_t>& bounds);
+
bool have_inode( inodeno_t ino ) { return inode_map.count(ino) ? true:false; }
CInode* get_inode( inodeno_t ino ) {
if (have_inode(ino))
- return inode_map[ ino ];
+ return inode_map[ino];
return NULL;
}
-
+ CDir* get_dir(inodeno_t dirino) {
+ if (have_inode(dirino))
+ return inode_map[dirino]->dir;
+ return NULL;
+ }
+
int hash_dentry(inodeno_t ino, const string& s) {
return 0; // fixme
}
- // is anyone resolving?
- if (is_resolve() || is_rejoin() || is_active() || is_stopping()) {
+ // RESOLVE
+ // am i newly resolving?
+ if (is_resolve() && oldstate == MDSMap::STATE_REPLAY) {
+ // send to all resolve, active, stopping
+ dout(10) << "i am newly resolving, sharing import map" << endl;
+ set<int> who;
+ mdsmap->get_mds_set(who, MDSMap::STATE_RESOLVE);
+ mdsmap->get_mds_set(who, MDSMap::STATE_ACTIVE);
+ mdsmap->get_mds_set(who, MDSMap::STATE_STOPPING);
+ mdsmap->get_mds_set(who, MDSMap::STATE_REJOIN); // hrm. FIXME.
+ for (set<int>::iterator p = who.begin(); p != who.end(); ++p) {
+ if (*p == whoami) continue;
+ mdcache->send_import_map(*p); // now.
+ }
+ }
+ // is someone else newly resolving?
+ else if (is_resolve() || is_rejoin() || is_active() || is_stopping()) {
set<int> resolve;
mdsmap->get_mds_set(resolve, MDSMap::STATE_RESOLVE);
- if (oldresolve != resolve)
+ if (oldresolve != resolve) {
dout(10) << "resolve set is " << resolve << ", was " << oldresolve << endl;
- for (set<int>::iterator p = resolve.begin(); p != resolve.end(); ++p) {
- if (*p == whoami) continue;
- if (oldresolve.count(*p) == 0 || // if other guy newly resolve, or
- oldstate == MDSMap::STATE_REPLAY) // if i'm newly resolve,
- mdcache->send_import_map(*p); // share my import map (now or later)
+ for (set<int>::iterator p = resolve.begin(); p != resolve.end(); ++p) {
+ if (*p == whoami) continue;
+ if (oldresolve.count(*p)) continue;
+ mdcache->send_import_map(*p); // now or later.
+ }
}
}
+ // REJOIN
// is everybody finally rejoining?
if (is_rejoin() || is_active() || is_stopping()) {
if (!wasrejoining && mdsmap->is_rejoining()) {
for (int i=0; i<g_conf.mds_thrash_exports; i++) {
set<int> s;
mdsmap->get_mds_set(s, MDSMap::STATE_ACTIVE);
- if (s.size() == 1 || mdcache->inode_map.size() < 10)
+ if (s.size() < 2 || mdcache->get_num_inodes() < 10)
break; // need peers for this to work.
dout(7) << "mds thrashing exports pass " << (i+1) << "/" << g_conf.mds_thrash_exports << endl;
// sets
void get_mds_set(set<int>& s) {
- s.clear();
for (map<int,int>::const_iterator p = mds_state.begin();
p != mds_state.end();
p++)
s.insert(p->first);
}
void get_up_mds_set(set<int>& s) {
- s.clear();
for (map<int,int>::const_iterator p = mds_state.begin();
p != mds_state.end();
p++)
s.insert(p->first);
}
void get_mds_set(set<int>& s, int state) {
- s.clear();
for (map<int,int>::const_iterator p = mds_state.begin();
p != mds_state.end();
p++)
get_mds_set(s, MDSMap::STATE_FAILED);
}
void get_recovery_mds_set(set<int>& s) {
- s.clear();
for (map<int,int>::const_iterator p = mds_state.begin();
p != mds_state.end();
p++)
#include "include/filepath.h"
#include "events/EString.h"
-#include "events/EExportStart.h"
-#include "events/EExportFinish.h"
+#include "events/EExport.h"
#include "events/EImportStart.h"
#include "events/EImportFinish.h"
{
dout(7) << "export_empty_import " << *dir << endl;
- return; // hack fixme
-
- if (!dir->is_import()) {
- dout(7) << "not import (anymore?)" << endl;
+ if (dir->inode->is_auth()) return;
+ if (!dir->is_auth()) return;
+
+ if (dir->inode->is_freezing() || dir->inode->is_frozen()) return;
+ if (dir->is_freezing() || dir->is_frozen()) return;
+
+ if (dir->get_size() > 0) {
+ dout(7) << "not actually empty" << endl;
return;
}
+
if (dir->inode->is_root()) {
dout(7) << "root" << endl;
return;
}
-
- if (dir->get_size() > 0) {
- dout(7) << "not actually empty" << endl;
- return;
- }
-
+
// is it really empty?
if (!dir->is_complete()) {
dout(7) << "not complete, fetching." << endl;
new C_MDC_EmptyImport(this,dir));
return;
}
-
+
int dest = dir->inode->authority().first;
-
+
// comment this out ot wreak havoc?
//if (mds->is_shutting_down()) dest = 0; // this is more efficient.
case EXPORT_LOGGINGSTART:
case EXPORT_PREPPING:
case EXPORT_WARNING:
- dout(10) << "state loggingstart|prepping|warning : unfreezing, logging EExportFinish(false)" << endl;
+ dout(10) << "state loggingstart|prepping|warning : unfreezing" << endl;
dir->unfreeze_tree();
- mds->mdlog->submit_entry(new EExportFinish(dir,false));
break;
case EXPORT_EXPORTING:
- dout(10) << "state exporting : logging EExportFinish(false), reversing, and unfreezing" << endl;
- mds->mdlog->submit_entry(new EExportFinish(dir,false));
+ dout(10) << "state exporting : reversing, and unfreezing" << endl;
reverse_export(dir);
dir->unfreeze_tree();
break;
next++;
inodeno_t dirino = q->first;
CInode *diri = mds->mdcache->get_inode(dirino);
- CDir *dir = 0;
- if (diri)
- dir = diri->dir;
+ CDir *dir = mds->mdcache->get_dir(dirino);
if (import_peer[dirino] == who) {
switch (import_state[dirino]) {
assert(diri);
// unpin base
diri->put(CInode::PIN_IMPORTING);
+ import_state.erase(dirino);
+ import_peer.erase(dirino);
break;
// NOTE: state order reversal + fall-thru, pay attention.
// unfreeze
dir->unfreeze_tree();
- // adjust auth
+ // adjust auth back to me
cache->adjust_subtree_auth(dir, mds->get_nodeid());
cache->try_subtree_merge(dir);
dout(10) << "state prepping : unpinning base+bounds " << *dir << endl;
}
assert(dir);
+
// unpin base
dir->put(CDir::PIN_IMPORTING);
+
// unpin bounds
for (set<CDir*>::iterator it = import_bounds[dir].begin();
it != import_bounds[dir].end();
bd->state_clear(CDir::STATE_IMPORTBOUND);
bd->put(CDir::PIN_IMPORTBOUND);
}
+
+ import_state.erase(dirino);
+ import_peer.erase(dirino);
+ import_bound_inos.erase(dirino);
+ import_bounds.erase(dir);
break;
case IMPORT_LOGGINGSTART:
dout(10) << "state loggingstart : reversing import on " << *dir << endl;
assert(dir);
- mds->mdlog->submit_entry(new EImportFinish(dir,false)); // log failure
reverse_import(dir);
break;
case IMPORT_ACKING:
// hrm. make this an ambiguous import, and wait for exporter recovery to disambiguate
- // ...
+ dout(10) << "state acking : noting ambiguous import " << *dir << endl;
+ cache->add_ambiguous_import(dir, import_bounds[dir]);
break;
case IMPORT_LOGGINGFINISH:
- // do nothing, exporter is no longer involved.
+ // do nothing special, exporter is no longer involved.
+ // i will finish/clean up myself.
break;
}
-
- import_state.erase(dirino);
- import_peer.erase(dirino);
- import_bound_inos.erase(dirino);
- import_bounds.erase(dir);
-
- cache->show_imports();
- cache->show_cache();
}
// next!
set<CDir*> &bounds = export_bounds[dir];
// generate prep message, log entry.
- EExportStart *le = new EExportStart(dir, dest);
MExportDirPrep *prep = new MExportDirPrep(dir->inode);
// include spanning tree for all nested exports.
// include base dir
prep->add_dir( new CDirDiscover(dir, dir->add_replica(dest)) );
- le->metablob.add_dir( dir, false );
// check bounds
for (set<CDir*>::iterator it = bounds.begin();
dout(7) << " export bound " << *bound << endl;
prep->add_export( bound->ino() );
- le->get_bounds().insert(bound->ino());
- le->metablob.add_dir_context( bound );
- le->metablob.add_dir( bound, false );
/* first assemble each trace, in trace order, and put in message */
list<CInode*> inode_trace;
}
}
-
- // log our intentions
- dout(7) << " logging EExportStart" << endl;
- mds->mdlog->submit_entry(le);
- // don't wait for it to flush, it doesn't matter.
- // FIXME: just combine Start into Finish, we only need 1 log entry.
- //, new C_MDC_ExportStartLogged(this, dir, dest, prep));
-
- /*
-
-}
-
-void Migrator::export_dir_start_logged(CDir *dir, MExportDirPrep *prep, int dest)
-{
- dout(7) << "export_dir_start_logged " << *dir << endl;
-
- if (export_state.count(dir) == 0 ||
- export_state[dir] != EXPORT_LOGGINGSTART) {
- // export must have aborted.
- dout(7) << "export must have aborted, unfreezing and deleting me old prep message" << endl;
- delete prep;
- dir->unfreeze_tree(); // cancel the freeze
- return;
- }
- */
+ // send.
export_state[dir] = EXPORT_PREPPING;
mds->send_message_mds(prep, dest, MDS_PORT_MIGRATOR);
}
*/
void Migrator::handle_export_dir_ack(MExportDirAck *m)
{
- CInode *diri = cache->get_inode(m->get_ino());
- CDir *dir = diri->dir;
+ CDir *dir = cache->get_dir(m->get_ino());
assert(dir);
assert(dir->is_frozen_tree_root()); // i'm exporting!
export_state[dir] = EXPORT_LOGGINGFINISH;
export_data.erase(dir);
+ // log completion
+ EExport *le = new EExport(dir);
+ le->metablob.add_dir( dir, false );
+ for (set<CDir*>::iterator p = export_bounds[dir].begin();
+ p != export_bounds[dir].end();
+ ++p) {
+ CDir *bound = *p;
+ le->get_bounds().insert(bound->ino());
+ le->metablob.add_dir_context(bound);
+ le->metablob.add_dir(bound, false);
+ }
+
// log export completion, then finish (unfreeze, trigger finish context, etc.)
dir->get(CDir::PIN_LOGGINGEXPORTFINISH);
- mds->mdlog->submit_entry(new EExportFinish(dir, true),
+ mds->mdlog->submit_entry(le,
new C_MDS_ExportFinishLogged(this, dir));
delete m;
assert(in);
// note bound.
- import_bound_inos[dir->ino()].insert(*it);
+ import_bound_inos[dir->ino()].push_back(*it);
if (!in->dir) {
dout(7) << " opening nested export on " << *in << endl;
void Migrator::handle_export_dir(MExportDir *m)
{
- CInode *diri = cache->get_inode(m->get_ino());
- assert(diri);
- CDir *dir = diri->dir;
- assert(dir);
+ CDir *dir = cache->get_dir(m->get_ino());
int oldauth = m->get_source().num();
dout(7) << "handle_export_dir importing " << *dir << " from " << oldauth << endl;
cache->adjust_subtree_auth(dir, mds->get_nodeid(), oldauth);
cache->verify_subtree_bounds(dir, import_bounds[dir]);
- // take out my importing pin
- dir->put(CDir::PIN_IMPORTING);
-
// add this crap to my cache
list<inodeno_t> imported_subdirs;
int num_imported_inodes = 0;
dout(10) << " " << imported_subdirs.size() << " imported subdirs" << endl;
dout(10) << " " << m->get_exports().size() << " imported nested exports" << endl;
- // remove bound pins
// include bounds in EImportStart
for (set<CDir*>::iterator it = import_bounds[dir].begin();
it != import_bounds[dir].end();
it++) {
CDir *bd = *it;
- // remove bound pin
- bd->put(CDir::PIN_IMPORTBOUND);
- bd->state_clear(CDir::STATE_IMPORTBOUND);
-
// include bounding dirs in EImportStart
// (now that the interior metadata is already in the event)
le->metablob.add_dir(bd, false);
}
+/*
+ * note: this does teh full work of reversing and import and cleaning up
+ * state.
+ * called by both handle_mds_failure and by handle_import_map (if we are
+ * a survivor coping with an exporter failure+recovery).
+ */
void Migrator::reverse_import(CDir *dir)
{
dout(7) << "reverse_import " << *dir << endl;
- assert(0); // implement me.
+ // remove importing pin
+ dir->put(CDir::PIN_IMPORTING);
- // update auth, with possible subtree merge.
- cache->adjust_subtree_auth(dir, import_peer[dir->ino()]);
- cache->try_subtree_merge(dir);
-
// remove bound pins
for (set<CDir*>::iterator it = import_bounds[dir].begin();
it != import_bounds[dir].end();
bd->state_clear(CDir::STATE_IMPORTBOUND);
}
+ // update auth, with possible subtree merge.
+ cache->adjust_subtree_auth(dir, import_peer[dir->ino()]);
+ cache->try_subtree_merge(dir);
+
+
+
+ assert(0); // implement me.
+ cache->show_cache();
+
+
+
// ...
+ // adjust auth/dirty bits
+
+ // discard expire crap
+ cache->discard_delayed_expire(dir);
+
+ // log our failure
+ mds->mdlog->submit_entry(new EImportFinish(dir,false)); // log failure
+
+ // clean up
+ import_state.erase(dir->ino());
+ import_peer.erase(dir->ino());
+ import_bound_inos.erase(dir->ino());
+ import_bounds.erase(dir);
}
void Migrator::handle_export_dir_finish(MExportDirFinish *m)
{
- CInode *diri = cache->get_inode(m->get_ino());
- CDir *dir = diri->dir;
+ CDir *dir = cache->get_dir(m->get_ino());
assert(dir);
+ dout(7) << "handle_export_dir_finish on " << *dir << endl;
+ import_dir_finish(dir);
+ delete m;
+}
- dout(7) << "handle_export_dir_finish logging import_finish on " << *dir << endl;
-
+void Migrator::import_dir_finish(CDir *dir)
+{
+ dout(7) << "import_dir_finish logging import_finish on " << *dir << endl;
+
// note state
import_state[dir->ino()] = IMPORT_LOGGINGFINISH;
-
+
// log finish
mds->mdlog->submit_entry(new EImportFinish(dir, true),
new C_MDS_ImportDirLoggedFinish(this,dir));
-
- delete m;
}
{
dout(7) << "import_dir_logged_finish " << *dir << endl;
+ // remove pins
+ dir->put(CDir::PIN_IMPORTING);
+
+ for (set<CDir*>::iterator it = import_bounds[dir].begin();
+ it != import_bounds[dir].end();
+ it++) {
+ CDir *bd = *it;
+
+ // remove bound pin
+ bd->put(CDir::PIN_IMPORTBOUND);
+ bd->state_clear(CDir::STATE_IMPORTBOUND);
+ }
+
// unfreeze
dir->unfreeze_tree();
void Migrator::handle_export_dir_warning(MExportDirWarning *m)
{
- CInode *diri = cache->get_inode(m->get_ino());
- CDir *dir = 0;
- if (diri) dir = diri->dir;
+ CDir *dir = cache->get_dir(m->get_ino());
int oldauth = m->get_source().num();
int newauth = m->get_new_dir_auth();
void Migrator::handle_export_dir_notify(MExportDirNotify *m)
{
- CInode *diri = cache->get_inode(m->get_ino());
- CDir *dir = 0;
- if (diri) dir = diri->dir;
+ CDir *dir = cache->get_dir(m->get_ino());
int from = m->get_source().num();
pair<int,int> auth = m->get_auth();
} else {
dout(7) << "handle_export_dir_notify " << auth
<< " on " << *dir << endl;
- // see which bounds we have in our cache
- set<CDir*> bounds;
- for (list<inodeno_t>::iterator p = m->get_exports().begin();
- p != m->get_exports().end();
- ++p) {
- CInode *in = cache->get_inode(*p);
- if (in && in->dir)
- bounds.insert(in->dir);
- }
-
// adjust auth
- cache->adjust_bounded_subtree_auth(dir, bounds, auth);
- cache->verify_subtree_bounds(dir, bounds);
+ cache->adjust_bounded_subtree_auth(dir, m->get_exports(), auth);
// induce a merge?
cache->try_subtree_merge(dir);
MDCache *cache;
// -- exports --
+public:
// export stages. used to clean up intelligently if there's a failure.
const static int EXPORT_DISCOVERING = 1; // dest is disovering export dir
const static int EXPORT_FREEZING = 2; // we're freezing the dir tree
const static int EXPORT_EXPORTING = 6; // sent actual export, waiting for ack
const static int EXPORT_LOGGINGFINISH = 7; // logging EExportFinish
const static int EXPORT_NOTIFYING = 8; // waiting for notifyacks
-
+
+protected:
// export fun
map<CDir*,int> export_state;
map<CDir*,int> export_peer;
// -- imports --
+public:
const static int IMPORT_DISCOVERED = 1; // waiting for prep
const static int IMPORT_PREPPING = 2; // opening dirs on bounds
const static int IMPORT_PREPPED = 3; // opened bounds, waiting for import
const static int IMPORT_ACKING = 5; // logged EImportStart, sent ack, waiting for finish
const static int IMPORT_LOGGINGFINISH = 6; // logging EImportFinish
- map<inodeno_t,int> import_state;
- map<inodeno_t,int> import_peer;
- map<inodeno_t,set<inodeno_t> > import_bound_inos;
- map<CDir*,set<CDir*> > import_bounds;
+protected:
+ map<inodeno_t,int> import_state;
+ map<inodeno_t,int> import_peer;
+ map<inodeno_t,list<inodeno_t> > import_bound_inos;
+ map<CDir*,set<CDir*> > import_bounds;
// -- hashing madness --
return 0;
}
bool is_importing() { return !import_state.empty(); }
- const set<inodeno_t>& get_import_bound_inos(inodeno_t base) {
+ const list<inodeno_t>& get_import_bound_inos(inodeno_t base) {
assert(import_bound_inos.count(base));
return import_bound_inos[base];
}
return import_bounds[base];
}
+ int get_import_state(inodeno_t dirino) {
+ assert(import_state.count(dirino));
+ return import_state[dirino];
+ }
+ int get_import_peer(inodeno_t dirino) {
+ assert(import_peer.count(dirino));
+ return import_peer[dirino];
+ }
+
// -- misc --
void handle_mds_failure(int who);
void got_hashed_replica(CDir *import,
inodeno_t dir_ino,
inodeno_t replica_ino);
+public:
void reverse_import(CDir *dir);
+protected:
void import_dir_logged_start(CDir *dir, int from,
list<inodeno_t> &imported_subdirs,
list<inodeno_t> &exports);
void handle_export_dir_finish(MExportDirFinish *m);
+public:
+ void import_dir_finish(CDir *dir);
+protected:
void import_dir_logged_finish(CDir *dir);
friend class C_MDC_ExportDirDiscover;
dout(10) << "file_rename_finish on " << *in << endl;
// did i empty out an imported dir? FIXME this check should go somewhere else???
- if (srcdir->is_import() && !srcdir->inode->is_root() && srcdir->get_size() == 0)
+ if (srcdir->is_auth() && !srcdir->inode->is_auth() && srcdir->get_size() == 0)
cache->migrator->export_empty_import(srcdir);
// finish our caller
if (!in->is_auth()) {
// i should be exporting this now/soon, since the dir is empty.
dout(7) << "handle_client_rmdir dir is auth, but not inode." << endl;
- if (!in->dir->is_freezing() && in->dir->is_frozen()) {
- assert(in->dir->is_import());
- mdcache->migrator->export_empty_import(in->dir);
- } else {
- dout(7) << "apparently already exporting" << endl;
- }
+ mdcache->migrator->export_empty_import(in->dir);
in->dir->add_waiter(CDIR_WAIT_UNFREEZE,
new C_MDS_RetryRequest(mds, req, diri));
return;
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef __EEXPORT_H
+#define __EEXPORT_H
+
+#include <assert.h>
+#include "config.h"
+#include "include/types.h"
+
+#include "../MDS.h"
+
+#include "EMetaBlob.h"
+
+class EExport : public LogEvent {
+public:
+ EMetaBlob metablob; // exported dir
+protected:
+ inodeno_t dirino;
+ set<inodeno_t> bounds;
+
+public:
+ EExport(CDir *dir) : LogEvent(EVENT_EXPORT),
+ dirino(dir->ino()) {
+ metablob.add_dir_context(dir);
+ }
+ EExport() : LogEvent(EVENT_EXPORT) { }
+
+ set<inodeno_t> &get_bounds() { return bounds; }
+
+ void print(ostream& out) {
+ out << "export " << dirino << " " << metablob;
+ }
+
+ virtual void encode_payload(bufferlist& bl) {
+ metablob._encode(bl);
+ bl.append((char*)&dirino, sizeof(dirino));
+ ::_encode(bounds, bl);
+ }
+ void decode_payload(bufferlist& bl, int& off) {
+ metablob._decode(bl, off);
+ bl.copy(off, sizeof(dirino), (char*)&dirino);
+ off += sizeof(dirino);
+ ::_decode(bounds, bl, off);
+ }
+
+ bool has_expired(MDS *mds);
+ void expire(MDS *mds, Context *c);
+ void replay(MDS *mds);
+
+};
+
+#endif
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#ifndef __EEXPORTFINISH_H
-#define __EEXPORTFINISH_H
-
-#include <assert.h>
-#include "config.h"
-#include "include/types.h"
-
-#include "../MDS.h"
-
-class EExportFinish : public LogEvent {
- protected:
- inodeno_t dirino; // exported dir
- bool success;
-
- public:
- EExportFinish(CDir *dir, bool s) : LogEvent(EVENT_EXPORTFINISH),
- dirino(dir->ino()),
- success(s) { }
- EExportFinish() : LogEvent(EVENT_EXPORTFINISH) { }
-
- void print(ostream& out) {
- out << "export_finish " << dirino;
- if (success)
- out << " success";
- else
- out << " failure";
- }
-
- virtual void encode_payload(bufferlist& bl) {
- bl.append((char*)&dirino, sizeof(dirino));
- bl.append((char*)&success, sizeof(success));
- }
- void decode_payload(bufferlist& bl, int& off) {
- bl.copy(off, sizeof(dirino), (char*)&dirino);
- off += sizeof(dirino);
- bl.copy(off, sizeof(success), (char*)&success);
- off += sizeof(success);
- }
-
- bool has_expired(MDS *mds);
- void expire(MDS *mds, Context *c);
- void replay(MDS *mds);
-
-};
-
-#endif
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#ifndef __EEXPORTSTART_H
-#define __EEXPORTSTART_H
-
-#include <assert.h>
-#include "config.h"
-#include "include/types.h"
-
-#include "../MDS.h"
-
-#include "EMetaBlob.h"
-
-class EExportStart : public LogEvent {
- public:
- EMetaBlob metablob; // exported dir
- protected:
- inodeno_t dirino;
- int dest; // dest mds
- set<inodeno_t> bounds;
-
- public:
- EExportStart(CDir *dir, int d) : LogEvent(EVENT_EXPORTSTART),
- dirino(dir->ino()),
- dest(d) {
- metablob.add_dir_context(dir);
- }
- EExportStart() : LogEvent(EVENT_EXPORTSTART) { }
-
- set<inodeno_t> &get_bounds() { return bounds; }
-
- void print(ostream& out) {
- out << "export_start " << dirino << " -> " << dest;
- }
-
- virtual void encode_payload(bufferlist& bl) {
- metablob._encode(bl);
- bl.append((char*)&dirino, sizeof(dirino));
- bl.append((char*)&dest, sizeof(dest));
- ::_encode(bounds, bl);
- }
- void decode_payload(bufferlist& bl, int& off) {
- metablob._decode(bl, off);
- bl.copy(off, sizeof(dirino), (char*)&dirino);
- off += sizeof(dirino);
- bl.copy(off, sizeof(dest), (char*)&dest);
- off += sizeof(dest);
- ::_decode(bounds, bl, off);
- }
-
- bool has_expired(MDS *mds);
- void expire(MDS *mds, Context *c);
- void replay(MDS *mds);
-
-};
-
-#endif
public:
EMetaBlob metablob;
set<inodeno_t> imports;
- set<inodeno_t> exports;
//set<inodeno_t> hashdirs;
- map<inodeno_t, set<inodeno_t> > nested_exports;
+ map<inodeno_t, set<inodeno_t> > bounds;
EImportMap() : LogEvent(EVENT_IMPORTMAP) { }
void print(ostream& out) {
- out << "import_map " << imports.size() << " imports, "
- << exports.size() << " exports"
- << " " << metablob;
+ out << "import_map " << imports.size() << " imports "
+ << metablob;
}
void encode_payload(bufferlist& bl) {
metablob._encode(bl);
::_encode(imports, bl);
- ::_encode(exports, bl);
for (set<inodeno_t>::iterator p = imports.begin();
p != imports.end();
++p) {
- ::_encode(nested_exports[*p], bl);
- if (nested_exports[*p].empty())
- nested_exports.erase(*p);
+ ::_encode(bounds[*p], bl);
+ if (bounds[*p].empty())
+ bounds.erase(*p);
}
}
void decode_payload(bufferlist& bl, int& off) {
metablob._decode(bl, off);
::_decode(imports, bl, off);
- ::_decode(exports, bl, off);
for (set<inodeno_t>::iterator p = imports.begin();
p != imports.end();
++p) {
- ::_decode(nested_exports[*p], bl, off);
- if (nested_exports[*p].empty())
- nested_exports.erase(*p);
+ ::_decode(bounds[*p], bl, off);
+ if (bounds[*p].empty())
+ bounds.erase(*p);
}
}
/* dirlump - contains metadata for any dir we have contents for.
*/
struct dirlump {
- static const int STATE_IMPORT = (1<<0);
+ //static const int STATE_IMPORT = (1<<0);
static const int STATE_COMPLETE = (1<<1);
static const int STATE_DIRTY = (1<<2); // dirty due to THIS journal item, that is!
public:
dirlump() : state(0), nfull(0), nremote(0), nnull(0), dn_decoded(true) { }
- bool is_import() { return state & STATE_IMPORT; }
- void mark_import() { state |= STATE_IMPORT; }
+ //bool is_import() { return state & STATE_IMPORT; }
+ //void mark_import() { state |= STATE_IMPORT; }
bool is_complete() { return state & STATE_COMPLETE; }
void mark_complete() { state |= STATE_COMPLETE; }
bool is_dirty() { return state & STATE_DIRTY; }
}
dirlump& l = lump_map[dir->ino()];
if (dir->is_complete()) l.mark_complete();
- if (dir->is_import()) l.mark_import();
+ //if (dir->is_import()) l.mark_import();
if (dirty) l.mark_dirty();
return l;
}
return;
CInode *diri = dir->get_inode();
- if (!toroot &&
- (dir->is_import() || dir->is_hashed()))
+ if (!toroot && dir->is_subtree_root() && dir->is_auth())
return; // stop at import point
if (!dir->get_inode()->get_parent_dn())
return;
#include "events/EPurgeFinish.h"
#include "events/EUnlink.h"
-#include "events/EExportStart.h"
-#include "events/EExportFinish.h"
+#include "events/EExport.h"
#include "events/EImportStart.h"
#include "events/EImportFinish.h"
void EImportMap::replay(MDS *mds)
{
- /*
- if (!mds->mdcache->imports.empty()) {
+ if (!mds->mdcache->subtrees.empty()) {
dout(10) << "EImportMap.replay -- ignoring, already have import map" << endl;
} else {
- dout(10) << "EImportMap.replay -- reconstructing import/export spanning tree" << endl;
-
+ dout(10) << "EImportMap.replay -- reconstructing (auth) subtree spanning tree" << endl;
+
// first, stick the spanning tree in my cache
metablob.replay(mds);
for (set<inodeno_t>::iterator p = imports.begin();
p != imports.end();
++p) {
- mds->mdcache->add_ambiguous_import(*p, nested_exports[*p]);
- mds->mdcache->finish_ambiguous_import(*p);
+ CInode *diri = mds->mdcache->get_inode(*p);
+ CDir *dir = diri->dir;
+ mds->mdcache->adjust_subtree_auth(dir, mds->get_nodeid());
}
}
- */
mds->mdcache->show_imports();
}
// =========================================================================
// -----------------------
-// EExportStart
+// EExport
-bool EExportStart::has_expired(MDS *mds)
+bool EExport::has_expired(MDS *mds)
{
CInode *diri = mds->mdcache->get_inode(dirino);
if (!diri) return true;
if (!dir) return true;
if (!mds->mdcache->migrator->is_exporting(dir))
return true;
- dout(10) << "EExportStart.has_expired still exporting " << *dir << endl;
+ dout(10) << "EExport.has_expired still exporting " << *dir << endl;
return false;
}
-void EExportStart::expire(MDS *mds, Context *c)
+void EExport::expire(MDS *mds, Context *c)
{
CInode *diri = mds->mdcache->get_inode(dirino);
assert(diri);
assert(dir);
assert(mds->mdcache->migrator->is_exporting(dir));
- dout(10) << "EExportStart.expire waiting for export of " << *dir << endl;
+ dout(10) << "EExport.expire waiting for export of " << *dir << endl;
mds->mdcache->migrator->add_export_finish_waiter(dir, c);
}
-void EExportStart::replay(MDS *mds)
+void EExport::replay(MDS *mds)
{
- dout(10) << "EExportStart.replay " << dirino << " -> " << dest << endl;
+ dout(10) << "EExport.replay " << dirino << endl;
metablob.replay(mds);
- // put in pending_exports lists
- mds->mdlog->pending_exports[dirino] = bounds;
-}
-
-// -----------------------
-// EExportFinish
-
-bool EExportFinish::has_expired(MDS *mds)
-{
- // we can always expire.
- return true;
-}
-
-void EExportFinish::expire(MDS *mds, Context *c)
-{
- assert(0); // should never happen.
-}
-
-void EExportFinish::replay(MDS *mds)
-{
- dout(10) << "EExportFinish.replay " << dirino << " success=" << success << endl;
-
- assert(mds->mdlog->pending_exports.count(dirino));
-
- // finish?
- if (success)
- mds->mdcache->finish_ambiguous_export(dirino, mds->mdlog->pending_exports[dirino]);
+ CInode *diri = mds->mdcache->get_inode(dirino);
+ assert(diri);
+ CDir *dir = diri->dir;
+ assert(dir);
+
+ set<CDir*> realbounds;
+ for (set<inodeno_t>::iterator p = bounds.begin();
+ p != bounds.end();
+ ++p) {
+ CInode *bdi = mds->mdcache->get_inode(*p);
+ CDir *bd = bdi->dir;
+ assert(bd);
+ realbounds.insert(bd);
+ }
- // remove from pending_exports list
- mds->mdlog->pending_exports.erase(dirino);
+ // adjust auth away
+ mds->mdcache->adjust_bounded_subtree_auth(dir, realbounds, pair<int,int>(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN));
+ mds->mdcache->try_subtree_merge(dir);
}
dout(10) << "EImportStart.replay " << dirino << endl;
metablob.replay(mds);
- // convert list -> set
- set<inodeno_t> b;
- for (list<inodeno_t>::iterator p = bounds.begin(); p != bounds.end(); ++p)
- b.insert(*p);
-
// put in ambiguous import list
- mds->mdcache->add_ambiguous_import(dirino, b);
+ mds->mdcache->add_ambiguous_import(dirino, bounds);
}
// -----------------------
}
virtual char *get_type_name() { return "Cfcap";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(seq), (char*)&seq);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(seq), (char*)&seq);
off += sizeof(seq);
- s.copy(off, sizeof(inode), (char*)&inode);
+ payload.copy(off, sizeof(inode), (char*)&inode);
off += sizeof(inode);
- s.copy(off, sizeof(caps), (char*)&caps);
+ payload.copy(off, sizeof(caps), (char*)&caps);
off += sizeof(caps);
- s.copy(off, sizeof(wanted), (char*)&wanted);
+ payload.copy(off, sizeof(wanted), (char*)&wanted);
off += sizeof(wanted);
- //s.copy(off, sizeof(client), (char*)&client);
+ //payload.copy(off, sizeof(client), (char*)&client);
//off += sizeof(client);
- s.copy(off, sizeof(mds), (char*)&mds);
+ payload.copy(off, sizeof(mds), (char*)&mds);
off += sizeof(mds);
- s.copy(off, sizeof(special), (char*)&special);
+ payload.copy(off, sizeof(special), (char*)&special);
off += sizeof(special);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&seq, sizeof(seq));
- s.append((char*)&inode, sizeof(inode));
- s.append((char*)&caps, sizeof(caps));
- s.append((char*)&wanted, sizeof(wanted));
- //s.append((char*)&client, sizeof(client));
- s.append((char*)&mds,sizeof(mds));
- s.append((char*)&special,sizeof(special));
+ virtual void encode_payload() {
+ payload.append((char*)&seq, sizeof(seq));
+ payload.append((char*)&inode, sizeof(inode));
+ payload.append((char*)&caps, sizeof(caps));
+ payload.append((char*)&wanted, sizeof(wanted));
+ //payload.append((char*)&client, sizeof(client));
+ payload.append((char*)&mds,sizeof(mds));
+ payload.append((char*)&special,sizeof(special));
}
};
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-
-#ifndef __MCLIENTINODEAUTHUPDATE_H
-#define __MCLIENTINODEAUTHUPDATE_H
-
-class MClientInodeAuthUpdate : public Message {
- inodeno_t ino;
- int newauth;
-
- public:
- inodeno_t get_ino() { return ino; }
- int get_auth() { return newauth; }
-
- MClientInodeAuthUpdate() {}
- MClientInodeAuthUpdate(inodeno_t ino, int newauth) :
- Message(MSG_CLIENT_INODEAUTHUPDATE) {
- this->ino = ino;
- this->newauth = newauth;
- }
- virtual char *get_type_name() { return "Ciau";}
-
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(ino), (char*)&ino);
- off += sizeof(ino);
- s.copy(off, sizeof(newauth), (char*)&newauth);
- off += sizeof(newauth);
- }
- virtual void encode_payload(crope& s) {
- s.append((char*)&ino,sizeof(ino));
- s.append((char*)&newauth,sizeof(newauth));
- }
-};
-
-#endif
char *get_type_name() { return "Cmnt"; }
- virtual void decode_payload(crope& s, int& off) {
+ virtual void decode_payload() {
}
- virtual void encode_payload(crope& s) {
+ virtual void encode_payload() {
}
};
}
virtual char *get_type_name() { return "Dun";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(dirino), (char*)&dirino);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(dirino), (char*)&dirino);
off += sizeof(dirino);
- _unrope(dn, s, off);
+ ::_decode(dn, payload, off);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&dirino,sizeof(dirino));
- _rope(dn, s);
+ virtual void encode_payload() {
+ payload.append((char*)&dirino,sizeof(dirino));
+ ::_encode(dn, payload);
}
};
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-
-#ifndef __MDIREXPIRE_H
-#define __MDIREXPIRE_H
-
-typedef struct {
- inodeno_t ino;
- int nonce;
- int from;
-} MDirExpire_st;
-
-class MDirExpire : public Message {
- MDirExpire_st st;
-
- public:
- inodeno_t get_ino() { return st.ino; }
- int get_from() { return st.from; }
- int get_nonce() { return st.nonce; }
-
- MDirExpire() {}
- MDirExpire(inodeno_t ino, int from, int nonce) :
- Message(MSG_MDS_DIREXPIRE) {
- st.ino = ino;
- st.from = from;
- st.nonce = nonce;
- }
- virtual char *get_type_name() { return "DirEx";}
-
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(st), (char*)&st);
- off += sizeof(st);
- }
- virtual void encode_payload(crope& s) {
- s.append((char*)&st,sizeof(st));
- }
-};
-
-#endif
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-
-#ifndef __MDIREXPIREREQ_H
-#define __MDIREXPIREREQ_H
-
-typedef struct {
- inodeno_t ino;
- int nonce;
- int from;
-} MDirExpireReq_st;
-
-class MDirExpire : public Message {
- MDirExpireReq_st st;
-
- public:
- inodeno_t get_ino() { return st.ino; }
- int get_from() { return st.from; }
- int get_nonce() { return st.nonce; }
-
- MDirExpire() {}
- MDirExpire(inodeno_t ino, int from, int nonce) :
- Message(MSG_MDS_DIREXPIREREQ) {
- st.ino = ino;
- st.from = from;
- st.nonce = nonce;
- }
- virtual char *get_type_name() { return "DirExR";}
-
- virtual void decode_payload(crope& s) {
- s.copy(0, sizeof(st), (char*)&st);
- }
- virtual void encode_payload(crope& s) {
- s.append((char*)&st,sizeof(st));
- }
-};
-
-#endif
}
virtual char *get_type_name() { return "dup"; }
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(st), (char*)&st);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(st), (char*)&st);
off += sizeof(st);
- _unrope(dir_rep_by, s, off);
- _unrope(path, s, off);
+ ::_decode(dir_rep_by, payload, off);
+ ::_decode(path, payload, off);
}
- virtual void encode_payload(crope& r) {
- r.append((char*)&st, sizeof(st));
- _rope(dir_rep_by, r);
- _rope(path, r);
+ virtual void encode_payload() {
+ payload.append((char*)&st, sizeof(st));
+ ::_encode(dir_rep_by, payload);
+ ::_encode(path, payload);
}
};
}
virtual char *get_type_name() { return "Dis"; }
- virtual void decode_payload(crope& r, int& off) {
- r.copy(off, sizeof(asker), (char*)&asker);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(asker), (char*)&asker);
off += sizeof(asker);
- r.copy(off, sizeof(base_ino), (char*)&base_ino);
+ payload.copy(off, sizeof(base_ino), (char*)&base_ino);
off += sizeof(base_ino);
- r.copy(off, sizeof(bool), (char*)&want_base_dir);
+ payload.copy(off, sizeof(bool), (char*)&want_base_dir);
off += sizeof(bool);
- want._unrope(r, off);
+ want._decode(payload, off);
}
- virtual void encode_payload(crope& r) {
- r.append((char*)&asker, sizeof(asker));
- r.append((char*)&base_ino, sizeof(base_ino));
- r.append((char*)&want_base_dir, sizeof(want_base_dir));
- want._rope(r);
+ virtual void encode_payload() {
+ payload.append((char*)&asker, sizeof(asker));
+ payload.append((char*)&base_ino, sizeof(base_ino));
+ payload.append((char*)&want_base_dir, sizeof(want_base_dir));
+ want._encode(payload);
}
};
entity_name_t get_failed() { return failed; }
- virtual void decode_payload(crope& s, int& off) {
- s.copy(0, sizeof(failed), (char*)&failed);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(0, sizeof(failed), (char*)&failed);
off += sizeof(failed);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&failed, sizeof(failed));
+ virtual void encode_payload() {
+ payload.append((char*)&failed, sizeof(failed));
}
- virtual char *get_type_name() { return "faila"; }
+ virtual char *get_type_name() { return "failack"; }
};
#endif
virtual char *get_type_name() { return "HB"; }
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off,sizeof(load), (char*)&load);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off,sizeof(load), (char*)&load);
off += sizeof(load);
- s.copy(off, sizeof(beat), (char*)&beat);
+ payload.copy(off, sizeof(beat), (char*)&beat);
off += sizeof(beat);
-
- int n;
- s.copy(off, sizeof(n), (char*)&n);
- off += sizeof(n);
- while (n--) {
- int f;
- s.copy(off, sizeof(f), (char*)&f);
- off += sizeof(f);
- float v;
- s.copy(off, sizeof(v), (char*)&v);
- off += sizeof(v);
- import_map[f] = v;
- }
+ ::_decode(import_map, payload, off);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&load, sizeof(load));
- s.append((char*)&beat, sizeof(beat));
-
- int n = import_map.size();
- s.append((char*)&n, sizeof(n));
- for (map<int, float>::iterator it = import_map.begin();
- it != import_map.end();
- it++) {
- int f = it->first;
- s.append((char*)&f, sizeof(f));
- float v = it->second;
- s.append((char*)&v, sizeof(v));
- }
-
+ virtual void encode_payload() {
+ payload.append((char*)&load, sizeof(load));
+ payload.append((char*)&beat, sizeof(beat));
+ ::_encode(import_map, payload);
}
};
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-
-#ifndef __MINODEEXPIRE_H
-#define __MINODEEXPIRE_H
-
-typedef struct {
- inodeno_t ino;
- int nonce;
- int from;
-} MInodeExpire_st;
-
-class MInodeExpire : public Message {
- MInodeExpire_st st;
-
- public:
- inodeno_t get_ino() { return st.ino; }
- int get_from() { return st.from; }
- int get_nonce() { return st.nonce; }
-
- MInodeExpire() {}
- MInodeExpire(inodeno_t ino, int from, int nonce) :
- Message(MSG_MDS_INODEEXPIRE) {
- st.ino = ino;
- st.from = from;
- st.nonce = nonce;
- }
- virtual char *get_type_name() { return "InEx";}
-
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(st), (char*)&st);
- off += sizeof(st);
- }
- virtual void encode_payload(crope& s) {
- s.append((char*)&st,sizeof(st));
- }
-};
-
-#endif
virtual char *get_type_name() { return "Icap";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(from), (char*)&from);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(from), (char*)&from);
off += sizeof(from);
- s.copy(off, sizeof(ino), (char*)&ino);
+ payload.copy(off, sizeof(ino), (char*)&ino);
off += sizeof(ino);
- s.copy(off, sizeof(caps), (char*)&caps);
+ payload.copy(off, sizeof(caps), (char*)&caps);
off += sizeof(caps);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&from, sizeof(from));
- s.append((char*)&ino, sizeof(ino));
- s.append((char*)&caps, sizeof(caps));
+ virtual void encode_payload() {
+ payload.append((char*)&from, sizeof(from));
+ payload.append((char*)&ino, sizeof(ino));
+ payload.append((char*)&caps, sizeof(caps));
}
};
}
virtual char *get_type_name() { return "InL";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(st), (char*)&st);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(st), (char*)&st);
off += sizeof(st);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&st,sizeof(st));
+ virtual void encode_payload() {
+ payload.append((char*)&st,sizeof(st));
}
};
}
virtual char *get_type_name() { return "InLA";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(st), (char*)&st);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(st), (char*)&st);
off += sizeof(st);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&st,sizeof(st));
+ virtual void encode_payload() {
+ payload.append((char*)&st,sizeof(st));
}
};
}
virtual char *get_type_name() { return "InUl";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(st), (char*)&st);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(st), (char*)&st);
off += sizeof(st);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&st,sizeof(st));
+ virtual void encode_payload() {
+ payload.append((char*)&st,sizeof(st));
}
};
}
virtual char *get_type_name() { return "InUlA";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(st), (char*)&st);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(st), (char*)&st);
off += sizeof(st);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&st,sizeof(st));
+ virtual void encode_payload() {
+ payload.append((char*)&st,sizeof(st));
}
};
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-
-#ifndef __MINODEUPDATE_H
-#define __MINODEUPDATE_H
-
-#include "msg/Message.h"
-
-#include <set>
-using namespace std;
-
-class MInodeUpdate : public Message {
- int nonce;
- crope inode_basic_state;
-
- public:
- inodeno_t get_ino() {
- inodeno_t ino;
- inode_basic_state.copy(0, sizeof(inodeno_t), (char*)&ino);
- return ino;
- }
- int get_nonce() { return nonce; }
-
- MInodeUpdate() {}
- MInodeUpdate(CInode *in, int nonce) :
- Message(MSG_MDS_INODEUPDATE) {
- inode_basic_state = in->encode_basic_state();
- this->nonce = nonce;
- }
- virtual char *get_type_name() { return "Iup"; }
-
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(int), (char*)&nonce);
- off += sizeof(int);
- size_t len;
- s.copy(off, sizeof(len), (char*)&len);
- off += sizeof(len);
- inode_basic_state = s.substr(off, len);
- off += len;
- }
- virtual void encode_payload(crope& s) {
- s.append((char*)&nonce, sizeof(int));
- size_t len = inode_basic_state.length();
- s.append((char*)&len, sizeof(len));
- s.append(inode_basic_state);
- }
-
-};
-
-#endif
class MMDSImportMap : public Message {
public:
- map<inodeno_t, set<inodeno_t> > imap;
- map<inodeno_t, set<inodeno_t> > ambiguous_imap;
+ map<inodeno_t, list<inodeno_t> > imap;
+ map<inodeno_t, list<inodeno_t> > ambiguous_imap;
MMDSImportMap() : Message(MSG_MDS_IMPORTMAP) {}
imap[im].clear();
}
void add_import_export(inodeno_t im, inodeno_t ex) {
- imap[im].insert(ex);
+ imap[im].push_back(ex);
}
- void add_ambiguous_import(inodeno_t im, const set<inodeno_t>& m) {
+ void add_ambiguous_import(inodeno_t im, const list<inodeno_t>& m) {
ambiguous_imap[im] = m;
}
}
MPing() : Message(MSG_PING) {}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(0, sizeof(seq), (char*)&seq);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(0, sizeof(seq), (char*)&seq);
off += sizeof(seq);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&seq, sizeof(seq));
+ virtual void encode_payload() {
+ payload.append((char*)&seq, sizeof(seq));
}
virtual char *get_type_name() { return "ping"; }
this->seq = p->seq;
}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(0, sizeof(seq), (char*)&seq);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(0, sizeof(seq), (char*)&seq);
off += sizeof(seq);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&seq, sizeof(seq));
+ virtual void encode_payload() {
+ payload.append((char*)&seq, sizeof(seq));
}
virtual char *get_type_name() { return "pinga"; }
}
virtual char *get_type_name() { return "RnAck";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(ino), (char*)&ino);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(ino), (char*)&ino);
off += sizeof(ino);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&ino,sizeof(ino));
+ virtual void encode_payload() {
+ payload.append((char*)&ino,sizeof(ino));
}
};
}
virtual char *get_type_name() { return "Rnot";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(ino), (char*)&ino);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(ino), (char*)&ino);
off += sizeof(ino);
- s.copy(off, sizeof(srcdirino), (char*)&srcdirino);
+ payload.copy(off, sizeof(srcdirino), (char*)&srcdirino);
off += sizeof(srcdirino);
- s.copy(off, sizeof(destdirino), (char*)&destdirino);
+ payload.copy(off, sizeof(destdirino), (char*)&destdirino);
off += sizeof(destdirino);
- _unrope(srcname, s, off);
- _unrope(destname, s, off);
- _unrope(destdirpath, s, off);
- s.copy(off, sizeof(srcauth), (char*)&srcauth);
+ ::_decode(srcname, payload, off);
+ ::_decode(destname, payload, off);
+ ::_decode(destdirpath, payload, off);
+ payload.copy(off, sizeof(srcauth), (char*)&srcauth);
off += sizeof(srcauth);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&ino,sizeof(ino));
- s.append((char*)&srcdirino,sizeof(srcdirino));
- s.append((char*)&destdirino,sizeof(destdirino));
- _rope(srcname, s);
- _rope(destname, s);
- _rope(destdirpath, s);
- s.append((char*)&srcauth, sizeof(srcauth));
+ virtual void encode_payload() {
+ payload.append((char*)&ino,sizeof(ino));
+ payload.append((char*)&srcdirino,sizeof(srcdirino));
+ payload.append((char*)&destdirino,sizeof(destdirino));
+ ::_encode(srcname, payload);
+ ::_encode(destname, payload);
+ ::_encode(destdirpath, payload);
+ payload.append((char*)&srcauth, sizeof(srcauth));
}
};
}
virtual char *get_type_name() { return "RnotA";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(ino), (char*)&ino);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(ino), (char*)&ino);
off += sizeof(ino);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&ino,sizeof(ino));
+ virtual void encode_payload() {
+ payload.append((char*)&ino,sizeof(ino));
}
};
}
virtual char *get_type_name() { return "RnP";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(initiator), (char*)&initiator);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(initiator), (char*)&initiator);
off += sizeof(initiator);
- s.copy(off, sizeof(srcdirino), (char*)&srcdirino);
+ payload.copy(off, sizeof(srcdirino), (char*)&srcdirino);
off += sizeof(srcdirino);
- s.copy(off, sizeof(destdirino), (char*)&destdirino);
+ payload.copy(off, sizeof(destdirino), (char*)&destdirino);
off += sizeof(destdirino);
- _unrope(srcname, s, off);
- _unrope(srcpath, s, off);
- _unrope(destname, s, off);
- _unrope(destpath, s, off);
- s.copy(off, sizeof(srcauth), (char*)&srcauth);
+ ::_decode(srcname, payload, off);
+ ::_decode(srcpath, payload, off);
+ ::_decode(destname, payload, off);
+ ::_decode(destpath, payload, off);
+ payload.copy(off, sizeof(srcauth), (char*)&srcauth);
off += sizeof(srcauth);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&initiator,sizeof(initiator));
- s.append((char*)&srcdirino,sizeof(srcdirino));
- s.append((char*)&destdirino,sizeof(destdirino));
- _rope(srcname, s);
- _rope(srcpath, s);
- _rope(destname, s);
- _rope(destpath, s);
- s.append((char*)&srcauth, sizeof(srcauth));
+ virtual void encode_payload() {
+ payload.append((char*)&initiator,sizeof(initiator));
+ payload.append((char*)&srcdirino,sizeof(srcdirino));
+ payload.append((char*)&destdirino,sizeof(destdirino));
+ ::_encode(srcname, payload);
+ ::_encode(srcpath, payload);
+ ::_encode(destname, payload);
+ ::_encode(destpath, payload);
+ payload.append((char*)&srcauth, sizeof(srcauth));
}
};
}
virtual char *get_type_name() { return "RnReq";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(initiator), (char*)&initiator);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(initiator), (char*)&initiator);
off += sizeof(initiator);
- s.copy(off, sizeof(srcdirino), (char*)&srcdirino);
+ payload.copy(off, sizeof(srcdirino), (char*)&srcdirino);
off += sizeof(srcdirino);
- s.copy(off, sizeof(destdirino), (char*)&destdirino);
+ payload.copy(off, sizeof(destdirino), (char*)&destdirino);
off += sizeof(destdirino);
- _unrope(srcname, s, off);
- _unrope(destname, s, off);
- _unrope(destpath, s, off);
- s.copy(off, sizeof(destauth), (char*)&destauth);
+ ::_decode(srcname, payload, off);
+ ::_decode(destname, payload, off);
+ ::_decode(destpath, payload, off);
+ payload.copy(off, sizeof(destauth), (char*)&destauth);
off += sizeof(destauth);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&initiator,sizeof(initiator));
- s.append((char*)&srcdirino,sizeof(srcdirino));
- s.append((char*)&destdirino,sizeof(destdirino));
- _rope(srcname, s);
- _rope(destname, s);
- _rope(destpath, s);
- s.append((char*)&destauth, sizeof(destauth));
+ virtual void encode_payload() {
+ payload.append((char*)&initiator,sizeof(initiator));
+ payload.append((char*)&srcdirino,sizeof(srcdirino));
+ payload.append((char*)&destdirino,sizeof(destdirino));
+ ::_encode(srcname, payload);
+ ::_encode(destname, payload);
+ ::_encode(destpath, payload);
+ payload.append((char*)&destauth, sizeof(destauth));
}
};
}
virtual char *get_type_name() { return "RnW";}
- virtual void decode_payload(crope& s, int& off) {
- s.copy(off, sizeof(ino), (char*)&ino);
+ virtual void decode_payload() {
+ int off = 0;
+ payload.copy(off, sizeof(ino), (char*)&ino);
off += sizeof(ino);
}
- virtual void encode_payload(crope& s) {
- s.append((char*)&ino,sizeof(ino));
+ virtual void encode_payload() {
+ payload.append((char*)&ino,sizeof(ino));
}
};
#include "messages/MInodeLinkAck.h"
//#include "messages/MInodeUpdate.h"
-#include "messages/MInodeExpire.h"
-#include "messages/MDirExpire.h"
#include "messages/MCacheExpire.h"
#include "messages/MInodeFileCaps.h"
break;
*/
- case MSG_MDS_INODEEXPIRE:
- m = new MInodeExpire();
- break;
-
case MSG_MDS_INODEFILECAPS:
m = new MInodeFileCaps();
break;
- case MSG_MDS_DIREXPIRE:
- m = new MDirExpire();
- break;
-
case MSG_MDS_LOCK:
m = new MLock();
break;
using std::list;
#include <ext/hash_map>
-#include <ext/rope>
-using __gnu_cxx::crope;
#include "include/types.h"
#include "include/buffer.h"
payload.clear();
}
- // overload either the rope version (easier!)
- virtual void encode_payload(crope& s) { assert(0); }
- virtual void decode_payload(crope& s, int& off) { assert(0); }
-
- // of the bufferlist versions (faster!)
- virtual void decode_payload() {
- // use a crope for convenience, small messages, etc. FIXME someday.
- crope ser;
- for (list<bufferptr>::const_iterator it = payload.buffers().begin();
- it != payload.buffers().end();
- it++)
- ser.append((*it).c_str(), (*it).length());
-
- int off = 0;
- decode_payload(ser, off);
- assert((unsigned)off == payload.length());
- }
- virtual void encode_payload() {
- assert(payload.length() == 0); // caller should reset payload
-
- // use crope for convenience, small messages. FIXME someday.
- crope r;
- encode_payload(r);
-
- // copy payload
- payload.push_back( buffer::copy(r.c_str(), r.length()) );
- }
+ virtual void decode_payload() = 0;
+ virtual void encode_payload() = 0;
virtual void print(ostream& out) {
out << get_type_name();
using namespace std;
#include <ext/hash_map>
-#include <ext/rope>
using namespace __gnu_cxx;
#include "include/types.h"