MDS_OBJS= \
mds/MDS.o\
+ mds/journal.o\
mds/Server.o\
mds/MDCache.o\
mds/Locker.o\
inodeno_t remote_ino; // if remote dentry
// state
- bool dirty;
- __uint64_t parent_dir_version; // dir version when last touched.
+ bool dirty;
+ version_t parent_dir_version; // dir version when last touched.
// locking
int lockstate;
#include "config.h"
#undef dout
-#define dout(x) if (x <= g_conf.debug || x <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << " cdir: "
+#define dout(x) if (x <= g_conf.debug || x <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".cache.dir(" << inode->inode.ino << ") "
// PINS
// state
unsigned state;
- __uint64_t version;
- __uint64_t committing_version;
- __uint64_t last_committed_version;
+ version_t version;
+ version_t committing_version;
+ version_t last_committed_version;
// authority, replicas
set<int> open_by; // nodes that have me open
// -- dirtyness --
- __uint64_t get_version() { return version; }
- void float_version(__uint64_t ge) {
+ version_t get_version() { return version; }
+ void float_version(version_t ge) {
if (version < ge)
version = ge;
}
- __uint64_t get_committing_version() { return committing_version; }
- __uint64_t get_last_committed_version() { return last_committed_version; }
+ void set_version(version_t v) { version = v; }
+
+ version_t get_committing_version() { return committing_version; }
+ version_t get_last_committed_version() { return last_committed_version; }
// as in, we're committing the current version.
void set_committing_version() { committing_version = version; }
- void set_last_committed_version(__uint64_t v) { last_committed_version = v; }
+ void set_last_committed_version(version_t v) { last_committed_version = v; }
void mark_dirty();
void mark_clean();
void mark_complete() { state_set(CDIR_STATE_COMPLETE); }
inodeno_t ino;
__uint64_t nitems; // actual real entries
__uint64_t nden; // num dentries (including null ones)
- __uint64_t version;
+ version_t version;
unsigned state;
meta_load_t popularity_justme;
meta_load_t popularity_curdom;
#include "CDentry.h"
#include "MDS.h"
+#include "MDCache.h"
#include "AnchorTable.h"
#include "common/Clock.h"
#include "config.h"
#undef dout
-#define dout(x) if (x <= g_conf.debug || x <= g_conf.debug_mds) cout << g_clock.now() << " cinode: "
+#define dout(x) if (x <= g_conf.debug || x <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mdcache->mds->get_nodeid() << ".cache.inode(" << inode.ino << ") "
int cinode_pins[CINODE_NUM_PINS]; // counts
if (in.is_symlink()) out << " symlink";
+ out << " v" << in.get_version();
+
out << " hard=" << in.hardlock;
out << " file=" << in.filelock;
// ====== CInode =======
-CInode::CInode(bool auth) : LRUObject() {
+CInode::CInode(MDCache *c, bool auth) : LRUObject() {
+ mdcache = c;
+
ref = 0;
parent = NULL;
num_request_pins = 0;
state = 0;
- version = 0;
+
+ committing_version = committed_version = 0;
if (auth) state_set(CINODE_STATE_AUTH);
}
assert(is_auth());
// touch my private version
- version++;
+ inode.version++;
if (!(state & CINODE_STATE_DIRTY)) {
state |= CINODE_STATE_DIRTY;
get(CINODE_PIN_DIRTY);
}
}
+void CInode::mark_clean()
+{
+ dout(10) << " mark_clean " << *this << endl;
+ if (state & CINODE_STATE_DIRTY) {
+ state &= ~CINODE_STATE_DIRTY;
+ put(CINODE_PIN_DIRTY);
+ }
+}
// state
#include "config.h"
#include "include/types.h"
#include "include/lru.h"
-#include "common/DecayCounter.h"
-//#include <sys/stat.h>
#include "CDentry.h"
#include "Lock.h"
class Message;
class CInode;
class CInodeDiscover;
+class MDCache;
//class MInodeSyncStart;
// cached inode wrapper
class CInode : public LRUObject {
public:
+ MDCache *mdcache;
+
inode_t inode; // the inode itself
CDir *dir; // directory, if we have it opened.
protected:
int ref; // reference count
set<int> ref_set;
- version_t version;
- version_t parent_dir_version; // dir version when last touched.
+ version_t parent_dir_version; // parent dir version when i was last touched.
+ version_t committing_version;
+ version_t committed_version;
unsigned state;
public:
// ---------------------------
- CInode(bool auth=true);
+ CInode(MDCache *c, bool auth=true);
~CInode();
// -- dirtyness --
- version_t get_version() { return version; }
+ version_t get_version() { return inode.version; }
version_t get_parent_dir_version() { return parent_dir_version; }
void float_parent_dir_version(version_t ge) {
if (parent_dir_version < ge)
parent_dir_version = ge;
}
-
+ version_t get_committing_version() { return committing_version; }
+ version_t get_last_committed_version() { return committed_version; }
+ void set_committing_version(version_t v) { committing_version = v; }
+ void set_committed_version() {
+ committed_version = committing_version;
+ committing_version = 0;
+ }
+
bool is_dirty() { return state & CINODE_STATE_DIRTY; }
bool is_clean() { return !is_dirty(); }
void mark_dirty();
- void mark_clean() {
- dout(10) << " mark_clean " << *this << endl;
- if (state & CINODE_STATE_DIRTY) {
- state &= ~CINODE_STATE_DIRTY;
- put(CINODE_PIN_DIRTY);
- }
- }
+ void mark_clean();
struct {
inode_t inode;
- version_t version;
meta_load_t popularity_justme;
meta_load_t popularity_curdom;
bool is_dirty; // dirty inode?
CInodeExport() {}
CInodeExport(CInode *in) {
st.inode = in->inode;
- st.version = in->get_version();
st.is_dirty = in->is_dirty();
cached_by = in->cached_by;
cached_by_nonce = in->cached_by_nonce;
void update_inode(CInode *in, set<int>& new_client_caps) {
in->inode = st.inode;
- in->version = st.version;
-
in->popularity[MDS_POP_JUSTME] += st.popularity_justme;
in->popularity[MDS_POP_CURDOM] += st.popularity_curdom;
in->popularity[MDS_POP_ANYDOM] += st.popularity_curdom;
#include "events/EUnlink.h"
#include "events/EAlloc.h"
#include "events/EMknod.h"
+#include "events/EMkdir.h"
+#include "events/EPurgeFinish.h"
LogEvent *LogEvent::decode(bufferlist& bl)
{
case EVENT_UNLINK:
le = new EUnlink();
break;
+
+ case EVENT_PURGEFINISH:
+ le = new EPurgeFinish();
+ break;
case EVENT_ALLOC:
le = new EAlloc();
le = new EMknod();
break;
+ case EVENT_MKDIR:
+ le = new EMkdir();
+ break;
+
default:
dout(1) << "uh oh, unknown event type " << type << endl;
assert(0);
#define __LOGEVENT_H
#define EVENT_STRING 1
+
#define EVENT_INODEUPDATE 2
#define EVENT_DIRUPDATE 3
-#define EVENT_UNLINK 4
-#define EVENT_ALLOC 5
-#define EVENT_MKNOD 6
+
+#define EVENT_ALLOC 10
+#define EVENT_MKNOD 11
+#define EVENT_MKDIR 12
+#define EVENT_LINK 13
+
+#define EVENT_UNLINK 20
+#define EVENT_RMDIR 21
+#define EVENT_PURGEFINISH 22
#include <string>
#include "osdc/Filer.h"
-#include "events/EInodeUpdate.h"
-#include "events/EDirUpdate.h"
#include "events/EUnlink.h"
+#include "events/EPurgeFinish.h"
#include "messages/MGenericMessage.h"
#include "messages/MDiscover.h"
MDCache::~MDCache()
{
+ delete migrator;
+ delete renamer;
}
-
void MDCache::log_stat(Logger *logger)
{
if (get_root()) {
CInode *MDCache::create_inode()
{
- CInode *in = new CInode;
+ CInode *in = new CInode(this);
// zero
memset(&in->inode, 0, sizeof(inode_t));
+void MDCache::set_root(CInode *in)
+{
+ assert(root == 0);
+ root = in;
+ root->state_set(CINODE_STATE_ROOT);
+}
+
+void MDCache::add_import(CDir *dir)
+{
+ imports.insert(dir);
+ dir->state_set(CDIR_STATE_IMPORT);
+ dir->get(CDIR_PIN_IMPORT);
+}
+
+
+
+
+
+// **************
+// Inode purging -- reliably removing deleted file's objects
+
+class C_MDC_PurgeFinish : public Context {
+ MDCache *mdc;
+ inodeno_t ino;
+public:
+ C_MDC_PurgeFinish(MDCache *c, inodeno_t i) : mdc(c), ino(i) {}
+ void finish(int r) {
+ mdc->purge_inode_finish(ino);
+ }
+};
+class C_MDC_PurgeFinish2 : public Context {
+ MDCache *mdc;
+ inodeno_t ino;
+public:
+ C_MDC_PurgeFinish2(MDCache *c, inodeno_t i) : mdc(c), ino(i) {}
+ void finish(int r) {
+ mdc->purge_inode_finish_2(ino);
+ }
+};
+
+/* purge_inode in
+ * will be called by on unlink or rmdir
+ * caller responsible for journaling an appropriate EUnlink or ERmdir
+ */
+void MDCache::purge_inode(inode_t &inode)
+{
+ dout(10) << "purge_inode " << inode.ino << " size " << inode.size << endl;
+
+ // take note
+ assert(purging.count(inode.ino) == 0);
+ purging[inode.ino] = inode;
+
+ // remove
+ mds->filer->remove(inode, 0, inode.size,
+ 0, new C_MDC_PurgeFinish(this, inode.ino));
+}
+
+void MDCache::purge_inode_finish(inodeno_t ino)
+{
+ dout(10) << "purge_inode_finish " << ino << " - logging our completion" << endl;
+
+ // log completion
+ mds->mdlog->submit_entry(new EPurgeFinish(ino),
+ new C_MDC_PurgeFinish2(this, ino));
+}
+
+void MDCache::purge_inode_finish_2(inodeno_t ino)
+{
+ dout(10) << "purge_inode_finish_2 " << ino << endl;
+
+ // remove from purging list
+ purging.erase(ino);
+
+ // tell anyone who cares (log flusher?)
+ list<Context*> ls;
+ ls.swap(waiting_for_purge[ino]);
+ waiting_for_purge.erase(ino);
+ finish_contexts(ls, 0);
+
+ // reclaim ino?
+
+}
+
+void MDCache::start_recovered_purges()
+{
+ for (map<inodeno_t,inode_t>::iterator p = purging.begin();
+ p != purging.end();
+ ++p) {
+ dout(10) << "start_recovered_purges " << p->first << " size " << p->second.size << endl;
+ mds->filer->remove(p->second, 0, p->second.size,
+ 0, new C_MDC_PurgeFinish(this, p->first));
+ }
+}
+
// open root inode
if (whoami == 0) {
// i am root inode
- CInode *root = new CInode();
+ CInode *root = new CInode(this);
memset(&root->inode, 0, sizeof(inode_t));
root->inode.ino = 1;
root->inode.hash_seed = 0; // not hashed!
root->inode.nlink = 1;
root->inode.layout = g_OSD_MDDirLayout;
- root->state_set(CINODE_STATE_ROOT);
-
set_root( root );
+ add_inode( root );
// root directory too
assert(root->dir == NULL);
assert(!m->has_base_dir());
// add in root
- cur = new CInode(false);
+ cur = new CInode(this, false);
m->get_inode(0).update_inode(cur);
// root
- cur->state_set(CINODE_STATE_ROOT);
set_root( cur );
+ add_inode( cur );
dout(7) << " got root: " << *cur << endl;
// take waiters
assert(dn->inode == 0); // better not be something else linked to this dentry...
// didn't have it.
- in = new CInode(false);
+ in = new CInode(this, false);
m->get_inode(i).update_inode(in);
// log it
if (dn->inode) dn->inode->mark_unsafe(); // XXX ??? FIXME
- mds->mdlog->submit_entry(new EUnlink(dir, dn),
+ mds->mdlog->submit_entry(new EUnlink(dir, dn, dn->inode),
NULL); // FIXME FIXME FIXME
// tell replicas
// active MDS requests
hash_map<Message*, active_request_t> active_requests;
+
+ // inode purging
+ map<inodeno_t, inode_t> purging;
+ map<inodeno_t, list<Context*> > waiting_for_purge;
// shutdown crap
int shutdown_commits;
bool did_shutdown_exports;
friend class C_MDC_ShutdownCommit;
+ friend class CInode;
friend class Locker;
friend class Migrator;
friend class Renamer;
// root inode
CInode *get_root() { return root; }
- void set_root(CInode *r) {
- root = r;
- add_inode(root);
- }
+ void set_root(CInode *r);
+
+ void add_import(CDir *dir);
+ void remove_import(CDir *dir);
// cache
void set_cache_size(size_t max) { lru.lru_set_max(max); }
}
void rename_file(CDentry *srcdn, CDentry *destdn);
+ public:
+ // inode purging
+ void purge_inode(inode_t& inode);
+ void purge_inode_finish(inodeno_t ino);
+ void purge_inode_finish_2(inodeno_t ino);
+ void waitfor_purge(inodeno_t ino, Context *c);
+ void start_recovered_purges();
+
+
protected:
// private methods
CDir *get_auth_container(CDir *in);
}
-void MDLog::submit_entry( LogEvent *e,
+void MDLog::submit_entry( LogEvent *le,
Context *c )
{
- dout(5) << "submit_entry " << journaler->get_write_pos() << " : " << *e << endl;
+ dout(5) << "submit_entry " << journaler->get_write_pos() << " : " << *le << endl;
if (g_conf.mds_log) {
// encode it, with event type
bufferlist bl;
- bl.append((char*)&e->_type, sizeof(e->_type));
- e->encode_payload(bl);
+ bl.append((char*)&le->_type, sizeof(le->_type));
+ le->encode_payload(bl);
// journal it.
journaler->append_entry(bl);
- delete e;
+ delete le;
num_events++;
logger->inc("add");
assert(le->can_expire(mds));
if (trimming.begin()->first == le->_end_off) {
- // front! we can expire log a bit
+ // front! we can expire the log a bit
journaler->set_expire_pos(le->_end_off);
}
switch (step) {
case 0:
- step = 1;
+ if (whoami == 0) {
+ dout(2) << "boot_recover " << step << ": creating root inode" << endl;
+ mdcache->open_root(0);
+ step = 1;
+ // fall-thru
+ } else {
+ // FIXME
+ assert(0);
+ }
case 1:
dout(2) << "boot_recover " << step << ": opening idalloc" << endl;
break;
case 5:
+ dout(2) << "boot_recover " << step << ": restarting any recovered purges" << endl;
+ mdcache->start_recovered_purges();
+ step++;
+ // fall-thru
+
+ case 6:
dout(2) << "boot_recover " << step << ": done." << endl;
mark_active();
}
using namespace std;
#include <ext/hash_map>
-#include <ext/rope>
using namespace __gnu_cxx;
#include "msg/Dispatcher.h"
ostream& operator<<(ostream& out, MDS& mds);
-//extern MDS *g_mds;
-
-
#endif
#include "config.h"
#undef dout
-#define dout(l) if (l<=g_conf.debug) cout << "mds" << mds->get_nodeid() << ".store "
+#define dout(l) if (l<=g_conf.debug || l<=g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".store "
/*
// parse buffer contents into cache
dout(15) << "bl is " << bl << endl;
+
+ int off = 0;
size_t size;
- bl.copy(0, sizeof(size), (char*)&size);
+ __uint32_t num;
+ version_t got_version;
+ int got_hashcode;
+ bl.copy(off, sizeof(size), (char*)&size);
+ off += sizeof(size);
assert(bl.length() >= size + sizeof(size));
+ bl.copy(off, sizeof(num), (char*)&num);
+ off += sizeof(num);
+ bl.copy(off, sizeof(got_version), (char*)&got_version);
+ off += sizeof(got_version);
+ bl.copy(off, sizeof(got_hashcode), (char*)&got_hashcode);
+ off += sizeof(got_hashcode);
+
+ assert(got_hashcode == hashcode);
- int n;
- bl.copy(sizeof(size), sizeof(n), (char*)&n);
-
- char *buffer = bl.c_str(); // contiguous ptr to whole buffer(list)
- size_t buflen = bl.length();
- size_t p = sizeof(size_t);
-
- __uint32_t num = *(__uint32_t*)(buffer + p);
- p += sizeof(num);
+ int buflen = bl.length();
dout(10) << " " << num << " items in " << size << " bytes" << endl;
unsigned parsed = 0;
while (parsed < num) {
- assert(p < buflen && num > 0);
+ assert(off < buflen && num > 0);
parsed++;
- dout(24) << " " << parsed << "/" << num << " pos " << p-8 << endl;
+ dout(24) << " " << parsed << "/" << num << " pos " << off << endl;
// dentry
- string dname = buffer+p;
- p += dname.length() + 1;
+ string dname;
+ ::_decode(dname, bl, off);
dout(24) << "parse filename '" << dname << "'" << endl;
CDentry *dn = dir->lookup(dname); // existing dentry?
- if (*(buffer+p) == 'L') {
- // hard link, we don't do that yet.
- p++;
-
- inodeno_t ino = *(inodeno_t*)(buffer+p);
- p += sizeof(ino);
+ char type = bl[off];
+ ++off;
+ if (type == 'L') {
+ // hard link
+ inodeno_t ino;
+ bl.copy(off, sizeof(ino), (char*)&ino);
+ off += sizeof(ino);
// what to do?
if (hashcode >= 0) {
dout(12) << "readdir got remote link " << ino << " (dont' have it)" << endl;
}
}
- else if (*(buffer+p) == 'I') {
+ else if (type == 'I') {
// inode
- p++;
// parse out inode
- inode_t *inode = (inode_t*)(buffer+p);
- p += sizeof(inode_t);
+ inode_t inode;
+ bl.copy(off, sizeof(inode), (char*)&inode);
+ off += sizeof(inode);
string symlink;
- if ((inode->mode & INODE_TYPE_MASK) == INODE_MODE_SYMLINK) {
- symlink = (char*)(buffer+p);
- p += symlink.length() + 1;
- }
-
+ if (inode.is_symlink())
+ ::_decode(symlink, bl, off);
+
// what to do?
if (hashcode >= 0) {
int dentryhashcode = mds->hash_dentry( dir->ino(), dname );
} else {
// had dentry
dout(12) << "readdir had dentry " << dname << endl;
+
+ // under water?
+ if (dn->get_inode()->get_parent_dir_version() <= got_version) {
+ dout(10) << "readdir had underwater dentry " << dname << " and inode, marking clean" << endl;
+ dn->get_inode()->mark_clean();
+ dn->mark_clean();
+ }
}
continue;
}
// add inode
CInode *in = 0;
- if (mds->mdcache->have_inode(inode->ino)) {
- in = mds->mdcache->get_inode(inode->ino);
- dout(12) << "readdir got (but i already had) " << *in << " mode " << in->inode.mode << " mtime " << in->inode.mtime << endl;
+ if (mds->mdcache->have_inode(inode.ino)) {
+ in = mds->mdcache->get_inode(inode.ino);
+ dout(12) << "readdir got (but i already had) " << *in
+ << " mode " << in->inode.mode
+ << " mtime " << in->inode.mtime << endl;
} else {
// inode
- in = new CInode();
- memcpy(&in->inode, inode, sizeof(inode_t));
+ in = new CInode(mds->mdcache);
+ in->inode = inode;
// symlink?
if (in->is_symlink()) {
dout(12) << "readdir got " << *in << " mode " << in->inode.mode << " mtime " << in->inode.mtime << endl;
}
else {
- dout(1) << "corrupt directory, i got tag char '" << *(buffer+p) << "' val " << (int)(*(buffer+p)) << " at pos " << p << endl;
+ dout(1) << "corrupt directory, i got tag char '" << type << "' val " << (int)(type)
+ << " at pos " << off << endl;
assert(0);
}
}
public:
MDS *mds;
inodeno_t ino;
- __uint64_t version;
+ version_t version;
Context *c;
C_MDS_CommitDirVerify( MDS *mds,
inodeno_t ino,
- __uint64_t version,
+ version_t version,
Context *c) {
this->mds = mds;
this->c = c;
protected:
MDStore *ms;
CDir *dir;
- __uint64_t version;
+ version_t version;
public:
}
void MDStore::commit_dir( CDir *dir,
- __uint64_t version,
+ version_t version,
Context *c )
{
assert(dir->is_auth() ||
void MDStore::commit_dir_2( int result,
CDir *dir,
- __uint64_t committed_version)
+ version_t committed_version)
{
dout(5) << "commit_dir_2 " << *dir << " committed " << committed_version << ", current version " << dir->get_version() << endl;
assert(committed_version == dir->get_committing_version());
CDir *dir;
Context *c;
int hashcode;
- __uint64_t version;
+ version_t version;
public:
bufferlist bl;
__uint32_t num = 0;
bufferlist dirdata;
+
+ version_t v = dir->get_version();
+ dirdata.append((char*)&v, sizeof(v));
+ dirdata.append((char*)&hashcode, sizeof(hashcode));
for (CDir_map_t::iterator it = dir->begin();
it != dir->end();
if (in->is_dirty()) {
in->float_parent_dir_version( dir->get_version() );
dout(12) << " dirty inode " << *in << " now " << in->get_parent_dir_version() << endl;
+
+ in->set_committing_version( in->get_version() );
+ assert(in->get_last_committed_version() < in->get_committing_version());
+ } else {
+ assert(in->get_committing_version() == in->get_version());
}
+
}
num++;
void MDStore::commit_dir_slice_2( int result,
CDir *dir,
Context *c,
- __uint64_t committed_version,
+ version_t committed_version,
int hashcode )
{
dout(11) << "commit_dir_slice_2 hashcode " << hashcode << " " << *dir << " v " << committed_version << endl;
assert(in);
assert(in->is_auth());
+ if (in->get_committing_version())
+ in->set_committed_version();
+
if (committed_version > in->get_parent_dir_version()) {
dout(15) << " dir " << committed_version << " > inode " << in->get_parent_dir_version() << " still clean " << *(in) << endl;
assert(!in->is_dirty());
*/
void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, int new_auth)
{
- in->version++; // so local log entries are ignored, etc. (FIXME ??)
+ in->inode.version++; // so local log entries are ignored, etc. (FIXME ??)
// tell (all) clients about migrating caps.. mark STALE
for (map<int, Capability>::iterator it = in->client_caps.begin();
(*it)->update_inode(in);
dout(7) << " updated " << *in << endl;
} else {
- in = new CInode(false);
+ in = new CInode(mds->mdcache, false);
(*it)->update_inode(in);
// link to the containing dir
bool added = false;
CInode *in = cache->get_inode(istate.get_ino());
if (!in) {
- in = new CInode;
+ in = new CInode(mds->mdcache);
added = true;
} else {
in->set_auth(true);
it->second->update_inode(in);
dout(5) << " updated " << *in << endl;
} else {
- in = new CInode(false);
+ in = new CInode(mds->mdcache, false);
it->second->update_inode(in);
cache->add_inode(in);
it->second->update_inode(in);
dout(5) << " updated " << *in << endl;
} else {
- in = new CInode(false);
+ in = new CInode(mds->mdcache, false);
it->second->update_inode(in);
cache->add_inode(in);
#include "events/EInodeUpdate.h"
#include "events/EDirUpdate.h"
#include "events/EMknod.h"
+#include "events/EMkdir.h"
#include "include/filepath.h"
#include "common/Timer.h"
#include <iostream>
using namespace std;
+#include "config.h"
+#undef dout
+#define dout(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) cout << g_clock.now() << " mds" << whoami << ".server "
+#define derr(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) cout << g_clock.now() << " mds" << whoami << ".server "
void Server::dispatch(Message *m)
MSG_ADDR_CLIENT(req->get_client()), req->get_client_inst());
// <HACK>
- if (refpath.last_bit() == ".hash" &&
- refpath.depth() > 1) {
- dout(1) << "got explicit hash command " << refpath << endl;
- CDir *dir = trace[trace.size()-1]->get_inode()->dir;
- if (!dir->is_hashed() &&
- !dir->is_hashing() &&
- dir->is_auth())
- mdcache->migrator->hash_dir(dir);
+ // is this a special debug command?
+ if (refpath.depth() - 1 == trace.size() &&
+ refpath.last_bit().find(".ceph.") == 0) {
+ CDir *dir = 0;
+ if (trace.empty())
+ dir = mdcache->get_root()->dir;
+ else
+ dir = trace[trace.size()-1]->get_inode()->dir;
+
+ dout(1) << "** POSSIBLE CEPH DEBUG COMMAND '" << refpath.last_bit() << "' in " << *dir << endl;
+
+ if (refpath.last_bit() == ".ceph.hash" &&
+ refpath.depth() > 1) {
+ dout(1) << "got explicit hash command " << refpath << endl;
+ CDir *dir = trace[trace.size()-1]->get_inode()->dir;
+ if (!dir->is_hashed() &&
+ !dir->is_hashing() &&
+ dir->is_auth())
+ mdcache->migrator->hash_dir(dir);
+ }
+ else if (refpath.last_bit() == ".ceph.commit") {
+ dout(1) << "got explicit commit command on " << *dir << endl;
+ mds->mdstore->commit_dir(dir, 0);
+ }
}
// </HACK>
// STAT
void Server::handle_client_stat(MClientRequest *req,
- CInode *ref)
+ CInode *ref)
{
// do I need file info?
int mask = req->get_iarg();
// commit
commit_request(req, new MClientReply(req, 0), ref,
- new EInodeUpdate(newi)); // FIXME this is the wrong message
+ new EMknod(newi));
}
// mknod(): used by handle_client_mkdir, handle_client_mknod, which are mostly identical.
newi->mark_dirty();
// journal it
- mdlog->submit_entry(new EMknod(newi));
+ //mdlog->submit_entry(new EMknod(newi));
// ok!
return newi;
CDir *dir = ref->dir;
dout(7) << "handle_client_link dir is " << *dir << endl;
+
+
// make sure it's my dentry
int dauth = dir->dentry_authority(dname);
if (dauth != whoami) {
// commit to log
commit_request(req, new MClientReply(req, 0), diri,
- new EInodeUpdate(newi),//);
- new EDirUpdate(newdir)); // FIXME: weird performance regression here w/ double log; somewhat of a mystery!
+ new EMkdir(newdir));
+ //new EInodeUpdate(newi),//);
+ //new EDirUpdate(newdir)); // FIXME: weird performance regression here w/ double log; somewhat of a mystery!
return;
}
__uint64_t stat_ops;
+
public:
Server(MDS *m) :
mds(m),
#include <assert.h>
#include "config.h"
#include "include/types.h"
-#include "ETraced.h"
+
#include "../LogEvent.h"
+#include "ETrace.h"
#include "../CDir.h"
#include "../MDCache.h"
#include "../MDStore.h"
-class EDirUpdate : public ETraced {
+class EDirUpdate : public LogEvent {
protected:
+ ETrace trace;
inodeno_t dirino;
version_t version;
public:
- EDirUpdate(CDir *dir) : ETraced(EVENT_DIRUPDATE, dir->inode) {
+ EDirUpdate(CDir *dir) : LogEvent(EVENT_DIRUPDATE),
+ trace(dir->inode) {
this->dirino = dir->ino();
version = dir->get_version();
}
- EDirUpdate() : ETraced(EVENT_DIRUPDATE) {
+ EDirUpdate() : LogEvent(EVENT_DIRUPDATE) {
}
void print(ostream& out) {
- out << "up dir " << dirino << " ";
- ETraced::print(out);
- out << "/ v " << version;
+ out << "up dir " << dirino << " "
+ << trace
+ << "/ v " << version;
}
virtual void encode_payload(bufferlist& bl) {
- encode_trace(bl);
+ trace.encode(bl);
bl.append((char*)&version, sizeof(version));
bl.append((char*)&dirino, sizeof(dirino));
}
void decode_payload(bufferlist& bl, int& off) {
- decode_trace(bl, off);
+ trace.decode(bl, off);
bl.copy(off, sizeof(version), (char*)&version);
off += sizeof(version);
bl.copy(off, sizeof(dirino), (char*)&dirino);
#include "config.h"
#include "include/types.h"
-#include "ETraced.h"
-#include "../MDStore.h"
+#include "../LogEvent.h"
+#include "ETrace.h"
-
-class EInodeUpdate : public ETraced {
+class EInodeUpdate : public LogEvent {
protected:
- inode_t inode;
+ ETrace trace;
public:
- EInodeUpdate(CInode *in) : ETraced(EVENT_INODEUPDATE, in) {
- this->inode = in->get_inode();
+ EInodeUpdate(CInode *in) : LogEvent(EVENT_INODEUPDATE),
+ trace(in) {
}
- EInodeUpdate() : ETraced(EVENT_INODEUPDATE) { }
+ EInodeUpdate() : LogEvent(EVENT_INODEUPDATE) { }
void print(ostream& out) {
- out << "up inode " << inode.ino << " ";
- ETraced::print(out);
- out << " v " << inode.version;
+ out << "up inode " << trace.back().inode.ino
+ << " " << trace
+ << " v " << trace.back().inode.version;
}
-
+
virtual void encode_payload(bufferlist& bl) {
- encode_trace(bl);
- bl.append((char*)&inode, sizeof(inode));
+ trace.encode(bl);
}
void decode_payload(bufferlist& bl, int& off) {
- decode_trace(bl, off);
- bl.copy(off, sizeof(inode), (char*)&inode);
- off += sizeof(inode);
+ trace.decode(bl, off);
}
-
- bool can_expire(MDS *mds) {
- // am i obsolete?
- CInode *in = mds->mdcache->get_inode(inode.ino);
-
- //assert(in);
- if (!in) {
- dout(7) << "inode " << inode.ino << " not in cache, must have exported" << endl;
- return true;
- }
- dout(7) << "EInodeUpdate obsolete? on " << *in << endl;
- if (!in->is_auth())
- return true; // not my inode anymore!
- if (in->get_version() != inode.version)
- return true; // i'm obsolete! (another log entry follows)
-
- CDir *parent = in->get_parent_dir();
- if (!parent) return true; // root?
- if (!parent->is_dirty()) return true; // dir is clean!
-
- // frozen -> exporting -> obsolete (FOR NOW?)
- if (in->is_frozen())
- return true;
+ bool can_expire(MDS *mds);
+ void retire(MDS *mds, Context *c);
+ bool has_happened(MDS *mds);
+ void replay(MDS *mds);
- return false;
- }
-
- virtual void retire(MDS *mds, Context *c) {
- // commit my containing directory
- CInode *in = mds->mdcache->get_inode(inode.ino);
- assert(in);
- CDir *parent = in->get_parent_dir();
-
- if (parent) {
- // okay!
- dout(7) << "commiting containing dir for " << *in << ", which is " << *parent << endl;
- mds->mdstore->commit_dir(parent, c);
- } else {
- // oh, i'm the root inode
- dout(7) << "don't know how to commit the root inode" << endl;
- if (c) {
- c->finish(0);
- delete c;
- }
- }
-
- }
-
};
#endif
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef __EMKDIR_H
+#define __EMKDIR_H
+
+#include <assert.h>
+#include "config.h"
+#include "include/types.h"
+
+#include "ETrace.h"
+#include "../MDS.h"
+#include "../MDStore.h"
+
+
+class EMkdir : public LogEvent {
+ protected:
+ ETrace trace;
+ //version_t pdirv;
+
+ public:
+ EMkdir(CDir *dir) : LogEvent(EVENT_MKDIR),
+ trace(dir->inode) {
+ //pdirv = dir->inode->get_parent_dir()->get_version();
+ }
+ EMkdir() : LogEvent(EVENT_MKDIR) { }
+
+ void print(ostream& out) {
+ out << "mkdir ";
+ trace.print(out);
+ }
+
+ virtual void encode_payload(bufferlist& bl) {
+ trace.encode(bl);
+ //bl.append((char*)&pdirv, sizeof(pdirv));
+ }
+ void decode_payload(bufferlist& bl, int& off) {
+ trace.decode(bl, off);
+ //bl.copy(off, sizeof(pdirv), (char*)&pdirv);
+ //off += sizeof(pdirv);
+ }
+
+ bool can_expire(MDS *mds);
+ void retire(MDS *mds, Context *c);
+
+ // recovery
+ bool has_happened(MDS *mds);
+ void replay(MDS *mds);
+
+};
+
+#endif
#include "config.h"
#include "include/types.h"
-#include "ETraced.h"
+#include "../LogEvent.h"
+#include "ETrace.h"
+#include "../MDS.h"
#include "../MDStore.h"
-class EMknod : public ETraced {
+class EMknod : public LogEvent {
protected:
+ ETrace trace;
+ //version_t pdirv;
+
public:
- EMknod(CInode *in) : ETraced(EVENT_MKNOD, in) {
+ EMknod(CInode *in) : LogEvent(EVENT_MKNOD),
+ trace(in) {
+ //pdirv = in->get_parent_dir()->get_version();
}
- EMknod() : ETraced(EVENT_MKNOD) { }
+ EMknod() : LogEvent(EVENT_MKNOD) { }
void print(ostream& out) {
- out << "mknod ";
- ETraced::print(out);
+ out << "mknod " << trace;
}
virtual void encode_payload(bufferlist& bl) {
- encode_trace(bl);
+ trace.encode(bl);
+ //bl.append((char*)&pdirv, sizeof(pdirv));
}
void decode_payload(bufferlist& bl, int& off) {
- decode_trace(bl, off);
- }
-
- bool can_expire(MDS *mds) {
- // am i obsolete?
- CInode *diri = mds->mdcache->get_inode( trace.back().dirino );
- if (!diri) return true;
- CDir *dir = diri->dir;
- if (!dir) return true;
+ trace.decode(bl, off);
+ //bl.copy(off, sizeof(pdirv), (char*)&pdirv);
+ //off += sizeof(pdirv);
+ }
- if (!dir->is_auth()) return true; // not mine!
- if (dir->is_frozen()) return true; // frozen -> exporting -> obsolete? FIXME
-
- if (!dir->is_dirty()) return true;
+ bool can_expire(MDS *mds);
+ void retire(MDS *mds, Context *c);
+ bool has_happened(MDS *mds);
+ void replay(MDS *mds);
- if (dir->get_committing_version() > trace.back().dirv)
- return true;
-
- return false;
- }
-
- virtual void retire(MDS *mds, Context *c) {
- // commit directory
- CInode *in = mds->mdcache->get_inode( trace.back().dirino );
- assert(in);
- CDir *dir = in->dir;
- assert(dir);
-
- dout(10) << "EMknod committing dir " << *dir << endl;
- mds->mdstore->commit_dir(dir, c);
- }
-
};
#endif
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef __EPURGE_H
+#define __EPURGE_H
+
+#include <assert.h>
+#include "config.h"
+#include "include/types.h"
+
+class EPurgeFinish : public LogEvent {
+ protected:
+ inodeno_t ino;
+
+ public:
+ EPurgeFinish(inodeno_t i) :
+ LogEvent(EVENT_PURGEFINISH),
+ ino(i) { }
+ EPurgeFinish() : LogEvent(EVENT_PURGEFINISH) { }
+
+ void print(ostream& out) {
+ out << "purgefinish " << ino;
+ }
+
+ virtual void encode_payload(bufferlist& bl) {
+ bl.append((char*)&ino, sizeof(ino));
+ }
+ void decode_payload(bufferlist& bl, int& off) {
+ bl.copy(off, sizeof(ino), (char*)&ino);
+ }
+
+ bool can_expire(MDS *mds);
+ void retire(MDS *mds, Context *c);
+ bool has_happened(MDS *mds);
+ void replay(MDS *mds);
+
+};
+
+#endif
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef __MDS_ETRACE_H
+#define __MDS_ETRACE_H
+
+#include <stdlib.h>
+#include <string>
+using namespace std;
+
+#include "../CInode.h"
+#include "../CDir.h"
+#include "../CDentry.h"
+
+
+// path trace for use in journal events
+
+class ETrace {
+
+ // <dir, dn, inode> segment.
+ struct bit {
+ inodeno_t dirino;
+ version_t dirv;
+ string dn;
+ inode_t inode;
+
+ bit(bufferlist& bl, int& off) { _decode(bl,off); }
+ bit(inodeno_t di, version_t dv, const string& d, inode_t i) :
+ dirino(di), dirv(dv), dn(d), inode(i) {}
+
+ void _encode(bufferlist& bl) {
+ bl.append((char*)&dirino, sizeof(dirino));
+ bl.append((char*)&dirv, sizeof(dirv));
+ ::_encode(dn, bl);
+ bl.append((char*)&inode, sizeof(inode));
+ }
+ void _decode(bufferlist& bl, int& off) {
+ bl.copy(off, sizeof(dirino), (char*)&dirino); off += sizeof(dirino);
+ bl.copy(off, sizeof(dirv), (char*)&dirv); off += sizeof(dirv);
+ ::_decode(dn, bl, off);
+ bl.copy(off, sizeof(inode), (char*)&inode); off += sizeof(inode);
+ }
+ };
+
+ public:
+ list<bit> trace;
+
+ ETrace(CInode *in = 0) {
+ if (in) {
+ CDir *dir;
+ CDentry *dn;
+ do {
+ dn = in->get_parent_dn();
+ if (!dn) break;
+ dir = dn->get_dir();
+ if (!dir) break;
+
+ trace.push_front(bit(dir->ino(),
+ dir->get_version(),
+ dn->get_name(),
+ in->inode));
+
+ in = dir->get_inode();
+ } while (!dir->is_import());
+ }
+ }
+
+ bit& back() {
+ return trace.back();
+ }
+
+ void decode(bufferlist& bl, int& off) {
+ int n;
+ bl.copy(off, sizeof(n), (char*)&n);
+ off += sizeof(n);
+ for (int i=0; i<n; i++)
+ trace.push_back( bit(bl, off) );
+ }
+
+ void encode(bufferlist& bl) {
+ int n = trace.size();
+ bl.append((char*)&n, sizeof(n));
+ for (list<bit>::iterator i = trace.begin();
+ i != trace.end();
+ i++)
+ i->_encode(bl);
+ }
+
+ void print(ostream& out) const {
+ for (list<bit>::const_iterator p = trace.begin();
+ p != trace.end();
+ p++) {
+ if (p == trace.begin())
+ out << "[" << p->dirino << "]/" << p->dn;
+ else
+ out << "/" << p->dn;
+ }
+ }
+
+ CInode *restore_trace(MDS *mds);
+
+};
+
+inline ostream& operator<<(ostream& out, const ETrace& t) {
+ t.print(out);
+ return out;
+}
+
+#endif
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-
-#ifndef __MDS_ETRACED_H
-#define __MDS_ETRACED_H
-
-#include <stdlib.h>
-#include <string>
-using namespace std;
-
-#include "../LogEvent.h"
-#include "../CInode.h"
-#include "../CDir.h"
-#include "../CDentry.h"
-#include "../MDCache.h"
-
-// generic log event
-class ETraced : public LogEvent {
-
- // <dir, dn, inode> segment.
- struct bit {
- inodeno_t dirino;
- version_t dirv;
- string dn;
- inodeno_t ino;
- version_t inov;
-
- bit(bufferlist& bl, int& off) { _decode(bl,off); }
- bit(inodeno_t di, version_t dv, const string& d, inodeno_t i, version_t iv) :
- dirino(di), dirv(dv), dn(d), ino(i), inov(iv) {}
-
- void _encode(bufferlist& bl) {
- bl.append((char*)&dirino, sizeof(dirino));
- bl.append((char*)&dirv, sizeof(dirv));
- ::_encode(dn, bl);
- bl.append((char*)&ino, sizeof(ino));
- bl.append((char*)&inov, sizeof(inov));
- }
- void _decode(bufferlist& bl, int& off) {
- bl.copy(off, sizeof(dirino), (char*)&dirino); off += sizeof(dirino);
- bl.copy(off, sizeof(dirv), (char*)&dirv); off += sizeof(dirv);
- ::_decode(dn, bl, off);
- bl.copy(off, sizeof(ino), (char*)&ino); off += sizeof(ino);
- bl.copy(off, sizeof(inov), (char*)&inov); off += sizeof(inov);
- }
- };
-
- protected:
- list<bit> trace;
-
-public:
- ETraced(int t, CInode *in = 0) : LogEvent(t) {
- if (in) {
- CDir *dir;
- CDentry *dn;
- do {
- dn = in->get_parent_dn();
- if (!dn) break;
- dir = dn->get_dir();
- if (!dir) break;
-
- trace.push_front(bit(dir->ino(), dir->get_version(),
- dn->get_name(),
- in->ino(), in->get_version()));
-
- in = dir->get_inode();
- } while (!dir->is_import());
- }
- }
-
- void decode_trace(bufferlist& bl, int& off) {
- int n;
- bl.copy(off, sizeof(n), (char*)&n);
- off += sizeof(n);
- for (int i=0; i<n; i++)
- trace.push_back( bit(bl, off) );
- }
-
- void encode_trace(bufferlist& bl) {
- int n = trace.size();
- bl.append((char*)&n, sizeof(n));
- for (list<bit>::iterator i = trace.begin();
- i != trace.end();
- i++)
- i->_encode(bl);
- }
-
- void print(ostream& out) {
- for (list<bit>::iterator p = trace.begin();
- p != trace.end();
- p++) {
- if (p == trace.begin())
- out << "[" << p->dirino << "]/" << p->dn;
- else
- out << "/" << p->dn;
- }
- }
-
-};
-
-#endif
#include <assert.h>
#include "config.h"
#include "include/types.h"
-#include "../LogEvent.h"
-#include "../CInode.h"
-#include "../MDCache.h"
-#include "../MDStore.h"
+#include "../LogEvent.h"
+#include "ETrace.h"
+#include "../CInode.h"
+#include "../CDentry.h"
+#include "../CDir.h"
class EUnlink : public LogEvent {
protected:
- inodeno_t dir_ino;
- __uint64_t version;
+ ETrace diritrace;
+ version_t dirv;
string dname;
+ ETrace inodetrace;
public:
- EUnlink(CDir *dir, CDentry* dn) :
- LogEvent(EVENT_UNLINK) {
- this->dir_ino = dir->ino();
- this->dname = dn->get_name();
- this->version = dir->get_version();
- }
- EUnlink() :
- LogEvent(EVENT_UNLINK) {
- }
+ EUnlink(CDir *dir, CDentry* dn, CInode *in) :
+ LogEvent(EVENT_UNLINK),
+ diritrace(dir->inode),
+ dirv(dir->get_version()),
+ dname(dn->get_name()),
+ inodetrace(in) {}
+ EUnlink() : LogEvent(EVENT_UNLINK) { }
virtual void encode_payload(bufferlist& bl) {
- bl.append((char*)&dir_ino, sizeof(dir_ino));
- bl.append((char*)&version, sizeof(version));
- bl.append((char*)dname.c_str(), dname.length() + 1);
+ diritrace.encode(bl);
+ bl.append((char*)&dirv, sizeof(dirv));
+ ::_encode(dname, bl);
+ inodetrace.encode(bl);
}
void decode_payload(bufferlist& bl, int& off) {
- bl.copy(off, sizeof(dir_ino), (char*)&dir_ino);
- off += sizeof(dir_ino);
- bl.copy(off, sizeof(version), (char*)&version);
- off += sizeof(version);
- dname = bl.c_str() + off;
- off += dname.length() + 1;
+ diritrace.decode(bl,off);
+ bl.copy(off, sizeof(dirv), (char*)&dirv);
+ off += sizeof(dirv);
+ ::_decode(dname, bl, off);
+ inodetrace.decode(bl, off);
}
- virtual bool can_expire(MDS *mds) {
- // am i obsolete?
- CInode *idir = mds->mdcache->get_inode(dir_ino);
- if (!idir) return true;
-
- CDir *dir = idir->dir;
-
- if (!dir) return true;
-
- if (!idir->dir->is_auth()) return true;
- if (idir->dir->is_clean()) return true;
-
- if (idir->dir->get_last_committed_version() >= version) return true;
- return false;
- }
-
- virtual void retire(MDS *mds, Context *c) {
- // commit my containing directory
- CDir *dir = mds->mdcache->get_inode(dir_ino)->dir;
- assert(dir);
-
- // okay!
- dout(7) << "commiting dirty (from unlink) dir " << *dir << endl;
- mds->mdstore->commit_dir(dir, version, c);
- }
+ bool can_expire(MDS *mds);
+ void retire(MDS *mds, Context *c);
+ bool has_happened(MDS *mds);
+ void replay(MDS *mds);
};
#endif
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "events/ETrace.h"
+#include "events/EMknod.h"
+#include "events/EMkdir.h"
+#include "events/EInodeUpdate.h"
+#include "events/EPurgeFinish.h"
+#include "events/EUnlink.h"
+
+#include "MDS.h"
+#include "MDCache.h"
+
+#include "config.h"
+#undef dout
+#define dout(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".journal "
+#define derr(l) if (l<=g_conf.debug || l <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".journal "
+
+
+// -----------------------
+// ETrace
+
+CInode *ETrace::restore_trace(MDS *mds)
+{
+ CInode *in = 0;
+ for (list<bit>::iterator p = trace.begin();
+ p != trace.end();
+ ++p) {
+ // the dir
+ CInode *diri = mds->mdcache->get_inode(p->dirino);
+ if (!diri) {
+ dout(10) << "ETrace.restore_trace adding dir " << p->dirino << endl;
+ diri = new CInode(mds->mdcache);
+ diri->inode.ino = p->dirino;
+ diri->inode.mode = INODE_MODE_DIR;
+ mds->mdcache->add_inode(diri);
+
+ CDir *dir = diri->get_or_open_dir(mds);
+
+ // root? import?
+ if (p == trace.begin()) {
+ mds->mdcache->add_import(dir);
+ if (dir->ino() == 1)
+ mds->mdcache->set_root(diri);
+ }
+ } else {
+ dout(20) << "ETrace.restore_trace had dir " << p->dirino << endl;
+ diri->get_or_open_dir(mds);
+ }
+ assert(diri->dir);
+ dout(20) << "ETrace.restore_trace dir is " << *diri->dir << endl;
+
+ // the inode
+ in = mds->mdcache->get_inode(p->inode.ino);
+ if (!in) {
+ dout(10) << "ETrace.restore_trace adding dn '" << p->dn << "' inode " << p->inode.ino << endl;
+ in = new CInode(mds->mdcache);
+ in->inode = p->inode;
+ mds->mdcache->add_inode(in);
+
+ // the dentry
+ CDentry *dn = diri->dir->add_dentry( p->dn, in );
+ dn->mark_dirty();
+ assert(dn);
+ } else {
+ dout(20) << "ETrace.restore_trace had dn '" << p->dn << "' inode " << p->inode.ino << endl;
+ in->inode = p->inode;
+ }
+ dout(20) << "ETrace.restore_trace in is " << *in << endl;
+ }
+ return in;
+}
+
+
+// -----------------------
+// EMkdir
+// - trace goes to new dir's inode.
+
+bool EMkdir::can_expire(MDS *mds)
+{
+ // am i obsolete?
+ CInode *in = mds->mdcache->get_inode( trace.back().inode.ino );
+ if (!in) return true;
+ CDir *dir = in->dir;
+ if (!dir) return true;
+ CDir *pdir = in->get_parent_dir();
+ assert(pdir);
+
+ dout(10) << "EMkdir.can_expire in is " << *in << endl;
+ dout(10) << "EMkdir.can_expire inv is " << trace.back().inode.version << endl;
+ dout(10) << "EMkdir.can_expire dir is " << *dir << endl;
+ bool commitparent = in->get_last_committed_version() < trace.back().inode.version;
+ bool commitnew = dir->get_last_committed_version() == 0;
+
+ if (commitparent || commitnew) return false;
+ return true;
+}
+
+void EMkdir::retire(MDS *mds, Context *c)
+{
+ // commit parent dir AND my dir
+ CInode *in = mds->mdcache->get_inode( trace.back().inode.ino );
+ assert(in);
+ CDir *dir = in->dir;
+ assert(dir);
+ CDir *pdir = in->get_parent_dir();
+ assert(pdir);
+
+ dout(10) << "EMkdir.retire in is " << *in << endl;
+ dout(10) << "EMkdir.retire inv is " << trace.back().inode.version << endl;
+ dout(10) << "EMkdir.retire dir is " << *dir << endl;
+ bool commitparent = in->get_last_committed_version() < trace.back().inode.version;
+ bool commitnew = dir->get_last_committed_version() == 0;
+
+ if (commitparent && commitnew) {
+ // both
+ dout(10) << "EMkdir.retire committing parent+new dir " << *dir << endl;
+ C_Gather *gather = new C_Gather(c);
+ mds->mdstore->commit_dir(pdir, gather->new_sub());
+ mds->mdstore->commit_dir(dir, gather->new_sub());
+ } else if (commitparent) {
+ // just parent
+ dout(10) << "EMkdir.retire committing parent dir " << *dir << endl;
+ mds->mdstore->commit_dir(pdir, c);
+ } else {
+ // just new dir
+ dout(10) << "EMkdir.retire committing new dir " << *dir << endl;
+ mds->mdstore->commit_dir(dir, c);
+ }
+}
+
+bool EMkdir::has_happened(MDS *mds)
+{
+ return false;
+}
+
+void EMkdir::replay(MDS *mds)
+{
+ dout(10) << "EMkdir.replay " << *this << endl;
+ CInode *in = trace.restore_trace(mds);
+
+ // mark dir inode dirty
+ in->mark_dirty();
+
+ // mark parent dir dirty, and set version.
+ // this may end up being below water when dir is fetched from disk.
+ CDir *pdir = in->get_parent_dir();
+ if (!pdir->is_dirty()) pdir->mark_dirty();
+ pdir->set_version(trace.back().dirv);
+
+ // mark new dir dirty + complete
+ CDir *dir = in->get_or_open_dir(mds);
+ dir->mark_dirty();
+ dir->mark_complete();
+}
+
+
+
+// -----------------------
+// EMknod
+
+bool EMknod::can_expire(MDS *mds)
+{
+ // am i obsolete?
+ CInode *in = mds->mdcache->get_inode( trace.back().inode.ino );
+ if (!in) return true;
+
+ if (!in->is_auth()) return true; // not my inode anymore!
+ if (in->get_version() != trace.back().inode.version)
+ return true; // i'm obsolete! (another log entry follows)
+
+ if (in->get_last_committed_version() >= trace.back().inode.version)
+ return true;
+
+ return false;
+}
+
+void EMknod::retire(MDS *mds, Context *c)
+{
+ // commit parent directory
+ CInode *diri = mds->mdcache->get_inode( trace.back().dirino );
+ assert(diri);
+ CDir *dir = diri->dir;
+ assert(dir);
+
+ dout(10) << "EMknod.retire committing parent dir " << *dir << endl;
+ mds->mdstore->commit_dir(dir, c);
+}
+
+bool EMknod::has_happened(MDS *mds)
+{
+ return false;
+}
+
+void EMknod::replay(MDS *mds)
+{
+ dout(10) << "EMknod.replay " << *this << endl;
+ CInode *in = trace.restore_trace(mds);
+ in->mark_dirty();
+
+ // mark parent dir dirty, and set version.
+ // this may end up being below water when dir is fetched from disk.
+ CDir *pdir = in->get_parent_dir();
+ if (!pdir->is_dirty()) pdir->mark_dirty();
+ pdir->set_version(trace.back().dirv);
+}
+
+
+
+// -----------------------
+// EInodeUpdate
+
+bool EInodeUpdate::can_expire(MDS *mds)
+{
+ CInode *in = mds->mdcache->get_inode( trace.back().inode.ino );
+ if (!in) return true;
+
+ if (!in->is_auth()) return true; // not my inode anymore!
+ if (in->get_version() != trace.back().inode.version)
+ return true; // i'm obsolete! (another log entry follows)
+
+ /*
+ // frozen -> exporting -> obsolete (FOR NOW?)
+ if (in->is_frozen())
+ return true;
+ */
+
+ if (in->get_last_committed_version() >= trace.back().inode.version)
+ return true;
+
+ return false;
+}
+
+void EInodeUpdate::retire(MDS *mds, Context *c)
+{
+ // commit parent directory
+ CInode *diri = mds->mdcache->get_inode( trace.back().dirino );
+ assert(diri);
+ CDir *dir = diri->dir;
+ assert(dir);
+
+ dout(10) << "EMknod.retire committing parent dir " << *dir << endl;
+ mds->mdstore->commit_dir(dir, c);
+}
+
+bool EInodeUpdate::has_happened(MDS *mds)
+{
+ return false;
+}
+
+void EInodeUpdate::replay(MDS *mds)
+{
+ dout(10) << "EInodeUpdate.replay " << *this << endl;
+ CInode *in = trace.restore_trace(mds);
+ in->mark_dirty();
+
+ // mark parent dir dirty, and set version.
+ // this may end up being below water when dir is fetched from disk.
+ CDir *pdir = in->get_parent_dir();
+ if (!pdir->is_dirty()) pdir->mark_dirty();
+ pdir->set_version(trace.back().dirv);
+}
+
+
+
+// -----------------------
+// EUnlink
+
+bool EUnlink::can_expire(MDS *mds)
+{
+ // dir
+ CInode *diri = mds->mdcache->get_inode( diritrace.back().inode.ino );
+ CDir *dir = 0;
+ if (diri) dir = diri->dir;
+
+ if (dir && dir->get_last_committed_version() < dirv) return false;
+
+ if (!inodetrace.trace.empty()) {
+ // inode
+ CInode *in = mds->mdcache->get_inode( inodetrace.back().inode.ino );
+ if (in && in->get_last_committed_version() < inodetrace.back().inode.version)
+ return false;
+ }
+
+ return true;
+}
+
+void EUnlink::retire(MDS *mds, Context *c)
+{
+ CInode *diri = mds->mdcache->get_inode( diritrace.back().inode.ino );
+ CDir *dir = diri->dir;
+ assert(dir);
+
+ // okay!
+ dout(7) << "commiting dirty (from unlink) dir " << *dir << endl;
+ mds->mdstore->commit_dir(dir, dirv, c);
+}
+
+bool EUnlink::has_happened(MDS *mds)
+{
+ return true;
+}
+
+void EUnlink::replay(MDS *mds)
+{
+}
+
+
+
+
+// -----------------------
+// EPurgeFinish
+
+
+bool EPurgeFinish::can_expire(MDS *mds)
+{
+ return true;
+}
+
+void EPurgeFinish::retire(MDS *mds, Context *c)
+{
+}
+
+bool EPurgeFinish::has_happened(MDS *mds)
+{
+ return true;
+}
+
+void EPurgeFinish::replay(MDS *mds)
+{
+}
+
+
+
+