- mds metadata versioning
- (dir) inode versions..
+ primary file link -> old inode
+ primary dir link -> multiversion inode
+ remote link -> multiversion inode
+
+- for simplicity, don't replicate any snapshot data.
+- or issue any leases or capabilities... the content is static!
+-
+
+- need rrealms in fraginfo_t
+- rename() needs to create a new realm if src/dst realms differ and (rrealms, or open_children, or not subtree leaf) (similar logic to the anchor update)
+
- will snapshots and CAS play nice?
+ - cas object refs should follow same deletion semantics as non-cas objects.
- mds server ops
#include <sys/stat.h>
#include <fcntl.h>
-int main(int argc, const char **argv, char *envp[]) {
-
+int main(int argc, const char **argv, char *envp[])
+{
//cerr << "csyn starting" << std::endl;
vector<const char*> args;
argv_to_vec(argc, argv, args);
out << "[dentry " << path;
+ if (true || dn.first != 0 || dn.last != CEPH_NOSNAP) {
+ out << " [" << dn.first << ",";
+ if (dn.last == CEPH_NOSNAP)
+ out << "head";
+ else
+ out << dn.last;
+ out << ']';
+ }
+
if (dn.is_auth()) {
out << " auth";
if (dn.is_replicated())
return *this < *(CDentry*)r;
}
- protected:
+public:
nstring name;
+ snapid_t first, last;
+
+ dentry_key_t key() {
+ return dentry_key_t(last, name.c_str());
+ }
+protected:
inodeno_t remote_ino; // if remote dentry
unsigned char remote_d_type;
SimpleLock lock;
-
public:
// cons
- CDentry() :
- remote_ino(0), remote_d_type(0),
- inode(0), dir(0),
- version(0), projected_version(0),
- xlist_dirty(this),
- dir_offset(0),
- auth_pins(0), nested_auth_pins(0), nested_anchors(0),
- lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { }
CDentry(const nstring& n, CInode *in) :
name(n),
+ first(0), last(CEPH_NOSNAP),
remote_ino(0), remote_d_type(0),
inode(in), dir(0),
version(0), projected_version(0),
lock(this, CEPH_LOCK_DN, WAIT_LOCK_OFFSET) { }
CDentry(const nstring& n, inodeno_t ino, unsigned char dt, CInode *in=0) :
name(n),
+ first(0), last(CEPH_NOSNAP),
remote_ino(ino), remote_d_type(dt),
inode(in), dir(0),
version(0), projected_version(0),
dn->version = get_projected_version();
// add to dir
- assert(items.count(dn->name.c_str()) == 0);
+ assert(items.count(dn->key()) == 0);
//assert(null_items.count(dn->name) == 0);
- items[dn->name.c_str()] = dn;
+ items[dn->key()] = dn;
nnull++;
dout(12) << "add_null_dentry " << *dn << dendl;
dn->version = get_projected_version();
// add to dir
- assert(items.count(dn->name.c_str()) == 0);
+ assert(items.count(dn->key()) == 0);
//assert(null_items.count(dn->name) == 0);
- items[dn->name.c_str()] = dn;
+ items[dn->key()] = dn;
link_inode_work( dn, in );
dout(12) << "add_primary_dentry " << *dn << dendl;
dn->version = get_projected_version();
// add to dir
- assert(items.count(dn->name.c_str()) == 0);
+ assert(items.count(dn->key()) == 0);
//assert(null_items.count(dn->name) == 0);
- items[dn->name.c_str()] = dn;
+ items[dn->key()] = dn;
nitems++;
dout(12) << "add_remote_dentry " << *dn << dendl;
}
// remove from list
- assert(items.count(dn->name.c_str()) == 1);
- items.erase(dn->name.c_str());
+ assert(items.count(dn->key()) == 1);
+ items.erase(dn->key());
// adjust dirty counter?
if (dn->state_test(CDentry::STATE_DIRTY))
{
dout(15) << "steal_dentry " << *dn << dendl;
- items[dn->name.c_str()] = dn;
+ items[dn->key()] = dn;
- dn->dir->items.erase(dn->name.c_str());
+ dn->dir->items.erase(dn->key());
if (dn->dir->items.empty())
dn->dir->put(PIN_CHILD);
CDir::map_t::iterator p = items.begin();
CDentry *dn = p->second;
- frag_t subfrag = inode->pick_dirfrag(p->first);
+ frag_t subfrag = inode->pick_dirfrag(dn->name);
int n = (subfrag.value() & (subfrag.mask() ^ frag.mask())) >> subfrag.mask_shift();
dout(15) << " subfrag " << subfrag << " n=" << n << " for " << p->first << dendl;
CDir *f = subfrags[n];
class CDirDiscover;
+
+
+
ostream& operator<<(ostream& out, class CDir& dir);
public:
//typedef hash_map<string, CDentry*> map_t; // there is a bug somewhere, valgrind me.
- typedef map<const char *, CDentry*, ltstr> map_t;
+ //typedef map<const char *, CDentry*, ltstr> map_t;
+ typedef map<dentry_key_t, CDentry*, ltdentrykey> map_t;
protected:
// contents
// -- dentries and inodes --
public:
- CDentry* lookup(const string& s) {
- nstring ns(s);
- return lookup(ns);
+ CDentry* lookup(const string& n, snapid_t snap=CEPH_NOSNAP) {
+ return lookup(n.c_str(), snap);
+ }
+ CDentry* lookup(const nstring& ns, snapid_t snap=CEPH_NOSNAP) {
+ return lookup(ns.c_str(), snap);
}
- CDentry* lookup(const nstring& ns) {
- map_t::iterator iter = items.find(ns.c_str());
+ CDentry* lookup(const char *n, snapid_t snap=CEPH_NOSNAP) {
+ map_t::iterator iter = items.find(dentry_key_t(snap, n));
if (iter == items.end())
return 0;
else
SnapRealm *snaprealm;
SnapRealm *containing_realm;
+ snapid_t snapid; // 0 = multiversion OR head
+ map<snapid_t, old_inode_t> old_inodes; // key = last, value.first = first
off_t last_journaled; // log offset for the last time i was journaled
off_t last_open_journaled; // log offset for the last journaled EOpen
// ---------------------------
CInode(MDCache *c, bool auth=true) :
mdcache(c),
- snaprealm(0),
+ snaprealm(0), containing_realm(0),
+ snapid(0),
last_journaled(0), last_open_journaled(0),
//hack_accessed(true),
stickydir_ref(0),
inodeno_t ino() const { return inode.ino; }
+ vinodeno_t vino() const { return vinodeno_t(inode.ino, snapid); }
inode_t& get_inode() { return inode; }
CDentry* get_parent_dn() { return parent; }
CDentry* get_projected_parent_dn() { return projected_parent ? projected_parent:parent; }
void MDCache::add_inode(CInode *in)
{
// add to lru, inode map
- assert(inode_map.count(in->ino()) == 0); // should be no dup inos!
- inode_map[ in->ino() ] = in;
+ assert(inode_map.count(in->vino()) == 0); // should be no dup inos!
+ inode_map[ in->vino() ] = in;
if (in->ino() < MDS_INO_BASE) {
base_inodes.insert(in);
}
// remove from inode map
- inode_map.erase(o->ino());
+ inode_map.erase(o->vino());
if (o->ino() < MDS_INO_BASE) {
assert(base_inodes.count(o));
dout(15) << " add_weak_primary_dentry " << *dn << dendl;
assert(dn->is_primary());
assert(dn->inode->is_dir());
- rejoin->add_weak_primary_dentry(dir->dirfrag(), p->first, dn->get_inode()->ino());
+ rejoin->add_weak_primary_dentry(dir->dirfrag(), dn->name.c_str(), dn->get_inode()->ino());
dn->get_inode()->get_nested_dirfrags(nested);
}
} else {
++p) {
CDentry *dn = p->second;
dout(15) << " add_strong_dentry " << *dn << dendl;
- rejoin->add_strong_dentry(dir->dirfrag(), p->first,
+ rejoin->add_strong_dentry(dir->dirfrag(), dn->name,
dn->is_primary() ? dn->get_inode()->ino():inodeno_t(0),
dn->is_remote() ? dn->get_remote_ino():inodeno_t(0),
dn->is_remote() ? dn->get_remote_d_type():0,
// FIXME: what about root and stray inodes.
- for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
+ for (hash_map<vinodeno_t,CInode*>::iterator p = inode_map.begin();
p != inode_map.end();
++p) {
CInode *in = p->second;
*/
dout(10) << "identify_files_to_recover" << dendl;
- for (hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
+ for (hash_map<vinodeno_t,CInode*>::iterator p = inode_map.begin();
p != inode_map.end();
++p) {
CInode *in = p->second;
if (lru.lru_get_size() == 0) {
// root, stray, etc.?
- hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
+ hash_map<vinodeno_t,CInode*>::iterator p = inode_map.begin();
while (p != inode_map.end()) {
- hash_map<inodeno_t,CInode*>::iterator next = p;
+ hash_map<vinodeno_t,CInode*>::iterator next = p;
++next;
CInode *in = p->second;
if (!in->is_auth()) {
// MISS. dentry doesn't exist.
dout(12) << "traverse: miss on dentry " << path[depth] << " in " << *curdir << dendl;
-
+
if (curdir->is_auth()) {
// dentry is mine.
if (curdir->is_complete()) {
{
dout(7) << "show_cache" << dendl;
- for (hash_map<inodeno_t,CInode*>::iterator it = inode_map.begin();
+ for (hash_map<vinodeno_t,CInode*>::iterator it = inode_map.begin();
it != inode_map.end();
it++) {
// unlinked?
ofstream myfile;
myfile.open(fn);
- for (hash_map<inodeno_t,CInode*>::iterator it = inode_map.begin();
+ for (hash_map<vinodeno_t,CInode*>::iterator it = inode_map.begin();
it != inode_map.end();
it++) {
list<CDir*> dfs;
// -- my cache --
LRU lru; // dentry lru for expiring items from cache
protected:
- hash_map<inodeno_t,CInode*> inode_map; // map of inodes by ino
- CInode *root; // root inode
- CInode *stray; // my stray dir
+ hash_map<vinodeno_t,CInode*> inode_map; // map of inodes by ino
+ CInode *root; // root inode
+ CInode *stray; // my stray dir
set<CInode*> base_inodes; // inodes < MDS_INO_BASE (root, stray, etc.)
bool did_shutdown_log_cap;
// inode_map
- bool have_inode( inodeno_t ino ) { return inode_map.count(ino) ? true:false; }
- CInode* get_inode( inodeno_t ino ) {
- if (have_inode(ino))
- return inode_map[ino];
+ bool have_inode( inodeno_t ino ) { return have_inode(vinodeno_t(ino, 0)); }
+ bool have_inode( vinodeno_t vino ) { return inode_map.count(vino) ? true:false; }
+ CInode* get_inode( inodeno_t ino, snapid_t s=0 ) {
+ vinodeno_t vino(ino,s);
+ if (have_inode(vino))
+ return inode_map[vino];
return NULL;
}
CDir* get_dirfrag(dirfrag_t df) {
if (!have_inode(df.ino)) return NULL;
- return inode_map[df.ino]->get_dirfrag(df.frag);
+ return get_inode(df.ino)->get_dirfrag(df.frag);
}
/*
void get_dirfrags_under(dirfrag_t df, list<CDir*>& ls) {
CInode *hack_pick_random_inode() {
assert(!inode_map.empty());
int n = rand() % inode_map.size();
- hash_map<inodeno_t,CInode*>::iterator p = inode_map.begin();
+ hash_map<vinodeno_t,CInode*>::iterator p = inode_map.begin();
while (n--) p++;
return p->second;
}
dout(12) << "including inode " << *in << dendl;
// dentry
- ::encode(it->first, dnbl);
+ ::encode(dn->name, dnbl);
mds->locker->issue_client_lease(dn, client, dnbl, mdr->now, mdr->session);
// inode
<< ")";
}
+struct vinodeno_t {
+ inodeno_t ino;
+ snapid_t snapid;
+
+ vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {}
+};
+
+inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) {
+ return l.ino == r.ino && l.snapid == r.snapid;
+}
+
+namespace __gnu_cxx {
+ template<> struct hash<vinodeno_t> {
+ size_t operator()(const vinodeno_t &vino) const {
+ hash<inodeno_t> H;
+ hash<uint64_t> I;
+ return H(vino.ino) ^ I(vino.snapid);
+ }
+ };
+}
+
+
+
+
+inline ostream& operator<<(ostream &out, const vinodeno_t &vino) {
+ out << vino.ino;
+ if (vino.snapid == CEPH_NOSNAP)
+ out << ".head";
+ else if (vino.snapid)
+ out << '.' << vino.snapid;
+ return out;
+}
+
+
struct inode_t {
// base (immutable)
inodeno_t ino;
};
WRITE_CLASS_ENCODER(inode_t)
+
+struct old_inode_t {
+ snapid_t first;
+ inodeno_t inode;
+ map<string,bufferptr> xattrs;
+
+ void encode(bufferlist& bl) const {
+ ::encode(first, bl);
+ ::encode(inode, bl);
+ ::encode(xattrs, bl);
+ }
+ void decode(bufferlist::iterator& bl) {
+ ::decode(first, bl);
+ ::decode(inode, bl);
+ ::decode(xattrs, bl);
+ }
+};
+WRITE_CLASS_ENCODER(old_inode_t)
+
+
/*
* like an inode, but for a dir frag
*/
+
+// =======
+// dentries
+
+typedef pair<snapid_t, const char *> dentry_key_t;
+
+struct ltdentrykey
+{
+ bool operator()(const dentry_key_t& k1,
+ const dentry_key_t& k2) const
+ {
+ return
+ k1.first < k2.first ||
+ (k1.first == k2.first && strcmp(k1.second, k2.second) < 0);
+ }
+};
+
+
+
+
// =========
-// reqeusts
+// requests
struct metareqid_t {
entity_name_t name;
int i = 0;
for (set<snapid_t>::reverse_iterator p = s.rbegin(); p != s.rend(); p++)
cached_snaps[i++] = *p;
-
- dout(10) << "get_snap_vector " << cached_snaps << dendl;
- //" (highwater " << cached_snaps_stamp << ")" << dendl;
+
+ dout(10) << "get_snap_vector " << cached_snaps
+ << " (highwater " << snap_highwater << ")" << dendl;
return &cached_snaps;
}
<< " on " << *child->inode << dendl;
// split open_children
- dout(10) << " my children are " << open_children << dendl;
+ dout(10) << " open_children are " << open_children << dendl;
for (set<SnapRealm*>::iterator p = open_children.begin();
p != open_children.end(); ) {
SnapRealm *realm = *p;