mds/MDSTable.cc \
mds/IdAllocator.cc \
mds/SnapTable.cc \
+ mds/snap.cc \
mds/SessionMap.cc \
mds/MDLog.cc
- client capgroups
- mds snapid allocation
- snap creation
+ - async SnapClient for the (possibly remote) SnapTable
+ - hmm, can we generalize any of AnchorClient?
- mds metadata versioning
- mds server ops
- when we create a snapshot,
- xlock snaplock
- - create realm, if necesarry
+ - create realm, if necessary
- add it to the realm snaps list.
- build list of current children
- send client a capgroup update for each affected realm
__le32 seq;
__le32 caps, wanted;
__le64 ino;
+ __le64 realm;
__le64 size, max_size;
__le32 migrate_seq;
struct ceph_timespec mtime, atime, ctime;
class CInodeDiscover;
class MDCache;
class LogSegment;
+class SnapRealm;
ostream& operator<<(ostream& out, CInode& in);
string symlink; // symlink dest, if symlink
map<string, bufferptr> xattrs;
fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
+ SnapRealm *snaprealm;
off_t last_journaled; // log offset for the last time i was journaled
off_t last_open_journaled; // log offset for the last journaled EOpen
// -- distributed state --
protected:
// file capabilities
- map<int, Capability*> client_caps; // client -> caps
+ map<int, Capability*> client_caps; // client -> caps
map<int, int> mds_caps_wanted; // [auth] mds -> caps wanted
int replica_caps_wanted; // [replica] what i've requested from auth
utime_t replica_caps_wanted_keep_until;
// LogSegment xlists i (may) belong to
xlist<CInode*>::item xlist_dirty;
public:
+ xlist<CInode*>::item xlist_caps;
xlist<CInode*>::item xlist_open_file;
xlist<CInode*>::item xlist_dirty_dirfrag_dir;
xlist<CInode*>::item xlist_dirty_dirfrag_dirfragtree;
// ---------------------------
CInode(MDCache *c, bool auth=true) :
mdcache(c),
+ snaprealm(0),
last_journaled(0), last_open_journaled(0),
//hack_accessed(true),
stickydir_ref(0),
parent(0), projected_parent(0),
inode_auth(CDIR_AUTH_DEFAULT),
replica_caps_wanted(0),
- xlist_dirty(this), xlist_open_file(this),
+ xlist_dirty(this), xlist_caps(this), xlist_open_file(this),
xlist_dirty_dirfrag_dir(this),
xlist_dirty_dirfrag_dirfragtree(this),
xlist_purging_inode(this),
private:
CInode *inode;
+ xlist<Capability*>::item cap_group_item;
__u32 wanted_caps; // what the client wants (ideally)
-
+
map<capseq_t, __u32> cap_history; // seq -> cap, [last_recv,last_sent]
capseq_t last_sent, last_recv;
capseq_t last_open;
xlist<Capability*>::item session_caps_item;
Capability(CInode *i=0, int want=0, capseq_t s=0) :
- inode(i),
+ inode(i), cap_group_item(this),
wanted_caps(want),
last_sent(s),
last_recv(s),
snapid_t sn = ++last_snap;
snaps[sn].snapid = sn;
- snaps[sn].base = base;
+ snaps[sn].dirino = base;
snaps[sn].name = name;
snaps[sn].stamp = stamp;
version++;
assert(version-1 == mds->snaptable->get_version());
if (create) {
- snapid_t s = mds->snaptable->create(snap.base, snap.name, snap.stamp);
+ snapid_t s = mds->snaptable->create(snap.dirino, snap.name, snap.stamp);
assert(s == snap.snapid);
} else {
mds->snaptable->remove(snap.snapid);
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004- Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "snap.h"
+#include "MDCache.h"
+#include "MDS.h"
+
+
+/*
+ * SnapRealm
+ */
+
+#define dout(x) if (x < g_conf.debug_mds) *_dout << dbeginl << g_clock.now() \
+ << " mds" << mdcache->mds->get_nodeid() \
+ << ".snaprealm(" << dirino << ") "
+
+bool SnapRealm::open_parents(MDRequest *mdr)
+{
+ dout(10) << "open_parents" << dendl;
+ for (multimap<snapid_t, snaplink_t>::iterator p = parents.begin();
+ p != parents.end();
+ p++) {
+ CInode *parent = mdcache->get_inode(p->second.dirino);
+ if (parent)
+ continue;
+ mdcache->open_remote_ino(p->second.dirino, mdr,
+ new C_MDS_RetryRequest(mdcache, mdr));
+ return false;
+ }
+ return true;
+}
+
+/*
+ * get list of snaps for this realm. we must include parents' snaps
+ * for the intervals during which they were our parent.
+ */
+void SnapRealm::get_snap_list(set<snapid_t> &s)
+{
+ // start with my snaps
+ for (map<snapid_t, SnapInfo>::iterator p = snaps.begin();
+ p != snaps.end();
+ p++)
+ s.insert(p->first);
+
+ // include parent snaps
+ for (multimap<snapid_t, snaplink_t>::iterator p = parents.begin();
+ p != parents.end();
+ p++) {
+ CInode *parent = mdcache->get_inode(p->second.dirino);
+ assert(parent); // call open_parents first!
+ assert(parent->snaprealm);
+
+ for (map<snapid_t, SnapInfo>::iterator q = parent->snaprealm->snaps.begin();
+ q != parent->snaprealm->snaps.end();
+ q++)
+ if (q->first <= p->first &&
+ q->first >= p->second.first)
+ s.insert(q->first);
+ }
+ dout(10) << "build_snap_list " << s << dendl;
+}
#ifndef __CEPH_MDS_SNAP_H
#define __CEPH_MDS_SNAP_H
+#include "mdstypes.h"
+#include "include/xlist.h"
+
+/*
+ * generic snap descriptor.
+ */
struct SnapInfo {
snapid_t snapid;
- inodeno_t base;
+ inodeno_t dirino;
utime_t stamp;
string name;
void encode(bufferlist& bl) const {
::encode(snapid, bl);
- ::encode(base, bl);
+ ::encode(dirino, bl);
::encode(stamp, bl);
::encode(name, bl);
}
void decode(bufferlist::iterator& bl) {
::decode(snapid, bl);
- ::decode(base, bl);
+ ::decode(dirino, bl);
::decode(stamp, bl);
::decode(name, bl);
}
WRITE_CLASS_ENCODER(SnapInfo)
inline ostream& operator<<(ostream& out, const SnapInfo &sn) {
- return out << "snap(" << sn.snapid << " " << sn.base << " '" << sn.name << "' " << sn.stamp << ")";
+ return out << "snap(" << sn.snapid
+ << " " << sn.dirino
+ << " '" << sn.name
+ << "' " << sn.stamp << ")";
}
+
+
+/*
+ * SnapRealm - a subtree that shares the same set of snapshots.
+ */
+struct SnapRealm;
+struct CapabilityGroup;
+class CInode;
+class MDCache;
+class MDRequest;
+
+struct snaplink_t {
+ inodeno_t dirino;
+ snapid_t first;
+};
+
+struct SnapRealm {
+ // realm state
+ inodeno_t dirino;
+ map<snapid_t, SnapInfo> snaps;
+ multimap<snapid_t, snaplink_t> parents, children; // key is "last" (or NOSNAP)
+
+ // in-memory state
+ MDCache *mdcache;
+ CInode *inode;
+
+ // caches?
+ //set<snapid_t> cached_snaps;
+ //set<SnapRealm*> cached_active_children; // active children that are currently open
+
+ xlist<CInode*> inodes_with_caps; // for efficient realm splits
+ map<int, CapabilityGroup*> client_cap_groups; // to identify clients who need snap notifications
+
+ SnapRealm(inodeno_t i, MDCache *c, CInode *in) : dirino(i), mdcache(c), inode(in) {}
+
+ bool open_parents(MDRequest *mdr);
+ void get_snap_list(set<snapid_t>& s);
+};
+
+
+
+/*
+ * CapabilityGroup - group per-realm, per-client caps for efficient
+ * client snap notifications.
+ */
+struct Capability;
+
+struct CapabilityGroup {
+ int client;
+ xlist<Capability*> caps;
+ SnapRealm *realm;
+};
+
+
#endif