- client reconnect
- esp cap claim
+- client snap caps
+ - NO CAP STATE FOR SNAPPED INODES.
+ - mds grants open access (yes/no), but there is no state, since there is no concurrency.
+ (mds doesn't grant access until filelock it is readable, i.e., snapped data has flushed)
+ - client _should_ only send FLUSHSNAP _after_ data is flushed. this will require much more sophisticated barriers in the client's cache.
+ - reconnect should map caps into snaprealms, and include snaprealm state, such that those can be brought in sync w/ the mds.
+ - reconnect does _not_ need any per-cap snap-related info.
+
+
/- call open_parents() where needed.
- what about during recovery? e.g. client reconnected caps...
- mds server ops
dout(10) << " path on " << p->first << " is " << path << dendl;
in->caps[mds]->seq = 0; // reset seq.
- m->add_cap(p->first, path.get_path(), // ino
+ m->add_cap(p->first.ino, path.get_path(), // ino
in->caps_wanted(), // wanted
in->caps[mds]->issued, // issued
- in->inode.size, in->inode.mtime, in->inode.atime);
+ in->inode.size, in->inode.mtime, in->inode.atime, in->snaprealm->ino);
}
if (in->exporting_mds == mds) {
dout(10) << " clearing exporting_caps on " << p->first << dendl;
op = CEPH_CAP_OP_RELEASE;
dout(10) << " op = " << op << dendl;
MClientFileCaps *m = new MClientFileCaps(op,
- in->inode, in->snapid,
+ in->inode,
0,
cap->seq,
cap->issued,
struct ceph_mds_file_caps {
__le32 op;
__le64 ino;
- __le64 snapid;
__le32 seq;
__le32 caps, wanted;
__le64 size, max_size;
__le32 issued;
__le64 size;
struct ceph_timespec mtime, atime;
+ __le64 snaprealm;
} __attribute__ ((packed));
/* followed by encoded string */
+struct ceph_mds_snaprealm_reconnect {
+ __le64 seq;
+ __le64 parent; /* parent realm */
+} __attribute__ ((packed));
/*
* snaps
WRITE_RAW_ENCODER(ceph_mds_reply_head)
WRITE_RAW_ENCODER(ceph_mds_reply_inode)
WRITE_RAW_ENCODER(ceph_mds_cap_reconnect)
+WRITE_RAW_ENCODER(ceph_mds_snaprealm_reconnect)
WRITE_RAW_ENCODER(ceph_frag_tree_split)
WRITE_RAW_ENCODER(ceph_inopath_item)
<< " new pending " << cap_string(cap->pending()) << " was " << cap_string(before)
<< dendl;
mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT,
- in->inode, in->last,
+ in->inode,
in->find_snaprealm()->inode->ino(),
cap->get_last_seq(),
cap->pending(),
it++) {
Capability *cap = it->second;
mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_TRUNC,
- in->inode, in->last,
+ in->inode,
in->find_snaprealm()->inode->ino(),
cap->get_last_seq(),
cap->pending(),
if (cap->pending() & CEPH_CAP_WR) {
dout(10) << "share_inode_max_size with client" << client << dendl;
mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_GRANT,
- in->inode, in->last,
+ in->inode,
in->find_snaprealm()->inode->ino(),
cap->get_last_seq(),
cap->pending(),
ack = new MMDSCacheRejoin(MMDSCacheRejoin::OP_ACK);
// check cap exports
- for (map<vinodeno_t,map<int,ceph_mds_cap_reconnect> >::iterator p = weak->cap_exports.begin();
+ for (map<inodeno_t,map<int,ceph_mds_cap_reconnect> >::iterator p = weak->cap_exports.begin();
p != weak->cap_exports.end();
++p) {
CInode *in = get_inode(p->first);
assert(mds->is_rejoin());
// check cap exports.
- for (map<vinodeno_t,map<int,ceph_mds_cap_reconnect> >::iterator p = weak->cap_exports.begin();
+ for (map<inodeno_t,map<int,ceph_mds_cap_reconnect> >::iterator p = weak->cap_exports.begin();
p != weak->cap_exports.end();
++p) {
CInode *in = get_inode(p->first);
* returns a C_Gather* is there is work to do. caller is responsible for setting
* the C_Gather completer.
*/
-C_Gather *MDCache::parallel_fetch(map<vinodeno_t,string>& pathmap)
+C_Gather *MDCache::parallel_fetch(map<inodeno_t,string>& pathmap)
{
dout(10) << "parallel_fetch on " << pathmap.size() << " paths" << dendl;
// scan list
set<CDir*> fetch_queue;
- map<vinodeno_t,string>::iterator p = pathmap.begin();
+ map<inodeno_t,string>::iterator p = pathmap.begin();
while (p != pathmap.end()) {
CInode *in = get_inode(p->first);
if (in) {
// process cap imports
// ino -> client -> frommds -> capex
- for (map<vinodeno_t,map<int, map<int,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin();
+ for (map<inodeno_t,map<int, map<int,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin();
p != cap_imports.end();
++p) {
CInode *in = get_inode(p->first);
// send IMPORT
SnapRealm *realm = in->find_snaprealm();
MClientFileCaps *reap = new MClientFileCaps(CEPH_CAP_OP_IMPORT,
- in->inode, in->last,
+ in->inode,
realm->inode->ino(),
cap->get_last_seq(),
cap->pending(),
set<int> rejoin_sent; // nodes i sent a rejoin to
set<int> rejoin_ack_gather; // nodes from whom i need a rejoin ack
- map<vinodeno_t,map<int,ceph_mds_cap_reconnect> > cap_exports; // ino -> client -> capex
- map<vinodeno_t,string> cap_export_paths;
+ map<inodeno_t,map<int,ceph_mds_cap_reconnect> > cap_exports; // ino -> client -> capex
+ map<inodeno_t,string> cap_export_paths;
- map<vinodeno_t,map<int,map<int,ceph_mds_cap_reconnect> > > cap_imports; // ino -> client -> frommds -> capex
- map<vinodeno_t,string> cap_import_paths;
+ map<inodeno_t,map<int,map<int,ceph_mds_cap_reconnect> > > cap_imports; // ino -> client -> frommds -> capex
+ map<inodeno_t,string> cap_import_paths;
set<CInode*> rejoin_undef_inodes;
set<CInode*> rejoin_potential_updated_scatterlocks;
public:
void rejoin_gather_finish();
void rejoin_send_rejoins();
- void rejoin_export_caps(vinodeno_t vino, int client, cap_reconnect_t& icr) {
- cap_exports[vino][client] = icr.capinfo;
- cap_export_paths[vino] = icr.path;
+ void rejoin_export_caps(inodeno_t ino, int client, cap_reconnect_t& icr) {
+ cap_exports[ino][client] = icr.capinfo;
+ cap_export_paths[ino] = icr.path;
}
- void rejoin_recovered_caps(vinodeno_t vino, int client, cap_reconnect_t& icr,
+ void rejoin_recovered_caps(inodeno_t ino, int client, cap_reconnect_t& icr,
int frommds=-1) {
- cap_imports[vino][client][frommds] = icr.capinfo;
- cap_import_paths[vino] = icr.path;
+ cap_imports[ino][client][frommds] = icr.capinfo;
+ cap_import_paths[ino] = icr.path;
}
void rejoin_import_cap(CInode *in, int client, ceph_mds_cap_reconnect& icr, int frommds);
vector<Anchor>& anchortrace,
Context *onfinish);
- C_Gather *parallel_fetch(map<vinodeno_t,string>& pathmap);
+ C_Gather *parallel_fetch(map<inodeno_t,string>& pathmap);
void make_trace(vector<CDentry*>& trace, CInode *in);
dout(7) << "finish_export_inode telling client" << it->first
<< " exported caps on " << *in << dendl;
MClientFileCaps *m = new MClientFileCaps(CEPH_CAP_OP_EXPORT,
- in->inode, in->last,
+ in->inode,
in->find_snaprealm()->inode->ino(),
cap->get_last_seq(),
cap->pending(),
SnapRealm *realm = in->find_snaprealm();
MClientFileCaps *caps = new MClientFileCaps(CEPH_CAP_OP_IMPORT,
- in->inode, in->last,
+ in->inode,
realm->inode->ino(),
cap->get_last_seq(),
cap->pending(),
} else {
// caps
- for (map<vinodeno_t, cap_reconnect_t>::iterator p = m->caps.begin();
+ for (map<inodeno_t, cap_reconnect_t>::iterator p = m->caps.begin();
p != m->caps.end();
++p) {
CInode *in = mdcache->get_inode(p->first);
// mark client caps stale.
inode_t fake_inode;
memset(&fake_inode, 0, sizeof(fake_inode));
- fake_inode.ino = p->first.ino;
+ fake_inode.ino = p->first;
MClientFileCaps *stale = new MClientFileCaps(CEPH_CAP_OP_EXPORT,
- fake_inode, p->first.snapid,
+ fake_inode,
0,
0,
0, // doesn't matter.
ceph_mds_cap_reconnect capinfo;
cap_reconnect_t() {}
- cap_reconnect_t(const string& p, int w, int i, uint64_t sz, utime_t mt, utime_t at) :
+ cap_reconnect_t(const string& p, int w, int i, uint64_t sz, utime_t mt, utime_t at, inodeno_t sr) :
path(p) {
capinfo.wanted = w;
capinfo.issued = i;
capinfo.size = sz;
capinfo.mtime = mt;
capinfo.atime = at;
+ capinfo.snaprealm = sr;
}
void encode(bufferlist& bl) const {
WRITE_CLASS_ENCODER(cap_reconnect_t)
+
// ================================================================
// dir frag
capseq_t get_mseq() { return h.migrate_seq; }
inodeno_t get_ino() { return inodeno_t(h.ino); }
- snapid_t get_snapid() { return snapid_t(h.snapid); }
__u64 get_size() { return h.size; }
__u64 get_max_size() { return h.max_size; }
MClientFileCaps() {}
MClientFileCaps(int op,
inode_t& inode,
- snapid_t snapid,
inodeno_t realm,
long seq,
int caps,
Message(CEPH_MSG_CLIENT_FILECAPS) {
h.op = op;
h.ino = inode.ino;
- h.snapid = snapid;
h.seq = seq;
h.caps = caps;
h.wanted = wanted;
class MClientReconnect : public Message {
public:
- map<vinodeno_t, cap_reconnect_t> caps;
__u8 closed; // true if this session was closed by the client.
+ map<inodeno_t, cap_reconnect_t> caps; // only head inodes
+ map<inodeno_t, ceph_mds_snaprealm_reconnect> realms;
MClientReconnect() : Message(CEPH_MSG_CLIENT_RECONNECT),
closed(false) { }
<< caps.size() << " caps)";
}
- void add_cap(vinodeno_t ino, const string& path,
+ void add_cap(inodeno_t ino, const string& path,
int wanted, int issued,
- loff_t sz, utime_t mt, utime_t at) {
- caps[ino] = cap_reconnect_t(path, wanted, issued, sz, mt, at);
+ loff_t sz, utime_t mt, utime_t at,
+ inodeno_t sr) {
+ caps[ino] = cap_reconnect_t(path, wanted, issued, sz, mt, at, sr);
+ }
+ void add_snaprealm(inodeno_t ino, snapid_t seq, inodeno_t parent) {
+ realms[ino].seq = seq;
+ realms[ino].parent = parent;
}
void encode_payload() {
// open
bufferlist cap_export_bl;
- map<vinodeno_t,map<__s32, ceph_mds_cap_reconnect> > cap_exports;
- map<vinodeno_t,string> cap_export_paths;
+ map<inodeno_t,map<__s32, ceph_mds_cap_reconnect> > cap_exports;
+ map<inodeno_t,string> cap_export_paths;
// full
bufferlist inode_base;