- SnapRealm open_parents, get_snap_set need to recursively open/examine parents over given ranges...
- realm split
+ - make realm split notifications safe from races from multiple mds's
- figure out how to fix up rados logging
- snap collections
case CEPH_SNAP_OP_SPLIT:
{
- SnapRealm *newrealm = get_snap_realm(m->new_realm);
- for (list<inodeno_t>::iterator p = m->new_inodes.begin();
- p != m->new_inodes.end();
+ /*
+ * fixme: this blindly moves the inode from whatever its prior
+ * realm is. this needs to somehow be safe from races from
+ * multiple mds's...
+ */
+ for (list<inodeno_t>::iterator p = m->split_inos.begin();
+ p != m->split_inos.end();
p++) {
if (inode_map.count(*p)) {
Inode *in = inode_map[*p];
- dout(10) << " moving " << *in << " into new realm " << m->new_realm << dendl;
+ dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl;
if (in->snaprealm)
put_snap_realm(in->snaprealm);
- in->snaprealm = newrealm;
- newrealm->nref++;
+ in->snaprealm = realm;
+ realm->nref++;
}
}
- put_snap_realm(newrealm);
+ // oh.. update it too
+ maybe_update_snaprealm(realm, m->snap_highwater, m->snaps);
}
break;
fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
SnapRealm *snaprealm;
+ SnapRealm *containing_realm;
+
off_t last_journaled; // log offset for the last time i was journaled
off_t last_open_journaled; // log offset for the last journaled EOpen
return 0;
}
Capability *add_client_cap(int client, CInode *in) {
- if (client_caps.empty())
+ if (client_caps.empty()) {
get(PIN_CAPS);
+ containing_realm = find_containing_snaprealm();
+ containing_realm->inodes_with_caps.push_back(&xlist_caps);
+ }
+
assert(client_caps.count(client) == 0);
Capability *cap = client_caps[client] = new Capability;
cap->set_inode(in);
-
- SnapRealm *realm = find_containing_snaprealm();
- realm->add_cap(client, cap);
+
+ containing_realm->add_cap(client, cap);
return cap;
}
void remove_client_cap(int client) {
assert(client_caps.count(client) == 1);
- Capability *cap = client_caps[client];
- cap->realm->remove_cap(client, cap);
+ containing_realm->remove_cap(client, client_caps[client]);
delete client_caps[client];
client_caps.erase(client);
- if (client_caps.empty())
+ if (client_caps.empty()) {
put(PIN_CAPS);
+ xlist_caps.remove_myself();
+ containing_realm = NULL;
+ }
+ }
+ void move_to_containing_realm(SnapRealm *realm) {
+ for (map<int,Capability*>::iterator q = client_caps.begin();
+ q != client_caps.end();
+ q++) {
+ containing_realm->remove_cap(q->first, q->second);
+ realm->add_cap(q->first, q->second);
+ }
+ xlist_caps.remove_myself();
+ realm->inodes_with_caps.push_back(&xlist_caps);
+ containing_realm = realm;
}
+
Capability *reconnect_cap(int client, inode_caps_reconnect_t& icr) {
Capability *cap = get_client_cap(client);
if (cap) {
class CInode;
-class SnapRealm;
class Capability {
public:
public:
xlist<Capability*>::item session_caps_item;
- SnapRealm *realm;
xlist<Capability*>::item snaprealm_caps_item;
Capability(CInode *i=0, int want=0, capseq_t s=0) :
last_open(0),
mseq(0),
suppress(false), stale(false),
- session_caps_item(this), realm(0), snaprealm_caps_item(this) { }
+ session_caps_item(this), snaprealm_caps_item(this) { }
capseq_t get_mseq() { return mseq; }
dout(10) << " snapid is " << snapid << dendl;
// create realm?
+ inodeno_t split_parent = 0;
if (!diri->snaprealm) {
dout(10) << "creating snaprealm on " << *diri << dendl;
diri->open_snaprealm();
link.dirino = parent->inode->ino();
diri->snaprealm->parents.insert(pair<snapid_t,snaplink_t>(CEPH_NOSNAP, link));
- // split...
- // ***
+ // split existing caps
+ parent->split_at(diri->snaprealm);
+ split_parent = parent->inode->ino();
}
// add the snap
vector<snapid_t> snaps;
diri->snaprealm->get_snap_vector(snaps);
- // notify clients
-
+ // notify clients of update|split
+ list<inodeno_t> split_inos;
+ if (split_parent)
+ for (xlist<CInode*>::iterator p = diri->snaprealm->inodes_with_caps.begin(); !p.end(); ++p)
+ split_inos.push_back((*p)->ino());
+
for (map<int, xlist<Capability*> >::iterator p = diri->snaprealm->client_caps.begin();
p != diri->snaprealm->client_caps.end();
p++) {
assert(!p->second.empty());
- MClientSnap *update = new MClientSnap(CEPH_SNAP_OP_UPDATE, diri->ino());
+ MClientSnap *update = new MClientSnap(split_parent ? CEPH_SNAP_OP_SPLIT:CEPH_SNAP_OP_UPDATE,
+ diri->ino());
update->snaps = snaps;
update->snap_highwater = diri->snaprealm->snap_highwater;
+ update->split_parent = split_parent;
+ update->split_inos = split_inos;
mds->send_message_client(update, p->first);
}
#include "MDCache.h"
#include "MDS.h"
+#include "messages/MClientSnap.h"
/*
* SnapRealm
for (set<snapid_t>::reverse_iterator p = s.rbegin(); p != s.rend(); p++)
v[i++] = *p;
}
+
+
+void SnapRealm::split_at(SnapRealm *child)
+{
+ dout(10) << "split_at " << *child << dendl;
+
+ xlist<CInode*>::iterator p = inodes_with_caps.begin();
+ while (!p.end()) {
+ CInode *in = *p;
+ ++p;
+
+ // does inode fall within the child realm?
+ CInode *t = in;
+ bool under_child = false;
+ while (t) {
+ t = in->get_parent_dn()->get_dir()->get_inode();
+ if (t == child->inode) {
+ under_child = true;
+ break;
+ }
+ if (t == inode)
+ break;
+ }
+ if (!under_child) {
+ dout(20) << " keeping " << *in << dendl;
+ continue;
+ }
+
+ dout(20) << " child gets " << *in << dendl;
+ in->move_to_containing_realm(child);
+ }
+
+}
void get_snap_set(set<snapid_t>& s);
void get_snap_vector(vector<snapid_t>& s);
+ void split_at(SnapRealm *child);
+
void add_cap(int client, Capability *cap) {
client_caps[client].push_back(&cap->snaprealm_caps_item);
- cap->realm = this;
}
void remove_cap(int client, Capability *cap) {
- cap->realm = 0;
cap->snaprealm_caps_item.remove_myself();
if (client_caps[client].empty())
client_caps.erase(client);
vector<snapid_t> snaps;
// (for split only)
- inodeno_t new_realm;
- list<inodeno_t> new_inodes;
+ inodeno_t split_parent;
+ list<inodeno_t> split_inos;
MClientSnap() : Message(CEPH_MSG_CLIENT_SNAP) {}
MClientSnap(int o, inodeno_t r) :
Message(CEPH_MSG_CLIENT_SNAP),
- op(o), realm(r) {}
+ op(o), realm(r),
+ split_parent(0) {}
const char *get_type_name() { return "Csnap"; }
void print(ostream& out) {
out << "client_snap(" << get_opname(op) << " " << realm
<< " " << snaps;
+ if (split_parent)
+ out << " split_parent=" << split_parent;
out << ")";
}
::encode(realm, payload);
::encode(snap_highwater, payload);
::encode(snaps, payload);
- ::encode(new_realm, payload);
- ::encode(new_inodes, payload);
+ ::encode(split_parent, payload);
+ ::encode(split_inos, payload);
}
void decode_payload() {
bufferlist::iterator p = payload.begin();
::decode(realm, p);
::decode(snap_highwater, p);
::decode(snaps, p);
- ::decode(new_realm, p);
- ::decode(new_inodes, p);
+ ::decode(split_parent, p);
+ ::decode(split_inos, p);
assert(p.end());
}