/ - SnapRealm open_parents, get_snap_set need to recursively open/examine parents over given ranges...
/ - realm split
- adjust parent/child linkages
- - make realm split notifications safe from races from multiple mds's
+/ - make realm split notifications safe from races from multiple mds's
- make sense of snap_highwater...
- figure out how to fix up rados logging
* do not block.
*/
void Client::add_update_cap(Inode *in, int mds,
- inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
+ inodeno_t realm, snapid_t snap_created, snapid_t snap_highwater,
+ vector<snapid_t> &snaps,
unsigned issued, unsigned seq, unsigned mseq)
{
InodeCap *cap = 0;
}
in->caps[mds] = cap = new InodeCap;
}
- maybe_update_snaprealm(in->snaprealm, snap_highwater, snaps);
+ maybe_update_snaprealm(in->snaprealm, snap_created, snap_highwater, snaps);
unsigned old_caps = cap->issued;
cap->issued |= issued;
}
}
-void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps)
+void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created,
+ snapid_t snap_highwater, vector<snapid_t>& snaps)
{
- if (realm->maybe_update(snap_highwater, snaps))
+ if (realm->maybe_update(snap_created, snap_highwater, snaps))
dout(10) << *realm << " now " << snaps << " highwater " << snap_highwater << dendl;
}
switch (m->op) {
case CEPH_SNAP_OP_UPDATE:
- maybe_update_snaprealm(realm, m->snap_highwater, m->snaps);
+ maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps);
break;
case CEPH_SNAP_OP_SPLIT:
p++) {
if (inode_map.count(*p)) {
Inode *in = inode_map[*p];
- dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl;
- if (in->snaprealm)
+ if (in->snaprealm) {
+ if (in->snaprealm->created > m->snap_created) {
+ dout(10) << " NOT moving " << *in << " from _newer_ realm "
+ << *in->snaprealm << dendl;
+ continue;
+ }
put_snap_realm(in->snaprealm);
+ }
+ dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl;
in->snaprealm = realm;
realm->nref++;
}
}
- // oh.. update it too
- maybe_update_snaprealm(realm, m->snap_highwater, m->snaps);
+ // update it too
+ maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps);
}
break;
if (m->get_op() == CEPH_CAP_OP_IMPORT) {
// add/update it
add_update_cap(in, mds,
- m->get_realm(), m->get_snap_highwater(), m->get_snaps(),
+ m->get_snap_realm(), m->get_snap_created(), m->get_snap_highwater(), m->get_snaps(),
m->get_caps(), m->get_seq(), m->get_mseq());
if (in->exporting_mseq < m->get_mseq()) {
int mds = reply->get_source().num();
add_update_cap(in, mds,
reply->get_snap_realm(),
+ reply->get_snap_created(),
reply->get_snap_highwater(),
reply->get_snaps(),
reply->get_file_caps(),
struct SnapRealm {
inodeno_t dirino;
int nref;
- snapid_t snap_highwater;
+ snapid_t created;
+ snapid_t highwater;
vector<snapid_t> snaps;
SnapRealm(inodeno_t i) :
- dirino(i), nref(0), snap_highwater(0) { }
+ dirino(i), nref(0), created(0), highwater(0) { }
- bool maybe_update(snapid_t sh, vector<snapid_t> &s) {
- if (sh > snap_highwater) {
- snap_highwater = sh;
+ bool maybe_update(snapid_t c, snapid_t sh, vector<snapid_t> &s) {
+ created = c;
+ if (sh > highwater) {
+ highwater = sh;
snaps = s;
return true;
}
};
inline ostream& operator<<(ostream& out, const SnapRealm& r) {
- return out << "snaprealm(" << r.dirino << " nref=" << r.nref << " hw=" << r.snap_highwater
+ return out << "snaprealm(" << r.dirino << " nref=" << r.nref << " c=" << r.created << " hw=" << r.highwater
<< " snaps=" << r.snaps << ")";
}
// file caps
void add_update_cap(Inode *in, int mds,
- inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
+ inodeno_t realm, snapid_t snap_created, snapid_t snap_highwater,
+ vector<snapid_t> &snaps,
unsigned issued, unsigned seq, unsigned mseq);
void remove_cap(Inode *in, int mds);
void remove_all_caps(Inode *in);
- void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps);
+ void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater,
+ vector<snapid_t>& snaps);
void handle_snap(class MClientSnap *m);
void handle_file_caps(class MClientFileCaps *m);
__le32 file_caps_mseq;
__le32 mdsmap_epoch;
__le64 snap_realm;
- __le64 snap_highwater;
+ __le64 snap_created, snap_highwater;
__le32 num_snaps;
__le64 snaps[];
} __attribute__ ((packed));
__le32 seq;
__le32 caps, wanted;
__le64 ino;
- __le64 realm;
__le64 size, max_size;
__le32 migrate_seq;
struct ceph_timespec mtime, atime, ctime;
__le64 time_warp_seq;
+ __le64 snap_realm;
+ __le64 snap_created, snap_highwater;
__le32 num_snaps;
- __le64 snap_highwater;
__le64 snaps[];
} __attribute__ ((packed));
cap->wanted(),
cap->get_mseq());
realm->get_snap_vector(reap->get_snaps());
- reap->set_snap_highwater(realm->snap_highwater);
+ reap->set_snap_created(realm->created);
+ reap->set_snap_highwater(realm->highwater);
mds->messenger->send_message(reap, session->inst);
}
cap->wanted(),
cap->get_mseq());
realm->get_snap_vector(caps->get_snaps());
- caps->set_snap_highwater(realm->snap_highwater);
+ caps->set_snap_created(realm->created);
+ caps->set_snap_highwater(realm->highwater);
mds->send_message_client(caps, session->inst);
}
SnapRealm *realm = cur->find_containing_snaprealm();
realm->get_snap_vector(reply->get_snaps());
- reply->set_snap_highwater(realm->snap_highwater);
- reply->set_snap_realm(realm->inode->ino());
+ reply->set_snap_info(realm->inode->ino(), realm->created, realm->highwater);
dout(10) << " snaprealm is " << *realm << " snaps=" << reply->get_snaps() << " on " << *realm->inode << dendl;
//reply->set_file_data_version(fdv);
// lock snap
set<SimpleLock*> rdlocks, wrlocks, xlocks;
+
+ // rdlock path
for (int i=0; i<(int)trace.size()-1; i++)
rdlocks.insert(&trace[i]->lock);
+
+ // rdlock ancestor snaps
+ CInode *t = diri->get_parent_dn()->get_dir()->get_inode();
+ while (t) {
+ rdlocks.insert(&t->snaplock);
+ t = t->get_parent_dn()->get_dir()->get_inode();
+ }
+
+ // xlock snap
xlocks.insert(&dn->inode->snaplock);
+
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
return;
if (!diri->snaprealm) {
dout(10) << "creating snaprealm on " << *diri << dendl;
diri->open_snaprealm();
+ diri->snaprealm->created = snapid;
// link them up
// HACK! parent may be on another mds...
MClientSnap *update = new MClientSnap(split_parent ? CEPH_SNAP_OP_SPLIT:CEPH_SNAP_OP_UPDATE,
realm->inode->ino());
update->snaps = snaps;
- update->snap_highwater = diri->snaprealm->snap_highwater;
+ update->snap_created = diri->snaprealm->created;
+ update->snap_highwater = diri->snaprealm->highwater;
update->split_parent = split_parent;
update->split_inos = split_inos;
mds->send_message_client(update, p->first);
if (!s.empty()) {
snapid_t t = *s.rbegin();
- if (snap_highwater < t)
- snap_highwater = t;
+ if (highwater < t)
+ highwater = t;
}
}
for (set<snapid_t>::reverse_iterator p = s.rbegin(); p != s.rend(); p++)
v[i++] = *p;
- dout(10) << "get_snap_vector " << v << " (highwater " << snap_highwater << ")" << dendl;
+ dout(10) << "get_snap_vector " << v << " (highwater " << highwater << ")" << dendl;
}
struct SnapRealm {
// realm state
+ snapid_t created;
map<snapid_t, SnapInfo> snaps;
multimap<snapid_t, snaplink_t> parents, children; // key is "last" (or NOSNAP)
void encode(bufferlist& bl) const {
+ ::encode(created, bl);
::encode(snaps, bl);
::encode(parents, bl);
::encode(children, bl);
}
void decode(bufferlist::iterator& p) {
+ ::decode(created, p);
::decode(snaps, p);
::decode(parents, p);
::decode(children, p);
MDCache *mdcache;
CInode *inode;
- snapid_t snap_highwater; // largest snap this realm has exposed to clients (implicitly or explicitly)
+ snapid_t highwater; // largest snap this realm has exposed to clients (implicitly or explicitly)
// caches?
//set<snapid_t> cached_snaps;
xlist<CInode*> inodes_with_caps; // for efficient realm splits
map<int, xlist<Capability*> > client_caps; // to identify clients who need snap notifications
- SnapRealm(MDCache *c, CInode *in) : mdcache(c), inode(in), snap_highwater(0) {}
+ SnapRealm(MDCache *c, CInode *in) :
+ created(0),
+ mdcache(c), inode(in), highwater(0) {}
bool open_parents(MDRequest *mdr);
void get_snap_set(set<snapid_t>& s, snapid_t first=0, snapid_t last=CEPH_NOSNAP);
int get_wanted() { return h.wanted; }
capseq_t get_seq() { return h.seq; }
capseq_t get_mseq() { return h.migrate_seq; }
+
+ inodeno_t get_snap_realm() { return inodeno_t(h.snap_realm); }
+ snapid_t get_snap_created() { return h.snap_created; }
snapid_t get_snap_highwater() { return h.snap_highwater; }
vector<snapid_t> &get_snaps() { return snaps; }
inodeno_t get_ino() { return inodeno_t(h.ino); }
- inodeno_t get_realm() { return inodeno_t(h.realm); }
__u64 get_size() { return h.size; }
__u64 get_max_size() { return h.max_size; }
utime_t get_ctime() { return utime_t(h.ctime); }
void set_mtime(const utime_t &t) { t.encode_timeval(&h.mtime); }
void set_atime(const utime_t &t) { t.encode_timeval(&h.atime); }
+ void set_snap_created(snapid_t c) { h.snap_created = c; }
void set_snap_highwater(snapid_t hw) { h.snap_highwater = hw; }
MClientFileCaps() {}
h.caps = caps;
h.wanted = wanted;
h.ino = inode.ino;
- h.realm = realm;
+ h.snap_realm = realm;
h.size = inode.size;
h.max_size = inode.max_size;
h.migrate_seq = mseq;
int get_result() { return (__s32)(__u32)st.result; }
inodeno_t get_snap_realm() { return inodeno_t((__u64)st.snap_realm); }
+ snapid_t get_snap_created() { return st.snap_created; }
snapid_t get_snap_highwater() { return st.snap_highwater; }
vector<snapid_t> &get_snaps() { return snaps; }
- void set_snap_realm(snapid_t hw) { st.snap_realm = hw; }
- void set_snap_highwater(snapid_t hw) { st.snap_highwater = hw; }
+ void set_snap_info(inodeno_t r, snapid_t c, snapid_t hw) {
+ st.snap_realm = r;
+ st.snap_created = c;
+ st.snap_highwater = hw;
+ }
unsigned get_file_caps() { return st.file_caps; }
unsigned get_file_caps_seq() { return st.file_caps_seq; }
inodeno_t realm;
// new snap state
- snapid_t snap_highwater;
+ snapid_t snap_created, snap_highwater;
vector<snapid_t> snaps;
// (for split only)