* do not block.
*/
void Client::add_update_inode_cap(Inode *in, int mds,
- inodeno_t realm, vector<snapid_t> &snaps,
+ inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
unsigned issued, unsigned seq, unsigned mseq)
{
InodeCap *cap = 0;
mds_sessions[mds].num_caps++;
if (in->caps.empty()) {
assert(in->snaprealm == 0);
- in->snaprealm = get_snap_realm(realm, snaps);
+ in->snaprealm = get_snap_realm(realm);
in->get();
}
if (in->exporting_mds == mds) {
}
in->caps[mds] = cap = new InodeCap;
}
+ in->snaprealm->maybe_update(snap_highwater, snaps);
unsigned old_caps = cap->issued;
cap->issued |= issued;
if (m->get_op() == CEPH_CAP_OP_IMPORT) {
// add/update it
- add_update_inode_cap(in, mds, m->get_realm(), m->get_snaps(), m->get_caps(), m->get_seq(), m->get_mseq());
+ add_update_inode_cap(in, mds,
+ m->get_realm(), m->get_snap_highwater(), m->get_snaps(),
+ m->get_caps(), m->get_seq(), m->get_mseq());
if (in->exporting_mseq < m->get_mseq()) {
dout(5) << "handle_file_caps ino " << m->get_ino() << " mseq " << m->get_mseq()
int mds = reply->get_source().num();
add_update_inode_cap(in, mds,
reply->get_file_caps_realm(),
+ reply->get_snap_highwater(),
reply->get_snaps(),
reply->get_file_caps(),
reply->get_file_caps_seq(),
struct SnapRealm {
inodeno_t dirino;
- vector<snapid_t> snaps;
int nref;
+ snapid_t snap_highwater;
+ vector<snapid_t> snaps;
- SnapRealm(inodeno_t i, vector<snapid_t> &s) : dirino(i), nref(0) {
- snaps.swap(s);
+ SnapRealm(inodeno_t i) :
+ dirino(i), nref(0), snap_highwater(0) { }
+
+ void maybe_update(snapid_t sh, vector<snapid_t> &s) {
+ if (sh > snap_highwater) {
+ snap_highwater = sh;
+ snaps = s;
+ }
}
};
Inode* root;
LRU lru; // lru list of Dentry's in our local metadata cache.
- map<inodeno_t,SnapRealm*> snap_realms;
+ hash_map<inodeno_t,SnapRealm*> snap_realms;
- SnapRealm *get_snap_realm(inodeno_t r, vector<snapid_t> &snaps) {
+ SnapRealm *get_snap_realm(inodeno_t r) {
SnapRealm *realm = snap_realms[r];
if (!realm)
- snap_realms[r] = realm = new SnapRealm(r, snaps);
+ snap_realms[r] = realm = new SnapRealm(r);
realm->nref++;
return realm;
}
// file caps
void add_update_inode_cap(Inode *in, int mds,
- inodeno_t realm, vector<snapid_t> &snaps,
+ inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
unsigned issued, unsigned seq, unsigned mseq);
void remove_cap(Inode *in, int mds);
void handle_file_caps(class MClientFileCaps *m);
#define CEPH_MSG_CLIENT_REPLY 26
#define CEPH_MSG_CLIENT_FILECAPS 0x310
#define CEPH_MSG_CLIENT_LEASE 0x311
+#define CEPH_MSG_CLIENT_SNAP 0x312
/* osd */
#define CEPH_MSG_OSD_GETMAP 40
__le32 file_caps_mseq;
__le32 mdsmap_epoch;
__le32 num_snaps;
+ __le64 snap_highwater;
__le64 snaps[];
} __attribute__ ((packed));
struct ceph_timespec mtime, atime, ctime;
__le64 time_warp_seq;
__le32 num_snaps;
+ __le64 snap_highwater;
__le64 snaps[];
} __attribute__ ((packed));
/* followed by encoded string */
+enum {
+ CEPH_SNAP_OP_UPDATE,
+ CEPH_SNAP_OP_SPLIT,
+};
+
+struct ceph_mds_snap {
+ /* ... */
+};
+
+
/*
* osd map
*/
session->touch_cap(cap);
// send IMPORT
+ SnapRealm *realm = in->find_containing_snaprealm();
MClientFileCaps *reap = new MClientFileCaps(CEPH_CAP_OP_IMPORT,
in->inode,
- in->find_containing_snaprealm()->inode->ino(),
+ realm->inode->ino(),
cap->get_last_seq(),
cap->pending(),
cap->wanted(),
cap->get_mseq());
- in->find_containing_snaprealm()->get_snap_vector(reap->get_snaps());
+ realm->get_snap_vector(reap->get_snaps());
+ reap->set_snap_highwater(realm->snap_highwater);
mds->messenger->send_message(reap, session->inst);
}
}
cap->merge(it->second);
+ SnapRealm *realm = in->find_containing_snaprealm();
MClientFileCaps *caps = new MClientFileCaps(CEPH_CAP_OP_IMPORT,
in->inode,
- in->find_containing_snaprealm()->inode->ino(),
+ realm->inode->ino(),
cap->get_last_seq(),
cap->pending(),
cap->wanted(),
cap->get_mseq());
- in->find_containing_snaprealm()->get_snap_vector(caps->get_snaps());
+ realm->get_snap_vector(caps->get_snaps());
+ caps->set_snap_highwater(realm->snap_highwater);
+
mds->send_message_client(caps, session->inst);
}
reply->set_file_caps(cap->pending());
reply->set_file_caps_seq(cap->get_last_seq());
reply->set_file_caps_mseq(cap->get_mseq());
- cur->find_containing_snaprealm()->get_snap_vector(reply->get_snaps());
+
+ SnapRealm *realm = cur->find_containing_snaprealm();
+ realm->get_snap_vector(reply->get_snaps());
+ reply->set_snap_highwater(realm->snap_highwater);
+
//reply->set_file_data_version(fdv);
reply_request(mdr, reply);
q->first >= p->second.first)
s.insert(q->first);
}
- dout(10) << "build_snap_list " << s << dendl;
+
+ if (!s.empty()) {
+ snapid_t t = *s.rbegin();
+ if (snap_highwater < t)
+ snap_highwater = t;
+ }
+
+ dout(10) << "build_snap_list " << s << " (highwater " << snap_highwater << ")" << dendl;
}
/*
MDCache *mdcache;
CInode *inode;
+ snapid_t snap_highwater; // largest snap this realm has exposed to clients (implicitly or explicitly)
+
// caches?
//set<snapid_t> cached_snaps;
//set<SnapRealm*> cached_active_children; // active children that are currently open
xlist<CInode*> inodes_with_caps; // for efficient realm splits
map<int, xlist<Capability*> > client_caps; // to identify clients who need snap notifications
- SnapRealm(MDCache *c, CInode *in) : mdcache(c), inode(in) {}
+ SnapRealm(MDCache *c, CInode *in) : mdcache(c), inode(in), snap_highwater(0) {}
bool open_parents(MDRequest *mdr);
void get_snap_set(set<snapid_t>& s);
vector<snapid_t> snaps;
public:
- int get_caps() { return h.caps; }
- int get_wanted() { return h.wanted; }
- capseq_t get_seq() { return h.seq; }
- capseq_t get_mseq() { return h.migrate_seq; }
+ int get_caps() { return h.caps; }
+ int get_wanted() { return h.wanted; }
+ capseq_t get_seq() { return h.seq; }
+ capseq_t get_mseq() { return h.migrate_seq; }
+ snapid_t get_snap_highwater() { return h.snap_highwater; }
vector<snapid_t> &get_snaps() { return snaps; }
inodeno_t get_ino() { return inodeno_t(h.ino); }
void set_mtime(const utime_t &t) { t.encode_timeval(&h.mtime); }
void set_atime(const utime_t &t) { t.encode_timeval(&h.atime); }
+ void set_snap_highwater(snapid_t hw) { h.snap_highwater = hw; }
+
MClientFileCaps() {}
MClientFileCaps(int op,
inode_t& inode,
int get_result() { return (__s32)(__u32)st.result; }
+ snapid_t get_snap_highwater() { return st.snap_highwater; }
vector<snapid_t> &get_snaps() { return snaps; }
unsigned get_file_caps() { return st.file_caps; }
void set_file_caps_seq(capseq_t s) { st.file_caps_seq = s; }
void set_file_caps_mseq(capseq_t s) { st.file_caps_mseq = s; }
//void set_file_data_version(uint64_t v) { st.file_data_version = v; }
+ void set_snap_highwater(snapid_t hw) { st.snap_highwater = hw; }
MClientReply() {}
MClientReply(MClientRequest *req, int result = 0) :
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef __MCLIENTSNAP_H
+#define __MCLIENTSNAP_H
+
+#include "msg/Message.h"
+
+struct MClientSnap : public Message {
+
+ static const char *get_opname(int o) {
+ switch (o) {
+ case CEPH_SNAP_OP_UPDATE: return "update";
+ case CEPH_SNAP_OP_SPLIT: return "split";
+ default: return "???";
+ }
+ }
+
+ __u32 op;
+ inodeno_t realm;
+
+ // new snap state
+ snapid_t snap_highwater;
+ vector<snapid_t> snaps;
+
+ // (for split only)
+ inodeno_t new_realm;
+ list<inodeno_t> new_inodes;
+
+ MClientSnap() : Message(CEPH_MSG_CLIENT_SNAP) {}
+ MClientSnap(int o, inodeno_t r) :
+ Message(CEPH_MSG_CLIENT_SNAP),
+ op(o), realm(r) {}
+
+ const char *get_type_name() { return "Csnap"; }
+ void print(ostream& out) {
+ out << "client_snap(" << get_opname(op) << " " << realm
+ << " " << snaps;
+ out << ")";
+ }
+
+ void encode_payload() {
+ ::encode(op, payload);
+ ::encode(realm, payload);
+ ::encode(snap_highwater, payload);
+ ::encode(snaps, payload);
+ ::encode(new_realm, payload);
+ ::encode(new_inodes, payload);
+ }
+ void decode_payload() {
+ bufferlist::iterator p = payload.begin();
+ ::decode(op, p);
+ ::decode(realm, p);
+ ::decode(snap_highwater, p);
+ ::decode(snaps, p);
+ ::decode(new_realm, p);
+ ::decode(new_inodes, p);
+ assert(p.end());
+ }
+
+};
+
+#endif
#include "messages/MClientReply.h"
#include "messages/MClientFileCaps.h"
#include "messages/MClientLease.h"
+#include "messages/MClientSnap.h"
#include "messages/MMDSSlaveRequest.h"
case CEPH_MSG_CLIENT_LEASE:
m = new MClientLease;
break;
+ case CEPH_MSG_CLIENT_SNAP:
+ m = new MClientSnap;
+ break;
// mds
case MSG_MDS_SLAVE_REQUEST: