- rados bits to do clone+write
/ - cloning
- fix cloning on unlinked file (where snaps=[], but head may have follows_snap attr)
+
+ - SnapRealm open_parents, get_snap_set need to recursively open/examine parents over given ranges...
+ - realm split
+
- figure out how to fix up rados logging
- snap collections
- garbage collection
handle_client_reply((MClientReply*)m);
break;
+ case CEPH_MSG_CLIENT_SNAP:
+ handle_snap((MClientSnap*)m);
+ break;
case CEPH_MSG_CLIENT_FILECAPS:
handle_file_caps((MClientFileCaps*)m);
break;
}
if (wanted == 0) {
- dout(10) << "last caps on " << *in << dendl;
- in->caps.clear();
- put_inode(in);
+ remove_all_caps(in);
}
}
* handle caps update from mds. including mds to mds caps transitions.
* do not block.
*/
-void Client::add_update_inode_cap(Inode *in, int mds,
- inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
- unsigned issued, unsigned seq, unsigned mseq)
+void Client::add_update_cap(Inode *in, int mds,
+ inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
+ unsigned issued, unsigned seq, unsigned mseq)
{
InodeCap *cap = 0;
if (in->caps.count(mds)) {
assert(in->snaprealm == 0);
in->snaprealm = get_snap_realm(realm);
in->get();
+ dout(15) << "add_update_cap first one, opened snaprealm " << in->snaprealm << dendl;
}
if (in->exporting_mds == mds) {
dout(10) << " clearing exporting_caps on " << mds << dendl;
}
in->caps[mds] = cap = new InodeCap;
}
- in->snaprealm->maybe_update(snap_highwater, snaps);
+ maybe_update_snaprealm(in->snaprealm, snap_highwater, snaps);
unsigned old_caps = cap->issued;
cap->issued |= issued;
assert(in->caps.count(mds));
in->caps.erase(mds);
if (in->caps.empty()) {
+ dout(15) << "remove_cap last one, closing snaprealm " << in->snaprealm << dendl;
put_inode(in);
put_snap_realm(in->snaprealm);
in->snaprealm = 0;
}
}
+void Client::remove_all_caps(Inode *in)
+{
+ bool wasempty = in->caps.empty();
+ in->caps.clear();
+ if (!wasempty) {
+ dout(15) << "remove_all_caps closing snaprealm " << in->snaprealm << dendl;
+ put_inode(in);
+ put_snap_realm(in->snaprealm);
+ in->snaprealm = 0;
+ }
+}
+
+void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps)
+{
+ if (realm->maybe_update(snap_highwater, snaps))
+ dout(10) << *realm << " now " << snaps << " highwater " << snap_highwater << dendl;
+}
+
void Client::handle_snap(MClientSnap *m)
{
dout(10) << "handle_snap " << *m << dendl;
switch (m->op) {
case CEPH_SNAP_OP_UPDATE:
- realm->maybe_update(m->snap_highwater, m->snaps);
+ maybe_update_snaprealm(realm, m->snap_highwater, m->snaps);
break;
case CEPH_SNAP_OP_SPLIT:
if (m->get_op() == CEPH_CAP_OP_IMPORT) {
// add/update it
- add_update_inode_cap(in, mds,
- m->get_realm(), m->get_snap_highwater(), m->get_snaps(),
- m->get_caps(), m->get_seq(), m->get_mseq());
+ add_update_cap(in, mds,
+ m->get_realm(), m->get_snap_highwater(), m->get_snaps(),
+ m->get_caps(), m->get_seq(), m->get_mseq());
if (in->exporting_mseq < m->get_mseq()) {
dout(5) << "handle_file_caps ino " << m->get_ino() << " mseq " << m->get_mseq()
// add the cap
int mds = reply->get_source().num();
- add_update_inode_cap(in, mds,
- reply->get_file_caps_realm(),
- reply->get_snap_highwater(),
- reply->get_snaps(),
- reply->get_file_caps(),
- reply->get_file_caps_seq(),
- reply->get_file_caps_mseq());
+ add_update_cap(in, mds,
+ reply->get_snap_realm(),
+ reply->get_snap_highwater(),
+ reply->get_snaps(),
+ reply->get_file_caps(),
+ reply->get_file_caps_seq(),
+ reply->get_file_caps_mseq());
dout(5) << "open success, fh is " << f << " combined caps " << cap_string(in->caps_issued()) << dendl;
}
// avoid livelock with fsync?
// FIXME
+
+ dout(10) << " snaprealm " << *in->snaprealm << dendl;
if (g_conf.client_oc) {
if (in->caps_issued() & CEPH_CAP_WRBUFFER) {
unsafe_sync_write++;
in->get_cap_ref(CEPH_CAP_WRBUFFER);
- // hack
- if (1) {
- static int a = 0;
- in->snaprealm->snaps.insert(in->snaprealm->snaps.begin(), ++a);
- in->snaprealm->snaps.insert(in->snaprealm->snaps.begin(), ++a);
- dout(10) << "snaps now " << in->snaprealm->snaps << dendl;
- }
-
filer->write(in->inode.ino, &in->inode.layout,
- CEPH_NOSNAP,
- in->snaprealm->snaps,
+ CEPH_NOSNAP, in->snaprealm->snaps,
offset, size, bl, 0, onfinish, onsafe);
while (!done)
}
+// =============================
+// snaps
+
+int Client::mksnap(const char *relpath, const char *name)
+{
+ Mutex::Locker l(client_lock);
+ filepath path = mkpath(relpath);
+ return _mksnap(path, name);
+}
+int Client::rmsnap(const char *relpath, const char *name)
+{
+ Mutex::Locker l(client_lock);
+ filepath path = mkpath(relpath);
+ return _rmsnap(path, name);
+}
+
+
+int Client::_mksnap(const filepath& path, const char *name, int uid, int gid)
+{
+ MClientRequest *req = new MClientRequest(CEPH_MDS_OP_MKSNAP, messenger->get_myinst());
+ req->set_filepath(path);
+ req->set_path2(name);
+
+ MClientReply *reply = make_request(req, uid, gid);
+ int res = reply->get_result();
+ delete reply;
+
+ trim_cache();
+ dout(3) << "mksnap(\"" << path << ", '" << name << "'\") = " << res << dendl;
+ return res;
+}
+
+int Client::_rmsnap(const filepath& path, const char *name, int uid, int gid)
+{
+ MClientRequest *req = new MClientRequest(CEPH_MDS_OP_RMSNAP, messenger->get_myinst());
+ req->set_filepath(path);
+ req->set_path2(name);
+
+ MClientReply *reply = make_request(req, uid, gid);
+ int res = reply->get_result();
+ delete reply;
+
+ trim_cache();
+ dout(3) << "rmsnap(\"" << path << ", '" << name << "'\") = " << res << dendl;
+ return res;
+}
+
// =========================================
}
+
+
+
+// ===============================
+
void Client::ms_handle_failure(Message *m, const entity_inst_t& inst)
{
entity_name_t dest = inst.name;
SnapRealm(inodeno_t i) :
dirino(i), nref(0), snap_highwater(0) { }
- void maybe_update(snapid_t sh, vector<snapid_t> &s) {
+ bool maybe_update(snapid_t sh, vector<snapid_t> &s) {
if (sh > snap_highwater) {
snap_highwater = sh;
snaps = s;
- }
+ return true;
+ }
+ return false;
}
};
+inline ostream& operator<<(ostream& out, const SnapRealm& r) {
+ return out << "snaprealm(" << r.dirino << " nref=" << r.nref << " hw=" << r.snap_highwater
+ << " snaps=" << r.snaps << ")";
+}
+
struct InodeCap {
unsigned issued;
unsigned implemented;
void release_lease(Inode *in, Dentry *dn, int mask);
// file caps
- void add_update_inode_cap(Inode *in, int mds,
- inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
- unsigned issued, unsigned seq, unsigned mseq);
+ void add_update_cap(Inode *in, int mds,
+ inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
+ unsigned issued, unsigned seq, unsigned mseq);
void remove_cap(Inode *in, int mds);
+ void remove_all_caps(Inode *in);
+
+ void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps);
+
void handle_snap(class MClientSnap *m);
void handle_file_caps(class MClientFileCaps *m);
void check_caps(Inode *in);
int _fsync(Fh *fh, bool syncdataonly);
int _statfs(struct statvfs *stbuf);
+ int _mksnap(const filepath &path, const char *name, int uid=-1, int gid=-1);
+ int _rmsnap(const filepath &path, const char *name, int uid=-1, int gid=-1);
+
public:
int mount();
int enumerate_layout(int fd, list<ObjectExtent>& result,
off_t length, off_t offset);
+ int mksnap(const char *path, const char *name);
+ int rmsnap(const char *path, const char *name);
+
// low-level interface
int ll_lookup(inodeno_t parent, const char *name, struct stat *attr, int uid = -1, int gid = -1);
bool ll_forget(inodeno_t ino, int count);
if (strcmp(args[i],"--syn") == 0) {
++i;
- if (strcmp(args[i],"writefile") == 0) {
+ if (strcmp(args[i], "mksnap") == 0) {
+ syn_modes.push_back(SYNCLIENT_MODE_MKSNAP);
+ syn_sargs.push_back(args[++i]); // path
+ syn_sargs.push_back(args[++i]); // name
+ }
+ else if (strcmp(args[i], "rmsnap") == 0) {
+ syn_modes.push_back(SYNCLIENT_MODE_RMSNAP);
+ syn_sargs.push_back(args[++i]); // path
+ syn_sargs.push_back(args[++i]); // name
+ }
+
+ else if (strcmp(args[i],"writefile") == 0) {
syn_modes.push_back( SYNCLIENT_MODE_WRITEFILE );
syn_iargs.push_back( atoi(args[++i]) );
syn_iargs.push_back( atoi(args[++i]) );
}
break;
+ case SYNCLIENT_MODE_MKSNAP:
+ {
+ string base = get_sarg(0);
+ string name = get_sarg(0);
+ if (run_me())
+ mksnap(base.c_str(), name.c_str());
+ did_run_me();
+ }
+ break;
+ case SYNCLIENT_MODE_RMSNAP:
+ {
+ string base = get_sarg(0);
+ string name = get_sarg(0);
+ if (run_me())
+ rmsnap(base.c_str(), name.c_str());
+ did_run_me();
+ }
+ break;
+
default:
assert(0);
}
client->close(fd);
return 0;
}
+
+
+
+void SyntheticClient::mksnap(const char *base, const char *name)
+{
+ client->mksnap(base, name);
+}
+
+void SyntheticClient::rmsnap(const char *base, const char *name)
+{
+ client->rmsnap(base, name);
+}
#define SYNCLIENT_MODE_CHUNK 400
+#define SYNCLIENT_MODE_MKSNAP 1000
+#define SYNCLIENT_MODE_RMSNAP 1001
void parse_syn_options(vector<const char*>& args);
void import_find(const char *basedir, const char *find, bool writedata);
int chunk_file(string &filename);
+
+ void mksnap(const char *base, const char *name);
+ void rmsnap(const char *base, const char *name);
};
#endif
fuse_ll: true,
// --- objectcacher ---
- client_oc: true,
+ client_oc: false,//until snaps are done... true,
client_oc_size: 1024*1024* 64, // MB * n
client_oc_max_dirty: 1024*1024* 48, // MB * n (dirty OR tx.. bigish)
client_oc_target_dirty: 1024*1024* 8, // target dirty (keep this smallish)
CEPH_MDS_OP_LTRUNCATE = 0x01303,
CEPH_MDS_OP_FSYNC = 0x00304,
CEPH_MDS_OP_READDIR = 0x00305,
+
+ CEPH_MDS_OP_MKSNAP = 0x01010,
+ CEPH_MDS_OP_RMSNAP = 0x01011,
};
static inline const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_TRUNCATE: return "truncate";
case CEPH_MDS_OP_LTRUNCATE: return "ltruncate";
case CEPH_MDS_OP_FSYNC: return "fsync";
+ case CEPH_MDS_OP_MKSNAP: return "mksnap";
+ case CEPH_MDS_OP_RMSNAP: return "rmsnap";
default: return "unknown";
}
}
__le32 op;
__le32 result;
__le32 file_caps;
- __le64 file_caps_realm;
__le32 file_caps_seq;
__le32 file_caps_mseq;
__le32 mdsmap_epoch;
- __le32 num_snaps;
+ __le64 snap_realm;
__le64 snap_highwater;
+ __le32 num_snaps;
__le64 snaps[];
} __attribute__ ((packed));
// --
-inline ostream& operator<<(ostream& out, ceph_fsid& f) {
+inline ostream& operator<<(ostream& out, const ceph_fsid& f) {
return out << hex << f.major << '.' << f.minor << dec;
}
// -- io helpers --
template<class A, class B>
-inline ostream& operator<<(ostream& out, pair<A,B> v) {
+inline ostream& operator<<(ostream& out, const pair<A,B> v) {
return out << v.first << "," << v.second;
}
template<class A>
-inline ostream& operator<<(ostream& out, vector<A>& v) {
+inline ostream& operator<<(ostream& out, const vector<A>& v) {
out << "[";
- for (unsigned i=0; i<v.size(); i++) {
- if (i) out << ",";
- out << v[i];
+ for (typename vector<A>::const_iterator p = v.begin(); p != v.end(); p++) {
+ if (p != v.begin()) out << ",";
+ out << *p;
}
out << "]";
return out;
}
void remove_client_cap(int client) {
assert(client_caps.count(client) == 1);
+
+ Capability *cap = client_caps[client];
+ cap->realm->remove_cap(client, cap);
+
delete client_caps[client];
client_caps.erase(client);
if (client_caps.empty())
class CInode;
+class SnapRealm;
class Capability {
public:
bool stale;
public:
xlist<Capability*>::item session_caps_item;
+
+ SnapRealm *realm;
xlist<Capability*>::item snaprealm_caps_item;
Capability(CInode *i=0, int want=0, capseq_t s=0) :
last_open(0),
mseq(0),
suppress(false), stale(false),
- session_caps_item(this), snaprealm_caps_item(this) { }
+ session_caps_item(this), realm(0), snaprealm_caps_item(this) { }
capseq_t get_mseq() { return mseq; }
#include "MDBalancer.h"
#include "AnchorClient.h"
#include "IdAllocator.h"
+#include "SnapTable.h"
#include "msg/Messenger.h"
#include "messages/MClientReply.h"
#include "messages/MClientReconnect.h"
#include "messages/MClientFileCaps.h"
+#include "messages/MClientSnap.h"
#include "messages/MMDSSlaveRequest.h"
break;
+ // snaps
+ case CEPH_MDS_OP_MKSNAP:
+ handle_client_mksnap(mdr);
+ break;
+ case CEPH_MDS_OP_RMSNAP:
+ handle_client_rmsnap(mdr);
+ break;
+
+
default:
dout(1) << " unknown client op " << req->get_op() << dendl;
assert(0);
SnapRealm *realm = cur->find_containing_snaprealm();
realm->get_snap_vector(reply->get_snaps());
reply->set_snap_highwater(realm->snap_highwater);
-
+ reply->set_snap_realm(realm->inode->ino());
+ dout(10) << " snaprealm is " << *realm << " snaps=" << reply->get_snaps() << " on " << *realm->inode << dendl;
+
//reply->set_file_data_version(fdv);
reply_request(mdr, reply);
+// snaps
+
+void Server::handle_client_mksnap(MDRequest *mdr)
+{
+ MClientRequest *req = mdr->client_request;
+
+ // traverse to path
+ vector<CDentry*> trace;
+ int r = mdcache->path_traverse(mdr, req,
+ req->get_filepath(), trace, false,
+ MDS_TRAVERSE_FORWARD);
+ if (r > 0) return;
+ if (trace.empty()) r = -EINVAL; // can't snap root
+ if (r < 0) {
+ reply_request(mdr, r);
+ return;
+ }
+ CDentry *dn = trace[trace.size()-1];
+ assert(dn);
+ if (!dn->is_auth()) { // fw to auth?
+ mdcache->request_forward(mdr, dn->authority().first);
+ return;
+ }
+
+ // dir only
+ CInode *diri = dn->inode;
+ if (!dn->is_primary() || !diri->is_dir()) {
+ reply_request(mdr, -ENOTDIR);
+ return;
+ }
+ dout(10) << "mksnap " << req->get_path2() << " on " << *diri << dendl;
+
+ // lock snap
+ set<SimpleLock*> rdlocks, wrlocks, xlocks;
+ for (int i=0; i<(int)trace.size()-1; i++)
+ rdlocks.insert(&trace[i]->lock);
+ xlocks.insert(&dn->inode->snaplock);
+ if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
+ return;
+
+ if (mdr->now == utime_t())
+ mdr->now = g_clock.now();
+
+ // anchor diri
+ if (!diri->inode.anchored) {
+ mds->mdcache->anchor_create(mdr, diri, new C_MDS_RetryRequest(mds->mdcache, mdr));
+ return;
+ }
+
+ // allocate a snapid
+ // HACK
+ snapid_t snapid = mds->snaptable->create(diri->ino(), req->get_path2(), mdr->now);
+ dout(10) << " snapid is " << snapid << dendl;
+
+ // create realm?
+ if (!diri->snaprealm) {
+ dout(10) << "creating snaprealm on " << *diri << dendl;
+ diri->open_snaprealm();
+
+ // link them up
+ // HACK! parent may be on another mds...
+
+ SnapRealm *parent = diri->find_containing_snaprealm();
+ assert(parent);
+ snaplink_t link;
+ link.first = snapid;
+ link.dirino = diri->ino();
+ parent->children.insert(pair<snapid_t,snaplink_t>(CEPH_NOSNAP, link));
+ link.dirino = parent->inode->ino();
+ diri->snaprealm->parents.insert(pair<snapid_t,snaplink_t>(CEPH_NOSNAP, link));
+
+ // split...
+ // ***
+ }
+
+ // add the snap
+ dout(10) << "snaprealm was " << *diri->snaprealm << dendl;
+ SnapInfo info;
+ info.snapid = snapid;
+ info.name = req->get_path2();
+ info.stamp = mdr->now;
+ diri->snaprealm->snaps[snapid] = info;
+ dout(10) << "snaprealm now " << *diri->snaprealm << dendl;
+
+ // build new snaps list
+ vector<snapid_t> snaps;
+ diri->snaprealm->get_snap_vector(snaps);
+
+ // notify clients
+
+ for (map<int, xlist<Capability*> >::iterator p = diri->snaprealm->client_caps.begin();
+ p != diri->snaprealm->client_caps.end();
+ p++) {
+ assert(!p->second.empty());
+
+ MClientSnap *update = new MClientSnap(CEPH_SNAP_OP_UPDATE, diri->ino());
+ update->snaps = snaps;
+ update->snap_highwater = diri->snaprealm->snap_highwater;
+ mds->send_message_client(update, p->first);
+ }
+
+ // yay
+ reply_request(mdr, 0, diri);
+}
+
+void Server::handle_client_rmsnap(MDRequest *mdr)
+{
+
+}
+
+
void _rename_finish(MDRequest *mdr,
CDentry *srcdn, CDentry *destdn, CDentry *straydn);
+ void handle_client_mksnap(MDRequest *mdr);
+ void handle_client_rmsnap(MDRequest *mdr);
+
// helpers
void _rename_prepare_witness(MDRequest *mdr, int who,
CDentry *srcdn, CDentry *destdn, CDentry *straydn);
s.insert(p->first);
// include parent snaps
+ // FIXME........ this is a weird interval DAG...
for (multimap<snapid_t, snaplink_t>::iterator p = parents.begin();
p != parents.end();
p++) {
bool open_parents(MDRequest *mdr);
void get_snap_set(set<snapid_t>& s);
void get_snap_vector(vector<snapid_t>& s);
+
void add_cap(int client, Capability *cap) {
client_caps[client].push_back(&cap->snaprealm_caps_item);
+ cap->realm = this;
+ }
+ void remove_cap(int client, Capability *cap) {
+ cap->realm = 0;
+ cap->snaprealm_caps_item.remove_myself();
+ if (client_caps[client].empty())
+ client_caps.erase(client);
}
};
WRITE_CLASS_ENCODER(SnapRealm)
-
-
+inline ostream& operator<<(ostream& out, const SnapRealm &realm) {
+ out << "snaprealm(" << realm.snaps;
+ if (realm.parents.size()) {
+ out << " parents=(";
+ for (multimap<snapid_t, snaplink_t>::const_iterator p = realm.parents.begin();
+ p != realm.parents.end();
+ p++) {
+ if (p != realm.parents.begin()) out << ",";
+ out << p->second.first << "-";
+ if (p->first == CEPH_NOSNAP)
+ out << "head";
+ else
+ out << p->first;
+ out << "=" << p->second.dirino;
+ }
+ out << ")";
+ }
+ if (realm.children.size()) {
+ out << " children=(";
+ for (multimap<snapid_t, snaplink_t>::const_iterator p = realm.parents.begin();
+ p != realm.parents.end();
+ p++) {
+ if (p != realm.parents.begin()) out << ",";
+ out << p->second.first << "-";
+ if (p->first == CEPH_NOSNAP)
+ out << "head";
+ else
+ out << p->first;
+ out << "=" << p->second.dirino;
+ }
+ out << ")";
+ }
+ out << ")";
+ return out;
+}
#endif
int get_result() { return (__s32)(__u32)st.result; }
+ inodeno_t get_snap_realm() { return inodeno_t((__u64)st.snap_realm); }
snapid_t get_snap_highwater() { return st.snap_highwater; }
vector<snapid_t> &get_snaps() { return snaps; }
+ void set_snap_realm(snapid_t hw) { st.snap_realm = hw; }
+ void set_snap_highwater(snapid_t hw) { st.snap_highwater = hw; }
+
unsigned get_file_caps() { return st.file_caps; }
- inodeno_t get_file_caps_realm() { return inodeno_t((__u64)st.file_caps_realm); }
unsigned get_file_caps_seq() { return st.file_caps_seq; }
unsigned get_file_caps_mseq() { return st.file_caps_mseq; }
//uint64_t get_file_data_version() { return st.file_data_version; }
void set_file_caps_seq(capseq_t s) { st.file_caps_seq = s; }
void set_file_caps_mseq(capseq_t s) { st.file_caps_mseq = s; }
//void set_file_data_version(uint64_t v) { st.file_data_version = v; }
- void set_snap_highwater(snapid_t hw) { st.snap_highwater = hw; }
MClientReply() {}
MClientReply(MClientRequest *req, int result = 0) :