for (xlist<Inode*>::iterator p = realm->inodes_with_caps.begin(); !p.end(); ++p) {
Inode *in = *p;
check_caps(in, true); // force writeback of write caps
+ if (g_conf.client_oc)
+ _flush(in);
}
realm->snaps = snaps; // ok.
dout(10) << "readahead " << f->nr_consec_read << " reads "
<< f->consec_read_bytes << " bytes ... readahead " << offset << "~" << l
<< " (caller wants " << offset << "~" << size << ")" << dendl;
- #warning bleh
- //objectcacher->file_read(in->inode.ino, &in->inode.layout, offset, l, NULL, 0, 0);
+ objectcacher->file_read(in->inode.ino, &in->inode.layout,
+ CEPH_NOSNAP, in->snaprealm->snaps,
+ offset, l, NULL, 0, 0);
dout(10) << "readahead initiated" << dendl;
}
// read (and possibly block)
- #warning bleh
- //r = objectcacher->file_read(in->inode.ino, &in->inode.layout, offset, size, bl, 0, onfinish);
+ r = objectcacher->file_read(in->inode.ino, &in->inode.layout,
+ CEPH_NOSNAP, in->snaprealm->snaps,
+ offset, size, bl, 0, onfinish);
if (r == 0) {
while (!done)
delete onfinish;
}
} else {
- #warning bleh
- //r = objectcacher->file_atomic_sync_read(in->inode.ino, &in->inode.layout, offset, size, bl, 0, client_lock);
+ r = objectcacher->file_atomic_sync_read(in->inode.ino, &in->inode.layout,
+ CEPH_NOSNAP, in->snaprealm->snaps,
+ offset, size, bl, 0, client_lock);
}
} else {
objectcacher->wait_for_write(size, client_lock);
// async, caching, non-blocking.
- #warning bleh
- //objectcacher->file_write(in->inode.ino, &in->inode.layout, offset, size, bl, 0);
+ objectcacher->file_write(in->inode.ino, &in->inode.layout,
+ CEPH_NOSNAP, in->snaprealm->snaps,
+ offset, size, bl, 0);
} else {
// atomic, synchronous, blocking.
- #warning bleh
- //objectcacher->file_atomic_sync_write(in->inode.ino, &in->inode.layout, offset, size, bl, 0, client_lock);
+ objectcacher->file_atomic_sync_write(in->inode.ino, &in->inode.layout,
+ CEPH_NOSNAP, in->snaprealm->snaps,
+ offset, size, bl, 0, client_lock);
}
} else {
// simple, non-atomic sync write
client_readahead_min: 128*1024, // readahead at _least_ this much.
client_readahead_max_bytes: 0,//8 * 1024*1024,
client_readahead_max_periods: 4, // as multiple of file layout period (object size * num stripes)
+ client_snapdir: ".snap",
fuse_direct_io: 0,
fuse_ll: true,
// --- objectcacher ---
- client_oc: false,//until snaps are done... true,
+ client_oc: true,
client_oc_size: 1024*1024* 64, // MB * n
client_oc_max_dirty: 1024*1024* 48, // MB * n (dirty OR tx.. bigish)
client_oc_target_dirty: 1024*1024* 8, // target dirty (keep this smallish)
loff_t client_readahead_min;
loff_t client_readahead_max_bytes;
loff_t client_readahead_max_periods;
+ const char *client_snapdir;
int fuse_direct_io;
bool fuse_ll;
* & 0x10000 -> follow symlink (e.g. stat(), not lstat()).
& & 0x100000 -> use weird ino/path trace
*/
-#define CEPH_MDS_OP_WRITE 0x01000
-#define CEPH_MDS_OP_FOLLOW_LINK 0x10000
-#define CEPH_MDS_OP_INO_PATH 0x100000
+#define CEPH_MDS_OP_WRITE 0x001000
+#define CEPH_MDS_OP_FOLLOW_LINK 0x010000
+#define CEPH_MDS_OP_INO_PATH 0x100000
enum {
CEPH_MDS_OP_FINDINODE = 0x100100,
CEPH_MDS_OP_FSYNC = 0x00304,
CEPH_MDS_OP_READDIR = 0x00305,
- CEPH_MDS_OP_MKSNAP = 0x01010,
- CEPH_MDS_OP_RMSNAP = 0x01011,
+ CEPH_MDS_OP_MKSNAP = 0x01400,
+ CEPH_MDS_OP_RMSNAP = 0x01401,
+ CEPH_MDS_OP_LSSNAP = 0x00402,
};
static inline const char *ceph_mds_op_name(int op)
if (snaps.empty())
return in;
- CInode *t = 0;
for (set<snapid_t>::const_iterator p = snaps.upper_bound(follows);
p != snaps.end();
p++) {
- t = get_inode(in->ino(), *p);
+ CInode *t = get_inode(in->ino(), *p);
if (t) {
in = t;
dout(10) << "pick_inode_snap snap " << *p << " found " << *in << dendl;
// snaps
+ case CEPH_MDS_OP_LSSNAP:
+ handle_client_lssnap(mdr);
+ break;
case CEPH_MDS_OP_MKSNAP:
handle_client_mksnap(mdr);
break;
// snaps
+void Server::handle_client_lssnap(MDRequest *mdr)
+{
+ MClientRequest *req = mdr->client_request;
+
+ // traverse to path
+ vector<CDentry*> trace;
+ int r = mdcache->path_traverse(mdr, req,
+ req->get_filepath(), trace, false,
+ MDS_TRAVERSE_FORWARD);
+ if (r > 0) return;
+ if (trace.empty()) r = -EINVAL; // can't snap root
+ if (r < 0) {
+ reply_request(mdr, r);
+ return;
+ }
+ CDentry *dn = trace[trace.size()-1];
+ assert(dn);
+ if (!dn->is_auth()) { // fw to auth?
+ mdcache->request_forward(mdr, dn->authority().first);
+ return;
+ }
+
+ // dir only
+ CInode *diri = dn->inode;
+ if (!dn->is_primary() || !diri->is_dir()) {
+ reply_request(mdr, -ENOTDIR);
+ return;
+ }
+ dout(10) << "lssnap " << req->get_path2() << " on " << *diri << dendl;
+
+ // lock snap
+ set<SimpleLock*> rdlocks, wrlocks, xlocks;
+
+ // rdlock path
+ for (int i=0; i<(int)trace.size()-1; i++)
+ rdlocks.insert(&trace[i]->lock);
+
+ // rdlock ancestor snaps
+ CInode *t = diri;
+ rdlocks.insert(&diri->snaplock);
+ while (t->get_parent_dn()) {
+ t = t->get_parent_dn()->get_dir()->get_inode();
+ rdlocks.insert(&t->snaplock);
+ }
+
+ if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
+ return;
+
+ SnapRealm *realm = diri->find_snaprealm();
+ bufferlist snapinfo;
+ realm->get_snap_info(snapinfo);
+
+ MClientReply *reply = new MClientReply(req);
+ reply->set_dir_bl(snapinfo);
+ reply_request(mdr, reply);
+}
+
void Server::handle_client_mksnap(MDRequest *mdr)
{
MClientRequest *req = mdr->client_request;
void _rename_finish(MDRequest *mdr,
CDentry *srcdn, CDentry *destdn, CDentry *straydn);
+ void handle_client_lssnap(MDRequest *mdr);
void handle_client_mksnap(MDRequest *mdr);
void handle_client_rmsnap(MDRequest *mdr);
}
+void SnapRealm::get_snap_info(bufferlist& bl, snapid_t first, snapid_t last)
+{
+ dout(10) << "get_snap_info snaps " << get_snaps() << dendl;
+
+ // include my snaps within interval [first,last]
+ for (map<snapid_t, SnapInfo>::iterator p = snaps.lower_bound(first); // first element >= first
+ p != snaps.end() && p->first <= last;
+ p++)
+ ::encode(p->second, bl);
+
+ // include snaps for parents during intervals that intersect [first,last]
+ snapid_t thru = first;
+ for (map<snapid_t, snaplink_t>::iterator p = past_parents.lower_bound(first);
+ p != past_parents.end() && p->first >= first && p->second.first <= last;
+ p++) {
+ CInode *oldparent = mdcache->get_inode(p->second.dirino);
+ assert(oldparent); // call open_parents first!
+ assert(oldparent->snaprealm);
+
+ thru = MIN(last, p->first);
+ oldparent->snaprealm->get_snap_info(bl,
+ MAX(first, p->second.first),
+ thru);
+ ++thru;
+ }
+ if (thru <= last && parent)
+ parent->get_snap_info(bl, thru, last);
+}
+
+
void SnapRealm::split_at(SnapRealm *child)
{
dout(10) << "split_at " << *child
void build_snap_set(set<snapid_t>& s, snapid_t first, snapid_t last);
const set<snapid_t>& get_snaps();
const vector<snapid_t>& get_snap_vector();
+ void get_snap_info(bufferlist& snapinfo, snapid_t first=0, snapid_t last=CEPH_NOSNAP);
const set<snapid_t>& update_snaps(snapid_t adding=0);
snapid_t get_latest_snap() {
const set<snapid_t> &snaps = get_snaps();
ObjectCacher::BufferHead *right = new BufferHead(this);
right->last_write_tid = left->last_write_tid;
right->set_state(left->get_state());
+ right->snaps = left->snaps;
off_t newleftlen = off - left->start();
right->set_start(off);
C_ReadFinish *onfinish = new C_ReadFinish(this, bh->ob->get_oid(), bh->start(), bh->length());
// go
- #warning bleh
- //objecter->read(bh->ob->get_oid(), bh->start(), bh->length(), bh->ob->get_layout(), &onfinish->bl, 0,
- //onfinish);
+ objecter->read(bh->ob->get_oid(), bh->start(), bh->length(), bh->ob->get_layout(),
+ bh->snaps,
+ &onfinish->bl, 0,
+ onfinish);
}
void ObjectCacher::bh_read_finish(object_t oid, off_t start, size_t length, bufferlist &bl)
C_WriteCommit *oncommit = new C_WriteCommit(this, bh->ob->get_oid(), bh->start(), bh->length());
// go
- tid_t tid =
- 0;
- #warning bleh
- //objecter->write(bh->ob->get_oid(), bh->start(), bh->length(), bh->ob->get_layout(), bh->bl, 0,
- // onack, oncommit);
+ tid_t tid = objecter->write(bh->ob->get_oid(), bh->start(), bh->length(), bh->ob->get_layout(),
+ bh->snaps, bh->bl, 0,
+ onack, oncommit);
// set bh last_write_tid
onack->tid = tid;
for (map<off_t, BufferHead*>::iterator bh_it = missing.begin();
bh_it != missing.end();
bh_it++) {
+ bh_it->second->snaps = rd->snaps;
bh_read(bh_it->second);
if (success && onfinish) {
dout(10) << "readx missed, waiting on " << *bh_it->second
for (map<off_t, BufferHead*>::iterator bh_it = rx.begin();
bh_it != rx.end();
bh_it++) {
+ bh_it->second->snaps = rd->snaps;
touch_bh(bh_it->second); // bump in lru, so we don't lose it.
if (success && onfinish) {
dout(10) << "readx missed, waiting on " << *bh_it->second
// map it all into a single bufferhead.
BufferHead *bh = o->map_write(wr);
+ bh->snaps = wr->snaps;
// adjust buffer pointers (ie "copy" data into my cache)
// this is over a single ObjectExtent, so we know that
bufferlist bl;
tid_t last_write_tid; // version of bh (if non-zero)
utime_t last_write;
+ vector<snapid_t> snaps;
map< off_t, list<Context*> > waitfor_read;