librbd/WatchCtx.cc \
osdc/ObjectCacher.cc \
osdc/Striper.cc \
+ osdc/snap_set_diff.cc \
cls/lock/cls_lock_client.cc \
cls/lock/cls_lock_types.cc \
cls/lock/cls_lock_ops.cc \
osd/ReplicatedPG.h\
osd/Watch.h\
osd/osd_types.h\
+ osdc/snap_set_diff.h\
osdc/Blinker.h\
osdc/Filer.h\
osdc/Journaler.h\
static const snap_t HEAD = ((snap_t)-1);
snap_t cloneid;
std::vector<snap_t> snaps; // ascending
- std::vector< std::pair<uint64_t,uint64_t> > overlap;
+ std::vector< std::pair<uint64_t,uint64_t> > overlap; // with next newest
uint64_t size;
};
ssize_t read(uint64_t ofs, size_t len, ceph::bufferlist& bl);
int64_t read_iterate(uint64_t ofs, size_t len,
int (*cb)(uint64_t, size_t, const char *, void *), void *arg);
+ int64_t diff_iterate(const char *fromsnapname,
+ uint64_t ofs, size_t len,
+ int (*cb)(uint64_t, size_t, bool, void *), void *arg);
ssize_t write(uint64_t ofs, size_t len, ceph::bufferlist& bl);
int discard(uint64_t ofs, uint64_t len);
#include "librbd/parent_types.h"
#include "include/util.h"
+#include "osdc/snap_set_diff.h"
+
#define dout_subsys ceph_subsys_rbd
#undef dout_prefix
#define dout_prefix *_dout << "librbd: "
return total_read;
}
+ int64_t diff_iterate(ImageCtx *ictx, const char *fromsnapname,
+ uint64_t off, size_t len,
+ int (*cb)(uint64_t, size_t, bool, void *),
+ void *arg)
+ {
+ utime_t start_time, elapsed;
+
+ ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
+ << " len = " << len << dendl;
+
+ int r = ictx_check(ictx);
+ if (r < 0)
+ return r;
+
+ uint64_t mylen = len;
+ r = clip_io(ictx, off, &mylen);
+ if (r < 0)
+ return r;
+
+ librados::IoCtx head_ctx;
+
+ ictx->md_lock.get_read();
+ ictx->snap_lock.get_read();
+ head_ctx.dup(ictx->data_ctx);
+ snap_t from_snap_id = 0;
+ uint64_t from_size = 0;
+ if (fromsnapname) {
+ from_snap_id = ictx->get_snap_id(fromsnapname);
+ from_size = ictx->get_image_size(from_snap_id);
+ }
+ snap_t end_snap_id = ictx->snap_id;
+ uint64_t end_size = ictx->get_image_size(end_snap_id);
+ ictx->snap_lock.put_read();
+ ictx->md_lock.put_read();
+ if (from_snap_id == CEPH_NOSNAP) {
+ return -EINVAL;
+ }
+ if (from_snap_id == end_snap_id) {
+ // no diff.
+ return 0;
+ }
+
+ // we must list snaps via the head, not end snap
+ head_ctx.snap_set_read(CEPH_SNAPDIR);
+
+ ldout(ictx->cct, 20) << "diff_iterate from " << from_snap_id << " to " << end_snap_id
+ << " size from " << from_size << " to " << end_size << dendl;
+
+ // FIXME: if end_size > from_size, we could read_iterate for the
+ // final part, and skip the listsnaps op.
+
+ int64_t total_read = 0;
+ uint64_t period = ictx->get_stripe_period();
+ uint64_t left = mylen;
+
+ start_time = ceph_clock_now(ictx->cct);
+ while (left > 0) {
+ uint64_t period_off = off - (off % period);
+ uint64_t read_len = min(period_off + period - off, left);
+
+ // map to extents
+ map<object_t,vector<ObjectExtent> > object_extents;
+ Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
+ off, read_len, object_extents, 0);
+
+ // get snap info for each object
+ for (map<object_t,vector<ObjectExtent> >::iterator p = object_extents.begin();
+ p != object_extents.end();
+ ++p) {
+ ldout(ictx->cct, 20) << "diff_iterate object " << p->first << dendl;
+
+ librados::snap_set_t snap_set;
+ uint64_t size;
+
+ librados::ObjectReadOperation op;
+ op.stat(&size, NULL, NULL);
+ op.list_snaps(&snap_set, NULL);
+ int r = head_ctx.operate(p->first.name, &op, NULL);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ // calc diff from from_snap_id -> to_snap_id
+ interval_set<uint64_t> diff;
+ bool end_exists;
+ calc_snap_set_diff(ictx->cct, snap_set,
+ from_snap_id,
+ end_snap_id == CEPH_NOSNAP ? librados::clone_info_t::HEAD : end_snap_id,
+ &diff, &end_exists);
+ ldout(ictx->cct, 20) << " diff " << diff << " end_exists=" << end_exists << dendl;
+ if (diff.empty())
+ continue;
+
+ for (vector<ObjectExtent>::iterator q = p->second.begin(); q != p->second.end(); ++q) {
+ ldout(ictx->cct, 20) << "diff_iterate object " << p->first
+ << " extent " << q->offset << "~" << q->length
+ << " from " << q->buffer_extents
+ << dendl;
+ uint64_t opos = q->offset;
+ for (vector<pair<uint64_t,uint64_t> >::iterator r = q->buffer_extents.begin();
+ r != q->buffer_extents.end();
+ ++r) {
+ interval_set<uint64_t> overlap;
+ overlap.insert(opos, r->second);
+ overlap.intersection_of(diff);
+ ldout(ictx->cct, 20) << " opos " << opos
+ << " buf " << r->first << "~" << r->second
+ << " overlap " << overlap
+ << dendl;
+ for (interval_set<uint64_t>::iterator s = overlap.begin();
+ s != overlap.end();
+ ++s) {
+ uint64_t logical_off = off + s.get_start();
+ ldout(ictx->cct, 20) << " overlap extent " << s.get_start() << "~" << s.get_len()
+ << " logical "
+ << logical_off << "~" << s.get_len()
+ << dendl;
+ cb(logical_off, s.get_len(), !end_exists, arg);
+ }
+ opos += r->second;
+ }
+ assert(opos == q->offset + q->length);
+ }
+ }
+
+ total_read += read_len;
+ left -= read_len;
+ off += read_len;
+ }
+
+ elapsed = ceph_clock_now(ictx->cct) - start_time;
+ return total_read;
+ }
+
int simple_read_cb(uint64_t ofs, size_t len, const char *buf, void *arg)
{
char *dest_buf = (char *)arg;
int64_t read_iterate(ImageCtx *ictx, uint64_t off, size_t len,
int (*cb)(uint64_t, size_t, const char *, void *),
void *arg);
+ int64_t diff_iterate(ImageCtx *ictx, const char *fromsnapname,
+ uint64_t off, size_t len,
+ int (*cb)(uint64_t, size_t, bool, void *),
+ void *arg);
ssize_t read(ImageCtx *ictx, uint64_t off, size_t len, char *buf);
ssize_t read(ImageCtx *ictx, const vector<pair<uint64_t,uint64_t> >& image_extents,
char *buf, bufferlist *pbl);
return librbd::read_iterate(ictx, ofs, len, cb, arg);
}
+ int64_t Image::diff_iterate(const char *fromsnapname,
+ uint64_t ofs, size_t len,
+ int (*cb)(uint64_t, size_t, bool, void *),
+ void *arg)
+ {
+ ImageCtx *ictx = (ImageCtx *)ctx;
+ return librbd::diff_iterate(ictx, fromsnapname, ofs, len, cb, arg);
+ }
+
ssize_t Image::write(uint64_t ofs, size_t len, bufferlist& bl)
{
ImageCtx *ictx = (ImageCtx *)ctx;
--- /dev/null
+
+#include <vector>
+
+#include "snap_set_diff.h"
+#include "common/ceph_context.h"
+#include "include/rados/librados.hpp"
+#include "include/interval_set.h"
+#include "common/debug.h"
+
+#define dout_subsys ceph_subsys_rbd
+
+/**
+ * calculate intervals/extents that vary between two snapshots
+ */
+void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
+ librados::snap_t start,
+ librados::snap_t end,
+ interval_set<uint64_t> *diff, bool *end_exists)
+{
+ ldout(cct, 10) << "calc_snap_set_diff start " << start << " end " << end
+ << ", snap_set seq " << snap_set.seq << dendl;
+ bool saw_start = false;
+ uint64_t start_size = 0;
+ diff->clear();
+ *end_exists = false;
+
+ for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin();
+ r != snap_set.clones.end();
+ ) {
+ // make an interval, and hide the fact that the HEAD doesn't
+ // include itself in the snaps list
+ librados::snap_t a, b;
+ b = r->cloneid;
+ if (b == librados::clone_info_t::HEAD) {
+ // head is valid starting from right after the last seen seq
+ a = snap_set.seq + 1;
+ } else {
+ assert(b == r->snaps[r->snaps.size()-1]);
+ a = r->snaps[0];
+ }
+ ldout(cct, 20) << " clone " << r->cloneid << " snaps " << r->snaps
+ << " -> [" << a << "," << b << "]"
+ << " size " << r->size << " overlap to next " << r->overlap << dendl;
+
+ if (b < start) {
+ // this is before start
+ ++r;
+ continue;
+ }
+
+ if (!saw_start) {
+ if (start < a) {
+ ldout(cct, 20) << " start, after " << start << dendl;
+ // this means the object didn't exist at start
+ diff->insert(0, r->size);
+ start_size = 0;
+ } else {
+ ldout(cct, 20) << " start" << dendl;
+ start_size = r->size;
+ }
+ saw_start = true;
+ }
+
+ if (end < a) {
+ ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl;
+ *end_exists = false;
+ if (start_size) {
+ diff->clear();
+ diff->insert(0, start_size);
+ }
+ break;
+ }
+ if (end <= b) {
+ ldout(cct, 20) << " end" << dendl;
+ *end_exists = true;
+ break;
+ }
+
+ // start with the max(this size, next size), and subtract off any
+ // overlap
+ const vector<pair<uint64_t, uint64_t> > *overlap = &r->overlap;
+ interval_set<uint64_t> diff_to_next;
+ uint64_t max_size = r->size;
+ ++r;
+ if (r != snap_set.clones.end()) {
+ if (r->size > max_size)
+ max_size = r->size;
+ }
+ if (max_size)
+ diff_to_next.insert(0, max_size);
+ for (vector<pair<uint64_t, uint64_t> >::const_iterator p = overlap->begin();
+ p != overlap->end();
+ ++p) {
+ diff_to_next.erase(p->first, p->second);
+ }
+ ldout(cct, 20) << " diff_to_next " << diff_to_next << dendl;
+ diff->union_of(diff_to_next);
+ ldout(cct, 20) << " diff now " << *diff << dendl;
+ }
+}
--- /dev/null
+#ifndef __CEPH_OSDC_SNAP_SET_DIFF_H
+#define __CEPH_OSDC_SNAP_SET_DIFF_H
+
+class CephContext;
+#include "include/rados/rados_types.hpp"
+#include "include/interval_set.h"
+
+void calc_snap_set_diff(CephContext *cct,
+ const librados::snap_set_t& snap_set,
+ librados::snap_t start, librados::snap_t end,
+ interval_set<uint64_t> *diff,
+ bool *end_exists);
+
+#endif
#include "test/librados/test.h"
#include "common/errno.h"
+#include "include/interval_set.h"
#include "include/stringify.h"
using namespace std;
ioctx.close();
ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
}
+
+
+int iterate_cb(uint64_t off, size_t len, bool zero, void *arg)
+{
+ cout << "iterate_cb " << off << "~" << len << std::endl;
+ interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
+ diff->insert(off, len);
+ return 0;
+}
+
+void scribble(librbd::Image& image, int n, int max, interval_set<uint64_t> *exists, interval_set<uint64_t> *what)
+{
+ uint64_t size;
+ image.size(&size);
+ for (int i=0; i<n; i++) {
+ uint64_t off = rand() % (size - max + 1);
+ uint64_t len = 1 + rand() % max;
+ interval_set<uint64_t> w;
+ w.insert(off, len);
+ if (rand() % 4 == 0) {
+ ASSERT_EQ((int)len, image.discard(off, len));
+ w.intersection_of(*exists);
+ what->union_of(w);
+ exists->subtract(w);
+ } else {
+ bufferlist bl;
+ bl.append(buffer::create(len));
+ bl.zero();
+ ASSERT_EQ((int)len, image.write(off, len, bl));
+ what->union_of(w);
+ exists->union_of(w);
+ }
+ }
+}
+
+TEST(LibRBD, DiffIterate)
+{
+ librados::Rados rados;
+ librados::IoCtx ioctx;
+ string pool_name = get_temp_pool_name();
+
+ ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
+ ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+
+ {
+ librbd::RBD rbd;
+ librbd::Image image;
+ int order = 0;
+ const char *name = "testimg";
+ uint64_t size = 20 << 20;
+
+ ASSERT_EQ(0, create_image_pp(rbd, ioctx, name, size, &order));
+ ASSERT_EQ(0, rbd.open(ioctx, image, name, NULL));
+
+ interval_set<uint64_t> exists;
+ interval_set<uint64_t> one, two;
+ scribble(image, 10, 102400, &exists, &one);
+ cout << " wrote " << one << std::endl;
+ ASSERT_EQ(0, image.snap_create("one"));
+ scribble(image, 10, 102400, &exists, &two);
+ cout << " wrote " << two << std::endl;
+
+ interval_set<uint64_t> diff;
+ ASSERT_EQ((int)size, image.diff_iterate("one", 0, size, iterate_cb, (void *)&diff));
+ cout << " diff was " << diff << std::endl;
+ if (!two.subset_of(diff)) {
+ interval_set<uint64_t> i;
+ i.intersection_of(two, diff);
+ interval_set<uint64_t> l = two;
+ l.subtract(i);
+ cout << " ... two - (two*diff) = " << l << std::endl;
+ }
+ ASSERT_TRUE(two.subset_of(diff));
+ }
+ ioctx.close();
+ ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
+}
+
+TEST(LibRBD, DiffIterateStress)
+{
+ librados::Rados rados;
+ librados::IoCtx ioctx;
+ string pool_name = get_temp_pool_name();
+
+ ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
+ ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+
+ {
+ librbd::RBD rbd;
+ librbd::Image image;
+ int order = 0;
+ const char *name = "testimg";
+ uint64_t size = 400 << 20;
+
+ ASSERT_EQ(0, create_image_pp(rbd, ioctx, name, size, &order));
+ ASSERT_EQ(0, rbd.open(ioctx, image, name, NULL));
+
+ interval_set<uint64_t> exists;
+ vector<interval_set<uint64_t> > wrote;
+ vector<string> snap;
+ int n = 10;
+ for (int i=0; i<n; i++) {
+ interval_set<uint64_t> w;
+ scribble(image, 10, 8192000, &exists, &w);
+ cout << " i=" << i << " exists " << exists << " wrote " << w << std::endl;
+ string s = "snap" + stringify(i);
+ ASSERT_EQ(0, image.snap_create(s.c_str()));
+ wrote.push_back(w);
+ snap.push_back(s);
+ }
+
+ for (int i=0; i<n-1; i++) {
+ for (int j=i+1; j<n; j++) {
+ interval_set<uint64_t> diff, actual;
+ for (int k=i+1; k<=j; k++)
+ diff.union_of(wrote[k]);
+ cout << "from " << i << " to " << j << " diff " << diff << std::endl;
+
+ image.snap_set(snap[j].c_str());
+ ASSERT_EQ((int)size, image.diff_iterate(snap[i].c_str(), 0, size, iterate_cb, (void *)&actual));
+ cout << " actual was " << actual << std::endl;
+ if (!diff.subset_of(actual)) {
+ interval_set<uint64_t> i;
+ i.intersection_of(diff, actual);
+ interval_set<uint64_t> l = diff;
+ l.subtract(i);
+ cout << " ... diff - (actual*diff) = " << l << std::endl;
+ }
+ ASSERT_TRUE(diff.subset_of(actual));
+ }
+ }
+
+ }
+ ioctx.close();
+ ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
+}