From: Sage Weil Date: Mon, 25 Mar 2013 21:14:50 +0000 (-0700) Subject: librbd: implement diff_iterate X-Git-Tag: v0.62~118^2~42 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0296c7cdae91907efadf4f5b154b817daf55b26f;p=ceph.git librbd: implement diff_iterate Implement a diff_iterate() method that will iterate over an image and report which extents vary between two snapshots (or a snapshot and the head). The callback gets an extent and a flag indicating whether it is full of data or is known to be zero in the ending snapshot. Signed-off-by: Sage Weil --- diff --git a/src/Makefile.am b/src/Makefile.am index b27c8bb6b702..c858f2a496ec 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -423,6 +423,7 @@ librbd_la_SOURCES = \ librbd/WatchCtx.cc \ osdc/ObjectCacher.cc \ osdc/Striper.cc \ + osdc/snap_set_diff.cc \ cls/lock/cls_lock_client.cc \ cls/lock/cls_lock_types.cc \ cls/lock/cls_lock_ops.cc \ @@ -1940,6 +1941,7 @@ noinst_HEADERS = \ osd/ReplicatedPG.h\ osd/Watch.h\ osd/osd_types.h\ + osdc/snap_set_diff.h\ osdc/Blinker.h\ osdc/Filer.h\ osdc/Journaler.h\ diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp index 6cfc30f9015a..b61bdcb7fbf8 100644 --- a/src/include/rados/rados_types.hpp +++ b/src/include/rados/rados_types.hpp @@ -13,7 +13,7 @@ struct clone_info_t { static const snap_t HEAD = ((snap_t)-1); snap_t cloneid; std::vector snaps; // ascending - std::vector< std::pair > overlap; + std::vector< std::pair > overlap; // with next newest uint64_t size; }; diff --git a/src/include/rbd/librbd.hpp b/src/include/rbd/librbd.hpp index a7bfcf43233c..15d184e02559 100644 --- a/src/include/rbd/librbd.hpp +++ b/src/include/rbd/librbd.hpp @@ -155,6 +155,9 @@ public: ssize_t read(uint64_t ofs, size_t len, ceph::bufferlist& bl); int64_t read_iterate(uint64_t ofs, size_t len, int (*cb)(uint64_t, size_t, const char *, void *), void *arg); + int64_t diff_iterate(const char *fromsnapname, + uint64_t ofs, size_t len, + int (*cb)(uint64_t, size_t, bool, void *), void *arg); ssize_t write(uint64_t ofs, size_t len, ceph::bufferlist& bl); int discard(uint64_t ofs, uint64_t len); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index b740e46b7c16..b1d30452f5ed 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -20,6 +20,8 @@ #include "librbd/parent_types.h" #include "include/util.h" +#include "osdc/snap_set_diff.h" + #define dout_subsys ceph_subsys_rbd #undef dout_prefix #define dout_prefix *_dout << "librbd: " @@ -2254,6 +2256,141 @@ reprotect_and_return_err: return total_read; } + int64_t diff_iterate(ImageCtx *ictx, const char *fromsnapname, + uint64_t off, size_t len, + int (*cb)(uint64_t, size_t, bool, void *), + void *arg) + { + utime_t start_time, elapsed; + + ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off + << " len = " << len << dendl; + + int r = ictx_check(ictx); + if (r < 0) + return r; + + uint64_t mylen = len; + r = clip_io(ictx, off, &mylen); + if (r < 0) + return r; + + librados::IoCtx head_ctx; + + ictx->md_lock.get_read(); + ictx->snap_lock.get_read(); + head_ctx.dup(ictx->data_ctx); + snap_t from_snap_id = 0; + uint64_t from_size = 0; + if (fromsnapname) { + from_snap_id = ictx->get_snap_id(fromsnapname); + from_size = ictx->get_image_size(from_snap_id); + } + snap_t end_snap_id = ictx->snap_id; + uint64_t end_size = ictx->get_image_size(end_snap_id); + ictx->snap_lock.put_read(); + ictx->md_lock.put_read(); + if (from_snap_id == CEPH_NOSNAP) { + return -EINVAL; + } + if (from_snap_id == end_snap_id) { + // no diff. + return 0; + } + + // we must list snaps via the head, not end snap + head_ctx.snap_set_read(CEPH_SNAPDIR); + + ldout(ictx->cct, 20) << "diff_iterate from " << from_snap_id << " to " << end_snap_id + << " size from " << from_size << " to " << end_size << dendl; + + // FIXME: if end_size > from_size, we could read_iterate for the + // final part, and skip the listsnaps op. + + int64_t total_read = 0; + uint64_t period = ictx->get_stripe_period(); + uint64_t left = mylen; + + start_time = ceph_clock_now(ictx->cct); + while (left > 0) { + uint64_t period_off = off - (off % period); + uint64_t read_len = min(period_off + period - off, left); + + // map to extents + map > object_extents; + Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, + off, read_len, object_extents, 0); + + // get snap info for each object + for (map >::iterator p = object_extents.begin(); + p != object_extents.end(); + ++p) { + ldout(ictx->cct, 20) << "diff_iterate object " << p->first << dendl; + + librados::snap_set_t snap_set; + uint64_t size; + + librados::ObjectReadOperation op; + op.stat(&size, NULL, NULL); + op.list_snaps(&snap_set, NULL); + int r = head_ctx.operate(p->first.name, &op, NULL); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + + // calc diff from from_snap_id -> to_snap_id + interval_set diff; + bool end_exists; + calc_snap_set_diff(ictx->cct, snap_set, + from_snap_id, + end_snap_id == CEPH_NOSNAP ? librados::clone_info_t::HEAD : end_snap_id, + &diff, &end_exists); + ldout(ictx->cct, 20) << " diff " << diff << " end_exists=" << end_exists << dendl; + if (diff.empty()) + continue; + + for (vector::iterator q = p->second.begin(); q != p->second.end(); ++q) { + ldout(ictx->cct, 20) << "diff_iterate object " << p->first + << " extent " << q->offset << "~" << q->length + << " from " << q->buffer_extents + << dendl; + uint64_t opos = q->offset; + for (vector >::iterator r = q->buffer_extents.begin(); + r != q->buffer_extents.end(); + ++r) { + interval_set overlap; + overlap.insert(opos, r->second); + overlap.intersection_of(diff); + ldout(ictx->cct, 20) << " opos " << opos + << " buf " << r->first << "~" << r->second + << " overlap " << overlap + << dendl; + for (interval_set::iterator s = overlap.begin(); + s != overlap.end(); + ++s) { + uint64_t logical_off = off + s.get_start(); + ldout(ictx->cct, 20) << " overlap extent " << s.get_start() << "~" << s.get_len() + << " logical " + << logical_off << "~" << s.get_len() + << dendl; + cb(logical_off, s.get_len(), !end_exists, arg); + } + opos += r->second; + } + assert(opos == q->offset + q->length); + } + } + + total_read += read_len; + left -= read_len; + off += read_len; + } + + elapsed = ceph_clock_now(ictx->cct) - start_time; + return total_read; + } + int simple_read_cb(uint64_t ofs, size_t len, const char *buf, void *arg) { char *dest_buf = (char *)arg; diff --git a/src/librbd/internal.h b/src/librbd/internal.h index f1392f690a25..66e97b6b19f1 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -167,6 +167,10 @@ namespace librbd { int64_t read_iterate(ImageCtx *ictx, uint64_t off, size_t len, int (*cb)(uint64_t, size_t, const char *, void *), void *arg); + int64_t diff_iterate(ImageCtx *ictx, const char *fromsnapname, + uint64_t off, size_t len, + int (*cb)(uint64_t, size_t, bool, void *), + void *arg); ssize_t read(ImageCtx *ictx, uint64_t off, size_t len, char *buf); ssize_t read(ImageCtx *ictx, const vector >& image_extents, char *buf, bufferlist *pbl); diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc index 02d8fbf32116..4ee178c6b760 100644 --- a/src/librbd/librbd.cc +++ b/src/librbd/librbd.cc @@ -436,6 +436,15 @@ namespace librbd { return librbd::read_iterate(ictx, ofs, len, cb, arg); } + int64_t Image::diff_iterate(const char *fromsnapname, + uint64_t ofs, size_t len, + int (*cb)(uint64_t, size_t, bool, void *), + void *arg) + { + ImageCtx *ictx = (ImageCtx *)ctx; + return librbd::diff_iterate(ictx, fromsnapname, ofs, len, cb, arg); + } + ssize_t Image::write(uint64_t ofs, size_t len, bufferlist& bl) { ImageCtx *ictx = (ImageCtx *)ctx; diff --git a/src/osdc/snap_set_diff.cc b/src/osdc/snap_set_diff.cc new file mode 100644 index 000000000000..4292b89d868d --- /dev/null +++ b/src/osdc/snap_set_diff.cc @@ -0,0 +1,100 @@ + +#include + +#include "snap_set_diff.h" +#include "common/ceph_context.h" +#include "include/rados/librados.hpp" +#include "include/interval_set.h" +#include "common/debug.h" + +#define dout_subsys ceph_subsys_rbd + +/** + * calculate intervals/extents that vary between two snapshots + */ +void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, + librados::snap_t start, + librados::snap_t end, + interval_set *diff, bool *end_exists) +{ + ldout(cct, 10) << "calc_snap_set_diff start " << start << " end " << end + << ", snap_set seq " << snap_set.seq << dendl; + bool saw_start = false; + uint64_t start_size = 0; + diff->clear(); + *end_exists = false; + + for (vector::const_iterator r = snap_set.clones.begin(); + r != snap_set.clones.end(); + ) { + // make an interval, and hide the fact that the HEAD doesn't + // include itself in the snaps list + librados::snap_t a, b; + b = r->cloneid; + if (b == librados::clone_info_t::HEAD) { + // head is valid starting from right after the last seen seq + a = snap_set.seq + 1; + } else { + assert(b == r->snaps[r->snaps.size()-1]); + a = r->snaps[0]; + } + ldout(cct, 20) << " clone " << r->cloneid << " snaps " << r->snaps + << " -> [" << a << "," << b << "]" + << " size " << r->size << " overlap to next " << r->overlap << dendl; + + if (b < start) { + // this is before start + ++r; + continue; + } + + if (!saw_start) { + if (start < a) { + ldout(cct, 20) << " start, after " << start << dendl; + // this means the object didn't exist at start + diff->insert(0, r->size); + start_size = 0; + } else { + ldout(cct, 20) << " start" << dendl; + start_size = r->size; + } + saw_start = true; + } + + if (end < a) { + ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl; + *end_exists = false; + if (start_size) { + diff->clear(); + diff->insert(0, start_size); + } + break; + } + if (end <= b) { + ldout(cct, 20) << " end" << dendl; + *end_exists = true; + break; + } + + // start with the max(this size, next size), and subtract off any + // overlap + const vector > *overlap = &r->overlap; + interval_set diff_to_next; + uint64_t max_size = r->size; + ++r; + if (r != snap_set.clones.end()) { + if (r->size > max_size) + max_size = r->size; + } + if (max_size) + diff_to_next.insert(0, max_size); + for (vector >::const_iterator p = overlap->begin(); + p != overlap->end(); + ++p) { + diff_to_next.erase(p->first, p->second); + } + ldout(cct, 20) << " diff_to_next " << diff_to_next << dendl; + diff->union_of(diff_to_next); + ldout(cct, 20) << " diff now " << *diff << dendl; + } +} diff --git a/src/osdc/snap_set_diff.h b/src/osdc/snap_set_diff.h new file mode 100644 index 000000000000..714dd13eb335 --- /dev/null +++ b/src/osdc/snap_set_diff.h @@ -0,0 +1,14 @@ +#ifndef __CEPH_OSDC_SNAP_SET_DIFF_H +#define __CEPH_OSDC_SNAP_SET_DIFF_H + +class CephContext; +#include "include/rados/rados_types.hpp" +#include "include/interval_set.h" + +void calc_snap_set_diff(CephContext *cct, + const librados::snap_set_t& snap_set, + librados::snap_t start, librados::snap_t end, + interval_set *diff, + bool *end_exists); + +#endif diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index e38d317485fc..9ae72c704a2e 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -33,6 +33,7 @@ #include "test/librados/test.h" #include "common/errno.h" +#include "include/interval_set.h" #include "include/stringify.h" using namespace std; @@ -1494,3 +1495,139 @@ TEST(LibRBD, FlushAioPP) ioctx.close(); ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados)); } + + +int iterate_cb(uint64_t off, size_t len, bool zero, void *arg) +{ + cout << "iterate_cb " << off << "~" << len << std::endl; + interval_set *diff = static_cast *>(arg); + diff->insert(off, len); + return 0; +} + +void scribble(librbd::Image& image, int n, int max, interval_set *exists, interval_set *what) +{ + uint64_t size; + image.size(&size); + for (int i=0; i w; + w.insert(off, len); + if (rand() % 4 == 0) { + ASSERT_EQ((int)len, image.discard(off, len)); + w.intersection_of(*exists); + what->union_of(w); + exists->subtract(w); + } else { + bufferlist bl; + bl.append(buffer::create(len)); + bl.zero(); + ASSERT_EQ((int)len, image.write(off, len, bl)); + what->union_of(w); + exists->union_of(w); + } + } +} + +TEST(LibRBD, DiffIterate) +{ + librados::Rados rados; + librados::IoCtx ioctx; + string pool_name = get_temp_pool_name(); + + ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); + ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); + + { + librbd::RBD rbd; + librbd::Image image; + int order = 0; + const char *name = "testimg"; + uint64_t size = 20 << 20; + + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name, size, &order)); + ASSERT_EQ(0, rbd.open(ioctx, image, name, NULL)); + + interval_set exists; + interval_set one, two; + scribble(image, 10, 102400, &exists, &one); + cout << " wrote " << one << std::endl; + ASSERT_EQ(0, image.snap_create("one")); + scribble(image, 10, 102400, &exists, &two); + cout << " wrote " << two << std::endl; + + interval_set diff; + ASSERT_EQ((int)size, image.diff_iterate("one", 0, size, iterate_cb, (void *)&diff)); + cout << " diff was " << diff << std::endl; + if (!two.subset_of(diff)) { + interval_set i; + i.intersection_of(two, diff); + interval_set l = two; + l.subtract(i); + cout << " ... two - (two*diff) = " << l << std::endl; + } + ASSERT_TRUE(two.subset_of(diff)); + } + ioctx.close(); + ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados)); +} + +TEST(LibRBD, DiffIterateStress) +{ + librados::Rados rados; + librados::IoCtx ioctx; + string pool_name = get_temp_pool_name(); + + ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); + ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); + + { + librbd::RBD rbd; + librbd::Image image; + int order = 0; + const char *name = "testimg"; + uint64_t size = 400 << 20; + + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name, size, &order)); + ASSERT_EQ(0, rbd.open(ioctx, image, name, NULL)); + + interval_set exists; + vector > wrote; + vector snap; + int n = 10; + for (int i=0; i w; + scribble(image, 10, 8192000, &exists, &w); + cout << " i=" << i << " exists " << exists << " wrote " << w << std::endl; + string s = "snap" + stringify(i); + ASSERT_EQ(0, image.snap_create(s.c_str())); + wrote.push_back(w); + snap.push_back(s); + } + + for (int i=0; i diff, actual; + for (int k=i+1; k<=j; k++) + diff.union_of(wrote[k]); + cout << "from " << i << " to " << j << " diff " << diff << std::endl; + + image.snap_set(snap[j].c_str()); + ASSERT_EQ((int)size, image.diff_iterate(snap[i].c_str(), 0, size, iterate_cb, (void *)&actual)); + cout << " actual was " << actual << std::endl; + if (!diff.subset_of(actual)) { + interval_set i; + i.intersection_of(diff, actual); + interval_set l = diff; + l.subtract(i); + cout << " ... diff - (actual*diff) = " << l << std::endl; + } + ASSERT_TRUE(diff.subset_of(actual)); + } + } + + } + ioctx.close(); + ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados)); +}