]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: implement diff_iterate
authorSage Weil <sage@inktank.com>
Mon, 25 Mar 2013 21:14:50 +0000 (14:14 -0700)
committerJosh Durgin <josh.durgin@inktank.com>
Mon, 1 Apr 2013 06:32:40 +0000 (23:32 -0700)
Implement a diff_iterate() method that will iterate over an image and
report which extents vary between two snapshots (or a snapshot and the
head).  The callback gets an extent and a flag indicating whether it is
full of data or is known to be zero in the ending snapshot.

Signed-off-by: Sage Weil <sage@inktank.com>
src/Makefile.am
src/include/rados/rados_types.hpp
src/include/rbd/librbd.hpp
src/librbd/internal.cc
src/librbd/internal.h
src/librbd/librbd.cc
src/osdc/snap_set_diff.cc [new file with mode: 0644]
src/osdc/snap_set_diff.h [new file with mode: 0644]
src/test/librbd/test_librbd.cc

index b27c8bb6b7029a4d6ea066d1886bf388470e5462..c858f2a496ec6017ab91c91d99d6a52955d9bd09 100644 (file)
@@ -423,6 +423,7 @@ librbd_la_SOURCES = \
        librbd/WatchCtx.cc \
        osdc/ObjectCacher.cc \
        osdc/Striper.cc \
+       osdc/snap_set_diff.cc \
        cls/lock/cls_lock_client.cc \
        cls/lock/cls_lock_types.cc \
        cls/lock/cls_lock_ops.cc \
@@ -1940,6 +1941,7 @@ noinst_HEADERS = \
         osd/ReplicatedPG.h\
         osd/Watch.h\
         osd/osd_types.h\
+       osdc/snap_set_diff.h\
         osdc/Blinker.h\
         osdc/Filer.h\
         osdc/Journaler.h\
index 6cfc30f9015a3a2aa158d40405e4051948523426..b61bdcb7fbf8bf641c12542671ea654b10b11f7d 100644 (file)
@@ -13,7 +13,7 @@ struct clone_info_t {
   static const snap_t HEAD = ((snap_t)-1);
   snap_t cloneid;
   std::vector<snap_t> snaps;          // ascending
-  std::vector< std::pair<uint64_t,uint64_t> > overlap;
+  std::vector< std::pair<uint64_t,uint64_t> > overlap;  // with next newest
   uint64_t size;
 };
 
index a7bfcf43233c83b7daf08f5217fb8d634f8e348e..15d184e02559a2894be0b766be23e56604eaaae1 100644 (file)
@@ -155,6 +155,9 @@ public:
   ssize_t read(uint64_t ofs, size_t len, ceph::bufferlist& bl);
   int64_t read_iterate(uint64_t ofs, size_t len,
                       int (*cb)(uint64_t, size_t, const char *, void *), void *arg);
+  int64_t diff_iterate(const char *fromsnapname,
+                      uint64_t ofs, size_t len,
+                      int (*cb)(uint64_t, size_t, bool, void *), void *arg);
   ssize_t write(uint64_t ofs, size_t len, ceph::bufferlist& bl);
   int discard(uint64_t ofs, uint64_t len);
 
index b740e46b7c162cc31af3e08f6088fba2a7913035..b1d30452f5edbd94fc5d6c2495b10868aeb93714 100644 (file)
@@ -20,6 +20,8 @@
 #include "librbd/parent_types.h"
 #include "include/util.h"
 
+#include "osdc/snap_set_diff.h"
+
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
 #define dout_prefix *_dout << "librbd: "
@@ -2254,6 +2256,141 @@ reprotect_and_return_err:
     return total_read;
   }
 
+  int64_t diff_iterate(ImageCtx *ictx, const char *fromsnapname,
+                      uint64_t off, size_t len,
+                      int (*cb)(uint64_t, size_t, bool, void *),
+                      void *arg)
+  {
+    utime_t start_time, elapsed;
+
+    ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
+                        << " len = " << len << dendl;
+
+    int r = ictx_check(ictx);
+    if (r < 0)
+      return r;
+
+    uint64_t mylen = len;
+    r = clip_io(ictx, off, &mylen);
+    if (r < 0)
+      return r;
+
+    librados::IoCtx head_ctx;
+
+    ictx->md_lock.get_read();
+    ictx->snap_lock.get_read();
+    head_ctx.dup(ictx->data_ctx);
+    snap_t from_snap_id = 0;
+    uint64_t from_size = 0;
+    if (fromsnapname) {
+      from_snap_id = ictx->get_snap_id(fromsnapname);
+      from_size = ictx->get_image_size(from_snap_id);
+    }
+    snap_t end_snap_id = ictx->snap_id;
+    uint64_t end_size = ictx->get_image_size(end_snap_id);
+    ictx->snap_lock.put_read();
+    ictx->md_lock.put_read();
+    if (from_snap_id == CEPH_NOSNAP) {
+      return -EINVAL;
+    }
+    if (from_snap_id == end_snap_id) {
+      // no diff.
+      return 0;
+    }
+
+    // we must list snaps via the head, not end snap
+    head_ctx.snap_set_read(CEPH_SNAPDIR);
+
+    ldout(ictx->cct, 20) << "diff_iterate from " << from_snap_id << " to " << end_snap_id
+                        << " size from " << from_size << " to " << end_size << dendl;
+
+    // FIXME: if end_size > from_size, we could read_iterate for the
+    // final part, and skip the listsnaps op.
+
+    int64_t total_read = 0;
+    uint64_t period = ictx->get_stripe_period();
+    uint64_t left = mylen;
+
+    start_time = ceph_clock_now(ictx->cct);
+    while (left > 0) {
+      uint64_t period_off = off - (off % period);
+      uint64_t read_len = min(period_off + period - off, left);
+
+      // map to extents
+      map<object_t,vector<ObjectExtent> > object_extents;
+      Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
+                              off, read_len, object_extents, 0);
+
+      // get snap info for each object
+      for (map<object_t,vector<ObjectExtent> >::iterator p = object_extents.begin();
+          p != object_extents.end();
+          ++p) {
+       ldout(ictx->cct, 20) << "diff_iterate object " << p->first << dendl;
+
+       librados::snap_set_t snap_set;
+       uint64_t size;
+
+       librados::ObjectReadOperation op;
+       op.stat(&size, NULL, NULL);
+       op.list_snaps(&snap_set, NULL);
+       int r = head_ctx.operate(p->first.name, &op, NULL);
+       if (r == -ENOENT)
+         continue;
+       if (r < 0)
+         return r;
+
+       // calc diff from from_snap_id -> to_snap_id
+       interval_set<uint64_t> diff;
+       bool end_exists;
+       calc_snap_set_diff(ictx->cct, snap_set,
+                          from_snap_id,
+                          end_snap_id == CEPH_NOSNAP ? librados::clone_info_t::HEAD : end_snap_id,
+                          &diff, &end_exists);
+       ldout(ictx->cct, 20) << "  diff " << diff << " end_exists=" << end_exists << dendl;
+       if (diff.empty())
+         continue;
+
+       for (vector<ObjectExtent>::iterator q = p->second.begin(); q != p->second.end(); ++q) {
+         ldout(ictx->cct, 20) << "diff_iterate object " << p->first
+                              << " extent " << q->offset << "~" << q->length
+                              << " from " << q->buffer_extents
+                              << dendl;
+         uint64_t opos = q->offset;
+         for (vector<pair<uint64_t,uint64_t> >::iterator r = q->buffer_extents.begin();
+              r != q->buffer_extents.end();
+              ++r) {
+           interval_set<uint64_t> overlap;
+           overlap.insert(opos, r->second);
+           overlap.intersection_of(diff);
+           ldout(ictx->cct, 20) << " opos " << opos
+                                << " buf " << r->first << "~" << r->second
+                                << " overlap " << overlap
+                                << dendl;
+           for (interval_set<uint64_t>::iterator s = overlap.begin();
+                s != overlap.end();
+                ++s) {
+             uint64_t logical_off = off + s.get_start();
+             ldout(ictx->cct, 20) << "   overlap extent " << s.get_start() << "~" << s.get_len()
+                                  << " logical "
+                                  << logical_off << "~" << s.get_len()
+                                  << dendl;
+             cb(logical_off, s.get_len(), !end_exists, arg);
+           }
+           opos += r->second;
+         }
+         assert(opos == q->offset + q->length);
+       }
+      }
+
+      total_read += read_len;
+      left -= read_len;
+      off += read_len;
+    }
+
+    elapsed = ceph_clock_now(ictx->cct) - start_time;
+    return total_read;
+  }
+
   int simple_read_cb(uint64_t ofs, size_t len, const char *buf, void *arg)
   {
     char *dest_buf = (char *)arg;
index f1392f690a257e2873a7ab232e5311986a3b034c..66e97b6b19f107c1ffd82ccadafb1934959a3402 100644 (file)
@@ -167,6 +167,10 @@ namespace librbd {
   int64_t read_iterate(ImageCtx *ictx, uint64_t off, size_t len,
                       int (*cb)(uint64_t, size_t, const char *, void *),
                       void *arg);
+  int64_t diff_iterate(ImageCtx *ictx, const char *fromsnapname,
+                      uint64_t off, size_t len,
+                      int (*cb)(uint64_t, size_t, bool, void *),
+                      void *arg);
   ssize_t read(ImageCtx *ictx, uint64_t off, size_t len, char *buf);
   ssize_t read(ImageCtx *ictx, const vector<pair<uint64_t,uint64_t> >& image_extents,
               char *buf, bufferlist *pbl);
index 02d8fbf3211672a0cbe196ab58aacd73f3159e16..4ee178c6b760620bf6c4f1753cb8d5fb3964651f 100644 (file)
@@ -436,6 +436,15 @@ namespace librbd {
     return librbd::read_iterate(ictx, ofs, len, cb, arg);
   }
 
+  int64_t Image::diff_iterate(const char *fromsnapname,
+                             uint64_t ofs, size_t len,
+                             int (*cb)(uint64_t, size_t, bool, void *),
+                             void *arg)
+  {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    return librbd::diff_iterate(ictx, fromsnapname, ofs, len, cb, arg);
+  }
+
   ssize_t Image::write(uint64_t ofs, size_t len, bufferlist& bl)
   {
     ImageCtx *ictx = (ImageCtx *)ctx;
diff --git a/src/osdc/snap_set_diff.cc b/src/osdc/snap_set_diff.cc
new file mode 100644 (file)
index 0000000..4292b89
--- /dev/null
@@ -0,0 +1,100 @@
+
+#include <vector>
+
+#include "snap_set_diff.h"
+#include "common/ceph_context.h"
+#include "include/rados/librados.hpp"
+#include "include/interval_set.h"
+#include "common/debug.h"
+
+#define dout_subsys ceph_subsys_rbd
+
+/**
+ * calculate intervals/extents that vary between two snapshots
+ */
+void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
+                       librados::snap_t start,
+                       librados::snap_t end,
+                       interval_set<uint64_t> *diff, bool *end_exists)
+{
+  ldout(cct, 10) << "calc_snap_set_diff start " << start << " end " << end
+                << ", snap_set seq " << snap_set.seq << dendl;
+  bool saw_start = false;
+  uint64_t start_size = 0;
+  diff->clear();
+  *end_exists = false;
+
+  for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin();
+       r != snap_set.clones.end();
+       ) {
+    // make an interval, and hide the fact that the HEAD doesn't
+    // include itself in the snaps list
+    librados::snap_t a, b;
+    b = r->cloneid;
+    if (b == librados::clone_info_t::HEAD) {
+      // head is valid starting from right after the last seen seq
+      a = snap_set.seq + 1;
+    } else {
+      assert(b == r->snaps[r->snaps.size()-1]);
+      a = r->snaps[0];
+    }
+    ldout(cct, 20) << " clone " << r->cloneid << " snaps " << r->snaps
+                  << " -> [" << a << "," << b << "]"
+                  << " size " << r->size << " overlap to next " << r->overlap << dendl;
+    
+    if (b < start) {
+      // this is before start
+      ++r;
+      continue;
+    }
+
+    if (!saw_start) {
+      if (start < a) {
+       ldout(cct, 20) << "  start, after " << start << dendl;
+       // this means the object didn't exist at start
+       diff->insert(0, r->size);
+       start_size = 0;
+      } else {
+       ldout(cct, 20) << "  start" << dendl;
+       start_size = r->size;
+      }
+      saw_start = true;
+    }
+
+    if (end < a) {
+      ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl;
+      *end_exists = false;
+      if (start_size) {
+       diff->clear();
+       diff->insert(0, start_size);
+      }
+      break;
+    }
+    if (end <= b) {
+      ldout(cct, 20) << " end" << dendl;
+      *end_exists = true;
+      break;
+    }
+
+    // start with the max(this size, next size), and subtract off any
+    // overlap
+    const vector<pair<uint64_t, uint64_t> > *overlap = &r->overlap;
+    interval_set<uint64_t> diff_to_next;
+    uint64_t max_size = r->size;
+    ++r;
+    if (r != snap_set.clones.end()) {
+      if (r->size > max_size)
+       max_size = r->size;
+    }
+    if (max_size)
+      diff_to_next.insert(0, max_size);
+    for (vector<pair<uint64_t, uint64_t> >::const_iterator p = overlap->begin();
+        p != overlap->end();
+        ++p) {
+      diff_to_next.erase(p->first, p->second);
+    }
+    ldout(cct, 20) << "  diff_to_next " << diff_to_next << dendl;
+    diff->union_of(diff_to_next);
+    ldout(cct, 20) << "  diff now " << *diff << dendl;
+  }
+}
diff --git a/src/osdc/snap_set_diff.h b/src/osdc/snap_set_diff.h
new file mode 100644 (file)
index 0000000..714dd13
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef __CEPH_OSDC_SNAP_SET_DIFF_H
+#define __CEPH_OSDC_SNAP_SET_DIFF_H
+
+class CephContext;
+#include "include/rados/rados_types.hpp"
+#include "include/interval_set.h"
+
+void calc_snap_set_diff(CephContext *cct,
+                       const librados::snap_set_t& snap_set,
+                       librados::snap_t start, librados::snap_t end,
+                       interval_set<uint64_t> *diff,
+                       bool *end_exists);
+
+#endif
index e38d317485fc2e571fec0c08c32f1a882fa708a9..9ae72c704a2ecf8ab55c20dabb571f70db78274c 100644 (file)
@@ -33,6 +33,7 @@
 
 #include "test/librados/test.h"
 #include "common/errno.h"
+#include "include/interval_set.h"
 #include "include/stringify.h"
 
 using namespace std;
@@ -1494,3 +1495,139 @@ TEST(LibRBD, FlushAioPP)
   ioctx.close();
   ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
 }
+
+
+int iterate_cb(uint64_t off, size_t len, bool zero, void *arg)
+{
+  cout << "iterate_cb " << off << "~" << len << std::endl;
+  interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
+  diff->insert(off, len);
+  return 0;
+}
+
+void scribble(librbd::Image& image, int n, int max, interval_set<uint64_t> *exists, interval_set<uint64_t> *what)
+{
+  uint64_t size;
+  image.size(&size);
+  for (int i=0; i<n; i++) {
+    uint64_t off = rand() % (size - max + 1);
+    uint64_t len = 1 + rand() % max;
+    interval_set<uint64_t> w;
+    w.insert(off, len);
+    if (rand() % 4 == 0) {
+      ASSERT_EQ((int)len, image.discard(off, len));
+      w.intersection_of(*exists);
+      what->union_of(w);
+      exists->subtract(w);
+    } else {
+      bufferlist bl;
+      bl.append(buffer::create(len));
+      bl.zero();
+      ASSERT_EQ((int)len, image.write(off, len, bl));
+      what->union_of(w);
+      exists->union_of(w);
+    }
+  }
+}
+
+TEST(LibRBD, DiffIterate)
+{
+  librados::Rados rados;
+  librados::IoCtx ioctx;
+  string pool_name = get_temp_pool_name();
+
+  ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
+  ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+
+  {
+    librbd::RBD rbd;
+    librbd::Image image;
+    int order = 0;
+    const char *name = "testimg";
+    uint64_t size = 20 << 20;
+
+    ASSERT_EQ(0, create_image_pp(rbd, ioctx, name, size, &order));
+    ASSERT_EQ(0, rbd.open(ioctx, image, name, NULL));
+
+    interval_set<uint64_t> exists;
+    interval_set<uint64_t> one, two;
+    scribble(image, 10, 102400, &exists, &one);
+    cout << " wrote " << one << std::endl;
+    ASSERT_EQ(0, image.snap_create("one"));
+    scribble(image, 10, 102400, &exists, &two);
+    cout << " wrote " << two << std::endl;
+
+    interval_set<uint64_t> diff;
+    ASSERT_EQ((int)size, image.diff_iterate("one", 0, size, iterate_cb, (void *)&diff));
+    cout << " diff was " << diff << std::endl;
+    if (!two.subset_of(diff)) {
+      interval_set<uint64_t> i;
+      i.intersection_of(two, diff);
+      interval_set<uint64_t> l = two;
+      l.subtract(i);
+      cout << " ... two - (two*diff) = " << l << std::endl;     
+    }
+    ASSERT_TRUE(two.subset_of(diff));
+  }
+  ioctx.close();
+  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
+}
+
+TEST(LibRBD, DiffIterateStress)
+{
+  librados::Rados rados;
+  librados::IoCtx ioctx;
+  string pool_name = get_temp_pool_name();
+
+  ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
+  ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+
+  {
+    librbd::RBD rbd;
+    librbd::Image image;
+    int order = 0;
+    const char *name = "testimg";
+    uint64_t size = 400 << 20;
+
+    ASSERT_EQ(0, create_image_pp(rbd, ioctx, name, size, &order));
+    ASSERT_EQ(0, rbd.open(ioctx, image, name, NULL));
+
+    interval_set<uint64_t> exists;
+    vector<interval_set<uint64_t> > wrote;
+    vector<string> snap;
+    int n = 10;
+    for (int i=0; i<n; i++) {
+      interval_set<uint64_t> w;
+      scribble(image, 10, 8192000, &exists, &w);
+      cout << " i=" << i << " exists " << exists << " wrote " << w << std::endl;
+      string s = "snap" + stringify(i);
+      ASSERT_EQ(0, image.snap_create(s.c_str()));
+      wrote.push_back(w);
+      snap.push_back(s);
+    }
+
+    for (int i=0; i<n-1; i++) {
+      for (int j=i+1; j<n; j++) {
+       interval_set<uint64_t> diff, actual;
+       for (int k=i+1; k<=j; k++)
+         diff.union_of(wrote[k]);
+       cout << "from " << i << " to " << j << " diff " << diff << std::endl;
+
+       image.snap_set(snap[j].c_str());
+       ASSERT_EQ((int)size, image.diff_iterate(snap[i].c_str(), 0, size, iterate_cb, (void *)&actual));
+       cout << " actual was " << actual << std::endl;
+       if (!diff.subset_of(actual)) {
+         interval_set<uint64_t> i;
+         i.intersection_of(diff, actual);
+         interval_set<uint64_t> l = diff;
+         l.subtract(i);
+         cout << " ... diff - (actual*diff) = " << l << std::endl;     
+       }
+       ASSERT_TRUE(diff.subset_of(actual));
+      }
+    }
+
+  }
+  ioctx.close();
+  ASSERT_EQ(0, destroy_one_pool_pp(pool_name, rados));
+}