#include "librbd/DiffIterate.h"
#include "librbd/ImageCtx.h"
+#include "librbd/internal.h"
#include "include/rados/librados.hpp"
#include "include/interval_set.h"
+#include "common/errno.h"
+#include "common/Mutex.h"
+#include "common/Throttle.h"
#include "librados/snap_set_diff.h"
+#include <boost/tuple/tuple.hpp>
+#include <list>
+#include <map>
+#include <vector>
#define dout_subsys ceph_subsys_rbd
#undef dout_prefix
OBJECT_DIFF_STATE_HOLE = 2
};
+class DiffContext {
+public:
+ typedef boost::tuple<uint64_t, size_t, bool> Diff;
+ typedef std::list<Diff> Diffs;
+
+ bool whole_object;
+ uint64_t from_snap_id;
+ uint64_t end_snap_id;
+ interval_set<uint64_t> parent_diff;
+
+ DiffContext(ImageCtx &image_ctx, DiffIterate::Callback callback,
+ void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
+ uint64_t _end_snap_id)
+ : whole_object(_whole_object), from_snap_id(_from_snap_id),
+ end_snap_id(_end_snap_id), m_lock("librbd::DiffContext::m_lock"),
+ m_image_ctx(image_ctx), m_callback(callback),
+ m_callback_arg(callback_arg), m_pending_ops(0), m_return_value(0),
+ m_next_request(0), m_waiting_request(0)
+ {
+ }
+
+ int invoke_callback() {
+ Mutex::Locker locker(m_lock);
+ if (m_return_value < 0) {
+ return m_return_value;
+ }
+
+ std::map<uint64_t, Diffs>::iterator it;
+ while ((it = m_request_diffs.begin()) != m_request_diffs.end() &&
+ it->first == m_waiting_request) {
+ Diffs diffs = it->second;
+ m_request_diffs.erase(it);
+
+ for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
+ m_lock.Unlock();
+ m_callback(d->get<0>(), d->get<1>(), d->get<2>(), m_callback_arg);
+ m_lock.Lock();
+ }
+ ++m_waiting_request;
+ }
+ return 0;
+ }
+
+ int wait_for_ret() {
+ Mutex::Locker locker(m_lock);
+ while (m_pending_ops > 0) {
+ m_cond.Wait(m_lock);
+ }
+ return m_return_value;
+ }
+
+ uint64_t start_op() {
+ Mutex::Locker locker(m_lock);
+ while (m_pending_ops >= m_image_ctx.concurrent_management_ops) {
+ m_cond.Wait(m_lock);
+ }
+ ++m_pending_ops;
+ return m_next_request++;
+ }
+
+ void finish_op(uint64_t request_num, int r, const Diffs &diffs) {
+ Mutex::Locker locker(m_lock);
+ m_request_diffs[request_num] = diffs;
+
+ if (m_return_value == 0 && r < 0) {
+ m_return_value = r;
+ }
+
+ --m_pending_ops;
+ m_cond.Signal();
+ }
+
+private:
+ Mutex m_lock;
+ Cond m_cond;
+
+ ImageCtx &m_image_ctx;
+ DiffIterate::Callback m_callback;
+ void *m_callback_arg;
+
+ uint32_t m_pending_ops;
+ int m_return_value;
+
+ uint64_t m_next_request;
+ uint64_t m_waiting_request;
+
+ std::map<uint64_t, Diffs> m_request_diffs;
+};
+
+class C_DiffObject : public Context {
+public:
+ C_DiffObject(ImageCtx &image_ctx, librados::IoCtx &head_ctx,
+ DiffContext &diff_context, const std::string &oid,
+ uint64_t offset, const std::vector<ObjectExtent> &object_extents)
+ : m_image_ctx(image_ctx), m_head_ctx(head_ctx),
+ m_diff_context(diff_context), m_oid(oid), m_offset(offset),
+ m_object_extents(object_extents), m_snap_ret(0)
+ {
+ m_request_num = m_diff_context.start_op();
+ }
+
+ void send() {
+ librados::ObjectReadOperation op;
+ op.list_snaps(&m_snap_set, &m_snap_ret);
+
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
+ int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
+ assert(r == 0);
+ rados_completion->release();
+ }
+
+protected:
+ virtual void finish(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ if (r == 0 && m_snap_ret < 0) {
+ r = m_snap_ret;
+ }
+
+ DiffContext::Diffs diffs;
+ if (r == 0) {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
+ compute_diffs(&diffs);
+ } else if (r == -ENOENT) {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
+ << dendl;
+ r = 0;
+ compute_parent_overlap(&diffs);
+ } else {
+ ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ m_diff_context.finish_op(m_request_num, r, diffs);
+ }
+
+private:
+ ImageCtx &m_image_ctx;
+ librados::IoCtx &m_head_ctx;
+ DiffContext &m_diff_context;
+ uint64_t m_request_num;
+ std::string m_oid;
+ uint64_t m_offset;
+ std::vector<ObjectExtent> m_object_extents;
+
+ librados::snap_set_t m_snap_set;
+ int m_snap_ret;
+
+ void compute_diffs(DiffContext::Diffs *diffs) {
+ CephContext *cct = m_image_ctx.cct;
+
+ // calc diff from from_snap_id -> to_snap_id
+ interval_set<uint64_t> diff;
+ bool end_exists;
+ calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
+ m_diff_context.end_snap_id, &diff, &end_exists);
+ ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists
+ << dendl;
+ if (diff.empty()) {
+ return;
+ } else if (m_diff_context.whole_object) {
+ // provide the full object extents to the callback
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
+ end_exists));
+ }
+ return;
+ }
+
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
+ << q->offset << "~" << q->length << " from "
+ << q->buffer_extents << dendl;
+ uint64_t opos = q->offset;
+ for (vector<pair<uint64_t,uint64_t> >::iterator r =
+ q->buffer_extents.begin();
+ r != q->buffer_extents.end(); ++r) {
+ interval_set<uint64_t> overlap; // object extents
+ overlap.insert(opos, r->second);
+ overlap.intersection_of(diff);
+ ldout(m_image_ctx.cct, 20) << " opos " << opos
+ << " buf " << r->first << "~" << r->second
+ << " overlap " << overlap << dendl;
+ for (interval_set<uint64_t>::iterator s = overlap.begin();
+ s != overlap.end(); ++s) {
+ uint64_t su_off = s.get_start() - opos;
+ uint64_t logical_off = m_offset + r->first + su_off;
+ ldout(cct, 20) << " overlap extent " << s.get_start() << "~"
+ << s.get_len() << " logical " << logical_off << "~"
+ << s.get_len() << dendl;
+ diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
+ end_exists));
+ }
+ opos += r->second;
+ }
+ assert(opos == q->offset + q->length);
+ }
+ }
+
+ void compute_parent_overlap(DiffContext::Diffs *diffs) {
+ if (m_diff_context.from_snap_id == 0 &&
+ !m_diff_context.parent_diff.empty()) {
+ // report parent diff instead
+ for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
+ q != m_object_extents.end(); ++q) {
+ for (vector<pair<uint64_t,uint64_t> >::iterator r =
+ q->buffer_extents.begin();
+ r != q->buffer_extents.end(); ++r) {
+ interval_set<uint64_t> o;
+ o.insert(m_offset + r->first, r->second);
+ o.intersection_of(m_diff_context.parent_diff);
+ ldout(m_image_ctx.cct, 20) << " reporting parent overlap " << o
+ << dendl;
+ for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
+ ++s) {
+ diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
+ true));
+ }
+ }
+ }
+ }
+ }
+};
+
} // anonymous namespace
int DiffIterate::execute() {
- librados::IoCtx head_ctx;
+ CephContext* cct = m_image_ctx.cct;
- m_image_ctx.md_lock.get_read();
- m_image_ctx.snap_lock.get_read();
- head_ctx.dup(m_image_ctx.data_ctx);
+ librados::IoCtx head_ctx;
librados::snap_t from_snap_id = 0;
+ librados::snap_t end_snap_id;
uint64_t from_size = 0;
- if (m_from_snap_name) {
- from_snap_id = m_image_ctx.get_snap_id(m_from_snap_name);
- from_size = m_image_ctx.get_image_size(from_snap_id);
+ uint64_t end_size;
+ {
+ RWLock::RLocker md_locker(m_image_ctx.md_lock);
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ head_ctx.dup(m_image_ctx.data_ctx);
+ if (m_from_snap_name) {
+ from_snap_id = m_image_ctx.get_snap_id(m_from_snap_name);
+ from_size = m_image_ctx.get_image_size(from_snap_id);
+ }
+ end_snap_id = m_image_ctx.snap_id;
+ end_size = m_image_ctx.get_image_size(end_snap_id);
}
- librados::snap_t end_snap_id = m_image_ctx.snap_id;
- uint64_t end_size = m_image_ctx.get_image_size(end_snap_id);
- m_image_ctx.snap_lock.put_read();
- m_image_ctx.md_lock.put_read();
+
if (from_snap_id == CEPH_NOSNAP) {
return -ENOENT;
}
if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state);
if (r < 0) {
- ldout(m_image_ctx.cct, 5) << "diff_iterate fast diff disabled" << dendl;
+ ldout(cct, 5) << "fast diff disabled" << dendl;
} else {
- ldout(m_image_ctx.cct, 5) << "diff_iterate fast diff enabled" << dendl;
+ ldout(cct, 5) << "fast diff enabled" << dendl;
fast_diff_enabled = true;
}
}
// we must list snaps via the head, not end snap
head_ctx.snap_set_read(CEPH_SNAPDIR);
- ldout(m_image_ctx.cct, 5) << "diff_iterate from " << from_snap_id << " to "
- << end_snap_id << " size from " << from_size
- << " to " << end_size << dendl;
-
- // FIXME: if end_size > from_size, we could read_iterate for the
- // final part, and skip the listsnaps op.
+ ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
+ << end_snap_id << " size from " << from_size
+ << " to " << end_size << dendl;
// check parent overlap only if we are comparing to the beginning of time
- interval_set<uint64_t> parent_diff;
+ DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
+ m_whole_object, from_snap_id, end_snap_id);
if (m_include_parent && from_snap_id == 0) {
RWLock::RLocker l(m_image_ctx.snap_lock);
RWLock::RLocker l2(m_image_ctx.parent_lock);
m_image_ctx.get_parent_overlap(from_snap_id, &overlap);
r = 0;
if (m_image_ctx.parent && overlap > 0) {
- ldout(m_image_ctx.cct, 10) << " first getting parent diff" << dendl;
+ ldout(cct, 10) << " first getting parent diff" << dendl;
DiffIterate diff_parent(*m_image_ctx.parent, NULL, 0, overlap,
m_include_parent, m_whole_object,
&DiffIterate::simple_diff_cb,
&diff_context.parent_diff);
r = diff_parent.execute();
}
- if (r < 0)
+ if (r < 0) {
return r;
+ }
}
uint64_t period = m_image_ctx.get_stripe_period();
// map to extents
map<object_t,vector<ObjectExtent> > object_extents;
- Striper::file_to_extents(m_image_ctx.cct, m_image_ctx.format_string,
+ Striper::file_to_extents(cct, m_image_ctx.format_string,
&m_image_ctx.layout, off, read_len, 0,
object_extents, 0);
for (map<object_t,vector<ObjectExtent> >::iterator p =
object_extents.begin();
p != object_extents.end(); ++p) {
- ldout(m_image_ctx.cct, 20) << "diff_iterate object " << p->first << dendl;
+ ldout(cct, 20) << "object " << p->first << dendl;
if (fast_diff_enabled) {
const uint64_t object_no = p->second.front().objectno;
m_callback(off + q->offset, q->length, updated, m_callback_arg);
}
}
- continue;
- }
-
- librados::snap_set_t snap_set;
- r = head_ctx.list_snaps(p->first.name, &snap_set);
- if (r == -ENOENT) {
- if (from_snap_id == 0 && !parent_diff.empty()) {
- // report parent diff instead
- for (vector<ObjectExtent>::iterator q = p->second.begin();
- q != p->second.end(); ++q) {
- for (vector<pair<uint64_t,uint64_t> >::iterator r =
- q->buffer_extents.begin();
- r != q->buffer_extents.end(); ++r) {
- interval_set<uint64_t> o;
- o.insert(off + r->first, r->second);
- o.intersection_of(parent_diff);
- ldout(m_image_ctx.cct, 20) << " reporting parent overlap " << o
- << dendl;
- for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
- ++s) {
- m_callback(s.get_start(), s.get_len(), true, m_callback_arg);
- }
- }
- }
- }
- continue;
- }
- if (r < 0)
- return r;
-
- // calc diff from from_snap_id -> to_snap_id
- interval_set<uint64_t> diff;
- bool end_exists;
- calc_snap_set_diff(m_image_ctx.cct, snap_set, from_snap_id, end_snap_id,
- &diff, &end_exists);
- ldout(m_image_ctx.cct, 20) << " diff " << diff << " end_exists="
- << end_exists << dendl;
- if (diff.empty()) {
- continue;
- } else if (m_whole_object) {
- // provide the full object extents to the callback
- for (vector<ObjectExtent>::iterator q = p->second.begin();
- q != p->second.end(); ++q) {
- m_callback(off + q->offset, q->length, end_exists, m_callback_arg);
- }
- continue;
- }
-
- for (vector<ObjectExtent>::iterator q = p->second.begin();
- q != p->second.end(); ++q) {
- ldout(m_image_ctx.cct, 20) << "diff_iterate object " << p->first
- << " extent " << q->offset << "~"
- << q->length << " from " << q->buffer_extents
- << dendl;
- uint64_t opos = q->offset;
- for (vector<pair<uint64_t,uint64_t> >::iterator r =
- q->buffer_extents.begin();
- r != q->buffer_extents.end(); ++r) {
- interval_set<uint64_t> overlap; // object extents
- overlap.insert(opos, r->second);
- overlap.intersection_of(diff);
- ldout(m_image_ctx.cct, 20) << " opos " << opos
- << " buf " << r->first << "~" << r->second
- << " overlap " << overlap << dendl;
- for (interval_set<uint64_t>::iterator s = overlap.begin();
- s != overlap.end(); ++s) {
- uint64_t su_off = s.get_start() - opos;
- uint64_t logical_off = off + r->first + su_off;
- ldout(m_image_ctx.cct, 20) << " overlap extent " << s.get_start()
- << "~" << s.get_len() << " logical "
- << logical_off << "~" << s.get_len() << dendl;
- m_callback(logical_off, s.get_len(), end_exists, m_callback_arg);
- }
- opos += r->second;
+ } else {
+ C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
+ diff_context,
+ p->first.name, off,
+ p->second);
+ diff_object->send();
+
+ r = diff_context.invoke_callback();
+ if (r < 0) {
+ diff_context.wait_for_ret();
+ return r;
}
- assert(opos == q->offset + q->length);
}
}
off += read_len;
}
- return 0;
+ r = diff_context.wait_for_ret();
+ if (r < 0) {
+ return r;
+ }
+
+ r = diff_context.invoke_callback();
+ return r;
}
int DiffIterate::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,