return 0;
}
- int set_flags(librados::IoCtx *ioctx, const std::string &oid,
- uint64_t flags, uint64_t mask)
+ void set_flags(librados::ObjectWriteOperation *op, uint64_t flags,
+ uint64_t mask)
{
bufferlist inbl;
::encode(flags, inbl);
::encode(mask, inbl);
-
- bufferlist outbl;
- return ioctx->exec(oid, "rbd", "set_flags", inbl, outbl);
+ op->exec("rbd", "set_flags", inbl);
}
int remove_parent(librados::IoCtx *ioctx, const std::string &oid)
return ioctx->exec(oid, "rbd", "add_child", in, out);
}
- int remove_child(librados::IoCtx *ioctx, const std::string &oid,
- parent_spec pspec, const std::string &c_imageid)
+ void remove_child(librados::ObjectWriteOperation *op,
+ parent_spec pspec, const std::string &c_imageid)
{
- bufferlist in, out;
+ bufferlist in;
::encode(pspec.pool_id, in);
::encode(pspec.image_id, in);
::encode(pspec.snap_id, in);
::encode(c_imageid, in);
+ op->exec("rbd", "remove_child", in);
+ }
- return ioctx->exec(oid, "rbd", "remove_child", in, out);
+ int remove_child(librados::IoCtx *ioctx, const std::string &oid,
+ parent_spec pspec, const std::string &c_imageid)
+ {
+ librados::ObjectWriteOperation op;
+ remove_child(&op, pspec, c_imageid);
+ return ioctx->operate(oid, &op);
}
int get_children(librados::IoCtx *ioctx, const std::string &oid,
parent_spec pspec, uint64_t parent_overlap);
int get_flags(librados::IoCtx *ioctx, const std::string &oid,
snapid_t snap_id, uint64_t *flags);
- int set_flags(librados::IoCtx *ioctx, const std::string &oid,
- uint64_t flags, uint64_t mask);
+ void set_flags(librados::ObjectWriteOperation *op,
+ uint64_t flags, uint64_t mask);
int remove_parent(librados::IoCtx *ioctx, const std::string &oid);
void remove_parent(librados::ObjectWriteOperation *op);
int add_child(librados::IoCtx *ioctx, const std::string &oid,
parent_spec pspec, const std::string &c_imageid);
+ void remove_child(librados::ObjectWriteOperation *op,
+ parent_spec pspec, const std::string &c_imageid);
int remove_child(librados::IoCtx *ioctx, const std::string &oid,
parent_spec pspec, const std::string &c_imageid);
int get_children(librados::IoCtx *ioctx, const std::string &oid,
class FunctionContext : public Context {
public:
- FunctionContext(const boost::function<void()> &callback)
+ FunctionContext(const boost::function<void(int)> &callback)
: m_callback(callback)
{
}
virtual void finish(int r) {
- m_callback();
+ m_callback(r);
}
private:
- boost::function<void()> m_callback;
+ boost::function<void(int)> m_callback;
};
#undef mydout
#include "common/ceph_context.h"
#include "common/dout.h"
+#include "common/errno.h"
#include "common/Mutex.h"
#include "common/RWLock.h"
#include "librbd/AioCompletion.h"
#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
#include "librbd/internal.h"
#include "librbd/AioRequest.h"
#include "librbd/CopyupRequest.h"
-#include "librbd/ObjectMap.h"
+
+#include <boost/bind.hpp>
+#include <boost/optional.hpp>
#define dout_subsys ceph_subsys_rbd
#undef dout_prefix
namespace librbd {
AioRequest::AioRequest() :
- m_ictx(NULL), m_ioctx(NULL),
+ m_ictx(NULL),
m_object_no(0), m_object_off(0), m_object_len(0),
m_snap_id(CEPH_NOSNAP), m_completion(NULL), m_parent_completion(NULL),
m_hide_enoent(false) {}
const ::SnapContext &snapc, librados::snap_t snap_id,
Context *completion,
bool hide_enoent) :
- m_ictx(ictx), m_ioctx(&ictx->data_ctx), m_oid(oid), m_object_no(objectno),
+ m_ictx(ictx), m_oid(oid), m_object_no(objectno),
m_object_off(off), m_object_len(len), m_snap_id(snap_id),
m_completion(completion), m_parent_completion(NULL),
m_hide_enoent(hide_enoent) {
m_object_no,
m_image_extents);
m_ictx->copyup_list[m_object_no] = new_req;
- new_req->queue_read_from_parent();
+ new_req->queue_send();
}
}
}
}
op.set_op_flags2(m_op_flags);
- r = m_ioctx->aio_operate(m_oid, rados_completion, &op, flags, NULL);
-
+ r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags, NULL);
rados_completion->release();
return r;
}
bool hide_enoent)
: AioRequest(ictx, oid, object_no, object_off, len, snapc, snap_id,
completion, hide_enoent),
- m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val), m_entire_object(NULL)
+ m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val),
+ m_entire_object(NULL)
{
+ m_io_ctx.dup(ictx->data_ctx);
+ m_io_ctx.snap_set_read(CEPH_NOSNAP);
+
m_object_image_extents = objectx;
m_parent_overlap = object_overlap;
}
m_state = LIBRBD_AIO_WRITE_GUARD;
m_write.assert_exists();
ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
+ } else {
+ m_state = LIBRBD_AIO_WRITE_FLAT;
}
}
map<uint64_t, CopyupRequest*>::iterator it;
bool finished = true;
switch (m_state) {
+ case LIBRBD_AIO_WRITE_PRE:
+ ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl;
+ if (r < 0) {
+ return true;
+ }
+
+ send_write();
+ finished = false;
+ break;
+
+ case LIBRBD_AIO_WRITE_POST:
+ ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl;
+ finished = true;
+ break;
+
case LIBRBD_AIO_WRITE_GUARD:
ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
m_entire_object = &(new_req->get_copyup_data());
m_ictx->copyup_list_lock.Unlock();
- new_req->read_from_parent();
+ new_req->send();
} else {
it->second->append_request(this);
m_entire_object = &it->second->get_copyup_data();
}
finished = false;
break;
- }
- if (r < 0) {
- ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl;
+ } else if (r < 0) {
+ // pass the error code to the finish context
+ m_state = LIBRBD_AIO_WRITE_ERROR;
+ complete(r);
+ finished = false;
break;
}
+
+ finished = send_post();
break;
case LIBRBD_AIO_WRITE_COPYUP:
ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
m_state = LIBRBD_AIO_WRITE_GUARD;
- if (r < 0)
+ if (r < 0) {
return should_complete(r);
+ }
// Read data from waiting list safely. If this AioWrite created a
// CopyupRequest, m_read_data should be empty.
if (m_entire_object != NULL) {
- assert(m_ictx->copyup_list_lock.is_locked());
- assert(m_ictx->copyup_list.find(m_object_no) !=
- m_ictx->copyup_list.end());
assert(m_read_data.length() == 0);
m_read_data.append(*m_entire_object);
}
case LIBRBD_AIO_WRITE_FLAT:
ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
- // nothing to do
+
+ finished = send_post();
+ break;
+
+ case LIBRBD_AIO_WRITE_ERROR:
+ assert(r < 0);
+ lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r)
+ << dendl;
break;
default:
}
int AbstractWrite::send() {
- ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len << dendl;
+ ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
+
+ if (send_pre()) {
+ return 0;
+ } else {
+ send_write();
+ }
+ return 0;
+ }
+
+ bool AbstractWrite::send_pre() {
+ bool lost_exclusive_lock = false;
+ {
+ RWLock::RLocker l(m_ictx->owner_lock);
+ RWLock::RLocker l2(m_ictx->md_lock);
+ if (m_ictx->object_map == NULL) {
+ return false;
+ }
+
+ if (!m_ictx->image_watcher->is_lock_owner()) {
+ ldout(m_ictx->cct, 1) << "lost exclusive lock during write" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
+
+ uint8_t new_state;
+ boost::optional<uint8_t> current_state;
+ pre_object_map_update(&new_state);
+
+ m_state = LIBRBD_AIO_WRITE_PRE;
+ FunctionContext *ctx = new FunctionContext(
+ boost::bind(&AioRequest::complete, this, _1));
+ m_ictx->object_map->aio_update(m_object_no, new_state,
+ current_state, ctx);
+ }
+ }
+
+ if (lost_exclusive_lock) {
+ complete(-ERESTART);
+ }
+ return true;
+ }
+
+ bool AbstractWrite::send_post() {
+ ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
+
+ RWLock::RLocker l(m_ictx->owner_lock);
+ RWLock::RLocker l2(m_ictx->md_lock);
+ if (m_ictx->object_map == NULL || !post_object_map_update()) {
+ return true;
+ }
+
+ if (m_ictx->image_watcher->is_lock_supported() &&
+ !m_ictx->image_watcher->is_lock_owner()) {
+ // leave the object flagged as pending
+ ldout(m_ictx->cct, 1) << "lost exclusive lock during write" << dendl;
+ return true;
+ }
+
+ m_state = LIBRBD_AIO_WRITE_POST;
+ FunctionContext *ctx = new FunctionContext(
+ boost::bind(&AioRequest::complete, this, _1));
+ m_ictx->object_map->aio_update(m_object_no, OBJECT_NONEXISTENT,
+ OBJECT_PENDING, ctx);
+ return false;
+ }
+
+ void AbstractWrite::send_write() {
+ ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
+
+ guard_write();
+ add_write_ops(&m_write);
+ assert(m_write.size() != 0);
+
librados::AioCompletion *rados_completion =
librados::Rados::aio_create_completion(this, NULL, rados_req_cb);
- int r;
- assert(m_write.size());
- r = m_ioctx->aio_operate(m_oid, rados_completion, &m_write,
- m_snap_seq, m_snaps);
+ int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write,
+ m_snap_seq, m_snaps);
+ assert(r == 0);
rados_completion->release();
- return r;
}
void AbstractWrite::send_copyup() {
ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len << dendl;
- if (!m_read_data.is_zero())
- m_copyup.exec("rbd", "copyup", m_read_data);
- add_copyup_ops();
+ librados::ObjectWriteOperation op;
+ if (!m_read_data.is_zero()) {
+ op.exec("rbd", "copyup", m_read_data);
+ }
+ add_write_ops(&op);
+ assert(op.size() != 0);
librados::AioCompletion *rados_completion =
librados::Rados::aio_create_completion(this, NULL, rados_req_cb);
- m_ictx->md_ctx.aio_operate(m_oid, rados_completion, &m_copyup,
+ m_ictx->md_ctx.aio_operate(m_oid, rados_completion, &op,
m_snap_seq, m_snaps);
rados_completion->release();
}
- void AioWrite::add_write_ops(librados::ObjectWriteOperation &wr) {
- wr.set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
- wr.write(m_object_off, m_write_data);
+ void AioWrite::add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
+ wr->write(m_object_off, m_write_data);
+ wr->set_op_flags2(m_op_flags);
}
}
#include "include/buffer.h"
#include "include/Context.h"
#include "include/rados/librados.hpp"
+#include "librbd/ObjectMap.h"
namespace librbd {
void read_from_parent(vector<pair<uint64_t,uint64_t> >& image_extents);
ImageCtx *m_ictx;
- librados::IoCtx *m_ioctx;
std::string m_oid;
uint64_t m_object_no, m_object_off, m_object_len;
librados::snap_t m_snap_id;
virtual ~AbstractWrite() {}
virtual bool should_complete(int r);
virtual int send();
- void guard_write();
bool has_parent() const {
return !m_object_image_extents.empty();
private:
/**
* Writes go through the following state machine to deal with
- * layering:
+ * layering and the object map:
*
- * need copyup
- * LIBRBD_AIO_WRITE_GUARD ---------------> LIBRBD_AIO_WRITE_COPYUP
- * | ^ |
- * v \------------------------------/
- * done
- * ^
- * |
- * LIBRBD_AIO_WRITE_FLAT
+ * <start>
+ * . |
+ * . |
+ * . \---> LIBRBD_AIO_WRITE_PRE
+ * . | |
+ * . . . . . . | . . . . | . . . . . . . . . . .
+ * . | -or- | .
+ * . | | v
+ * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . .
+ * . | | .
+ * v v need copyup | .
+ * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | .
+ * . | ^ | | .
+ * . | | | | .
+ * . | \---------------------------/ | .
+ * . | | .
+ * . \-------------------\ /-------------------/ .
+ * . | | .
+ * . LIBRBD_AIO_WRITE_POST .
+ * . | .
+ * . v .
+ * . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . .
*
- * Writes start in LIBRBD_AIO_WRITE_GUARD or _FLAT, depending on whether
- * there is a parent or not.
+ * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
+ * is disabled. The write starts in _WRITE_GUARD or _FLAT depending on
+ * whether or not there is a parent overlap.
*/
enum write_state_d {
LIBRBD_AIO_WRITE_GUARD,
LIBRBD_AIO_WRITE_COPYUP,
- LIBRBD_AIO_WRITE_FLAT
+ LIBRBD_AIO_WRITE_FLAT,
+ LIBRBD_AIO_WRITE_PRE,
+ LIBRBD_AIO_WRITE_POST,
+ LIBRBD_AIO_WRITE_ERROR
};
protected:
- virtual void add_copyup_ops() = 0;
-
write_state_d m_state;
vector<pair<uint64_t,uint64_t> > m_object_image_extents;
uint64_t m_parent_overlap;
librados::ObjectWriteOperation m_write;
- librados::ObjectWriteOperation m_copyup;
uint64_t m_snap_seq;
ceph::bufferlist *m_entire_object;
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0;
+ virtual void guard_write();
+ virtual void pre_object_map_update(uint8_t *new_state) = 0;
+ virtual bool post_object_map_update() {
+ return false;
+ }
+
private:
+ librados::IoCtx m_io_ctx;
+
+ bool send_pre();
+ bool send_post();
+ void send_write();
void send_copyup();
};
objectx, object_overlap,
snapc, snap_id,
completion, false),
- m_write_data(data) {
- guard_write();
- add_write_ops(m_write);
+ m_write_data(data), m_op_flags(0) {
}
virtual ~AioWrite() {}
void set_op_flags(int op_flags) {
- m_write.set_op_flags2(op_flags);
+ m_op_flags = op_flags;
}
protected:
- virtual void add_copyup_ops() {
- add_write_ops(m_copyup);
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr);
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ *new_state = OBJECT_EXISTS;
}
private:
- void add_write_ops(librados::ObjectWriteOperation &wr);
ceph::bufferlist m_write_data;
+ int m_op_flags;
};
class AioRemove : public AbstractWrite {
objectx, object_overlap,
snapc, snap_id, completion,
true) {
- if (has_parent())
- m_write.truncate(0);
- else
- m_write.remove();
}
virtual ~AioRemove() {}
protected:
- virtual void add_copyup_ops() {
- // removing an object never needs to copyup
- assert(0);
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ if (has_parent()) {
+ m_object_state = OBJECT_EXISTS;
+ wr->truncate(0);
+ } else {
+ m_object_state = OBJECT_PENDING;
+ wr->remove();
+ }
+ }
+
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ *new_state = m_object_state;
}
+
+ virtual bool post_object_map_update() {
+ if (m_object_state == OBJECT_EXISTS) {
+ return false;
+ }
+ return true;
+ }
+
+ virtual void guard_write() {
+ // do nothing to disable write guard
+ }
+
+ private:
+ uint8_t m_object_state;
};
class AioTruncate : public AbstractWrite {
objectx, object_overlap,
snapc, snap_id, completion,
true) {
- guard_write();
- m_write.truncate(object_off);
}
virtual ~AioTruncate() {}
protected:
- virtual void add_copyup_ops() {
- m_copyup.truncate(m_object_off);
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->truncate(m_object_off);
+ }
+
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ *new_state = OBJECT_EXISTS;
}
};
objectx, object_overlap,
snapc, snap_id, completion,
true) {
- guard_write();
- m_write.zero(object_off, object_len);
}
virtual ~AioZero() {}
protected:
- virtual void add_copyup_ops() {
- m_copyup.zero(m_object_off, m_object_len);
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->zero(m_object_off, m_object_len);
+ }
+
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ *new_state = OBJECT_EXISTS;
}
};
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/AsyncFlattenRequest.h"
+#include "librbd/AioRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/ObjectMap.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::AsyncFlattenRequest: "
+
+namespace librbd {
+
+class AsyncFlattenObjectContext : public C_AsyncObjectThrottle {
+public:
+ AsyncFlattenObjectContext(AsyncObjectThrottle &throttle, ImageCtx *image_ctx,
+ uint64_t object_size, ::SnapContext snapc,
+ uint64_t object_no)
+ : C_AsyncObjectThrottle(throttle), m_image_ctx(*image_ctx),
+ m_object_size(object_size), m_snapc(snapc), m_object_no(object_no)
+ {
+ }
+
+ virtual int send() {
+ CephContext *cct = m_image_ctx.cct;
+
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(cct, 1) << "lost exclusive lock during flatten" << dendl;
+ return -ERESTART;
+ }
+
+ RWLock::RLocker l2(m_image_ctx.md_lock);
+ uint64_t overlap;
+ {
+ RWLock::RLocker l3(m_image_ctx.parent_lock);
+ // stop early if the parent went away - it just means
+ // another flatten finished first, so this one is useless.
+ if (!m_image_ctx.parent) {
+ return 1;
+ }
+
+ // resize might have occurred while flatten is running
+ overlap = min(m_image_ctx.size, m_image_ctx.parent_md.overlap);
+ }
+
+ // map child object onto the parent
+ vector<pair<uint64_t,uint64_t> > objectx;
+ Striper::extent_to_file(cct, &m_image_ctx.layout, m_object_no,
+ 0, m_object_size, objectx);
+ uint64_t object_overlap = m_image_ctx.prune_parent_extents(objectx, overlap);
+ assert(object_overlap <= m_object_size);
+ if (object_overlap == 0) {
+ // resize shrunk image while flattening
+ return 1;
+ }
+
+ bufferlist bl;
+ string oid = m_image_ctx.get_object_name(m_object_no);
+ AioWrite *req = new AioWrite(&m_image_ctx, oid, m_object_no, 0, objectx,
+ object_overlap, bl, m_snapc, CEPH_NOSNAP,
+ this);
+ int r = req->send();
+ assert(r == 0);
+ return 0;
+ }
+
+private:
+ ImageCtx &m_image_ctx;
+ uint64_t m_object_size;
+ ::SnapContext m_snapc;
+ uint64_t m_object_no;
+};
+
+bool AsyncFlattenRequest::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+ if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) {
+ lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+
+ switch (m_state) {
+ case STATE_FLATTEN_OBJECTS:
+ ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl;
+ return send_update_header();
+
+ case STATE_UPDATE_HEADER:
+ ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+ return send_update_children();
+
+ case STATE_UPDATE_CHILDREN:
+ ldout(cct, 5) << "UPDATE_CHILDREN" << dendl;
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
+ }
+ return false;
+}
+
+void AsyncFlattenRequest::send() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " send" << dendl;
+
+ m_state = STATE_FLATTEN_OBJECTS;
+ AsyncObjectThrottle::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<AsyncFlattenObjectContext>(),
+ boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc,
+ boost::lambda::_2));
+ AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
+ context_factory, create_callback_context(), m_prog_ctx, 0,
+ m_overlap_objects);
+ throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
+}
+
+bool AsyncFlattenRequest::send_update_header() {
+ CephContext *cct = m_image_ctx.cct;
+ bool lost_exclusive_lock = false;
+
+ m_state = STATE_UPDATE_HEADER;
+ {
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(cct, 1) << "lost exclusive lock during header update" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ ldout(cct, 5) << this << " send_update_header" << dendl;
+
+ RWLock::RLocker l2(m_image_ctx.parent_lock);
+ // stop early if the parent went away - it just means
+ // another flatten finished first, so this one is useless.
+ if (!m_image_ctx.parent) {
+ ldout(cct, 5) << "image already flattened" << dendl;
+ return true;
+ }
+ m_ignore_enoent = true;
+ m_parent_spec = m_image_ctx.parent_md.spec;
+
+ // remove parent from this (base) image
+ librados::ObjectWriteOperation op;
+ if (m_image_ctx.image_watcher->is_lock_supported()) {
+ m_image_ctx.image_watcher->assert_header_locked(&op);
+ }
+ cls_client::remove_parent(&op);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+ rados_completion, &op);
+ assert(r == 0);
+ rados_completion->release();
+ }
+ }
+
+ if (lost_exclusive_lock) {
+ complete(-ERESTART);
+ }
+ return false;
+}
+
+bool AsyncFlattenRequest::send_update_children() {
+ CephContext *cct = m_image_ctx.cct;
+ bool lost_exclusive_lock = false;
+
+ m_state = STATE_UPDATE_CHILDREN;
+ {
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(cct, 1) << "lost exclusive lock during children update" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ // if there are no snaps, remove from the children object as well
+ // (if snapshots remain, they have their own parent info, and the child
+ // will be removed when the last snap goes away)
+ RWLock::RLocker l2(m_image_ctx.snap_lock);
+ if (!m_image_ctx.snaps.empty()) {
+ return true;
+ }
+
+ ldout(cct, 2) << "removing child from children list..." << dendl;
+ librados::ObjectWriteOperation op;
+ cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion,
+ &op);
+ assert(r == 0);
+ rados_completion->release();
+ }
+ }
+
+ if (lost_exclusive_lock) {
+ complete(-ERESTART);
+ }
+ return false;
+}
+
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_FLATTEN_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_FLATTEN_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+#include "librbd/parent_types.h"
+#include "common/snap_types.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+class AsyncFlattenRequest : public AsyncRequest
+{
+public:
+ AsyncFlattenRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t object_size, uint64_t overlap_objects,
+ const ::SnapContext &snapc, ProgressContext &prog_ctx)
+ : AsyncRequest(image_ctx, on_finish), m_object_size(object_size),
+ m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx),
+ m_ignore_enoent(false)
+ {
+ }
+
+ virtual void send();
+
+protected:
+ virtual bool should_complete(int r);
+
+private:
+ /**
+ * Flatten goes through the following state machine to copyup objects
+ * from the parent image:
+ *
+ * <start>
+ * |
+ * v
+ * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . .
+ * . | .
+ * . | .
+ * . v .
+ * . STATE_UPDATE_CHILDREN .
+ * . | .
+ * . | .
+ * . \---> <finish> < . .
+ * . ^
+ * . .
+ * . . . . . . . . . . . . . . . . . . .
+ *
+ * The _UPDATE_CHILDREN state will be skipped if the image has one or
+ * more snapshots. The _UPDATE_HEADER state will be skipped if the
+ * image was concurrently flattened by another client.
+ */
+ enum State {
+ STATE_FLATTEN_OBJECTS,
+ STATE_UPDATE_HEADER,
+ STATE_UPDATE_CHILDREN
+ };
+
+ uint64_t m_object_size;
+ uint64_t m_overlap_objects;
+ ::SnapContext m_snapc;
+ ProgressContext &m_prog_ctx;
+ State m_state;
+
+ parent_spec m_parent_spec;
+ bool m_ignore_enoent;
+
+ bool send_update_header();
+ bool send_update_children();
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_ASYNC_FLATTEN_REQUEST_H
{
}
-int AsyncObjectThrottle::start_ops(uint64_t max_concurrent) {
+void AsyncObjectThrottle::start_ops(uint64_t max_concurrent) {
bool complete;
{
Mutex::Locker l(m_lock);
for (uint64_t i = 0; i < max_concurrent; ++i) {
- int r = start_next_op();
- if (r < 0 && m_current_ops == 0) {
- return r;
+ start_next_op();
+ if (m_ret < 0 && m_current_ops == 0) {
+ break;
}
}
complete = (m_current_ops == 0);
m_ctx->complete(m_ret);
delete this;
}
- return 0;
}
void AsyncObjectThrottle::finish_op(int r) {
}
}
-int AsyncObjectThrottle::start_next_op() {
+void AsyncObjectThrottle::start_next_op() {
bool done = false;
while (!done) {
if (m_ret != 0 || m_object_no >= m_end_object_no) {
- return m_ret;
+ return;
}
uint64_t ono = m_object_no++;
if (r < 0) {
m_ret = r;
delete ctx;
- return m_ret;
+ return;
} else if (r > 0) {
// op completed immediately
delete ctx;
}
m_prog_ctx.update_progress(ono, m_end_object_no);
}
- return 0;
}
} // namespace librbd
ProgressContext &prog_ctx, uint64_t object_no,
uint64_t end_object_no);
- int start_ops(uint64_t max_concurrent);
+ void start_ops(uint64_t max_concurrent);
virtual void finish_op(int r);
private:
uint64_t m_current_ops;
int m_ret;
- int start_next_op();
+ void start_next_op();
};
} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "librbd/AsyncRequest.h"
+#include "librbd/internal.h"
+#include <boost/bind.hpp>
+
+namespace librbd
+{
+
+librados::AioCompletion *AsyncRequest::create_callback_completion() {
+ return librados::Rados::aio_create_completion(create_callback_context(),
+ NULL, rados_ctx_cb);
+}
+
+Context *AsyncRequest::create_callback_context() {
+ return new FunctionContext(boost::bind(&AsyncRequest::complete, this, _1));
+}
+
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/Context.h"
+#include "include/rados/librados.hpp"
+
+namespace librbd {
+
+class ImageCtx;
+
+class AsyncRequest
+{
+public:
+ AsyncRequest(ImageCtx &image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish)
+ {
+ }
+
+ virtual ~AsyncRequest() {}
+
+ void complete(int r) {
+ if (should_complete(r)) {
+ m_on_finish->complete(r);
+ delete this;
+ }
+ }
+
+ virtual void send() = 0;
+
+protected:
+ ImageCtx &m_image_ctx;
+ Context *m_on_finish;
+
+ librados::AioCompletion *create_callback_completion();
+ Context *create_callback_context();
+
+ virtual bool should_complete(int r) = 0;
+};
+
+class C_AsyncRequest : public Context
+{
+public:
+ C_AsyncRequest(AsyncRequest *req)
+ : m_req(req)
+ {
+ }
+
+protected:
+ virtual void finish(int r) {
+ m_req->complete(r);
+ }
+
+private:
+ AsyncRequest *m_req;
+};
+
+} // namespace librbd
+
+#endif //CEPH_LIBRBD_ASYNC_REQUEST_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "librbd/AsyncResizeRequest.h"
+#include "librbd/AsyncTrimRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "common/dout.h"
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::AsyncResizeRequest: "
+
+namespace librbd
+{
+
+bool AsyncResizeRequest::should_complete(int r)
+{
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl;
+ RWLock::WLocker l(m_image_ctx.md_lock);
+ if (m_image_ctx.size == m_new_size) {
+ m_image_ctx.size = m_original_size;
+ }
+ return true;
+ }
+
+ switch (m_state) {
+ case STATE_TRIM_IMAGE:
+ ldout(cct, 5) << "TRIM_IMAGE" << dendl;
+ send_grow_object_map();
+ break;
+
+ case STATE_GROW_OBJECT_MAP:
+ ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl;
+ send_update_header();
+ break;
+
+ case STATE_UPDATE_HEADER:
+ ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+ if (send_shrink_object_map()) {
+ return true;
+ }
+ break;
+
+ case STATE_SHRINK_OBJECT_MAP:
+ ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl;
+ return true;
+
+ case STATE_FINISHED:
+ ldout(cct, 5) << "FINISHED" << dendl;
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
+ }
+ return false;
+}
+
+void AsyncResizeRequest::send() {
+ CephContext *cct = m_image_ctx.cct;
+ if (m_original_size == m_new_size) {
+ ldout(cct, 2) << this << " no change in size (" << m_original_size
+ << " -> " << m_new_size << ")" << dendl;
+ m_state = STATE_FINISHED;
+ complete(0);
+ } else if (m_new_size > m_original_size) {
+ ldout(cct, 2) << this << " expanding image (" << m_original_size
+ << " -> " << m_new_size << ")" << dendl;
+ send_grow_object_map();
+ } else {
+ ldout(cct, 2) << this << " shrinking image (" << m_original_size
+ << " -> " << m_new_size << ")" << dendl;
+ send_trim_image();
+ }
+}
+
+void AsyncResizeRequest::send_trim_image() {
+ ldout(m_image_ctx.cct, 5) << this << " send_trim_image: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_TRIM_IMAGE;
+
+ {
+ // update in-memory size to clip concurrent IO operations
+ RWLock::WLocker l(m_image_ctx.md_lock);
+ m_image_ctx.size = m_new_size;
+ }
+
+ AsyncTrimRequest *req = new AsyncTrimRequest(m_image_ctx,
+ create_callback_context(),
+ m_original_size, m_new_size,
+ m_prog_ctx);
+ req->send();
+}
+
+void AsyncResizeRequest::send_grow_object_map() {
+ bool lost_exclusive_lock = false;
+ bool object_map_enabled = true;
+ {
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ RWLock::RLocker l2(m_image_ctx.md_lock);
+ if (m_image_ctx.object_map == NULL) {
+ object_map_enabled = false;
+ } else {
+ ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_GROW_OBJECT_MAP;
+
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(m_image_ctx.cct, 1) << "lost exclusive lock during grow object map" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ m_image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT,
+ create_callback_context());
+ object_map_enabled = true;
+ }
+ }
+ }
+
+ if (!object_map_enabled) {
+ send_update_header();
+ } else if (lost_exclusive_lock) {
+ // only complete when not holding locks
+ complete(-ERESTART);
+ }
+}
+
+bool AsyncResizeRequest::send_shrink_object_map() {
+ bool lost_exclusive_lock = false;
+ {
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ RWLock::RLocker l2(m_image_ctx.md_lock);
+ if (m_image_ctx.object_map == NULL ||
+ m_new_size > m_original_size) {
+ return true;
+ }
+
+ ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_SHRINK_OBJECT_MAP;
+
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(m_image_ctx.cct, 1) << "lost exclusive lock during shrink object map" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ m_image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT,
+ create_callback_context());
+ }
+ }
+
+ if (lost_exclusive_lock) {
+ // only complete when not holding locks
+ complete(-ERESTART);
+ }
+ return false;
+}
+
+void AsyncResizeRequest::send_update_header() {
+ bool lost_exclusive_lock = false;
+
+ ldout(m_image_ctx.cct, 5) << this << " send_update_header: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_UPDATE_HEADER;
+
+ {
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ RWLock::WLocker l2(m_image_ctx.md_lock);
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(m_image_ctx.cct, 1) << "lost exclusive lock during header update" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ m_image_ctx.size = m_new_size;
+
+ librados::ObjectWriteOperation op;
+ if (m_image_ctx.old_format) {
+ // rewrite header
+ bufferlist bl;
+ m_image_ctx.header.image_size = m_new_size;
+ bl.append((const char *)&m_image_ctx.header, sizeof(m_image_ctx.header));
+ op.write(0, bl);
+ } else {
+ if (m_image_ctx.image_watcher->is_lock_supported()) {
+ m_image_ctx.image_watcher->assert_header_locked(&op);
+ }
+ cls_client::set_size(&op, m_new_size);
+ }
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+ rados_completion, &op);
+ assert(r == 0);
+ rados_completion->release();
+ }
+ }
+
+ if (lost_exclusive_lock) {
+ // only complete when not holding locks
+ complete(-ERESTART);
+ }
+}
+
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_RESIZE_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_RESIZE_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+class AsyncResizeRequest : public AsyncRequest
+{
+public:
+ AsyncResizeRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx)
+ : AsyncRequest(image_ctx, on_finish),
+ m_original_size(original_size), m_new_size(new_size),
+ m_prog_ctx(prog_ctx)
+ {
+ }
+
+ virtual void send();
+
+protected:
+ /**
+ * Resize goes through the following state machine to resize the image
+ * and update the object map:
+ *
+ * <start> ----> STATE_FINISHED --------------------------------\
+ * | . |
+ * | . . . . . . . . . . . . . . . . . . |
+ * | . |
+ * | v |
+ * |---> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER -------|
+ * | |
+ * | |
+ * \---> STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . . |
+ * | . |
+ * | . |
+ * v v v
+ * STATE_SHRINK_OBJECT_MAP ---> <finish>
+ *
+ * The _OBJECT_MAP states are skipped if the object map isn't enabled.
+ * The state machine will immediately transition to _FINISHED if there
+ * are no objects to trim.
+ */
+ enum State {
+ STATE_TRIM_IMAGE,
+ STATE_GROW_OBJECT_MAP,
+ STATE_UPDATE_HEADER,
+ STATE_SHRINK_OBJECT_MAP,
+ STATE_FINISHED
+ };
+
+ State m_state;
+ uint64_t m_original_size;
+ uint64_t m_new_size;
+ ProgressContext &m_prog_ctx;
+
+ virtual bool should_complete(int r);
+
+ void send_trim_image();
+ void send_grow_object_map();
+ bool send_shrink_object_map();
+ void send_update_header();
+
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_ASYNC_RESIZE_REQUEST_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "librbd/AsyncTrimRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/AioRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "common/ContextCompletion.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+
+#include <boost/bind.hpp>
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::AsyncTrimRequest: "
+
+namespace librbd
+{
+
+class AsyncTrimObjectContext : public C_AsyncObjectThrottle {
+public:
+ AsyncTrimObjectContext(AsyncObjectThrottle &throttle, ImageCtx *image_ctx,
+ uint64_t object_no)
+ : C_AsyncObjectThrottle(throttle), m_image_ctx(*image_ctx),
+ m_object_no(object_no)
+ {
+ }
+
+ virtual int send() {
+ {
+ RWLock::RLocker l(m_image_ctx.md_lock);
+ if (m_image_ctx.object_map != NULL &&
+ !m_image_ctx.object_map->object_may_exist(m_object_no)) {
+ return 1;
+ }
+ }
+
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ return -ERESTART;
+ }
+
+ string oid = m_image_ctx.get_object_name(m_object_no);
+ ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl;
+
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
+ int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion);
+ assert(r == 0);
+ rados_completion->release();
+ return 0;
+ }
+
+private:
+ ImageCtx &m_image_ctx;
+ uint64_t m_object_no;
+};
+
+AsyncTrimRequest::AsyncTrimRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx)
+ : AsyncRequest(image_ctx, on_finish), m_new_size(new_size), m_prog_ctx(prog_ctx)
+{
+ uint64_t period = m_image_ctx.get_stripe_period();
+ uint64_t new_num_periods = ((m_new_size + period - 1) / period);
+ m_delete_off = MIN(new_num_periods * period, original_size);
+ // first object we can delete free and clear
+ m_delete_start = new_num_periods * m_image_ctx.get_stripe_count();
+ m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size);
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " trim image " << original_size << " -> "
+ << m_new_size << " periods " << new_num_periods
+ << " discard to offset " << m_delete_off
+ << " delete objects " << m_delete_start
+ << " to " << m_num_objects << dendl;
+}
+
+
+bool AsyncTrimRequest::should_complete(int r)
+{
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: r=" << r << dendl;
+ if (r < 0) {
+ lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+
+ switch (m_state) {
+ case STATE_PRE_REMOVE:
+ ldout(cct, 5) << " PRE_REMOVE" << dendl;
+ send_remove_objects();
+ break;
+
+ case STATE_REMOVE_OBJECTS:
+ ldout(cct, 5) << " REMOVE_OBJECTS" << dendl;
+ if (send_post_remove()) {
+ return true;
+ }
+ break;
+
+ case STATE_POST_REMOVE:
+ ldout(cct, 5) << " POST_OBJECTS" << dendl;
+ if (send_clean_boundary()) {
+ return true;
+ }
+ break;
+
+ case STATE_CLEAN_BOUNDARY:
+ ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl;
+ return true;
+
+ case STATE_FINISHED:
+ ldout(cct, 5) << "FINISHED" << dendl;
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
+ }
+ return false;
+}
+
+void AsyncTrimRequest::send() {
+ if (m_delete_start < m_num_objects) {
+ send_pre_remove();
+ } else {
+ bool finished = send_clean_boundary();
+ if (finished) {
+ m_state = STATE_FINISHED;
+ complete(0);
+ }
+ }
+}
+
+void AsyncTrimRequest::send_remove_objects() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_REMOVE_OBJECTS;
+
+ Context *ctx = create_callback_context();
+ AsyncObjectThrottle::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<AsyncTrimObjectContext>(),
+ boost::lambda::_1, &m_image_ctx, boost::lambda::_2));
+ AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
+ context_factory, ctx, m_prog_ctx, m_delete_start, m_num_objects);
+ throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
+}
+
+void AsyncTrimRequest::send_pre_remove() {
+ bool lost_exclusive_lock = false;
+ {
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ RWLock::RLocker l2(m_image_ctx.md_lock);
+ if (m_image_ctx.object_map == NULL) {
+ send_remove_objects();
+ return;
+ }
+
+ ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_PRE_REMOVE;
+
+ if (!m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ // flag the objects as pending deletion
+ m_image_ctx.object_map->aio_update(
+ m_delete_start, m_num_objects, OBJECT_PENDING, OBJECT_EXISTS,
+ create_callback_context());
+ }
+ }
+
+ if (lost_exclusive_lock) {
+ complete(-ERESTART);
+ }
+}
+
+bool AsyncTrimRequest::send_post_remove() {
+ bool lost_exclusive_lock = false;
+ {
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ RWLock::RLocker l2(m_image_ctx.md_lock);
+ if (m_image_ctx.object_map == NULL) {
+ return send_clean_boundary();
+ }
+
+ ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_POST_REMOVE;
+
+ if (!m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+ } else {
+ // flag the pending objects as removed
+ m_image_ctx.object_map->aio_update(
+ m_delete_start, m_num_objects, OBJECT_NONEXISTENT, OBJECT_PENDING,
+ create_callback_context());
+ }
+ }
+
+ if (lost_exclusive_lock) {
+ complete(-ERESTART);
+ }
+ return false;
+}
+
+bool AsyncTrimRequest::send_clean_boundary() {
+ CephContext *cct = m_image_ctx.cct;
+ if (m_delete_off <= m_new_size) {
+ return true;
+ }
+
+ bool lost_exclusive_lock = false;
+ ContextCompletion *completion = NULL;
+ {
+ ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_CLEAN_BOUNDARY;
+
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+ lost_exclusive_lock = true;
+ } else {
+ ::SnapContext snapc;
+ uint64_t parent_overlap;
+ {
+ RWLock::RLocker l2(m_image_ctx.snap_lock);
+ RWLock::RLocker l3(m_image_ctx.parent_lock);
+ snapc = m_image_ctx.snapc;
+ parent_overlap = m_image_ctx.parent_md.overlap;
+ }
+
+ // discard the weird boundary, if any
+ vector<ObjectExtent> extents;
+ Striper::file_to_extents(cct, m_image_ctx.format_string,
+ &m_image_ctx.layout, m_new_size,
+ m_delete_off - m_new_size, 0, extents);
+
+ completion = new ContextCompletion(create_callback_context(), true);
+ for (vector<ObjectExtent>::iterator p = extents.begin();
+ p != extents.end(); ++p) {
+ ldout(cct, 20) << " ex " << *p << dendl;
+ Context *req_comp = new C_ContextCompletion(*completion);
+
+ // reverse map this object extent onto the parent
+ vector<pair<uint64_t,uint64_t> > objectx;
+ Striper::extent_to_file(cct, &m_image_ctx.layout, p->objectno, 0,
+ m_image_ctx.layout.fl_object_size, objectx);
+ uint64_t object_overlap =
+ m_image_ctx.prune_parent_extents(objectx, parent_overlap);
+
+ AbstractWrite *req;
+ if (p->offset == 0) {
+ req = new AioRemove(&m_image_ctx, p->oid.name, p->objectno, objectx,
+ object_overlap, snapc, CEPH_NOSNAP, req_comp);
+ } else {
+ req = new AioTruncate(&m_image_ctx, p->oid.name, p->objectno, p->offset,
+ objectx, object_overlap, snapc, CEPH_NOSNAP,
+ req_comp);
+ }
+ int r = req->send();
+ if (r < 0) {
+ req_comp->complete(r);
+ delete req;
+ break;
+ }
+ }
+ }
+
+ }
+
+ if (lost_exclusive_lock) {
+ complete(-ERESTART);
+ } else if (completion != NULL) {
+ completion->finish_adding_requests();
+ }
+ return false;
+}
+
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_TRIM_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_TRIM_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+class AsyncTrimRequest : public AsyncRequest
+{
+public:
+ AsyncTrimRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx);
+
+ virtual void send();
+
+protected:
+ /**
+ * Trim goes through the following state machine to remove whole objects,
+ * clean partially trimmed objects, and update the object map:
+ *
+ * <start> . . . . > STATE_FINISHED . . . . . . . . .
+ * | . .
+ * | . . . . . . . . . . . . .
+ * | . .
+ * v v .
+ * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS .
+ * | . . .
+ * /-----------------------/ . . . . . . . .
+ * | . . .
+ * v v v v
+ * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> <finish>
+ * . ^
+ * . .
+ * . . . . . . . . . . . . . . . . . . . . . . .
+ *
+ * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
+ * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects
+ * are removed. The _CLEAN_BOUNDARY state is skipped if no boundary
+ * objects are cleaned. The state machine will immediately transition
+ * to _FINISHED state if there are no bytes to trim.
+ */
+
+ enum State {
+ STATE_PRE_REMOVE,
+ STATE_REMOVE_OBJECTS,
+ STATE_POST_REMOVE,
+ STATE_CLEAN_BOUNDARY,
+ STATE_FINISHED
+ };
+
+ virtual bool should_complete(int r);
+
+ State m_state;
+
+private:
+ uint64_t m_delete_start;
+ uint64_t m_num_objects;
+ uint64_t m_delete_off;
+ uint64_t m_new_size;
+ ProgressContext &m_prog_ctx;
+
+ void send_remove_objects();
+ void send_pre_remove();
+ bool send_post_remove();
+ bool send_clean_boundary();
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_ASYNC_TRIM_REQUEST_H
#include "common/Mutex.h"
#include "librbd/AioCompletion.h"
-#include "librbd/ImageCtx.h"
-
#include "librbd/AioRequest.h"
#include "librbd/CopyupRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
#include "librbd/ObjectMap.h"
+#include <boost/bind.hpp>
+
#define dout_subsys ceph_subsys_rbd
#undef dout_prefix
#define dout_prefix *_dout << "librbd::CopyupRequest: "
uint64_t objectno,
vector<pair<uint64_t,uint64_t> >& image_extents)
: m_ictx(ictx), m_oid(oid), m_object_no(objectno),
- m_image_extents(image_extents)
+ m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT)
{
}
CopyupRequest::~CopyupRequest() {
- assert(m_ictx->copyup_list_lock.is_locked());
assert(m_pending_requests.empty());
-
- map<uint64_t, CopyupRequest*>::iterator it =
- m_ictx->copyup_list.find(m_object_no);
- assert(it != m_ictx->copyup_list.end());
- m_ictx->copyup_list.erase(it);
-
- if (m_ictx->copyup_list.empty()) {
- m_ictx->copyup_list_cond.Signal();
- }
}
ceph::bufferlist& CopyupRequest::get_copyup_data() {
m_pending_requests.push_back(req);
}
- void CopyupRequest::complete_all(int r) {
+ bool CopyupRequest::complete_requests(int r) {
+ if (m_pending_requests.empty()) {
+ return false;
+ }
+
while (!m_pending_requests.empty()) {
vector<AioRequest *>::iterator it = m_pending_requests.begin();
AioRequest *req = *it;
req->complete(r);
m_pending_requests.erase(it);
}
+ return true;
}
- void CopyupRequest::send_copyup(int r) {
+ void CopyupRequest::send_copyup() {
ldout(m_ictx->cct, 20) << __func__ << " " << this
- << ": oid " << m_oid
- << ", r " << r << dendl;
+ << ": oid " << m_oid << dendl;
m_ictx->snap_lock.get_read();
::SnapContext snapc = m_ictx->snapc;
std::vector<librados::snap_t> snaps;
snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
- {
- RWLock::RLocker l(m_ictx->md_lock);
- if (m_ictx->object_map != NULL) {
- r = m_ictx->object_map->update(m_object_no, OBJECT_EXISTS,
- boost::optional<uint8_t>());
- if (r < 0) {
- lderr(m_ictx->cct) << __func__ << " " << this
- << ": failed to update object map:"
- << cpp_strerror(r) << dendl;
- return;
- }
- }
- }
-
librados::ObjectWriteOperation copyup_op;
copyup_op.exec("rbd", "copyup", m_copyup_data);
comp->release();
}
- void CopyupRequest::read_from_parent()
+ void CopyupRequest::send()
{
+ m_state = STATE_READ_FROM_PARENT;
AioCompletion *comp = aio_create_completion_internal(
- this, &CopyupRequest::read_from_parent_cb);
+ create_callback_context(), rbd_ctx_cb);
+
ldout(m_ictx->cct, 20) << __func__ << " " << this
<< ": completion " << comp
<< ", oid " << m_oid
<< ", extents " << m_image_extents
<< dendl;
-
int r = aio_read(m_ictx->parent, m_image_extents, NULL, &m_copyup_data,
comp, 0);
if (r < 0) {
comp->release();
- delete this;
+
+ remove_from_list();
+ complete_requests(r);
}
}
- void CopyupRequest::queue_read_from_parent()
+ void CopyupRequest::queue_send()
{
// TODO: once the ObjectCacher allows reentrant read requests, the finisher
// should be eliminated
ldout(m_ictx->cct, 20) << __func__ << " " << this
<< ": oid " << m_oid << " "
<< ", extents " << m_image_extents << dendl;
- C_ReadFromParent *ctx = new C_ReadFromParent(this);
+ FunctionContext *ctx = new FunctionContext(
+ boost::bind(&CopyupRequest::send, this));
m_ictx->copyup_finisher->queue(ctx);
}
- void CopyupRequest::read_from_parent_cb(completion_t cb, void *arg)
+ void CopyupRequest::complete(int r)
{
- CopyupRequest *req = reinterpret_cast<CopyupRequest *>(arg);
- AioCompletion *comp = reinterpret_cast<AioCompletion *>(cb);
-
- ldout(req->m_ictx->cct, 20) << __func__ << " " << req
- << ": oid " << req->m_oid
- << ", extents " << req->m_image_extents << dendl;
-
- // If this entry is created by a read request, then copyup operation will
- // be performed asynchronously. Perform cleaning up from copyup callback.
- // If this entry is created by a write request, then copyup operation will
- // be performed synchronously by AioWrite. After extracting data, perform
- // cleaning up here
- Mutex::Locker l(req->m_ictx->copyup_list_lock);
- if (req->m_pending_requests.empty()) {
- req->send_copyup(comp->get_return_value());
- } else {
- req->complete_all(comp->get_return_value());
+ if (should_complete(r)) {
+ delete this;
}
- delete req;
+ }
+
+ bool CopyupRequest::should_complete(int r)
+ {
+ CephContext *cct = m_ictx->cct;
+ ldout(cct, 20) << __func__ << " "
+ << ": oid " << m_oid
+ << ", extents " << m_image_extents
+ << ", r " << r << dendl;
+
+ switch (m_state) {
+ case STATE_READ_FROM_PARENT:
+ ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
+ remove_from_list();
+ if (complete_requests(r)) {
+ // pending write operation: it will handle object map / copyup
+ return true;
+ } else if (r < 0) {
+ // nothing to copyup
+ return true;
+ } else if (send_object_map()) {
+ return true;
+ }
+ break;
+
+ case STATE_OBJECT_MAP:
+ ldout(cct, 20) << "OBJECT_MAP" << dendl;
+ if (r == 0) {
+ send_copyup();
+ }
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
+ }
+ return false;
+ }
+
+ void CopyupRequest::remove_from_list()
+ {
+ Mutex::Locker l(m_ictx->copyup_list_lock);
+
+ map<uint64_t, CopyupRequest*>::iterator it =
+ m_ictx->copyup_list.find(m_object_no);
+ assert(it != m_ictx->copyup_list.end());
+ m_ictx->copyup_list.erase(it);
+
+ if (m_ictx->copyup_list.empty()) {
+ m_ictx->copyup_list_cond.Signal();
+ }
+ }
+
+ bool CopyupRequest::send_object_map() {
+ bool object_map_enabled = true;
+ {
+ RWLock::RLocker l(m_ictx->owner_lock);
+ RWLock::RLocker l2(m_ictx->md_lock);
+ if (m_ictx->object_map == NULL) {
+ object_map_enabled = false;
+ } else if (!m_ictx->image_watcher->is_lock_owner()) {
+ ldout(m_ictx->cct, 20) << "exclusive lock not held for copy-on-read"
+ << dendl;
+ return true;
+ } else {
+ m_state = STATE_OBJECT_MAP;
+ m_ictx->object_map->aio_update(m_object_no, OBJECT_EXISTS,
+ boost::optional<uint8_t>(),
+ create_callback_context());
+ }
+ }
+
+ if (!object_map_enabled) {
+ send_copyup();
+ return true;
+ }
+ return false;
+ }
+
+ Context *CopyupRequest::create_callback_context()
+ {
+ return new FunctionContext(boost::bind(&CopyupRequest::complete, this, _1));
}
}
ceph::bufferlist& get_copyup_data();
void append_request(AioRequest *req);
- void read_from_parent();
- void queue_read_from_parent();
- private:
- class C_ReadFromParent : public Context {
- public:
- C_ReadFromParent(CopyupRequest *c) : m_req(c) {}
-
- virtual void finish(int r) {
- m_req->read_from_parent();
- }
+ void send();
+ void queue_send();
- private:
- CopyupRequest *m_req;
+ private:
+ /**
+ * Copyup requests go through the following state machine to read from the
+ * parent image, update the object map, and copyup the object:
+ *
+ * <start>
+ * |
+ * v
+ * STATE_READ_FROM_PARENT ---> STATE_OBJECT_MAP
+ * . |
+ * . . . . . . . . . . . . . |
+ * . |
+ * v v
+ * <finish>
+ * The _OBJECT_MAP state is skipped if the object map isn't enabled.
+ */
+ enum State {
+ STATE_READ_FROM_PARENT,
+ STATE_OBJECT_MAP
};
ImageCtx *m_ictx;
std::string m_oid;
uint64_t m_object_no;
vector<pair<uint64_t,uint64_t> > m_image_extents;
+ State m_state;
ceph::bufferlist m_copyup_data;
vector<AioRequest *> m_pending_requests;
- void complete_all(int r);
- void send_copyup(int r);
- static void read_from_parent_cb(completion_t cb, void *arg);
+ bool complete_requests(int r);
+
+ void complete(int r);
+ bool should_complete(int r);
+
+ void remove_from_list();
+
+ bool send_object_map();
+ void send_copyup();
+ Context *create_callback_context();
};
}
onfinish->complete(r);
}
- void ImageCtx::write_to_cache(object_t o, bufferlist& bl, size_t len,
+ void ImageCtx::write_to_cache(object_t o, const bufferlist& bl, size_t len,
uint64_t off, Context *onfinish) {
snap_lock.get_read();
ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(snapc, bl,
uint64_t *overlap) const;
void aio_read_from_cache(object_t o, uint64_t object_no, bufferlist *bl,
size_t len, uint64_t off, Context *onfinish);
- void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off,
- Context *onfinish);
+ void write_to_cache(object_t o, const bufferlist& bl, size_t len,
+ uint64_t off, Context *onfinish);
int read_from_cache(object_t o, uint64_t object_no, bufferlist *bl,
size_t len, uint64_t off);
void user_flushed();
librbd_internal_la_SOURCES = \
librbd/AioCompletion.cc \
librbd/AioRequest.cc \
+ librbd/AsyncFlattenRequest.cc \
librbd/AsyncObjectThrottle.cc \
+ librbd/AsyncRequest.cc \
+ librbd/AsyncResizeRequest.cc \
+ librbd/AsyncTrimRequest.cc \
librbd/CopyupRequest.cc \
librbd/ImageCtx.cc \
librbd/ImageWatcher.cc \
noinst_HEADERS += \
librbd/AioCompletion.h \
librbd/AioRequest.h \
+ librbd/AsyncFlattenRequest.h \
librbd/AsyncObjectThrottle.h \
+ librbd/AsyncRequest.h \
+ librbd/AsyncResizeRequest.h \
+ librbd/AsyncTrimRequest.h \
librbd/CopyupRequest.h \
librbd/ImageCtx.h \
librbd/ImageWatcher.h \
// vim: ts=8 sw=2 smarttab
#include "librbd/ObjectMap.h"
#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
#include "librbd/internal.h"
#include "common/dout.h"
#include "common/errno.h"
ldout(cct, 20) << "refreshed object map: " << object_map.size()
<< dendl;
- uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
- m_image_ctx.get_current_size());
+ uint64_t num_objs = Striper::get_num_objects(
+ m_image_ctx.layout, m_image_ctx.get_image_size(m_image_ctx.snap_id));
if (object_map.size() != num_objs) {
// resize op might have been interrupted
lderr(cct) << "incorrect object map size: " << object_map.size()
}
return 0;
}
-int ObjectMap::resize(uint8_t default_object_state)
-{
- if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
- return 0;
- }
- RWLock::WLocker l(m_image_ctx.object_map_lock);
- CephContext *cct = m_image_ctx.cct;
- uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
- m_image_ctx.get_current_size());
- ldout(cct, 20) << "resizing object map: " << num_objs << dendl;
- librados::ObjectWriteOperation op;
- rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
- cls_client::object_map_resize(&op, num_objs, default_object_state);
- int r = m_image_ctx.data_ctx.operate(object_map_name(m_image_ctx.id), &op);
- if (r == -EBUSY) {
- lderr(cct) << "object map lock not owned by client" << dendl;
- return r;
- } else if (r < 0) {
- lderr(cct) << "error resizing object map: size=" << num_objs << ", "
- << "state=" << default_object_state << ", "
- << "error=" << cpp_strerror(r) << dendl;
- invalidate();
- return 0;
- }
- size_t orig_object_map_size = object_map.size();
- object_map.resize(num_objs);
- for (uint64_t i = orig_object_map_size; i < object_map.size(); ++i) {
- object_map[i] = default_object_state;
- }
- return 0;
+void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state,
+ Context *on_finish) {
+ assert((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) != 0);
+ assert(m_image_ctx.owner_lock.is_locked());
+ assert(m_image_ctx.image_watcher->is_lock_owner());
+
+ ResizeRequest *req = new ResizeRequest(
+ m_image_ctx, new_size, default_object_state, on_finish);
+ req->send();
}
-int ObjectMap::update(uint64_t object_no, uint8_t new_state,
- const boost::optional<uint8_t> ¤t_state)
+void ObjectMap::aio_update(uint64_t object_no, uint8_t new_state,
+ const boost::optional<uint8_t> ¤t_state,
+ Context *on_finish)
{
- return update(object_no, object_no + 1, new_state, current_state);
+ aio_update(object_no, object_no + 1, new_state, current_state, on_finish);
}
-int ObjectMap::update(uint64_t start_object_no,
- uint64_t end_object_no, uint8_t new_state,
- const boost::optional<uint8_t> ¤t_state)
+void ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no,
+ uint8_t new_state,
+ const boost::optional<uint8_t> ¤t_state,
+ Context *on_finish)
{
- if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
- return 0;
- }
-
- RWLock::WLocker l(m_image_ctx.object_map_lock);
- CephContext *cct = m_image_ctx.cct;
- assert(start_object_no <= end_object_no);
- assert(end_object_no <= object_map.size() ||
- (m_image_ctx.flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0);
- if (end_object_no > object_map.size()) {
- ldout(cct, 20) << "skipping update of invalid object map" << dendl;
- return 0;
- }
+ assert((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) != 0);
+ assert(m_image_ctx.owner_lock.is_locked());
+ assert(m_image_ctx.image_watcher->is_lock_owner());
bool update_required = false;
- for (uint64_t object_no = start_object_no; object_no < end_object_no;
- ++object_no) {
- if ((!current_state || object_map[object_no] == *current_state) &&
- object_map[object_no] != new_state) {
- update_required = true;
- break;
+ {
+ RWLock::WLocker l(m_image_ctx.object_map_lock);
+ assert(start_object_no < end_object_no);
+
+ CephContext *cct = m_image_ctx.cct;
+ if (end_object_no > object_map.size()) {
+ ldout(cct, 20) << "skipping update of invalid object map" << dendl;
+ return;
+ }
+
+ for (uint64_t object_no = start_object_no; object_no < end_object_no;
+ ++object_no) {
+ if ((!current_state || object_map[object_no] == *current_state) &&
+ object_map[object_no] != new_state) {
+ update_required = true;
+ break;
+ }
+ }
+
+ if (update_required) {
+ UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no,
+ end_object_no, new_state,
+ current_state, on_finish);
+ req->send();
}
}
if (!update_required) {
- return 0;
+ on_finish->complete(0);
}
+}
- ldout(cct, 20) << "updating object map: [" << start_object_no << ","
- << end_object_no << ") = "
- << static_cast<uint32_t>(new_state) << dendl;
+void ObjectMap::invalidate() {
+ CephContext *cct = m_image_ctx.cct;
+ lderr(cct) << this << " invalidating object map" << dendl;
+ m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID;
librados::ObjectWriteOperation op;
- rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
- cls_client::object_map_update(&op, start_object_no, end_object_no,
- new_state, current_state);
- int r = m_image_ctx.data_ctx.operate(object_map_name(m_image_ctx.id), &op);
- if (r == -EBUSY) {
- lderr(cct) << "object map lock not owned by client" << dendl;
- return r;
- } else if (r < 0) {
- lderr(cct) << "object map update failed: " << cpp_strerror(r) << dendl;
- invalidate();
- } else {
- for (uint64_t object_no = start_object_no; object_no < end_object_no;
- ++object_no) {
- if (!current_state || object_map[object_no] == *current_state) {
- object_map[object_no] = new_state;
+ cls_client::set_flags(&op, m_image_ctx.flags, RBD_FLAG_OBJECT_MAP_INVALID);
+
+ int r = m_image_ctx.md_ctx.operate(m_image_ctx.header_oid, &op);
+ if (r < 0) {
+ lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r)
+ << dendl;
+ }
+}
+
+bool ObjectMap::Request::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << this << " should_complete: r=" << r << dendl;
+
+ switch (m_state)
+ {
+ case STATE_REQUEST:
+ if (r == -EBUSY) {
+ lderr(cct) << "object map lock not owned by client" << dendl;
+ return true;
+ } else if (r < 0) {
+ lderr(cct) << "failed to update object map: " << cpp_strerror(r)
+ << dendl;
+ invalidate();
+ return false;
+ }
+
+ {
+ RWLock::RLocker l(m_image_ctx.md_lock);
+ RWLock::WLocker l2(m_image_ctx.object_map_lock);
+ ObjectMap *object_map = m_image_ctx.object_map;
+ if (object_map != NULL) {
+ finish(object_map);
}
}
+ return true;
+
+ case STATE_INVALIDATE:
+ ldout(cct, 20) << "INVALIDATE" << dendl;
+ if (r < 0) {
+ lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r)
+ << dendl;
+ return true;
+ }
+ break;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
}
- return 0;
+ return false;
}
-void ObjectMap::invalidate()
-{
- // TODO: md_lock
+void ObjectMap::Request::invalidate() {
+ CephContext *cct = m_image_ctx.cct;
+ RWLock::WLocker l(m_image_ctx.md_lock);
+
+ lderr(cct) << this << " invalidating object map" << dendl;
+ m_state = STATE_INVALIDATE;
m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID;
- int r = cls_client::set_flags(&m_image_ctx.md_ctx,
- m_image_ctx.header_oid,
- m_image_ctx.flags,
- RBD_FLAG_OBJECT_MAP_INVALID);
- if (r < 0) {
- lderr(m_image_ctx.cct) << "Failed to invalidate object map: "
- << cpp_strerror(r) << dendl;
+
+ librados::ObjectWriteOperation op;
+ cls_client::set_flags(&op, m_image_ctx.flags, RBD_FLAG_OBJECT_MAP_INVALID);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+ rados_completion, &op);
+ assert(r == 0);
+ rados_completion->release();
+}
+
+void ObjectMap::ResizeRequest::send() {
+ CephContext *cct = m_image_ctx.cct;
+
+ RWLock::WLocker l(m_image_ctx.object_map_lock);
+ m_num_objs = Striper::get_num_objects(m_image_ctx.layout, m_new_size);
+
+ ldout(cct, 5) << this << " resizing on-disk object map: " << m_num_objs << dendl;
+
+ librados::ObjectWriteOperation op;
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
+ cls_client::object_map_resize(&op, m_num_objs, m_default_object_state);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
+ rados_completion, &op);
+ assert(r == 0);
+ rados_completion->release();
+}
+
+void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) {
+ CephContext *cct = m_image_ctx.cct;
+
+ ldout(cct, 5) << this << " resizing in-memory object map: " << m_num_objs << dendl;
+ size_t orig_object_map_size = object_map->object_map.size();
+ object_map->object_map.resize(m_num_objs);
+ for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) {
+ object_map->object_map[i] = m_default_object_state;
+ }
+}
+
+void ObjectMap::UpdateRequest::send() {
+ CephContext *cct = m_image_ctx.cct;
+
+ ldout(cct, 20) << this << " updating on-disk object map: ["
+ << m_start_object_no << "," << m_end_object_no << ") = "
+ << static_cast<uint32_t>(m_new_state) << dendl;
+
+ librados::ObjectWriteOperation op;
+ rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
+ cls_client::object_map_update(&op, m_start_object_no, m_end_object_no,
+ m_new_state, m_current_state);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
+ rados_completion, &op);
+ assert(r == 0);
+ rados_completion->release();
+}
+
+void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) {
+ CephContext *cct = m_image_ctx.cct;
+
+ ldout(cct, 20) << this << " updating in-memory object map" << dendl;
+ for (uint64_t object_no = m_start_object_no;
+ object_no < MIN(m_end_object_no, object_map->object_map.size());
+ ++object_no) {
+ if (!m_current_state ||
+ object_map->object_map[object_no] == *m_current_state) {
+ object_map->object_map[object_no] = m_new_state;
+ }
}
}
#define CEPH_LIBRBD_OBJECT_MAP_H
#include "include/int_types.h"
+#include "include/rados/librados.hpp"
#include "common/bit_vector.hpp"
+#include "librbd/AsyncRequest.h"
#include <boost/optional.hpp>
+class Context;
+
namespace librbd {
+static const uint8_t OBJECT_NONEXISTENT = 0;
+static const uint8_t OBJECT_EXISTS = 1;
+static const uint8_t OBJECT_PENDING = 2;
+
class ImageCtx;
class ObjectMap {
bool object_may_exist(uint64_t object_no) const;
- int refresh();
- int resize(uint8_t default_object_state);
+ void aio_resize(uint64_t new_size, uint8_t default_object_state,
+ Context *on_finish);
+ void aio_update(uint64_t object_no, uint8_t new_state,
+ const boost::optional<uint8_t> ¤t_state,
+ Context *on_finish);
+ void aio_update(uint64_t start_object_no, uint64_t end_object_no,
+ uint8_t new_state,
+ const boost::optional<uint8_t> ¤t_state,
+ Context *on_finish);
- int update(uint64_t object_no, uint8_t new_state,
- const boost::optional<uint8_t> ¤t_state);
- int update(uint64_t start_object_no, uint64_t end_object_no,
- uint8_t new_state, const boost::optional<uint8_t> ¤t_state);
+ int refresh();
private:
+ class Request : public AsyncRequest {
+ public:
+ Request(ImageCtx &image_ctx, Context *on_finish)
+ : AsyncRequest(image_ctx, on_finish), m_state(STATE_REQUEST)
+ {
+ }
+
+ protected:
+ virtual bool should_complete(int r);
+ virtual void finish(ObjectMap *object_map) = 0;
+
+ private:
+ /**
+ * <start> ---> STATE_REQUEST ---> <finish>
+ * | ^
+ * v |
+ * STATE_INVALIDATE -------/
+ */
+ enum State {
+ STATE_REQUEST,
+ STATE_INVALIDATE
+ };
+
+ State m_state;
+
+ void invalidate();
+ };
+
+ class ResizeRequest : public Request {
+ public:
+ ResizeRequest(ImageCtx &image_ctx, uint64_t new_size,
+ uint8_t default_object_state, Context *on_finish)
+ : Request(image_ctx, on_finish), m_num_objs(0), m_new_size(new_size),
+ m_default_object_state(default_object_state)
+ {
+ }
+
+ virtual void send();
+ protected:
+ virtual void finish(ObjectMap *object_map);
+ private:
+ uint64_t m_num_objs;
+ uint64_t m_new_size;
+ uint8_t m_default_object_state;
+ };
+
+ class UpdateRequest : public Request {
+ public:
+ UpdateRequest(ImageCtx &image_ctx, uint64_t start_object_no,
+ uint64_t end_object_no, uint8_t new_state,
+ const boost::optional<uint8_t> ¤t_state,
+ Context *on_finish)
+ : Request(image_ctx, on_finish), m_start_object_no(start_object_no),
+ m_end_object_no(end_object_no), m_new_state(new_state),
+ m_current_state(current_state)
+ {
+ }
+
+ virtual void send();
+ protected:
+ virtual void finish(ObjectMap *object_map);
+ private:
+ uint64_t m_start_object_no;
+ uint64_t m_end_object_no;
+ uint8_t m_new_state;
+ boost::optional<uint8_t> m_current_state;
+ };
+
ImageCtx &m_image_ctx;
ceph::BitVector<2> object_map;
#include "librbd/AioCompletion.h"
#include "librbd/AioRequest.h"
-#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/AsyncFlattenRequest.h"
+#include "librbd/AsyncResizeRequest.h"
+#include "librbd/AsyncTrimRequest.h"
#include "librbd/CopyupRequest.h"
#include "librbd/ImageCtx.h"
#include "librbd/ImageWatcher.h"
#include "librados/snap_set_diff.h"
#include <boost/bind.hpp>
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
#include <boost/scope_exit.hpp>
#include "include/assert.h"
assert(!ictx->image_watcher->is_lock_supported() ||
ictx->image_watcher->is_lock_owner());
- Mutex my_lock("librbd::trim_image::my_lock");
- Cond cond;
- bool done;
- int ret;
-
- CephContext *cct = ictx->cct;
- Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret);
- ret = async_trim_image(ictx, ctx, ictx->size, newsize, prog_ctx);
- if (ret < 0) {
- lderr(cct) << "warning: failed to remove object(s): "
- << cpp_strerror(ret) << dendl;
- delete ctx;
- return;
- }
-
- my_lock.Lock();
- while (!done) {
- cond.Wait(my_lock);
- }
- my_lock.Unlock();
+ C_SaferCond *ctx = new C_SaferCond();
+ AsyncTrimRequest *req = new AsyncTrimRequest(*ictx, ctx, ictx->size,
+ newsize, prog_ctx);
+ req->send();
- if (ret < 0) {
- lderr(cct) << "warning: failed to remove some object(s): "
- << cpp_strerror(ret) << dendl;
+ int r = ctx->wait();
+ if (r < 0) {
+ lderr(ictx->cct) << "warning: failed to remove some object(s): "
+ << cpp_strerror(r) << dendl;
}
}
int r;
do {
- Mutex my_lock("librbd::resize::my_lock");
- Cond cond;
- bool done;
+ C_SaferCond *ctx;
{
RWLock::RLocker l(ictx->owner_lock);
while (ictx->image_watcher->is_lock_supported()) {
ldout(ictx->cct, 5) << "resize timed out notifying lock owner" << dendl;
}
- Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r);
+ ctx = new C_SaferCond();
r = async_resize(ictx, ctx, size, prog_ctx);
if (r < 0) {
delete ctx;
}
}
- my_lock.Lock();
- while (!done) {
- cond.Wait(my_lock);
- }
- my_lock.Unlock();
-
+ r = ctx->wait();
if (r == -ERESTART) {
ldout(ictx->cct, 5) << "resize interrupted: restarting" << dendl;
}
} while (r == -ERESTART);
+ ictx->perfcounter->inc(l_librbd_resize);
notify_change(ictx->md_ctx, ictx->header_oid, ictx);
ldout(cct, 2) << "resize finished" << dendl;
return r;
}
-
- class AsyncResizeFinishContext : public Context {
- public:
- AsyncResizeFinishContext(ImageCtx *ictx, Context *ctx)
- : m_ictx(ictx), m_ctx(ctx)
- {
- }
-
- virtual void finish(int r) {
- ldout(m_ictx->cct, 2) << "async_resize finished" << dendl;
- m_ictx->perfcounter->inc(l_librbd_resize);
- m_ctx->complete(r);
- }
-
- private:
- ImageCtx *m_ictx;
- Context *m_ctx;
- };
-
int async_resize(ImageCtx *ictx, Context *ctx, uint64_t size,
ProgressContext &prog_ctx)
{
}
}
- AsyncResizeFinishContext *finish_ctx =
- new AsyncResizeFinishContext(ictx, ctx);
- r = async_resize_helper(ictx, finish_ctx, original_size, size, prog_ctx);
- if (r < 0) {
- delete ctx;
- return r;
- }
+ async_resize_helper(ictx, ctx, original_size, size, prog_ctx);
return 0;
}
- class AsyncResizeHelperFinishContext : public Context {
- public:
- AsyncResizeHelperFinishContext(ImageCtx *ictx, Context *ctx,
- uint64_t original_size, uint64_t new_size)
- : m_ictx(ictx), m_ctx(ctx), m_original_size(original_size),
- m_new_size(new_size)
- {
- }
-
- virtual void finish(int r) {
- BOOST_SCOPE_EXIT((m_ctx) (m_ictx) (m_new_size) (m_original_size) (&r)) {
- ldout(m_ictx->cct, 2) << "async_resize_helper finished ("
- << r << ")" << dendl;
-
- if (r < 0) {
- RWLock::WLocker l(m_ictx->md_lock);
- if (m_ictx->size == m_new_size) {
- m_ictx->size = m_original_size;
- }
- m_ctx->complete(r);
- }
- } BOOST_SCOPE_EXIT_END
-
- if (r < 0) {
- return;
- }
-
- RWLock::RLocker l(m_ictx->owner_lock);
- if (m_ictx->image_watcher->is_lock_supported() &&
- !m_ictx->image_watcher->is_lock_owner()) {
- r = -ERESTART;
- return;
- }
-
- RWLock::WLocker l2(m_ictx->md_lock);
- m_ictx->size = m_new_size;
-
- librados::ObjectWriteOperation op;
- if (m_ictx->old_format) {
- // rewrite header
- bufferlist bl;
- m_ictx->header.image_size = m_new_size;
- bl.append((const char *)&m_ictx->header, sizeof(m_ictx->header));
- op.write(0, bl);
- } else {
- if (m_ictx->image_watcher->is_lock_supported()) {
- m_ictx->image_watcher->assert_header_locked(&op);
- }
- cls_client::set_size(&op, m_new_size);
- }
-
- librados::AioCompletion *rados_completion =
- librados::Rados::aio_create_completion(m_ctx, NULL, rados_ctx_cb);
- r = m_ictx->md_ctx.aio_operate(m_ictx->header_oid, rados_completion, &op);
- rados_completion->release();
- if (r < 0) {
- lderr(m_ictx->cct) << "error writing header: " << cpp_strerror(r)
- << dendl;
- return;
- }
-
- if (m_ictx->object_map != NULL) {
- m_ictx->size = m_new_size;
- m_ictx->object_map->resize(OBJECT_NONEXISTENT);
- }
- }
-
- private:
- ImageCtx *m_ictx;
- Context *m_ctx;
- uint64_t m_original_size;
- uint64_t m_new_size;
- };
-
- int async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
- uint64_t new_size, ProgressContext& prog_ctx)
+ void async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
+ uint64_t new_size, ProgressContext& prog_ctx)
{
- CephContext *cct = ictx->cct;
- if (original_size == new_size) {
- ldout(cct, 2) << "no change in size (" << original_size << " -> "
- << new_size << ")" << dendl;
- ctx->complete(0);
- return 0;
- }
-
- AsyncResizeHelperFinishContext *finish_ctx =
- new AsyncResizeHelperFinishContext(ictx, ctx, original_size, new_size);
- if (new_size > original_size) {
- ldout(cct, 2) << "expanding image " << original_size << " -> "
- << new_size << dendl;
- finish_ctx->complete(0);
- } else {
- ldout(cct, 2) << "shrinking image " << original_size << " -> "
- << new_size << dendl;
-
- {
- // update in-memory size to clip concurrent IO operations
- RWLock::WLocker l(ictx->md_lock);
- ictx->size = new_size;
- }
-
- int r = async_trim_image(ictx, finish_ctx, original_size, new_size,
- prog_ctx);
- if (r < 0) {
- delete finish_ctx;
- return r;
- }
- }
- return 0;
- }
-
- class AsyncTrimObjectContext : public C_AsyncObjectThrottle {
- public:
- AsyncTrimObjectContext(AsyncObjectThrottle &throttle, ImageCtx *ictx,
- uint64_t object_no)
- : C_AsyncObjectThrottle(throttle), m_ictx(ictx), m_object_no(object_no)
- {
- }
-
- virtual int send() {
- {
- RWLock::RLocker l(m_ictx->md_lock);
- if (m_ictx->object_map != NULL &&
- !m_ictx->object_map->object_may_exist(m_object_no)) {
- return 1;
- }
- }
-
- RWLock::RLocker l(m_ictx->owner_lock);
- if (m_ictx->image_watcher->is_lock_supported() &&
- !m_ictx->image_watcher->is_lock_owner()) {
- return -ERESTART;
- }
-
- string oid = m_ictx->get_object_name(m_object_no);
- librados::AioCompletion *rados_completion =
- librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
- m_ictx->data_ctx.aio_remove(oid, rados_completion);
- rados_completion->release();
- return 0;
- }
-
- private:
- ImageCtx *m_ictx;
- uint64_t m_object_no;
- };
-
- class AsyncTrimFinishContext : public Context {
- public:
-
- AsyncTrimFinishContext(ImageCtx *ictx, Context *ctx, uint64_t delete_start,
- uint64_t num_objects, uint64_t delete_offset,
- uint64_t new_size)
- : m_ictx(ictx), m_ctx(ctx), m_delete_start(delete_start),
- m_num_objects(num_objects), m_delete_offset(delete_offset),
- m_new_size(new_size)
- {
- }
-
- virtual void finish(int r) {
- if (r < 0) {
- m_ctx->complete(r);
- return;
- }
-
- RWLock::RLocker l(m_ictx->owner_lock);
- if (m_ictx->image_watcher->is_lock_supported() &&
- !m_ictx->image_watcher->is_lock_owner()) {
- m_ctx->complete(-ERESTART);
- return;
- }
-
- RWLock::RLocker l2(m_ictx->md_lock);
- if (m_ictx->object_map != NULL) {
- m_ictx->object_map->update(m_delete_start, m_num_objects,
- OBJECT_NONEXISTENT, OBJECT_PENDING);
- }
-
- if (m_delete_offset <= m_new_size) {
- m_ctx->complete(r);
- return;
- }
-
- // discard the weird boundary, if any
- vector<ObjectExtent> extents;
- Striper::file_to_extents(m_ictx->cct, m_ictx->format_string,
- &m_ictx->layout, m_new_size,
- m_delete_offset - m_new_size, 0, extents);
-
- ContextCompletion *completion = new ContextCompletion(m_ctx, true);
- for (vector<ObjectExtent>::iterator p = extents.begin();
- p != extents.end(); ++p) {
- ldout(m_ictx->cct, 20) << " ex " << *p << dendl;
- Context *req_comp = new C_ContextCompletion(*completion);
- librados::AioCompletion *rados_completion =
- librados::Rados::aio_create_completion(req_comp, NULL, rados_ctx_cb);
-
- bool flag_nonexistent = false;
- if (p->offset == 0) {
- flag_nonexistent = true;
- if (m_ictx->object_map != NULL) {
- m_ictx->object_map->update(p->objectno, OBJECT_PENDING,
- OBJECT_EXISTS);
- }
- m_ictx->data_ctx.aio_remove(p->oid.name, rados_completion);
- } else {
- if (m_ictx->object_map != NULL) {
- m_ictx->object_map->update(p->objectno, OBJECT_EXISTS,
- boost::optional<uint8_t>());
- }
- librados::ObjectWriteOperation op;
- op.truncate(p->offset);
- m_ictx->data_ctx.aio_operate(p->oid.name, rados_completion, &op);
- }
- rados_completion->release();
-
- if (flag_nonexistent && m_ictx->object_map != NULL) {
- m_ictx->object_map->update(p->objectno, OBJECT_NONEXISTENT,
- OBJECT_PENDING);
- }
- }
- completion->finish_adding_requests();
- }
-
- private:
- ImageCtx *m_ictx;
- Context *m_ctx;
- uint64_t m_delete_start;
- uint64_t m_num_objects;
- uint64_t m_delete_offset;
- uint64_t m_new_size;
- };
-
- int async_trim_image(ImageCtx *ictx, Context *ctx, uint64_t original_size,
- uint64_t new_size, ProgressContext& prog_ctx)
- {
- CephContext *cct = (CephContext *)ictx->data_ctx.cct();
-
- uint64_t period = ictx->get_stripe_period();
- uint64_t new_num_periods = ((new_size + period - 1) / period);
- uint64_t delete_off = MIN(new_num_periods * period, original_size);
- // first object we can delete free and clear
- uint64_t delete_start = new_num_periods * ictx->get_stripe_count();
- uint64_t num_objects = Striper::get_num_objects(ictx->layout, original_size);
-
- ldout(cct, 10) << "trim_image " << original_size << " -> " << new_size
- << " periods " << new_num_periods
- << " discard to offset " << delete_off
- << " delete objects " << delete_start
- << " to " << (num_objects-1)
- << dendl;
-
- AsyncTrimFinishContext *finish_ctx =
- new AsyncTrimFinishContext(ictx, ctx, delete_start, num_objects,
- delete_off, new_size);
- if (delete_start < num_objects) {
- ldout(cct, 2) << "trim_image objects " << delete_start << " to "
- << (num_objects - 1) << dendl;
-
- {
- RWLock::RLocker l(ictx->md_lock);
- if (ictx->object_map != NULL) {
- ictx->object_map->update(delete_start, num_objects, OBJECT_PENDING,
- OBJECT_EXISTS);
- }
- }
-
- AsyncObjectThrottle::ContextFactory context_factory(
- boost::lambda::bind(boost::lambda::new_ptr<AsyncTrimObjectContext>(),
- boost::lambda::_1, ictx, boost::lambda::_2));
- AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
- context_factory, finish_ctx, prog_ctx, delete_start, num_objects);
- int r = throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
- if (r < 0) {
- delete throttle;
- return r;
- }
- } else {
- finish_ctx->complete(0);
- }
- return 0;
+ assert(ictx->owner_lock.is_locked());
+ AsyncResizeRequest *req = new AsyncResizeRequest(*ictx, ctx, original_size,
+ new_size, prog_ctx);
+ req->send();
}
int snap_list(ImageCtx *ictx, vector<snap_info_t>& snaps)
}
}
- Mutex my_lock("librbd::snap_rollback::my_lock");
- Cond cond;
- bool done;
- Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r);
-
ldout(cct, 2) << "resizing to snapshot size..." << dendl;
NoOpProgressContext no_op;
- r = async_resize_helper(ictx, ctx, original_size, new_size, no_op);
- if (r < 0) {
- delete ctx;
- return r;
- }
-
- my_lock.Lock();
- while (!done) {
- cond.Wait(my_lock);
- }
- my_lock.Unlock();
+ C_SaferCond *ctx = new C_SaferCond();
+ async_resize_helper(ictx, ctx, original_size, new_size, no_op);
+ r = ctx->wait();
if (r < 0) {
lderr(cct) << "Error resizing to snapshot size: "
<< cpp_strerror(r) << dendl;
ictx->shutdown_cache(); // implicitly flushes
} else {
flush(ictx);
- ictx->wait_for_pending_aio();
}
if (ictx->copyup_finisher != NULL) {
delete ictx;
}
- class AsyncFlattenObjectContext : public C_AsyncObjectThrottle {
- public:
- AsyncFlattenObjectContext(AsyncObjectThrottle &throttle, ImageCtx *ictx,
- uint64_t object_size, ::SnapContext snapc,
- uint64_t object_no)
- : C_AsyncObjectThrottle(throttle), m_ictx(ictx),
- m_object_size(object_size), m_snapc(snapc), m_object_no(object_no)
- {
- }
-
- virtual int send() {
- int r = ictx_check(m_ictx);
- if (r < 0) {
- return r;
- }
-
- RWLock::RLocker l(m_ictx->owner_lock);
- if (m_ictx->image_watcher->is_lock_supported() &&
- !m_ictx->image_watcher->is_lock_owner()) {
- return -ERESTART;
- }
-
- RWLock::RLocker l2(m_ictx->md_lock);
- uint64_t overlap;
- {
- RWLock::RLocker l3(m_ictx->parent_lock);
- // stop early if the parent went away - it just means
- // another flatten finished first, so this one is useless.
- if (!m_ictx->parent) {
- return 1;
- }
-
- // resize might have occurred while flatten is running
- overlap = min(m_ictx->size, m_ictx->parent_md.overlap);
- }
-
- // map child object onto the parent
- vector<pair<uint64_t,uint64_t> > objectx;
- Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, 0,
- m_object_size, objectx);
- uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
- assert(object_overlap <= m_object_size);
- if (object_overlap == 0) {
- // resize shrunk image while flattening
- return 1;
- }
-
- bufferlist bl;
- string oid = m_ictx->get_object_name(m_object_no);
- AioWrite *req = new AioWrite(m_ictx, oid, m_object_no, 0, objectx,
- object_overlap, bl, m_snapc, CEPH_NOSNAP,
- this);
- r = req->send();
- if (r < 0) {
- lderr(m_ictx->cct) << "failed to flatten object " << oid << dendl;
- delete req;
- return r;
- }
- return 0;
- }
-
- private:
- ImageCtx *m_ictx;
- uint64_t m_object_size;
- ::SnapContext m_snapc;
- uint64_t m_object_no;
-
- };
-
- class AsyncFlattenFinishContext : public Context {
- public:
- AsyncFlattenFinishContext(ImageCtx *ictx, Context *ctx,
- uint64_t overlap_objects)
- : m_ictx(ictx), m_ctx(ctx), m_overlap_objects(overlap_objects)
- {
- }
-
- virtual void finish(int r) {
- BOOST_SCOPE_EXIT((&m_ctx) (&r)) {
- m_ctx->complete(r);
- } BOOST_SCOPE_EXIT_END
-
- CephContext *cct = m_ictx->cct;
- if (r < 0) {
- lderr(cct) << "failed to flatten at least one object: "
- << cpp_strerror(r) << dendl;
- return;
- }
-
- RWLock::RLocker l(m_ictx->owner_lock);
- if (m_ictx->image_watcher->is_lock_supported() &&
- !m_ictx->image_watcher->is_lock_owner()) {
- r = -ERESTART;
- return;
- }
-
- // remove parent from this (base) image
- librados::ObjectWriteOperation op;
- if (m_ictx->image_watcher->is_lock_supported()) {
- m_ictx->image_watcher->assert_header_locked(&op);
- }
- cls_client::remove_parent(&op);
- r = m_ictx->md_ctx.operate(m_ictx->header_oid, &op);
- if (r < 0) {
- lderr(cct) << "error removing parent" << dendl;
- return;
- }
-
- // and if there are no snaps, remove from the children object as well
- // (if snapshots remain, they have their own parent info, and the child
- // will be removed when the last snap goes away)
- m_ictx->snap_lock.get_read();
- if (m_ictx->snaps.empty()) {
- ldout(cct, 2) << "removing child from children list..." << dendl;
- int r = cls_client::remove_child(&m_ictx->md_ctx, RBD_CHILDREN,
- m_ictx->parent_md.spec, m_ictx->id);
- if (r < 0) {
- lderr(cct) << "error removing child from children list" << dendl;
- m_ictx->snap_lock.put_read();
- return;
- }
- }
- m_ictx->snap_lock.put_read();
-
- ldout(cct, 20) << "finished flattening" << dendl;
- return;
- }
-
- private:
- ImageCtx *m_ictx;
- Context *m_ctx;
- uint64_t m_overlap_objects;
- };
-
// 'flatten' child image by copying all parent's blocks
int flatten(ImageCtx *ictx, ProgressContext &prog_ctx)
{
int r;
do {
- Mutex my_lock("librbd::flatten:my_lock");
- Cond cond;
- bool done;
+ C_SaferCond *ctx;
{
RWLock::RLocker l(ictx->owner_lock);
while (ictx->image_watcher->is_lock_supported()) {
ldout(ictx->cct, 5) << "flatten timed out notifying lock owner" << dendl;
}
- Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r);
+ ctx = new C_SaferCond();
r = async_flatten(ictx, ctx, prog_ctx);
if (r < 0) {
delete ctx;
}
}
- my_lock.Lock();
- while (!done) {
- cond.Wait(my_lock);
- }
- my_lock.Unlock();
-
+ r = ctx->wait();
if (r == -ERESTART) {
ldout(ictx->cct, 5) << "flatten interrupted: restarting" << dendl;
}
overlap_objects = Striper::get_num_objects(ictx->layout, overlap);
}
- AsyncObjectThrottle::ContextFactory context_factory(
- boost::lambda::bind(boost::lambda::new_ptr<AsyncFlattenObjectContext>(),
- boost::lambda::_1, ictx, object_size, snapc,
- boost::lambda::_2));
- AsyncFlattenFinishContext *finish_ctx =
- new AsyncFlattenFinishContext(ictx, ctx, overlap_objects);
- AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
- context_factory, finish_ctx, prog_ctx, 0, overlap_objects);
- r = throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
- if (r < 0) {
- delete throttle;
- return r;
- }
-
+ AsyncFlattenRequest *req =
+ new AsyncFlattenRequest(*ictx, ctx, object_size, overlap_objects,
+ snapc, prog_ctx);
+ req->send();
return 0;
}
r = ictx->flush_cache();
} else {
r = ictx->data_ctx.aio_flush();
+ ictx->wait_for_pending_aio();
}
if (r)
bl.append(buf + q->first, q->second);
}
- {
- RWLock::RLocker l(ictx->md_lock);
- if (ictx->object_map != NULL) {
- r = ictx->object_map->update(p->objectno, OBJECT_EXISTS,
- boost::optional<uint8_t>());
- if (r < 0) {
- goto done;
- }
- }
- }
-
C_AioWrite *req_comp = new C_AioWrite(cct, c);
if (ictx->object_cacher) {
c->add_request();
req->set_op_flags(op_flags);
r = req->send();
- if (r < 0)
+ if (r < 0) {
+ req->complete(r);
goto done;
+ }
}
}
done:
ictx->perfcounter->inc(l_librbd_aio_wr);
ictx->perfcounter->inc(l_librbd_aio_wr_bytes, mylen);
-
- /* FIXME: cleanup all the allocated stuff */
return r;
}
object_overlap = ictx->prune_parent_extents(objectx, overlap);
}
- RWLock::RLocker l(ictx->md_lock);
- bool flag_nonexistent = false;
if (p->offset == 0 && p->length == ictx->layout.fl_object_size) {
req = new AioRemove(ictx, p->oid.name, p->objectno, objectx, object_overlap,
snapc, snap_id, req_comp);
- if (!req->has_parent() && ictx->object_map != NULL) {
- ictx->object_map->update(p->objectno, OBJECT_PENDING, OBJECT_EXISTS);
- flag_nonexistent = true;
- }
} else if (p->offset + p->length == ictx->layout.fl_object_size) {
req = new AioTruncate(ictx, p->oid.name, p->objectno, p->offset, objectx, object_overlap,
snapc, snap_id, req_comp);
snapc, snap_id, req_comp);
}
- if (!flag_nonexistent && ictx->object_map != NULL) {
- ictx->object_map->update(p->objectno, OBJECT_EXISTS,
- boost::optional<uint8_t>());
- }
-
r = req->send();
- if (r < 0)
+ if (r < 0) {
+ req->complete(r);
goto done;
-
- if (flag_nonexistent && ictx->object_map != NULL) {
- ictx->object_map->update(p->objectno, OBJECT_NONEXISTENT,
- OBJECT_PENDING);
}
}
r = 0;
ictx->perfcounter->inc(l_librbd_aio_discard);
ictx->perfcounter->inc(l_librbd_aio_discard_bytes, len);
-
- /* FIXME: cleanup all the allocated stuff */
return r;
}
}
};
- static const uint8_t OBJECT_NONEXISTENT = 0;
- static const uint8_t OBJECT_EXISTS = 1;
- static const uint8_t OBJECT_PENDING = 2;
-
const std::string id_obj_name(const std::string &name);
const std::string header_name(const std::string &image_id);
const std::string old_header_name(const std::string &image_name);
int async_flatten(ImageCtx *ictx, Context *ctx, ProgressContext &prog_ctx);
int async_resize(ImageCtx *ictx, Context *ctx, uint64_t size,
ProgressContext &prog_ctx);
- int async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
- uint64_t new_size, ProgressContext& prog_ctx);
- int async_trim_image(ImageCtx *ictx, Context *ctx, uint64_t original_size,
- uint64_t new_size, ProgressContext& prog_ctx);
+ void async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
+ uint64_t new_size, ProgressContext& prog_ctx);
int aio_write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf,
AioCompletion *c, int op_flags);
bufferlist bl;
utime_t mtime;
int flags;
- OSDWrite(const SnapContext& sc, bufferlist& b, utime_t mt, int f) : snapc(sc), bl(b), mtime(mt), flags(f) {}
+ OSDWrite(const SnapContext& sc, const bufferlist& b, utime_t mt, int f)
+ : snapc(sc), bl(b), mtime(mt), flags(f) {}
};
- OSDWrite *prepare_write(const SnapContext& sc, bufferlist &b, utime_t mt, int f) {
+ OSDWrite *prepare_write(const SnapContext& sc, const bufferlist &b,
+ utime_t mt, int f) {
return new OSDWrite(sc, b, mt, f);
}
ASSERT_EQ(0, get_flags(&ioctx, oid, CEPH_NOSNAP, &flags));
ASSERT_EQ(0U, flags);
- ASSERT_EQ(0, set_flags(&ioctx, oid, 3, 2));
+ librados::ObjectWriteOperation op1;
+ set_flags(&op1, 3, 2);
+ ASSERT_EQ(0, ioctx.operate(oid, &op1));
ASSERT_EQ(0, get_flags(&ioctx, oid, CEPH_NOSNAP, &flags));
ASSERT_EQ(2U, flags);
ASSERT_EQ(-ENOENT, get_flags(&ioctx, oid, snap_id, &flags));
ASSERT_EQ(0, snapshot_add(&ioctx, oid, snap_id, "snap"));
- ASSERT_EQ(0, set_flags(&ioctx, oid, 31, 4));
+ librados::ObjectWriteOperation op2;
+ set_flags(&op2, 31, 4);
+ ASSERT_EQ(0, ioctx.operate(oid, &op2));
ASSERT_EQ(0, get_flags(&ioctx, oid, CEPH_NOSNAP, &flags));
ASSERT_EQ(6U, flags);