librbd/LibrbdWriteback.cc \
librbd/WatchCtx.cc \
osdc/ObjectCacher.cc \
+ osdc/Filer.cc \
cls/lock/cls_lock_client.cc \
cls/lock/cls_lock_types.cc \
cls/lock/cls_lock_ops.cc
assert(pending_count);
int count = --pending_count;
if (!count) {
+ if (rval >= 0 && aio_type == AIO_TYPE_READ) {
+ // FIXME: make the destriper write directly into a buffer so
+ // that we avoid shuffling pointers and copying zeros around.
+ bufferlist bl;
+ destriper.assemble_result(bl, true);
+ assert(bl.length() == read_buf_len);
+ bl.copy(0, read_buf_len, read_buf);
+ ldout(cct, 20) << "AioCompletion::complete_request() copied resulting " << bl.length()
+ << " bytes to " << (void*)read_buf << dendl;
+ }
+
complete();
}
put_unlock();
void C_AioRead::finish(int r)
{
- ldout(m_cct, 10) << "C_AioRead::finish() " << this << dendl;
+ ldout(m_cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl;
if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation
- ldout(m_cct, 10) << "ofs=" << m_req->offset()
- << " len=" << m_req->length() << dendl;
- r = handle_sparse_read(m_cct, m_req->data(), m_req->offset(),
- m_req->ext_map(), 0, m_req->length(),
- m_out_buf);
+ ldout(m_cct, 10) << " got " << m_req->m_ext_map
+ << " for " << m_req->m_buffer_extents
+ << " bl " << m_req->data().length() << dendl;
+ m_completion->destriper.add_partial_sparse_result(m_req->data(),
+ m_req->m_ext_map, m_req->m_object_off,
+ m_req->m_buffer_extents);
+ r = m_req->m_object_len;
}
m_completion->complete_request(m_cct, r);
}
#include "librbd/ImageCtx.h"
#include "librbd/internal.h"
+#include "osdc/Filer.h"
+
namespace librbd {
class AioRead;
utime_t start_time;
aio_type_t aio_type;
+ Filer::StripedReadResult destriper;
+ char *read_buf;
+ size_t read_buf_len;
+
AioCompletion() : lock("AioCompletion::lock", true),
done(false), rval(0), complete_cb(NULL),
complete_arg(NULL), rbd_comp(NULL), pending_count(1),
ref(1), released(false), ictx(NULL),
- aio_type(AIO_TYPE_NONE) {
+ aio_type(AIO_TYPE_NONE),
+ read_buf(NULL), read_buf_len(0) {
}
~AioCompletion() {
}
class C_AioRead : public Context {
public:
- C_AioRead(CephContext *cct, AioCompletion *completion, char *out_buf)
- : m_cct(cct), m_completion(completion), m_req(NULL), m_out_buf(out_buf) {}
+ C_AioRead(CephContext *cct, AioCompletion *completion)
+ : m_cct(cct), m_completion(completion), m_req(NULL)
+ { }
virtual ~C_AioRead() {}
virtual void finish(int r);
void set_req(AioRead *req) {
CephContext *m_cct;
AioCompletion *m_completion;
AioRead *m_req;
- char *m_out_buf;
};
class C_AioWrite : public Context {
namespace librbd {
AioRequest::AioRequest() :
- m_ictx(NULL), m_image_ofs(0), m_block_ofs(0), m_len(0),
+ m_ictx(NULL),
m_snap_id(CEPH_NOSNAP), m_completion(NULL), m_parent_completion(NULL),
m_hide_enoent(false) {}
AioRequest::AioRequest(ImageCtx *ictx, const std::string &oid,
- uint64_t image_ofs, size_t len,
+ uint64_t objectno, uint64_t off, uint64_t len,
librados::snap_t snap_id,
Context *completion,
bool hide_enoent) {
m_ioctx.dup(ictx->data_ctx);
m_ioctx.snap_set_read(snap_id);
m_oid = oid;
- m_image_ofs = image_ofs;
- m_block_ofs = get_block_ofs(ictx->order, image_ofs);
- m_len = len;
+ m_object_no = objectno;
+ m_object_off = off;
+ m_object_len = len;
m_snap_id = snap_id;
m_completion = completion;
m_parent_completion = NULL;
}
}
- void AioRequest::read_from_parent(uint64_t image_ofs, size_t len)
+ void AioRequest::read_from_parent(vector<pair<uint64_t,uint64_t> >& image_extents)
{
ldout(m_ictx->cct, 20) << "read_from_parent this = " << this << dendl;
assert(m_ictx->parent_lock.is_locked());
m_parent_completion = aio_create_completion_internal(this, rbd_req_cb);
- aio_read(m_ictx->parent, image_ofs, len, m_read_data.c_str(),
+ aio_read(m_ictx->parent, image_extents, m_read_data.c_str(),
m_parent_completion);
}
+ /** read **/
+
bool AioRead::should_complete(int r)
{
ldout(m_ictx->cct, 20) << "read should_complete: r = " << r << dendl;
if (!m_tried_parent && r == -ENOENT) {
Mutex::Locker l(m_ictx->snap_lock);
Mutex::Locker l2(m_ictx->parent_lock);
- size_t len = m_ictx->parent_io_len(m_image_ofs, m_len, m_snap_id);
- if (len) {
+
+ // calculate reverse mapping onto the image
+ vector<pair<uint64_t,uint64_t> > image_extents;
+ Filer::extent_to_file(m_ictx->cct, &m_ictx->layout,
+ m_object_no, m_object_off, m_object_len,
+ image_extents);
+
+ uint64_t image_overlap = 0;
+ r = m_ictx->get_parent_overlap(m_snap_id, &image_overlap);
+ if (r < 0) {
+ assert(0 == "FIXME");
+ }
+ uint64_t object_overlap = m_ictx->prune_parent_extents(image_extents, image_overlap);
+ if (object_overlap) {
m_tried_parent = true;
- // zero the buffer so we have the full requested length result,
- // even if we actually read less due to overlap
- ceph::buffer::ptr bp(len);
- bp.zero();
+
+ ceph::buffer::ptr bp(object_overlap);
m_read_data.append(bp);
- // fill in single extent for sparse read callback
- m_ext_map[m_block_ofs] = len;
- read_from_parent(m_image_ofs, len);
+ if (object_overlap < m_object_len) {
+ ceph::buffer::ptr bp2(m_object_len - object_overlap);
+ bp2.zero();
+ m_read_data.append(bp2);
+ }
+
+ m_ext_map[m_object_off] = m_object_len; // the parent IO will read this extent
+ read_from_parent(image_extents);
return false;
}
}
int r;
if (m_sparse) {
r = m_ioctx.aio_sparse_read(m_oid, rados_completion, &m_ext_map,
- &m_read_data, m_len, m_block_ofs);
+ &m_read_data, m_object_len, m_object_off);
} else {
r = m_ioctx.aio_read(m_oid, rados_completion, &m_read_data,
- m_len, m_block_ofs);
+ m_object_len, m_object_off);
}
rados_completion->release();
return r;
}
- AbstractWrite::AbstractWrite() :
- m_state(LIBRBD_AIO_WRITE_FINAL), m_has_parent(false) {}
+ /** read **/
+
+ AbstractWrite::AbstractWrite() : m_state(LIBRBD_AIO_WRITE_FINAL) {}
AbstractWrite::AbstractWrite(ImageCtx *ictx, const std::string &oid,
- uint64_t image_ofs, size_t len,
- librados::snap_t snap_id, Context *completion,
- bool has_parent, const ::SnapContext &snapc,
+ uint64_t object_no, uint64_t object_off, uint64_t len,
+ vector<pair<uint64_t,uint64_t> >& objectx,
+ uint64_t object_overlap,
+ const ::SnapContext &snapc, librados::snap_t snap_id,
+ Context *completion,
bool hide_enoent)
- : AioRequest(ictx, oid, image_ofs, len, snap_id, completion, hide_enoent)
+ : AioRequest(ictx, oid, object_no, object_off, len, snap_id, completion, hide_enoent)
{
m_state = LIBRBD_AIO_WRITE_FINAL;
- m_has_parent = has_parent;
+
+ m_object_image_extents = objectx;
+ m_parent_overlap = object_overlap;
+
// TODO: find a way to make this less stupid
std::vector<librados::snap_t> snaps;
for (std::vector<snapid_t>::const_iterator it = snapc.snaps.begin();
void AbstractWrite::guard_write()
{
- if (m_has_parent) {
+ if (has_parent()) {
m_state = LIBRBD_AIO_WRITE_CHECK_EXISTS;
m_read.stat(NULL, NULL, NULL);
}
- ldout(m_ictx->cct, 20) << __func__ << " m_has_parent = " << m_has_parent
+ ldout(m_ictx->cct, 20) << __func__ << " has_parent = " << has_parent()
<< " m_state = " << m_state << " check exists = "
<< LIBRBD_AIO_WRITE_CHECK_EXISTS << dendl;
if (r == -ENOENT) {
Mutex::Locker l(m_ictx->snap_lock);
Mutex::Locker l2(m_ictx->parent_lock);
+
// copyup the entire object up to the overlap point
- uint64_t block_begin = m_image_ofs - m_block_ofs;
- size_t len = m_ictx->parent_io_len(block_begin,
- get_block_size(m_ictx->order),
- m_snap_id);
- if (len) {
- ldout(m_ictx->cct, 20) << "reading from parent" << dendl;
- m_state = LIBRBD_AIO_WRITE_COPYUP;
- ceph::buffer::ptr bp(len);
- m_read_data.append(bp);
- read_from_parent(block_begin, len);
- break;
- }
+ ldout(m_ictx->cct, 20) << "reading from parent " << m_object_image_extents << dendl;
+ assert(m_object_image_extents.size());
+
+ m_state = LIBRBD_AIO_WRITE_COPYUP;
+ ceph::buffer::ptr bp(m_parent_overlap);
+ m_read_data.append(bp);
+ read_from_parent(m_object_image_extents);
+ break;
}
ldout(m_ictx->cct, 20) << "no need to read from parent" << dendl;
m_state = LIBRBD_AIO_WRITE_FINAL;
{
public:
AioRequest();
- AioRequest(ImageCtx *ictx, const std::string &oid, uint64_t image_ofs,
- size_t len, librados::snap_t snap_id, Context *completion,
+ AioRequest(ImageCtx *ictx, const std::string &oid,
+ uint64_t objectno, uint64_t off, uint64_t len,
+ librados::snap_t snap_id, Context *completion,
bool hide_enoent);
virtual ~AioRequest();
- uint64_t offset()
- {
- return m_block_ofs;
- }
-
- size_t length()
- {
- return m_len;
- }
-
void complete(int r)
{
if (should_complete(r)) {
virtual int send() = 0;
protected:
- void read_from_parent(uint64_t image_ofs, size_t len);
+ void read_from_parent(vector<pair<uint64_t,uint64_t> >& image_extents);
ImageCtx *m_ictx;
librados::IoCtx m_ioctx;
std::string m_oid;
- uint64_t m_image_ofs;
- uint64_t m_block_ofs;
- size_t m_len;
+ uint64_t m_object_no, m_object_off, m_object_len;
librados::snap_t m_snap_id;
Context *m_completion;
AioCompletion *m_parent_completion;
class AioRead : public AioRequest {
public:
- AioRead(ImageCtx *ictx, const std::string &oid, uint64_t image_ofs,
- size_t len, librados::snap_t snap_id, bool sparse,
+ AioRead(ImageCtx *ictx, const std::string &oid,
+ uint64_t objectno, uint64_t offset, uint64_t len,
+ vector<pair<uint64_t,uint64_t> >& be,
+ librados::snap_t snap_id, bool sparse,
Context *completion)
- : AioRequest(ictx, oid, image_ofs, len, snap_id, completion, false),
+ : AioRequest(ictx, oid, objectno, offset, len, snap_id, completion, false),
+ m_buffer_extents(be),
m_tried_parent(false), m_sparse(sparse) {
m_ioctx.snap_set_read(m_snap_id);
}
ceph::bufferlist &data() {
return m_read_data;
}
- std::map<uint64_t, uint64_t> &ext_map() {
- return m_ext_map;
- }
+ std::map<uint64_t, uint64_t> m_ext_map;
+
+ friend class C_AioRead;
private:
- std::map<uint64_t, uint64_t> m_ext_map;
+ vector<pair<uint64_t,uint64_t> > m_buffer_extents;
bool m_tried_parent;
bool m_sparse;
};
class AbstractWrite : public AioRequest {
public:
AbstractWrite();
- AbstractWrite(ImageCtx *ictx, const std::string &oid, uint64_t image_ofs,
- size_t len, librados::snap_t snap_id, Context *completion,
- bool has_parent, const ::SnapContext &snapc, bool hide_enoent);
+ AbstractWrite(ImageCtx *ictx, const std::string &oid,
+ uint64_t object_no, uint64_t object_off, uint64_t len,
+ vector<pair<uint64_t,uint64_t> >& objectx, uint64_t object_overlap,
+ const ::SnapContext &snapc,
+ librados::snap_t snap_id,
+ Context *completion,
+ bool hide_enoent);
virtual ~AbstractWrite() {}
virtual bool should_complete(int r);
virtual int send();
void guard_write();
+ bool has_parent() const {
+ return !m_object_image_extents.empty();
+ }
+
private:
/**
* Writes go through the following state machine to
virtual void add_copyup_ops() = 0;
write_state_d m_state;
- bool m_has_parent;
+ vector<pair<uint64_t,uint64_t> > m_object_image_extents;
+ uint64_t m_parent_overlap;
librados::ObjectReadOperation m_read;
librados::ObjectWriteOperation m_write;
librados::ObjectWriteOperation m_copyup;
class AioWrite : public AbstractWrite {
public:
- AioWrite(ImageCtx *ictx, const std::string &oid, uint64_t image_ofs,
+ AioWrite(ImageCtx *ictx, const std::string &oid,
+ uint64_t object_no, uint64_t object_off,
+ vector<pair<uint64_t,uint64_t> >& objectx, uint64_t object_overlap,
const ceph::bufferlist &data, const ::SnapContext &snapc,
- librados::snap_t snap_id, bool has_parent, Context *completion)
- : AbstractWrite(ictx, oid, image_ofs, data.length(), snap_id, completion,
- has_parent, snapc, false),
+ librados::snap_t snap_id,
+ Context *completion)
+ : AbstractWrite(ictx, oid,
+ object_no, object_off, data.length(),
+ objectx, object_overlap,
+ snapc, snap_id,
+ completion, false),
m_write_data(data) {
guard_write();
- m_write.write(m_block_ofs, data);
+ m_write.write(m_object_off, data);
}
virtual ~AioWrite() {}
protected:
virtual void add_copyup_ops() {
- m_copyup.write(m_block_ofs, m_write_data);
+ m_copyup.write(m_object_off, m_write_data);
}
private:
class AioRemove : public AbstractWrite {
public:
- AioRemove(ImageCtx *ictx, const std::string &oid, uint64_t image_ofs,
+ AioRemove(ImageCtx *ictx, const std::string &oid,
+ uint64_t object_no,
+ vector<pair<uint64_t,uint64_t> >& objectx, uint64_t object_overlap,
const ::SnapContext &snapc, librados::snap_t snap_id,
- bool has_parent, Context *completion)
- : AbstractWrite(ictx, oid, image_ofs, 0, snap_id, completion,
- has_parent, snapc, true) {
- if (has_parent)
+ Context *completion)
+ : AbstractWrite(ictx, oid,
+ object_no, 0, 0,
+ objectx, object_overlap,
+ snapc, snap_id, completion,
+ true) {
+ if (has_parent())
m_write.truncate(0);
else
m_write.remove();
class AioTruncate : public AbstractWrite {
public:
- AioTruncate(ImageCtx *ictx, const std::string &oid, uint64_t image_ofs,
+ AioTruncate(ImageCtx *ictx, const std::string &oid,
+ uint64_t object_no, uint64_t object_off,
+ vector<pair<uint64_t,uint64_t> >& objectx, uint64_t object_overlap,
const ::SnapContext &snapc, librados::snap_t snap_id,
- bool has_parent, Context *completion)
- : AbstractWrite(ictx, oid, image_ofs, 0, snap_id, completion,
- has_parent, snapc, true) {
+ Context *completion)
+ : AbstractWrite(ictx, oid,
+ object_no, object_off, 0,
+ objectx, object_overlap,
+ snapc, snap_id, completion,
+ true) {
guard_write();
- m_write.truncate(m_block_ofs);
+ m_write.truncate(object_off);
}
virtual ~AioTruncate() {}
protected:
virtual void add_copyup_ops() {
- m_copyup.truncate(m_block_ofs);
+ m_copyup.truncate(m_object_off);
}
};
class AioZero : public AbstractWrite {
public:
- AioZero(ImageCtx *ictx, const std::string &oid, uint64_t image_ofs,
- size_t len, const ::SnapContext &snapc, librados::snap_t snap_id,
- bool has_parent, Context *completion)
- : AbstractWrite(ictx, oid, image_ofs, len, snap_id, completion,
- has_parent, snapc, true) {
+ AioZero(ImageCtx *ictx, const std::string &oid,
+ uint64_t object_no, uint64_t object_off, uint64_t object_len,
+ vector<pair<uint64_t,uint64_t> >& objectx, uint64_t object_overlap,
+ const ::SnapContext &snapc, librados::snap_t snap_id,
+ Context *completion)
+ : AbstractWrite(ictx, oid,
+ object_no, object_off, object_len,
+ objectx, object_overlap,
+ snapc, snap_id, completion,
+ true) {
guard_write();
- m_write.zero(m_block_ofs, len);
+ m_write.zero(object_off, object_len);
}
virtual ~AioZero() {}
protected:
virtual void add_copyup_ops() {
- m_copyup.zero(m_block_ofs, m_len);
+ m_copyup.zero(m_object_off, m_object_len);
}
};
<< cpp_strerror(r) << dendl;
return r;
}
+
+ init_layout();
} else {
header_oid = old_header_name(name);
}
-
+ return 0;
+ }
+
+ void ImageCtx::init_layout()
+ {
if (stripe_unit == 0 || stripe_count == 0) {
stripe_unit = 1ull << order;
stripe_count = 1;
}
- // initialize layout
memset(&layout, 0, sizeof(layout));
layout.fl_stripe_unit = stripe_unit;
layout.fl_stripe_count = stripe_count;
layout.fl_object_size = 1ull << order;
layout.fl_pg_pool = data_ctx.get_id(); // FIXME: pool id overflow?
- return 0;
+
+ ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
+ << " stripe_count " << stripe_count
+ << " object_size " << layout.fl_object_size << dendl;
}
void ImageCtx::perf_start(string name) {
<< parent_len << dendl;
return parent_len;
}
+
+ uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
+ uint64_t overlap)
+ {
+ // drop extents completely beyond the overlap
+ while (!objectx.empty() && objectx.back().first >= overlap)
+ objectx.pop_back();
+
+ // trim final overlapping extent
+ if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap)
+ objectx.back().second = overlap - objectx.back().first;
+
+ uint64_t len = 0;
+ for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin();
+ p != objectx.end();
+ ++p)
+ len += p->second;
+ ldout(cct, 10) << "prune_parent_extents image overlap " << overlap
+ << ", object overlap " << len
+ << " from image extents " << objectx << dendl;
+ return len;
+ }
}
const char *snap, IoCtx& p);
~ImageCtx();
int init();
+ void init_layout();
void perf_start(std::string name);
void perf_stop();
int snap_set(std::string in_snap_name);
void unregister_watch();
size_t parent_io_len(uint64_t offset, size_t length,
librados::snap_t in_snap_id);
+ uint64_t prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
+ uint64_t overlap);
+
};
}
{
C_Request *req_comp = new C_Request(m_ictx->cct, onfinish, &m_lock);
C_Read *read_comp = new C_Read(req_comp, pbl);
- uint64_t total_off = offset_of_object(oid.name, m_ictx->object_prefix,
- m_ictx->order) + off;
- AioRead *req = new AioRead(m_ictx, oid.name, total_off, len, snapid.val,
- false, read_comp);
+ uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix);
+ vector<pair<uint64_t,uint64_t> > ex(1);
+ ex[0] = make_pair(off, len);
+ AioRead *req = new AioRead(m_ictx, oid.name,
+ object_no, off, len, ex,
+ snapid, false, read_comp);
read_comp->set_req(req);
req->send();
return ++m_tid;
m_ictx->snap_lock.Lock();
librados::snap_t snap_id = m_ictx->snap_id;
m_ictx->parent_lock.Lock();
- int64_t parent_pool_id = m_ictx->get_parent_pool_id(snap_id);
uint64_t overlap = 0;
m_ictx->get_parent_overlap(snap_id, &overlap);
m_ictx->parent_lock.Unlock();
m_ictx->snap_lock.Unlock();
- uint64_t total_off = offset_of_object(oid.name, m_ictx->object_prefix,
- m_ictx->order) + off;
- bool parent_exists = has_parent(parent_pool_id, total_off - off, overlap);
+ uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix);
+
+ // reverse map this object extent onto the parent
+ vector<pair<uint64_t,uint64_t> > objectx;
+ Filer::extent_to_file(m_ictx->cct, &m_ictx->layout,
+ object_no, 0, m_ictx->layout.fl_object_size,
+ objectx);
+ uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
+
C_Request *req_comp = new C_Request(m_ictx->cct, oncommit, &m_lock);
- AioWrite *req = new AioWrite(m_ictx, oid.name, total_off, bl, snapc,
- snap_id, parent_exists, req_comp);
+ AioWrite *req = new AioWrite(m_ictx, oid.name,
+ object_no, off, objectx, object_overlap,
+ bl, snapc, snap_id,
+ req_comp);
req->send();
return ++m_tid;
}
return oss.str();
}
- uint64_t offset_of_object(const string &oid, const string &object_prefix,
- uint8_t order)
+ uint64_t oid_to_object_no(const string& oid, const string& object_prefix)
{
istringstream iss(oid);
// skip object prefix and separator
iss.ignore(object_prefix.length() + 1);
- uint64_t num, offset;
+ uint64_t num;
iss >> std::hex >> num;
+ return num;
+ }
+
+ uint64_t offset_of_object(const string &oid, const string &object_prefix,
+ uint8_t order)
+ {
+ uint64_t num, offset;
+ num = oid_to_object_no(oid, object_prefix);
offset = num * (1ULL << order);
return offset;
}
ictx->order = ictx->header.options.order;
ictx->size = ictx->header.image_size;
ictx->object_prefix = ictx->header.block_name;
+ ictx->init_layout();
} else {
do {
uint64_t incompatible_features;
if (r < 0)
return r;
- size_t total_write = 0;
- uint64_t start_block = get_block_num(ictx->order, off);
- uint64_t end_block = get_block_num(ictx->order, off + len - 1);
- uint64_t block_size = get_block_size(ictx->order);
+ r = check_io(ictx, off, len);
+ if (r < 0)
+ return r;
+
ictx->snap_lock.Lock();
snapid_t snap_id = ictx->snap_id;
::SnapContext snapc = ictx->snapc;
ictx->parent_lock.Lock();
- int64_t parent_pool_id = ictx->get_parent_pool_id(ictx->snap_id);
uint64_t overlap = 0;
ictx->get_parent_overlap(ictx->snap_id, &overlap);
ictx->parent_lock.Unlock();
ictx->snap_lock.Unlock();
- uint64_t left = len;
-
- r = check_io(ictx, off, len);
- if (r < 0)
- return r;
if (snap_id != CEPH_NOSNAP)
return -EROFS;
+ ldout(cct, 20) << " parent overlap " << overlap << dendl;
+
+ // map
+ vector<ObjectExtent> extents;
+ Filer::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, off, len, extents);
+
+ size_t total_write = 0;
+
c->get();
c->init_time(ictx, AIO_TYPE_WRITE);
- for (uint64_t i = start_block; i <= end_block; i++) {
- string oid = get_block_oid(ictx->object_prefix, i, ictx->old_format);
- ldout(cct, 20) << "oid = '" << oid << "' i = " << i << dendl;
- uint64_t total_off = off + total_write;
- uint64_t block_ofs = get_block_ofs(ictx->order, total_off);
- uint64_t write_len = min(block_size - block_ofs, left);
+ for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) {
+ ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length
+ << " from " << p->buffer_extents << dendl;
+ // assemble extent
bufferlist bl;
- bl.append(buf + total_write, write_len);
+ for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin();
+ q != p->buffer_extents.end();
+ ++q) {
+ bl.append(buf + q->first, q->second);
+ }
+
if (ictx->object_cacher) {
// may block
- ictx->write_to_cache(oid, bl, write_len, block_ofs);
+ ictx->write_to_cache(p->oid, bl, p->length, p->offset);
} else {
+ // reverse map this object extent onto the parent
+ vector<pair<uint64_t,uint64_t> > objectx;
+ Filer::extent_to_file(ictx->cct, &ictx->layout,
+ p->objectno, 0, ictx->layout.fl_object_size,
+ objectx);
+ uint64_t object_overlap = ictx->prune_parent_extents(objectx, overlap);
+
C_AioWrite *req_comp = new C_AioWrite(cct, c);
- bool parent_exists = has_parent(parent_pool_id, total_off - block_ofs, overlap);
- ldout(ictx->cct, 20) << "has_parent(pool=" << parent_pool_id
- << ", off=" << total_off
- << ", overlap=" << overlap << ") = "
- << parent_exists << dendl;
- AioWrite *req = new AioWrite(ictx, oid, total_off, bl, snapc, snap_id,
- parent_exists, req_comp);
+ AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset,
+ objectx, object_overlap,
+ bl, snapc, snap_id, req_comp);
c->add_request();
r = req->send();
if (r < 0)
goto done;
}
- total_write += write_len;
- left -= write_len;
+ total_write += bl.length();
}
done:
c->finish_adding_requests();
if (r < 0)
return r;
+ r = check_io(ictx, off, len);
+ if (r < 0)
+ return r;
+
// TODO: check for snap
- size_t total_write = 0;
- uint64_t start_block = get_block_num(ictx->order, off);
- uint64_t end_block = get_block_num(ictx->order, off + len - 1);
- uint64_t block_size = get_block_size(ictx->order);
ictx->snap_lock.Lock();
snapid_t snap_id = ictx->snap_id;
::SnapContext snapc = ictx->snapc;
ictx->parent_lock.Lock();
- int64_t parent_pool_id = ictx->get_parent_pool_id(ictx->snap_id);
uint64_t overlap = 0;
ictx->get_parent_overlap(ictx->snap_id, &overlap);
ictx->parent_lock.Unlock();
ictx->snap_lock.Unlock();
- uint64_t left = len;
- r = check_io(ictx, off, len);
- if (r < 0)
- return r;
-
- vector<ObjectExtent> v;
- if (ictx->object_cacher)
- v.reserve(end_block - start_block + 1);
+ // map
+ vector<ObjectExtent> extents;
+ Filer::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, off, len, extents);
c->get();
c->init_time(ictx, AIO_TYPE_DISCARD);
- for (uint64_t i = start_block; i <= end_block; i++) {
- string oid = get_block_oid(ictx->object_prefix, i, ictx->old_format);
- uint64_t total_off = off + total_write;
- uint64_t block_ofs = get_block_ofs(ictx->order, total_off);;
- uint64_t write_len = min(block_size - block_ofs, left);
-
- if (ictx->object_cacher) {
- v.push_back(ObjectExtent(oid, 0, block_ofs, write_len));
- v.back().oloc.pool = ictx->data_ctx.get_id();
- }
-
+ for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) {
+ ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length
+ << " from " << p->buffer_extents << dendl;
C_AioWrite *req_comp = new C_AioWrite(cct, c);
AbstractWrite *req;
c->add_request();
- bool parent_exists = has_parent(parent_pool_id, total_off - block_ofs, overlap);
- if (block_ofs == 0 && write_len == block_size) {
- req = new AioRemove(ictx, oid, total_off, snapc, snap_id,
- parent_exists, req_comp);
- } else if (block_ofs + write_len == block_size) {
- req = new AioTruncate(ictx, oid, total_off, snapc, snap_id,
- parent_exists, req_comp);
+ // reverse map this object extent onto the parent
+ vector<pair<uint64_t,uint64_t> > objectx;
+ uint64_t object_overlap = 0;
+ if (off < overlap) { // we might overlap...
+ Filer::extent_to_file(ictx->cct, &ictx->layout,
+ p->objectno, 0, ictx->layout.fl_object_size,
+ objectx);
+ object_overlap = ictx->prune_parent_extents(objectx, overlap);
+ }
+
+ if (p->offset == 0 && p->length == ictx->layout.fl_object_size) {
+ req = new AioRemove(ictx, p->oid.name, p->objectno, objectx, object_overlap,
+ snapc, snap_id, req_comp);
+ } else if (p->offset + p->length == ictx->layout.fl_object_size) {
+ req = new AioTruncate(ictx, p->oid.name, p->objectno, p->offset, objectx, object_overlap,
+ snapc, snap_id, req_comp);
} else {
- req = new AioZero(ictx, oid, total_off, write_len, snapc, snap_id,
- parent_exists, req_comp);
+ req = new AioZero(ictx, p->oid.name, p->objectno, p->offset, p->length,
+ objectx, object_overlap,
+ snapc, snap_id, req_comp);
}
r = req->send();
if (r < 0)
goto done;
- total_write += write_len;
- left -= write_len;
}
r = 0;
done:
if (ictx->object_cacher)
- ictx->object_cacher->discard_set(ictx->object_set, v);
+ ictx->object_cacher->discard_set(ictx->object_set, extents);
c->finish_adding_requests();
c->put();
char *buf,
AioCompletion *c)
{
- ldout(ictx->cct, 20) << "aio_read " << ictx << " off = " << off << " len = "
- << len << dendl;
+ vector<pair<uint64_t,uint64_t> > image_extents(1);
+ image_extents[0] = make_pair(off, len);
+ return aio_read(ictx, image_extents, buf, c);
+ }
- int r = ictx_check(ictx);
- if (r < 0)
- return r;
+ int aio_read(ImageCtx *ictx, const vector<pair<uint64_t,uint64_t> >& image_extents,
+ char *buf,
+ AioCompletion *c)
+ {
+ ldout(ictx->cct, 20) << "aio_read " << ictx << " " << image_extents << dendl;
- r = check_io(ictx, off, len);
+ int r = ictx_check(ictx);
if (r < 0)
return r;
- int64_t ret;
- int total_read = 0;
- uint64_t start_block = get_block_num(ictx->order, off);
- uint64_t end_block = get_block_num(ictx->order, off + len - 1);
- uint64_t block_size = get_block_size(ictx->order);
ictx->snap_lock.Lock();
snap_t snap_id = ictx->snap_id;
ictx->snap_lock.Unlock();
- uint64_t left = len;
+
+ // map
+ vector<ObjectExtent> extents;
+
+ uint64_t buffer_ofs = 0;
+ for (vector<pair<uint64_t,uint64_t> >::const_iterator p = image_extents.begin();
+ p != image_extents.end();
+ ++p) {
+ r = check_io(ictx, p->first, p->second);
+ if (r < 0)
+ return r;
+
+ Filer::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
+ p->first, p->second, extents, buffer_ofs);
+ buffer_ofs += p->second;
+ }
+
+ int64_t ret;
+
+ c->read_buf = buf;
+ c->read_buf_len = buffer_ofs;
c->get();
c->init_time(ictx, AIO_TYPE_READ);
- for (uint64_t i = start_block; i <= end_block; i++) {
- string oid = get_block_oid(ictx->object_prefix, i, ictx->old_format);
- uint64_t block_ofs = get_block_ofs(ictx->order, off + total_read);
- uint64_t read_len = min(block_size - block_ofs, left);
-
- C_AioRead *req_comp = new C_AioRead(ictx->cct, c, buf + total_read);
- AioRead *req = new AioRead(ictx, oid, off + total_read,
- read_len, snap_id, true, req_comp);
+ for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) {
+ ldout(ictx->cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length
+ << " from " << p->buffer_extents << dendl;
+
+ C_AioRead *req_comp = new C_AioRead(ictx->cct, c);
+ AioRead *req = new AioRead(ictx, p->oid.name,
+ p->objectno, p->offset, p->length,
+ p->buffer_extents,
+ snap_id, true, req_comp);
req_comp->set_req(req);
c->add_request();
if (ictx->object_cacher) {
- req->ext_map()[block_ofs] = read_len;
// cache has already handled possible reading from parent, so
// this AioRead is just used to pass data to the
// AioCompletion. The AioRead isn't being used as a
// completion, so wrap the completion in a C_CacheRead to
// delete it
C_CacheRead *cache_comp = new C_CacheRead(req_comp, req);
- ictx->aio_read_from_cache(oid, &req->data(),
- read_len, block_ofs, cache_comp);
+ req->m_ext_map[p->offset] = p->length;
+ ictx->aio_read_from_cache(p->oid, &req->data(),
+ p->length, p->offset,
+ cache_comp);
} else {
r = req->send();
if (r < 0 && r == -ENOENT)
goto done;
}
}
-
- total_read += read_len;
- left -= read_len;
}
- ret = total_read;
+ ret = buffer_ofs;
done:
c->finish_adding_requests();
c->put();
ictx->perfcounter->inc(l_librbd_aio_rd);
- ictx->perfcounter->inc(l_librbd_aio_rd_bytes, len);
+ ictx->perfcounter->inc(l_librbd_aio_rd_bytes, buffer_ofs);
return ret;
}
void image_info(const ImageCtx *ictx, image_info_t& info, size_t info_size);
std::string get_block_oid(const std::string &object_prefix, uint64_t num,
bool old_format);
+ uint64_t oid_to_object_no(const string& oid, const string& object_prefix);
uint64_t offset_of_object(const string &oid, const string &object_prefix,
uint8_t order);
uint64_t get_max_block(uint64_t size, uint8_t obj_order);
int aio_discard(ImageCtx *ictx, uint64_t off, uint64_t len, AioCompletion *c);
int aio_read(ImageCtx *ictx, uint64_t off, size_t len,
char *buf, AioCompletion *c);
+ int aio_read(ImageCtx *ictx, const vector<pair<uint64_t,uint64_t> >& image_extents,
+ char *buf,
+ AioCompletion *c);
int flush(ImageCtx *ictx);
int _flush(ImageCtx *ictx);