.set_default(true)
.set_description("skip discard (zero) of unaligned extents within an object"),
+ Option("rbd_discard_granularity_bytes", Option::TYPE_UINT,
+ Option::LEVEL_ADVANCED)
+ .set_default(64_K)
+ .set_min_max(4_K, 32_M)
+ .set_validator([](std::string *value, std::string *error_message){
+ uint64_t f = strict_si_cast<uint64_t>(value->c_str(), error_message);
+ if (!error_message->empty()) {
+ return -EINVAL;
+ } else if (!isp2(f)) {
+ *error_message = "value must be a power of two";
+ return -EINVAL;
+ }
+ return 0;
+ })
+ .set_description("minimum aligned size of discard operations"),
+
Option("rbd_enable_alloc_hint", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description("when writing a object, it will issue a hint to osd backend to indicate the expected size object need"),
#define ASSIGN_OPTION(param, type) \
param = config.get_val<type>("rbd_"#param)
+ bool skip_partial_discard = true;
ASSIGN_OPTION(non_blocking_aio, bool);
ASSIGN_OPTION(cache, bool);
ASSIGN_OPTION(cache_writethrough_until_flush, bool);
ASSIGN_OPTION(mtime_update_interval, uint64_t);
ASSIGN_OPTION(atime_update_interval, uint64_t);
ASSIGN_OPTION(skip_partial_discard, bool);
+ ASSIGN_OPTION(discard_granularity_bytes, uint64_t);
ASSIGN_OPTION(blkin_trace_all, bool);
#undef ASSIGN_OPTION
if (sparse_read_threshold_bytes == 0) {
sparse_read_threshold_bytes = get_object_size();
}
+ if (!skip_partial_discard) {
+ discard_granularity_bytes = 0;
+ }
io_work_queue->apply_qos_schedule_tick_min(
config.get_val<uint64_t>("rbd_qos_schedule_tick_min"));
uint64_t readahead_disable_after_bytes;
bool clone_copy_on_read;
bool enable_alloc_hint;
- bool skip_partial_discard;
+ uint32_t discard_granularity_bytes = 0;
bool blkin_trace_all;
uint64_t mirroring_replay_delay;
uint64_t mtime_update_interval;
virtual void aio_write(Extents&& image_extents, ceph::bufferlist&& bl,
int fadvise_flags, Context *on_finish) = 0;
virtual void aio_discard(uint64_t offset, uint64_t length,
- bool skip_partial_discard, Context *on_finish) = 0;
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) = 0;
virtual void aio_flush(Context *on_finish) = 0;
virtual void aio_writesame(uint64_t offset, uint64_t length,
ceph::bufferlist&& bl,
template <typename I>
void ImageWriteback<I>::aio_discard(uint64_t offset, uint64_t length,
- bool skip_partial_discard,
- Context *on_finish) {
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << "offset=" << offset << ", "
<< "length=" << length << ", "
auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
io::AIO_TYPE_DISCARD);
io::ImageDiscardRequest<I> req(m_image_ctx, aio_comp, {{offset, length}},
- skip_partial_discard, {});
+ discard_granularity_bytes, {});
req.set_bypass_image_cache();
req.send();
}
void aio_write(Extents &&image_extents, ceph::bufferlist&& bl,
int fadvise_flags, Context *on_finish);
void aio_discard(uint64_t offset, uint64_t length,
- bool skip_partial_discard, Context *on_finish);
+ uint32_t discard_granularity_bytes, Context *on_finish);
void aio_flush(Context *on_finish);
void aio_writesame(uint64_t offset, uint64_t length,
ceph::bufferlist&& bl,
template <typename I>
void PassthroughImageCache<I>::aio_discard(uint64_t offset, uint64_t length,
- bool skip_partial_discard, Context *on_finish) {
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << "offset=" << offset << ", "
<< "length=" << length << ", "
<< "on_finish=" << on_finish << dendl;
- m_image_writeback.aio_discard(offset, length, skip_partial_discard, on_finish);
+ m_image_writeback.aio_discard(offset, length, discard_granularity_bytes,
+ on_finish);
}
template <typename I>
void aio_write(Extents&& image_extents, ceph::bufferlist&& bl,
int fadvise_flags, Context *on_finish) override;
void aio_discard(uint64_t offset, uint64_t length,
- bool skip_partial_discard, Context *on_finish) override;
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) override;
void aio_flush(Context *on_finish) override;
void aio_writesame(uint64_t offset, uint64_t length,
ceph::bufferlist&& bl,
void operator()(Discard& discard) const {
ImageRequest<I>::aio_discard(
&spec->m_image_ctx, spec->m_aio_comp, std::move(spec->m_image_extents),
- discard.skip_partial_discard, spec->m_parent_trace);
+ discard.discard_granularity_bytes, spec->m_parent_trace);
}
void operator()(Write& write) const {
};
struct Discard {
- bool skip_partial_discard;
+ uint32_t discard_granularity_bytes;
- Discard(bool skip_partial_discard)
- : skip_partial_discard(skip_partial_discard) {
+ Discard(uint32_t discard_granularity_bytes)
+ : discard_granularity_bytes(discard_granularity_bytes) {
}
};
static ImageDispatchSpec* create_discard_request(
ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len,
- bool skip_partial_discard, const ZTracer::Trace &parent_trace) {
+ uint32_t discard_granularity_bytes, const ZTracer::Trace &parent_trace) {
return new ImageDispatchSpec(image_ctx, aio_comp, {{off, len}},
- Discard{skip_partial_discard}, 0,
- parent_trace);
+ Discard{discard_granularity_bytes},
+ 0, parent_trace);
}
static ImageDispatchSpec* create_write_request(
#include "common/perf_counters.h"
#include "common/WorkQueue.h"
#include "osdc/Striper.h"
+#include <algorithm>
#include <functional>
#define dout_subsys ceph_subsys_rbd
template <typename I>
void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
Extents &&image_extents,
- bool skip_partial_discard,
+ uint32_t discard_granularity_bytes,
const ZTracer::Trace &parent_trace) {
ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents),
- skip_partial_discard, parent_trace);
+ discard_granularity_bytes, parent_trace);
req.send();
}
image_ctx.journal->is_journal_appending());
}
- int ret = validate_object_extents(object_extents);
+ int ret = prune_object_extents(&object_extents);
if (ret < 0) {
aio_comp->fail(ret);
return;
journal::EventEntry event_entry(
journal::AioDiscardEvent(extent.first,
extent.second,
- this->m_skip_partial_discard));
+ this->m_discard_granularity_bytes));
tid = image_ctx.journal->append_io_event(std::move(event_entry),
extent.first, extent.second,
synchronous, 0);
for (auto &extent : this->m_image_extents) {
C_AioRequest *req_comp = new C_AioRequest(aio_comp);
image_ctx.image_cache->aio_discard(extent.first, extent.second,
- this->m_skip_partial_discard, req_comp);
+ this->m_discard_granularity_bytes,
+ req_comp);
}
}
const ObjectExtent &object_extent, const ::SnapContext &snapc,
uint64_t journal_tid, Context *on_finish) {
I &image_ctx = this->m_image_ctx;
- int discard_flags = OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE;
- if (m_skip_partial_discard) {
- discard_flags |= OBJECT_DISCARD_FLAG_SKIP_PARTIAL;
- }
auto req = ObjectDispatchSpec::create_discard(
&image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name,
object_extent.objectno, object_extent.offset, object_extent.length, snapc,
- discard_flags, journal_tid, this->m_trace, on_finish);
+ OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace,
+ on_finish);
return req;
}
image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
}
+template <typename I>
+int ImageDiscardRequest<I>::prune_object_extents(
+ ObjectExtents* object_extents) const {
+ if (m_discard_granularity_bytes == 0) {
+ return 0;
+ }
+
+ // Align the range to discard_granularity_bytes boundary and skip
+ // and discards that are too small to free up any space.
+ //
+ // discard_granularity_bytes >= object_size && tail truncation
+ // is a special case for filestore
+ bool prune_required = false;
+ auto object_size = this->m_image_ctx.layout.object_size;
+ auto discard_granularity_bytes = std::min<uint64_t>(
+ m_discard_granularity_bytes, object_size);
+ auto xform_lambda =
+ [discard_granularity_bytes, object_size, &prune_required]
+ (ObjectExtent& object_extent) {
+ auto& offset = object_extent.offset;
+ auto& length = object_extent.length;
+ auto next_offset = offset + length;
+
+ if ((discard_granularity_bytes < object_size) ||
+ (next_offset < object_size)) {
+ static_assert(sizeof(offset) == sizeof(discard_granularity_bytes));
+ offset = p2roundup(offset, discard_granularity_bytes);
+ next_offset = p2align(next_offset, discard_granularity_bytes);
+ if (offset >= next_offset) {
+ prune_required = true;
+ length = 0;
+ } else {
+ length = next_offset - offset;
+ }
+ }
+ };
+ std::for_each(object_extents->begin(), object_extents->end(),
+ xform_lambda);
+
+ if (prune_required) {
+ // one or more object extents were skipped
+ auto remove_lambda =
+ [](const ObjectExtent& object_extent) {
+ return (object_extent.length == 0);
+ };
+ object_extents->erase(
+ std::remove_if(object_extents->begin(), object_extents->end(),
+ remove_lambda),
+ object_extents->end());
+ }
+ return 0;
+}
+
template <typename I>
void ImageFlushRequest<I>::send_request() {
I &image_ctx = this->m_image_ctx;
}
template <typename I>
-int ImageCompareAndWriteRequest<I>::validate_object_extents(
- const ObjectExtents &object_extents) const {
- if (object_extents.size() > 1)
+int ImageCompareAndWriteRequest<I>::prune_object_extents(
+ ObjectExtents* object_extents) const {
+ if (object_extents->size() > 1)
return -EINVAL;
I &image_ctx = this->m_image_ctx;
uint64_t sector_size = 512ULL;
uint64_t su = image_ctx.layout.stripe_unit;
- ObjectExtent object_extent = object_extents.front();
+ ObjectExtent object_extent = object_extents->front();
if (object_extent.offset % sector_size + object_extent.length > sector_size ||
(su != 0 && (object_extent.offset % su + object_extent.length > su)))
return -EINVAL;
Extents &&image_extents, bufferlist &&bl, int op_flags,
const ZTracer::Trace &parent_trace);
static void aio_discard(ImageCtxT *ictx, AioCompletion *c,
- Extents &&image_extents, bool skip_partial_discard,
- const ZTracer::Trace &parent_trace);
+ Extents &&image_extents,
+ uint32_t discard_granularity_bytes,
+ const ZTracer::Trace &parent_trace);
static void aio_flush(ImageCtxT *ictx, AioCompletion *c,
FlushSource flush_source,
const ZTracer::Trace &parent_trace);
void send_request() override;
- virtual int validate_object_extents(
- const ObjectExtents &object_extents) const {
+ virtual int prune_object_extents(ObjectExtents* object_extents) const {
return 0;
}
class ImageDiscardRequest : public AbstractImageWriteRequest<ImageCtxT> {
public:
ImageDiscardRequest(ImageCtxT &image_ctx, AioCompletion *aio_comp,
- Extents&& image_extents, bool skip_partial_discard,
- const ZTracer::Trace &parent_trace)
+ Extents&& image_extents,
+ uint32_t discard_granularity_bytes,
+ const ZTracer::Trace &parent_trace)
: AbstractImageWriteRequest<ImageCtxT>(
image_ctx, aio_comp, std::move(image_extents), "discard", parent_trace),
- m_skip_partial_discard(skip_partial_discard) {
+ m_discard_granularity_bytes(discard_granularity_bytes) {
}
protected:
uint64_t append_journal_event(bool synchronous) override;
void update_stats(size_t length) override;
+
+ int prune_object_extents(ObjectExtents* object_extents) const override;
+
private:
- bool m_skip_partial_discard;
+ uint32_t m_discard_granularity_bytes;
};
template <typename ImageCtxT = ImageCtx>
return "aio_compare_and_write";
}
- int validate_object_extents(
- const ObjectExtents &object_extents) const override;
+ int prune_object_extents(ObjectExtents* object_extents) const override;
private:
bufferlist m_cmp_bl;
template <typename I>
ssize_t ImageRequestWQ<I>::discard(uint64_t off, uint64_t len,
- bool skip_partial_discard) {
+ uint32_t discard_granularity_bytes) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << "ictx=" << &m_image_ctx << ", off=" << off << ", "
<< "len = " << len << dendl;
C_SaferCond cond;
AioCompletion *c = AioCompletion::create(&cond);
- aio_discard(c, off, len, skip_partial_discard, false);
+ aio_discard(c, off, len, discard_granularity_bytes, false);
r = cond.wait();
if (r < 0) {
template <typename I>
void ImageRequestWQ<I>::aio_discard(AioCompletion *c, uint64_t off,
- uint64_t len, bool skip_partial_discard,
+ uint64_t len,
+ uint32_t discard_granularity_bytes,
bool native_async) {
CephContext *cct = m_image_ctx.cct;
FUNCTRACE(cct);
RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
if (m_image_ctx.non_blocking_aio || writes_blocked()) {
queue(ImageDispatchSpec<I>::create_discard_request(
- m_image_ctx, c, off, len, skip_partial_discard, trace));
+ m_image_ctx, c, off, len, discard_granularity_bytes, trace));
} else {
c->start_op();
ImageRequest<I>::aio_discard(&m_image_ctx, c, {{off, len}},
- skip_partial_discard, trace);
+ discard_granularity_bytes, trace);
finish_in_flight_io();
}
trace.event("finish");
ssize_t read(uint64_t off, uint64_t len, ReadResult &&read_result,
int op_flags);
ssize_t write(uint64_t off, uint64_t len, bufferlist &&bl, int op_flags);
- ssize_t discard(uint64_t off, uint64_t len, bool skip_partial_discard);
+ ssize_t discard(uint64_t off, uint64_t len,
+ uint32_t discard_granularity_bytes);
ssize_t writesame(uint64_t off, uint64_t len, bufferlist &&bl, int op_flags);
ssize_t compare_and_write(uint64_t off, uint64_t len,
bufferlist &&cmp_bl, bufferlist &&bl,
void aio_write(AioCompletion *c, uint64_t off, uint64_t len,
bufferlist &&bl, int op_flags, bool native_async=true);
void aio_discard(AioCompletion *c, uint64_t off, uint64_t len,
- bool skip_partial_discard, bool native_async=true);
+ uint32_t discard_granularity_bytes, bool native_async=true);
void aio_flush(AioCompletion *c, bool native_async=true);
void aio_writesame(AioCompletion *c, uint64_t off, uint64_t len,
bufferlist &&bl, int op_flags, bool native_async=true);
wr->set_op_flags2(m_op_flags);
}
-template <typename I>
-void ObjectDiscardRequest<I>::send() {
- I *image_ctx = this->m_ictx;
- auto cct = image_ctx->cct;
- if ((m_discard_flags & OBJECT_DISCARD_FLAG_SKIP_PARTIAL) != 0 &&
- this->m_object_off + this->m_object_len < image_ctx->layout.object_size) {
- ldout(cct, 20) << "oid " << this->m_oid << " " << this->m_object_off << "~"
- << this->m_object_len << ": skip partial discard" << dendl;
- this->async_finish(0);
- return;
- }
-
- AbstractObjectWriteRequest<I>::send();
-}
-
template <typename I>
void ObjectWriteSameRequest<I>::add_write_ops(
librados::ObjectWriteOperation *wr) {
return OBJECT_EXISTS;
}
- void send() override;
-
protected:
bool is_no_op_for_nonexistent_object() const override {
return (!this->has_parent());
enum {
OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE = 1UL << 0,
- OBJECT_DISCARD_FLAG_DISABLE_OBJECT_MAP_UPDATE = 1UL << 1,
- OBJECT_DISCARD_FLAG_SKIP_PARTIAL = 1UL << 2
+ OBJECT_DISCARD_FLAG_DISABLE_OBJECT_MAP_UPDATE = 1UL << 1
};
enum {
if (!clipped_io(event.offset, aio_comp)) {
io::ImageRequest<I>::aio_discard(&m_image_ctx, aio_comp,
{{event.offset, event.length}},
- event.skip_partial_discard, {});
+ event.discard_granularity_bytes, {});
}
if (flush_required) {
using ceph::encode;
encode(offset, bl);
encode(length, bl);
+ bool skip_partial_discard = (discard_granularity_bytes > 0);
encode(skip_partial_discard, bl);
+ encode(discard_granularity_bytes, bl);
}
void AioDiscardEvent::decode(__u8 version, bufferlist::const_iterator& it) {
using ceph::decode;
decode(offset, it);
decode(length, it);
+
+ bool skip_partial_discard = false;
if (version >= 4) {
decode(skip_partial_discard, it);
}
+
+ if (version >= 5) {
+ decode(discard_granularity_bytes, it);
+ } else {
+ if (skip_partial_discard) {
+ // use a size larger than the maximum object size which will
+ // truncated down to object size during IO processing
+ discard_granularity_bytes = std::numeric_limits<uint32_t>::max();
+ } else {
+ discard_granularity_bytes = 0;
+ }
+ }
}
void AioDiscardEvent::dump(Formatter *f) const {
f->dump_unsigned("offset", offset);
f->dump_unsigned("length", length);
- f->dump_bool("skip_partial_discard", skip_partial_discard);
+ f->dump_unsigned("discard_granularity_bytes", discard_granularity_bytes);
}
uint32_t AioWriteEvent::get_fixed_size() {
}
void EventEntry::encode(bufferlist& bl) const {
- ENCODE_START(4, 1, bl);
+ ENCODE_START(5, 1, bl);
boost::apply_visitor(EncodeVisitor(bl), event);
ENCODE_FINISH(bl);
encode_metadata(bl);
void EventEntry::generate_test_instances(std::list<EventEntry *> &o) {
o.push_back(new EventEntry(AioDiscardEvent()));
- o.push_back(new EventEntry(AioDiscardEvent(123, 345, false), utime_t(1, 1)));
+ o.push_back(new EventEntry(AioDiscardEvent(123, 345, 4096), utime_t(1, 1)));
bufferlist bl;
bl.append(std::string(32, '1'));
struct AioDiscardEvent {
static const EventType TYPE = EVENT_TYPE_AIO_DISCARD;
- uint64_t offset;
- uint64_t length;
- bool skip_partial_discard;
+ uint64_t offset = 0;
+ uint64_t length = 0;
+ uint32_t discard_granularity_bytes = 0;
- AioDiscardEvent() : offset(0), length(0), skip_partial_discard(false) {
+ AioDiscardEvent() {
}
- AioDiscardEvent(uint64_t _offset, uint64_t _length, bool _skip_partial_discard)
- : offset(_offset), length(_length), skip_partial_discard(_skip_partial_discard) {
+ AioDiscardEvent(uint64_t _offset, uint64_t _length,
+ uint32_t discard_granularity_bytes)
+ : offset(_offset), length(_length),
+ discard_granularity_bytes(discard_granularity_bytes) {
}
void encode(bufferlist& bl) const;
tracepoint(librbd, discard_exit, -EINVAL);
return -EINVAL;
}
- int r = ictx->io_work_queue->discard(ofs, len, ictx->skip_partial_discard);
+ int r = ictx->io_work_queue->discard(
+ ofs, len, ictx->discard_granularity_bytes);
tracepoint(librbd, discard_exit, r);
return r;
}
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) {
- int r = ictx->io_work_queue->discard(ofs, len, false);
+ int r = ictx->io_work_queue->discard(ofs, len, 0);
tracepoint(librbd, writesame_exit, r);
return r;
}
{
ImageCtx *ictx = (ImageCtx *)ctx;
tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc);
- ictx->io_work_queue->aio_discard(get_aio_completion(c), off, len, ictx->skip_partial_discard);
+ ictx->io_work_queue->aio_discard(
+ get_aio_completion(c), off, len, ictx->discard_granularity_bytes);
tracepoint(librbd, aio_discard_exit, 0);
return 0;
}
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) {
- ictx->io_work_queue->aio_discard(get_aio_completion(c), off, len, false);
+ ictx->io_work_queue->aio_discard(get_aio_completion(c), off, len, 0);
tracepoint(librbd, aio_writesame_exit, 0);
return 0;
}
return -EINVAL;
}
- int r = ictx->io_work_queue->discard(ofs, len, ictx->skip_partial_discard);
+ int r = ictx->io_work_queue->discard(
+ ofs, len, ictx->discard_granularity_bytes);
tracepoint(librbd, discard_exit, r);
return r;
}
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
if (discard_zero && mem_is_zero(buf, data_len)) {
- int r = ictx->io_work_queue->discard(ofs, len, false);
+ int r = ictx->io_work_queue->discard(ofs, len, 0);
tracepoint(librbd, writesame_exit, r);
return r;
}
librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc);
- ictx->io_work_queue->aio_discard(get_aio_completion(comp), off, len, ictx->skip_partial_discard);
+ ictx->io_work_queue->aio_discard(
+ get_aio_completion(comp), off, len, ictx->discard_granularity_bytes);
tracepoint(librbd, aio_discard_exit, 0);
return 0;
}
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
if (discard_zero && mem_is_zero(buf, data_len)) {
- ictx->io_work_queue->aio_discard(get_aio_completion(comp), off, len, false);
+ ictx->io_work_queue->aio_discard(get_aio_completion(comp), off, len, 0);
tracepoint(librbd, aio_writesame_exit, 0);
return 0;
}
TEST_F(TestMockDeepCopyObjectCopyRequest, Trim) {
ASSERT_EQ(0, m_src_image_ctx->operations->metadata_set(
"conf_rbd_skip_partial_discard", "false"));
- m_src_image_ctx->skip_partial_discard = false;
+ m_src_image_ctx->discard_granularity_bytes = 0;
// scribble some data
interval_set<uint64_t> one;
uint64_t trim_offset = rand() % one.range_end();
ASSERT_LE(0, m_src_image_ctx->io_work_queue->discard(
trim_offset, one.range_end() - trim_offset,
- m_src_image_ctx->skip_partial_discard));
+ m_src_image_ctx->discard_granularity_bytes));
ASSERT_EQ(0, create_snap("copy"));
librbd::MockTestImageCtx mock_src_image_ctx(*m_src_image_ctx);
// remove the object
uint64_t object_size = 1 << m_src_image_ctx->order;
ASSERT_LE(0, m_src_image_ctx->io_work_queue->discard(
- 0, object_size, m_src_image_ctx->skip_partial_discard));
+ 0, object_size, m_src_image_ctx->discard_granularity_bytes));
ASSERT_EQ(0, create_snap("copy"));
librbd::MockTestImageCtx mock_src_image_ctx(*m_src_image_ctx);
librbd::MockTestImageCtx mock_dst_image_ctx(*m_dst_image_ctx);
}
}
+ void expect_object_discard_request(MockTestImageCtx &mock_image_ctx,
+ uint64_t object_no, uint64_t offset,
+ uint32_t length, int r) {
+ EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_))
+ .WillOnce(Invoke([&mock_image_ctx, object_no, offset, length, r]
+ (ObjectDispatchSpec* spec) {
+ auto* discard_spec = boost::get<ObjectDispatchSpec::DiscardRequest>(&spec->request);
+ ASSERT_TRUE(discard_spec != nullptr);
+ ASSERT_EQ(object_no, discard_spec->object_no);
+ ASSERT_EQ(offset, discard_spec->object_off);
+ ASSERT_EQ(length, discard_spec->object_len);
+
+ spec->dispatch_result = io::DISPATCH_RESULT_COMPLETE;
+ mock_image_ctx.image_ctx->op_work_queue->queue(&spec->dispatcher_ctx, r);
+ }));
+ }
void expect_object_request_send(MockTestImageCtx &mock_image_ctx,
int r) {
ASSERT_EQ(1, aio_comp_ctx_2.wait());
}
+TEST_F(TestMockIoImageRequest, PartialDiscard) {
+ librbd::ImageCtx *ictx;
+ ASSERT_EQ(0, open_image(m_image_name, &ictx));
+ ictx->discard_granularity_bytes = 0;
+
+ MockTestImageCtx mock_image_ctx(*ictx);
+ mock_image_ctx.journal = nullptr;
+
+ InSequence seq;
+ expect_get_modify_timestamp(mock_image_ctx, false);
+ expect_object_discard_request(mock_image_ctx, 0, 16, 63, 0);
+ expect_object_discard_request(mock_image_ctx, 0, 84, 100, 0);
+
+ C_SaferCond aio_comp_ctx;
+ AioCompletion *aio_comp = AioCompletion::create_and_start(
+ &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+ MockImageDiscardRequest mock_aio_image_discard(
+ mock_image_ctx, aio_comp, {{16, 63}, {84, 100}},
+ ictx->discard_granularity_bytes, {});
+ {
+ RWLock::RLocker owner_locker(mock_image_ctx.owner_lock);
+ mock_aio_image_discard.send();
+ }
+ ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, TailDiscard) {
+ librbd::ImageCtx *ictx;
+ ASSERT_EQ(0, open_image(m_image_name, &ictx));
+ ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+ ictx->discard_granularity_bytes = 2 * ictx->layout.object_size;
+
+ MockTestImageCtx mock_image_ctx(*ictx);
+ mock_image_ctx.journal = nullptr;
+
+ InSequence seq;
+ expect_get_modify_timestamp(mock_image_ctx, false);
+ expect_object_discard_request(
+ mock_image_ctx, 0, ictx->layout.object_size - 1024, 1024, 0);
+
+ C_SaferCond aio_comp_ctx;
+ AioCompletion *aio_comp = AioCompletion::create_and_start(
+ &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+ MockImageDiscardRequest mock_aio_image_discard(
+ mock_image_ctx, aio_comp,
+ {{ictx->layout.object_size - 1024, 1024}},
+ ictx->discard_granularity_bytes, {});
+ {
+ RWLock::RLocker owner_locker(mock_image_ctx.owner_lock);
+ mock_aio_image_discard.send();
+ }
+ ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, DiscardGranularity) {
+ librbd::ImageCtx *ictx;
+ ASSERT_EQ(0, open_image(m_image_name, &ictx));
+ ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+ ictx->discard_granularity_bytes = 32;
+
+ MockTestImageCtx mock_image_ctx(*ictx);
+ mock_image_ctx.journal = nullptr;
+
+ InSequence seq;
+ expect_get_modify_timestamp(mock_image_ctx, false);
+ expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0);
+ expect_object_discard_request(mock_image_ctx, 0, 96, 64, 0);
+ expect_object_discard_request(
+ mock_image_ctx, 0, ictx->layout.object_size - 32, 32, 0);
+
+ C_SaferCond aio_comp_ctx;
+ AioCompletion *aio_comp = AioCompletion::create_and_start(
+ &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+ MockImageDiscardRequest mock_aio_image_discard(
+ mock_image_ctx, aio_comp,
+ {{16, 63}, {96, 31}, {84, 100}, {ictx->layout.object_size - 33, 33}},
+ ictx->discard_granularity_bytes, {});
+ {
+ RWLock::RLocker owner_locker(mock_image_ctx.owner_lock);
+ mock_aio_image_discard.send();
+ }
+ ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) {
REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
librbd::ImageCtx *ictx;
ASSERT_EQ(0, open_image(m_image_name, &ictx));
+ ictx->discard_granularity_bytes = 0;
MockTestImageCtx mock_image_ctx(*ictx);
MockTestJournal mock_journal;
C_SaferCond aio_comp_ctx;
AioCompletion *aio_comp = AioCompletion::create_and_start(
&aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
- MockImageDiscardRequest mock_aio_image_discard(mock_image_ctx, aio_comp,
- {{0, 1}},
- ictx->skip_partial_discard,
- {});
+ MockImageDiscardRequest mock_aio_image_discard(
+ mock_image_ctx, aio_comp, {{0, 1}}, ictx->discard_granularity_bytes, {});
{
RWLock::RLocker owner_locker(mock_image_ctx.owner_lock);
mock_aio_image_discard.send();
ASSERT_EQ(0, ctx.wait());
}
-TEST_F(TestMockIoObjectRequest, SkipPartialDiscard) {
- librbd::ImageCtx *ictx;
- ASSERT_EQ(0, open_image(m_image_name, &ictx));
-
- MockTestImageCtx mock_image_ctx(*ictx);
- expect_get_object_size(mock_image_ctx);
-
- MockExclusiveLock mock_exclusive_lock;
- if (ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
- mock_image_ctx.exclusive_lock = &mock_exclusive_lock;
- expect_is_lock_owner(mock_exclusive_lock);
- }
-
- MockObjectMap mock_object_map;
- if (ictx->test_features(RBD_FEATURE_OBJECT_MAP)) {
- mock_image_ctx.object_map = &mock_object_map;
- }
-
- expect_op_work_queue(mock_image_ctx);
-
- InSequence seq;
- expect_get_parent_overlap(mock_image_ctx, CEPH_NOSNAP, 0, 0);
-
- C_SaferCond ctx;
- auto req = MockObjectDiscardRequest::create_discard(
- &mock_image_ctx, ictx->get_object_name(0), 0, 0, 1, mock_image_ctx.snapc,
- OBJECT_DISCARD_FLAG_SKIP_PARTIAL, {}, &ctx);
- req->send();
- ASSERT_EQ(0, ctx.wait());
-}
-
TEST_F(TestMockIoObjectRequest, WriteSame) {
librbd::ImageCtx *ictx;
ASSERT_EQ(0, open_image(m_image_name, &ictx));
C_SaferCond cond_ctx;
auto c = librbd::io::AioCompletion::create(&cond_ctx);
c->get();
- ictx->io_work_queue->aio_discard(c, 123, 234, ictx->skip_partial_discard);
+ ictx->io_work_queue->aio_discard(c, 123, 234,
+ ictx->discard_granularity_bytes);
ASSERT_EQ(0, c->wait_for_complete());
c->put();
// inject a discard operation into the journal
inject_into_journal(ictx,
- librbd::journal::AioDiscardEvent(0, payload.size(),
- ictx->skip_partial_discard));
+ librbd::journal::AioDiscardEvent(
+ 0, payload.size(), ictx->discard_granularity_bytes));
close_image(ictx);
// re-open the journal so that it replays the new entry
librbd::io::ReadResult{read_result}, 0);
ASSERT_EQ(0, aio_comp->wait_for_complete());
aio_comp->release();
- if (ictx->skip_partial_discard) {
+ if (ictx->discard_granularity_bytes > 0) {
ASSERT_EQ(payload, read_payload);
} else {
ASSERT_EQ(std::string(read_payload.size(), '\0'), read_payload);
// replay several envents and check the commit position
inject_into_journal(ictx,
- librbd::journal::AioDiscardEvent(0, payload.size(),
- ictx->skip_partial_discard));
+ librbd::journal::AioDiscardEvent(
+ 0, payload.size(), ictx->discard_granularity_bytes));
inject_into_journal(ictx,
- librbd::journal::AioDiscardEvent(0, payload.size(),
- ictx->skip_partial_discard));
+ librbd::journal::AioDiscardEvent(
+ 0, payload.size(), ictx->discard_granularity_bytes));
close_image(ictx);
ASSERT_EQ(0, open_image(m_image_name, &ictx));
// verify lock ordering constraints
aio_comp = new librbd::io::AioCompletion();
ictx->io_work_queue->aio_discard(aio_comp, 0, read_payload.size(),
- ictx->skip_partial_discard);
+ ictx->discard_granularity_bytes);
ASSERT_EQ(0, aio_comp->wait_for_complete());
aio_comp->release();
}
}
MOCK_METHOD3(aio_discard, void(AioCompletion *c, const Extents& image_extents,
- bool skip_partial_discard));
+ uint32_t discard_granularity_bytes));
static void aio_discard(MockReplayImageCtx *ictx, AioCompletion *c,
- Extents&& image_extents, bool skip_partial_discard,
+ Extents&& image_extents,
+ uint32_t discard_granularity_bytes,
const ZTracer::Trace &parent_trace) {
ceph_assert(s_instance != nullptr);
- s_instance->aio_discard(c, image_extents, skip_partial_discard);
+ s_instance->aio_discard(c, image_extents, discard_granularity_bytes);
}
MOCK_METHOD1(aio_flush, void(AioCompletion *c));
void expect_aio_discard(MockIoImageRequest &mock_io_image_request,
io::AioCompletion **aio_comp, uint64_t off,
- uint64_t len, bool skip_partial_discard) {
+ uint64_t len, uint32_t discard_granularity_bytes) {
EXPECT_CALL(mock_io_image_request, aio_discard(_, io::Extents{{off, len}},
- skip_partial_discard))
+ discard_granularity_bytes))
.WillOnce(SaveArg<0>(aio_comp));
}
io::AioCompletion *aio_comp;
C_SaferCond on_ready;
C_SaferCond on_safe;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456,
+ ictx->discard_granularity_bytes);
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
+ EventEntry{AioDiscardEvent(123, 456,
+ ictx->discard_granularity_bytes)},
&on_ready, &on_safe);
when_complete(mock_image_ctx, aio_comp, 0);
io::AioCompletion *aio_comp;
C_SaferCond on_ready;
C_SaferCond on_safe;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456,
+ ictx->discard_granularity_bytes);
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
+ EventEntry{AioDiscardEvent(123, 456,
+ ictx->discard_granularity_bytes)},
&on_ready, &on_safe);
when_complete(mock_image_ctx, aio_comp, -EINVAL);
io::AioCompletion *aio_comp;
io::AioCompletion *flush_comp = nullptr;
C_SaferCond on_ready;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456,
+ ictx->discard_granularity_bytes);
if (i == io_count - 1) {
expect_aio_flush(mock_io_image_request, &flush_comp);
}
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
+ EventEntry{AioDiscardEvent(123, 456,
+ ictx->discard_granularity_bytes)},
&on_ready, &on_safes[i]);
when_complete(mock_image_ctx, aio_comp, 0);
ASSERT_EQ(0, on_ready.wait());
io::AioCompletion *aio_comp = nullptr;
C_SaferCond on_ready;
C_SaferCond on_safe;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456,
+ ictx->discard_granularity_bytes);
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
+ EventEntry{AioDiscardEvent(123, 456,
+ ictx->discard_granularity_bytes)},
&on_ready, &on_safe);
when_complete(mock_image_ctx, aio_comp, 0);
io::AioCompletion *aio_comp;
C_SaferCond on_ready;
C_SaferCond on_safe;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, false);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, 0);
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456, false)},
+ EventEntry{AioDiscardEvent(123, 456, 0)},
&on_ready, &on_safe);
when_complete(mock_image_ctx, aio_comp, 0);
exclusive_lock(NULL), journal(NULL),
trace_endpoint(image_ctx.trace_endpoint),
sparse_read_threshold_bytes(image_ctx.sparse_read_threshold_bytes),
+ discard_granularity_bytes(image_ctx.discard_granularity_bytes),
mirroring_replay_delay(image_ctx.mirroring_replay_delay),
non_blocking_aio(image_ctx.non_blocking_aio),
blkin_trace_all(image_ctx.blkin_trace_all),
ZTracer::Endpoint trace_endpoint;
uint64_t sparse_read_threshold_bytes;
+ uint32_t discard_granularity_bytes;
int mirroring_replay_delay;
bool non_blocking_aio;
bool blkin_trace_all;
aio_write_mock(image_extents, bl, fadvise_flags, on_finish);
}
- MOCK_METHOD4(aio_discard, void(uint64_t, uint64_t, bool, Context *));
+ MOCK_METHOD4(aio_discard, void(uint64_t, uint64_t, uint32_t, Context *));
MOCK_METHOD1(aio_flush, void(Context *));
MOCK_METHOD5(aio_writesame_mock, void(uint64_t, uint64_t, ceph::bufferlist& bl,
int, Context *));
m_ictxs.insert(*ictx);
ASSERT_EQ(0, (*ictx)->state->open(flags));
- (*ictx)->skip_partial_discard = false;
+ (*ictx)->discard_granularity_bytes = 0;
}
void open_image(librados::IoCtx& io_ctx, const std::string &name,
librbd::ImageCtx **ictx) {
open_image(io_ctx, name, "", false, 0, ictx);
- ASSERT_EQ(0, (*ictx)->state->open(0));
- (*ictx)->skip_partial_discard = false;
}
void migration_prepare(librados::IoCtx& dst_io_ctx,
uint64_t len = 1 + rand() % max_size;
if (rand() % 4 == 0) {
- ASSERT_EQ((int)len, image_ctx->io_work_queue->discard(off, len, image_ctx->skip_partial_discard));
+ ASSERT_EQ((int)len,
+ image_ctx->io_work_queue->discard(
+ off, len, image_ctx->discard_granularity_bytes));
} else {
bufferlist bl;
bl.append(std::string(len, '1'));
ASSERT_EQ(0, create_snap(m_remote_image_ctx, "snap", nullptr));
- ASSERT_EQ((int)len - 2, m_remote_image_ctx->io_work_queue->discard(off + 1,
- len - 2, m_remote_image_ctx->skip_partial_discard));
+ ASSERT_EQ((int)len - 2,
+ m_remote_image_ctx->io_work_queue->discard(
+ off + 1, len - 2, m_remote_image_ctx->discard_granularity_bytes));
{
RWLock::RLocker owner_locker(m_remote_image_ctx->owner_lock);
ASSERT_EQ(0, flush(m_remote_image_ctx));
// process with delay
EXPECT_CALL(mock_replay_entry, get_data());
librbd::journal::EventEntry event_entry(
- librbd::journal::AioDiscardEvent(123, 345, false), ceph_clock_now());
+ librbd::journal::AioDiscardEvent(123, 345, 0), ceph_clock_now());
EXPECT_CALL(mock_local_replay, decode(_, _))
.WillOnce(DoAll(SetArgPointee<1>(event_entry),
Return(0)));