"rbd_journal_max_payload_bytes", false)(
"rbd_journal_max_concurrent_object_sets", false)(
"rbd_mirroring_resync_after_disconnect", false)(
- "rbd_mirroring_replay_delay", false);
+ "rbd_mirroring_replay_delay", false)(
+ "rbd_skip_partial_discard", false);
md_config_t local_config_t;
std::map<std::string, bufferlist> res;
ASSIGN_OPTION(journal_max_concurrent_object_sets);
ASSIGN_OPTION(mirroring_resync_after_disconnect);
ASSIGN_OPTION(mirroring_replay_delay);
+ ASSIGN_OPTION(skip_partial_discard);
}
ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
int journal_max_concurrent_object_sets;
bool mirroring_resync_after_disconnect;
int mirroring_replay_delay;
+ bool skip_partial_discard;
LibrbdAdminSocketHook *asok_hook;
virtual void aio_write(Extents&& image_extents, ceph::bufferlist&& bl,
int fadvise_flags, Context *on_finish) = 0;
virtual void aio_discard(uint64_t offset, uint64_t length,
- Context *on_finish) = 0;
+ bool skip_partial_discard, Context *on_finish) = 0;
virtual void aio_flush(Context *on_finish) = 0;
virtual void aio_writesame(uint64_t offset, uint64_t length,
ceph::bufferlist&& bl,
template <typename I>
void ImageWriteback<I>::aio_discard(uint64_t offset, uint64_t length,
- Context *on_finish) {
+ bool skip_partial_discard, Context *on_finish) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << "offset=" << offset << ", "
<< "length=" << length << ", "
auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
io::AIO_TYPE_DISCARD);
- io::ImageDiscardRequest<I> req(m_image_ctx, aio_comp, offset, length);
+ io::ImageDiscardRequest<I> req(m_image_ctx, aio_comp, offset, length,
+ skip_partial_discard);
req.set_bypass_image_cache();
req.send();
}
int fadvise_flags, Context *on_finish);
void aio_write(Extents &&image_extents, ceph::bufferlist&& bl,
int fadvise_flags, Context *on_finish);
- void aio_discard(uint64_t offset, uint64_t length, Context *on_finish);
+ void aio_discard(uint64_t offset, uint64_t length,
+ bool skip_partial_discard, Context *on_finish);
void aio_flush(Context *on_finish);
void aio_writesame(uint64_t offset, uint64_t length,
ceph::bufferlist&& bl,
template <typename I>
void PassthroughImageCache<I>::aio_discard(uint64_t offset, uint64_t length,
- Context *on_finish) {
+ bool skip_partial_discard, Context *on_finish) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << "offset=" << offset << ", "
<< "length=" << length << ", "
<< "on_finish=" << on_finish << dendl;
- m_image_writeback.aio_discard(offset, length, on_finish);
+ m_image_writeback.aio_discard(offset, length, skip_partial_discard, on_finish);
}
template <typename I>
void aio_write(Extents&& image_extents, ceph::bufferlist&& bl,
int fadvise_flags, Context *on_finish) override;
void aio_discard(uint64_t offset, uint64_t length,
- Context *on_finish) override;
+ bool skip_partial_discard, Context *on_finish);
void aio_flush(Context *on_finish) override;
virtual void aio_writesame(uint64_t offset, uint64_t length,
ceph::bufferlist&& bl,
template <typename I>
void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
- uint64_t off, uint64_t len) {
- ImageDiscardRequest<I> req(*ictx, c, off, len);
+ uint64_t off, uint64_t len,
+ bool skip_partial_discard) {
+ ImageDiscardRequest<I> req(*ictx, c, off, len, skip_partial_discard);
req.send();
}
assert(!this->m_image_extents.empty());
for (auto &extent : this->m_image_extents) {
journal::EventEntry event_entry(journal::AioDiscardEvent(extent.first,
- extent.second));
+ extent.second,
+ this->m_skip_partial_discard));
tid = image_ctx.journal->append_io_event(std::move(event_entry),
requests, extent.first,
extent.second, synchronous);
void ImageDiscardRequest<I>::prune_object_extents(ObjectExtents &object_extents) {
I &image_ctx = this->m_image_ctx;
CephContext *cct = image_ctx.cct;
- if (!cct->_conf->rbd_skip_partial_discard) {
+ if (!this->m_skip_partial_discard) {
return;
}
aio_comp->set_request_count(this->m_image_extents.size());
for (auto &extent : this->m_image_extents) {
C_AioRequest *req_comp = new C_AioRequest(aio_comp);
- image_ctx.image_cache->aio_discard(extent.first, extent.second, req_comp);
+ image_ctx.image_cache->aio_discard(extent.first, extent.second,
+ this->m_skip_partial_discard, req_comp);
}
}
static void aio_write(ImageCtxT *ictx, AioCompletion *c,
Extents &&image_extents, bufferlist &&bl, int op_flags);
static void aio_discard(ImageCtxT *ictx, AioCompletion *c, uint64_t off,
- uint64_t len);
+ uint64_t len, bool skip_partial_discard);
static void aio_flush(ImageCtxT *ictx, AioCompletion *c);
static void aio_writesame(ImageCtxT *ictx, AioCompletion *c, uint64_t off,
uint64_t len, bufferlist &&bl, int op_flags);
class ImageDiscardRequest : public AbstractImageWriteRequest<ImageCtxT> {
public:
ImageDiscardRequest(ImageCtxT &image_ctx, AioCompletion *aio_comp,
- uint64_t off, uint64_t len)
- : AbstractImageWriteRequest<ImageCtxT>(image_ctx, aio_comp, {{off, len}}) {
+ uint64_t off, uint64_t len, bool skip_partial_discard)
+ : AbstractImageWriteRequest<ImageCtxT>(image_ctx, aio_comp, {{off, len}}),
+ m_skip_partial_discard(skip_partial_discard) {
}
protected:
uint64_t append_journal_event(const ObjectRequests &requests,
bool synchronous) override;
void update_stats(size_t length) override;
+private:
+ bool m_skip_partial_discard;
};
template <typename ImageCtxT = ImageCtx>
return len;
}
-int ImageRequestWQ::discard(uint64_t off, uint64_t len) {
+int ImageRequestWQ::discard(uint64_t off, uint64_t len, bool skip_partial_discard) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << "discard: ictx=" << &m_image_ctx << ", off=" << off << ", "
<< "len = " << len << dendl;
C_SaferCond cond;
AioCompletion *c = AioCompletion::create(&cond);
- aio_discard(c, off, len, false);
+ aio_discard(c, off, len, skip_partial_discard, false);
r = cond.wait();
if (r < 0) {
}
void ImageRequestWQ::aio_discard(AioCompletion *c, uint64_t off,
- uint64_t len, bool native_async) {
+ uint64_t len, bool skip_partial_discard,
+ bool native_async) {
c->init_time(&m_image_ctx, AIO_TYPE_DISCARD);
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << "aio_discard: ictx=" << &m_image_ctx << ", "
RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
if (m_image_ctx.non_blocking_aio || writes_blocked()) {
- queue(new ImageDiscardRequest<>(m_image_ctx, c, off, len));
+ queue(new ImageDiscardRequest<>(m_image_ctx, c, off, len, skip_partial_discard));
} else {
c->start_op();
- ImageRequest<>::aio_discard(&m_image_ctx, c, off, len);
+ ImageRequest<>::aio_discard(&m_image_ctx, c, off, len, skip_partial_discard);
finish_in_flight_op();
}
}
ssize_t read(uint64_t off, uint64_t len, ReadResult &&read_result,
int op_flags);
ssize_t write(uint64_t off, uint64_t len, bufferlist &&bl, int op_flags);
- int discard(uint64_t off, uint64_t len);
+ int discard(uint64_t off, uint64_t len, bool skip_partial_discard);
ssize_t writesame(uint64_t off, uint64_t len, bufferlist &&bl, int op_flags);
void aio_read(AioCompletion *c, uint64_t off, uint64_t len,
void aio_write(AioCompletion *c, uint64_t off, uint64_t len,
bufferlist &&bl, int op_flags, bool native_async=true);
void aio_discard(AioCompletion *c, uint64_t off, uint64_t len,
- bool native_async=true);
+ bool skip_partial_discard, bool native_async=true);
void aio_flush(AioCompletion *c, bool native_async=true);
void aio_writesame(AioCompletion *c, uint64_t off, uint64_t len,
bufferlist &&bl, int op_flags, bool native_async=true);
io::AIO_TYPE_DISCARD,
&flush_required);
io::ImageRequest<I>::aio_discard(&m_image_ctx, aio_comp, event.offset,
- event.length);
+ event.length, event.skip_partial_discard);
if (flush_required) {
m_lock.Lock();
auto flush_comp = create_aio_flush_completion(nullptr);
void AioDiscardEvent::encode(bufferlist& bl) const {
::encode(offset, bl);
::encode(length, bl);
+ ::encode(skip_partial_discard, bl);
}
void AioDiscardEvent::decode(__u8 version, bufferlist::iterator& it) {
::decode(offset, it);
::decode(length, it);
+ if (version >= 5) {
+ ::decode(skip_partial_discard, it);
+ }
}
void AioDiscardEvent::dump(Formatter *f) const {
f->dump_unsigned("offset", offset);
f->dump_unsigned("length", length);
+ f->dump_bool("skip_partial_discard", skip_partial_discard);
}
uint32_t AioWriteEvent::get_fixed_size() {
}
void EventEntry::encode(bufferlist& bl) const {
- ENCODE_START(4, 1, bl);
+ ENCODE_START(5, 1, bl);
boost::apply_visitor(EncodeVisitor(bl), event);
ENCODE_FINISH(bl);
encode_metadata(bl);
void EventEntry::generate_test_instances(std::list<EventEntry *> &o) {
o.push_back(new EventEntry(AioDiscardEvent()));
- o.push_back(new EventEntry(AioDiscardEvent(123, 345), utime_t(1, 1)));
+ o.push_back(new EventEntry(AioDiscardEvent(123, 345, false), utime_t(1, 1)));
bufferlist bl;
bl.append(std::string(32, '1'));
uint64_t offset;
uint64_t length;
+ bool skip_partial_discard;
- AioDiscardEvent() : offset(0), length(0) {
+ AioDiscardEvent() : offset(0), length(0), skip_partial_discard(false) {
}
- AioDiscardEvent(uint64_t _offset, uint64_t _length)
- : offset(_offset), length(_length) {
+ AioDiscardEvent(uint64_t _offset, uint64_t _length, bool _skip_partial_discard)
+ : offset(_offset), length(_length), skip_partial_discard(_skip_partial_discard) {
}
void encode(bufferlist& bl) const;
tracepoint(librbd, discard_exit, -EINVAL);
return -EINVAL;
}
- int r = ictx->io_work_queue->discard(ofs, len);
+ int r = ictx->io_work_queue->discard(ofs, len, ictx->skip_partial_discard);
tracepoint(librbd, discard_exit, r);
return r;
}
}
if (mem_is_zero(bl.c_str(), bl.length())) {
- int r = ictx->io_work_queue->discard(ofs, len);
+ int r = ictx->io_work_queue->discard(ofs, len, false);
tracepoint(librbd, writesame_exit, r);
return r;
}
{
ImageCtx *ictx = (ImageCtx *)ctx;
tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc);
- ictx->io_work_queue->aio_discard(get_aio_completion(c), off, len);
+ ictx->io_work_queue->aio_discard(get_aio_completion(c), off, len, ictx->skip_partial_discard);
tracepoint(librbd, aio_discard_exit, 0);
return 0;
}
}
if (mem_is_zero(bl.c_str(), bl.length())) {
- ictx->io_work_queue->aio_discard(get_aio_completion(c), off, len);
+ ictx->io_work_queue->aio_discard(get_aio_completion(c), off, len, false);
tracepoint(librbd, aio_writesame_exit, 0);
return 0;
}
{
librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
tracepoint(librbd, discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len);
- int r = ictx->io_work_queue->discard(ofs, len);
+ int r = ictx->io_work_queue->discard(ofs, len, ictx->skip_partial_discard);
tracepoint(librbd, discard_exit, r);
return r;
}
}
if (mem_is_zero(buf, data_len)) {
- int r = ictx->io_work_queue->discard(ofs, len);
+ int r = ictx->io_work_queue->discard(ofs, len, false);
tracepoint(librbd, writesame_exit, r);
return r;
}
librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc);
- ictx->io_work_queue->aio_discard(get_aio_completion(comp), off, len);
+ ictx->io_work_queue->aio_discard(get_aio_completion(comp), off, len, ictx->skip_partial_discard);
tracepoint(librbd, aio_discard_exit, 0);
return 0;
}
}
if (mem_is_zero(buf, data_len)) {
- ictx->io_work_queue->aio_discard(get_aio_completion(comp), off, len);
+ ictx->io_work_queue->aio_discard(get_aio_completion(comp), off, len, false);
tracepoint(librbd, aio_writesame_exit, 0);
return 0;
}
C_SaferCond aio_comp_ctx;
AioCompletion *aio_comp = AioCompletion::create_and_start(
&aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
- MockImageDiscardRequest mock_aio_image_discard(mock_image_ctx, aio_comp, 0, 1);
+ MockImageDiscardRequest mock_aio_image_discard(mock_image_ctx, aio_comp,
+ 0, 1, ictx->skip_partial_discard);
{
RWLock::RLocker owner_locker(mock_image_ctx.owner_lock);
mock_aio_image_discard.send();
C_SaferCond cond_ctx;
auto c = librbd::io::AioCompletion::create(&cond_ctx);
c->get();
- ictx->io_work_queue->aio_discard(c, 123, 234);
+ ictx->io_work_queue->aio_discard(c, 123, 234, cct->_conf->rbd_skip_partial_discard);
ASSERT_EQ(0, c->wait_for_complete());
c->put();
// inject a discard operation into the journal
inject_into_journal(ictx,
- librbd::journal::AioDiscardEvent(0, payload.size()));
+ librbd::journal::AioDiscardEvent(0, payload.size(), ictx->skip_partial_discard));
close_image(ictx);
// re-open the journal so that it replays the new entry
// replay several envents and check the commit position
inject_into_journal(ictx,
- librbd::journal::AioDiscardEvent(0, payload.size()));
+ librbd::journal::AioDiscardEvent(0, payload.size(), ictx->cct->_conf->rbd_skip_partial_discard));
inject_into_journal(ictx,
- librbd::journal::AioDiscardEvent(0, payload.size()));
+ librbd::journal::AioDiscardEvent(0, payload.size(), ictx->cct->_conf->rbd_skip_partial_discard));
close_image(ictx);
ASSERT_EQ(0, open_image(m_image_name, &ictx));
// verify lock ordering constraints
aio_comp = new librbd::io::AioCompletion();
- ictx->io_work_queue->aio_discard(aio_comp, 0, read_payload.size());
+ ictx->io_work_queue->aio_discard(aio_comp, 0, read_payload.size(), ictx->cct->_conf->rbd_skip_partial_discard);
ASSERT_EQ(0, aio_comp->wait_for_complete());
aio_comp->release();
}
s_instance->aio_write(c, image_extents, bl, op_flags);
}
- MOCK_METHOD3(aio_discard, void(AioCompletion *c, uint64_t off, uint64_t len));
+ MOCK_METHOD4(aio_discard, void(AioCompletion *c, uint64_t off, uint64_t len,
+ bool skip_partial_discard));
static void aio_discard(MockReplayImageCtx *ictx, AioCompletion *c,
- uint64_t off, uint64_t len) {
+ uint64_t off, uint64_t len, bool skip_partial_discard) {
assert(s_instance != nullptr);
- s_instance->aio_discard(c, off, len);
+ s_instance->aio_discard(c, off, len, skip_partial_discard);
}
MOCK_METHOD1(aio_flush, void(AioCompletion *c));
void expect_aio_discard(MockIoImageRequest &mock_io_image_request,
io::AioCompletion **aio_comp, uint64_t off,
- uint64_t len) {
- EXPECT_CALL(mock_io_image_request, aio_discard(_, off, len))
+ uint64_t len, bool skip_partial_discard) {
+ EXPECT_CALL(mock_io_image_request, aio_discard(_, off, len, skip_partial_discard))
.WillOnce(SaveArg<0>(aio_comp));
}
io::AioCompletion *aio_comp;
C_SaferCond on_ready;
C_SaferCond on_safe;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456)},
+ EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
&on_ready, &on_safe);
when_complete(mock_image_ctx, aio_comp, 0);
io::AioCompletion *aio_comp;
C_SaferCond on_ready;
C_SaferCond on_safe;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456)},
+ EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
&on_ready, &on_safe);
when_complete(mock_image_ctx, aio_comp, -EINVAL);
io::AioCompletion *aio_comp;
io::AioCompletion *flush_comp = nullptr;
C_SaferCond on_ready;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
if (i == io_count - 1) {
expect_aio_flush(mock_io_image_request, &flush_comp);
}
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456)},
+ EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
&on_ready, &on_safes[i]);
when_complete(mock_image_ctx, aio_comp, 0);
ASSERT_EQ(0, on_ready.wait());
io::AioCompletion *aio_comp = nullptr;
C_SaferCond on_ready;
C_SaferCond on_safe;
- expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456);
+ expect_aio_discard(mock_io_image_request, &aio_comp, 123, 456, ictx->skip_partial_discard);
when_process(mock_journal_replay,
- EventEntry{AioDiscardEvent(123, 456)},
+ EventEntry{AioDiscardEvent(123, 456, ictx->skip_partial_discard)},
&on_ready, &on_safe);
when_complete(mock_image_ctx, aio_comp, 0);
aio_write_mock(image_extents, bl, fadvise_flags, on_finish);
}
- MOCK_METHOD3(aio_discard, void(uint64_t, uint64_t, Context *));
+ MOCK_METHOD4(aio_discard, void(uint64_t, uint64_t, bool, Context *));
MOCK_METHOD1(aio_flush, void(Context *));
MOCK_METHOD5(aio_writesame_mock, void(uint64_t, uint64_t, ceph::bufferlist& bl,
int, Context *));
Context *ctx = new DummyContext();
auto c = librbd::io::AioCompletion::create(ctx);
c->get();
- ictx->io_work_queue->aio_discard(c, 0, 256);
+ ictx->io_work_queue->aio_discard(c, 0, 256, false);
bool is_owner;
ASSERT_EQ(0, librbd::is_exclusive_lock_owner(ictx, &is_owner));
read_bl.push_back(read_ptr);
ASSERT_EQ(static_cast<int>(m_image_size - 64),
- ictx2->io_work_queue->discard(32, m_image_size - 64));
+ ictx2->io_work_queue->discard(32, m_image_size - 64, false));
ASSERT_EQ(0, librbd::snap_set(ictx2, "snap1"));
{
// trim the object
uint64_t trim_offset = rand() % one.range_end();
ASSERT_LE(0, m_remote_image_ctx->io_work_queue->discard(
- trim_offset, one.range_end() - trim_offset));
+ trim_offset, one.range_end() - trim_offset, m_remote_image_ctx->skip_partial_discard));
ASSERT_EQ(0, create_snap("sync"));
librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
// remove the object
uint64_t object_size = 1 << m_remote_image_ctx->order;
- ASSERT_LE(0, m_remote_image_ctx->io_work_queue->discard(0, object_size));
+ ASSERT_LE(0, m_remote_image_ctx->io_work_queue->discard(0, object_size, m_remote_image_ctx->skip_partial_discard));
ASSERT_EQ(0, create_snap("sync"));
librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
uint64_t len = 1 + rand() % max_size;
if (rand() % 4 == 0) {
- ASSERT_EQ((int)len, image_ctx->io_work_queue->discard(off, len));
+ ASSERT_EQ((int)len, image_ctx->io_work_queue->discard(off, len, image_ctx->skip_partial_discard));
} else {
bufferlist bl;
bl.append(std::string(len, '1'));
ASSERT_EQ(0, create_snap(m_remote_image_ctx, "snap", nullptr));
ASSERT_EQ((int)len - 2, m_remote_image_ctx->io_work_queue->discard(off + 1,
- len - 2));
+ len - 2, m_remote_image_ctx->skip_partial_discard));
{
RWLock::RLocker owner_locker(m_remote_image_ctx->owner_lock);
ASSERT_EQ(0, m_remote_image_ctx->flush());
// process with delay
EXPECT_CALL(mock_replay_entry, get_data());
librbd::journal::EventEntry event_entry(
- librbd::journal::AioDiscardEvent(123, 345), ceph_clock_now());
+ librbd::journal::AioDiscardEvent(123, 345, false), ceph_clock_now());
EXPECT_CALL(mock_local_replay, decode(_, _))
.WillOnce(DoAll(SetArgPointee<1>(event_entry),
Return(0)));