From ba6c6d5fa8c5dea3959cbe55c4662f5adea5a30c Mon Sep 17 00:00:00 2001 From: Josef Johansson Date: Mon, 2 Jan 2023 14:12:53 +0100 Subject: [PATCH] librbd: Fix local rbd mirror journals growing forever This commit fixes commit 7ca1bab90f3 by pushing properly aligned discards back to m_image_extents, if corrected. If discards are misaligned (off 0, len 4608, gran=4096), they are corrected properly, but only in object_extents and not in m_image_extents. When journal_append_event is triggered it will only append from m_image_extents and does not now about the alignment fixes. In commit_io_events_extent it will log a message and return without completing the io since the larger misaligned area was sent to the journal. This will in turn break rbd journal mirroring since the local client will wait indefinately on the commit to be completed, which it never does. This does not effect rbd-mirror in any way, which may be confusing and dangerous since it's only rbd-mirror that updates ceph health, and not the local client. Setting `rbd_skip_partial_discard = false` under client will restore the pre 7ca1bab behaviour and thus not trigger the bug with journals growing. This will set `rbd_discard_granularity_bytes = 0` internally. This setting is only changed during startup of a client. Fixes: 7ca1bab90f3db3aaaa4cdbfc1f18e9f5cfbf5568 Fixes: https://tracker.ceph.com/issues/57396 Signed-off-by: Josef Johansson (cherry picked from commit 21a26a752843295ff946d1543c2f5f9fac764593) Conflicts: src/librbd/io/ImageRequest.cc [ commit b2c88820923e ("librbd: return area from extents_to_file()") not in quincy ] src/test/librbd/io/test_mock_ImageRequest.cc [ commit b9a2384cdc43 ("librbd: propagate area down to file_to_extents()") not in quincy ] --- qa/workunits/rbd/rbd-nbd.sh | 57 +++++++ src/librbd/io/ImageRequest.cc | 28 +++- src/test/librbd/io/test_mock_ImageRequest.cc | 148 +++++++++++++++++++ 3 files changed, 231 insertions(+), 2 deletions(-) diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh index ffcb585df69e9..2229b39c92dc5 100755 --- a/qa/workunits/rbd/rbd-nbd.sh +++ b/qa/workunits/rbd/rbd-nbd.sh @@ -442,6 +442,63 @@ _sudo rbd device detach ${DEV} --device-type nbd expect_false get_pid ${POOL} DEV= +# test discard granularity with journaling +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +rbd config image set ${POOL}/${IMAGE} rbd_discard_granularity_bytes 4096 +rbd feature enable ${POOL}/${IMAGE} journaling +# since a discard will now be pruned to only whole blocks (0..4095, 4096..8191) +# let us test all the cases around those alignments. 512 is the smallest +# possible block blkdiscard allows us to use. Thus the test checks +# 512 before, on the alignment, 512 after. +_sudo blkdiscard --offset 0 --length $((4096-512)) ${DEV} +_sudo blkdiscard --offset 0 --length 4096 ${DEV} +_sudo blkdiscard --offset 0 --length $((4096+512)) ${DEV} +_sudo blkdiscard --offset 512 --length $((8192-1024)) ${DEV} +_sudo blkdiscard --offset 512 --length $((8192-512)) ${DEV} +_sudo blkdiscard --offset 512 --length 8192 ${DEV} +expected='Entry: tag_id=1, commit_tid=1 +{ + "event_type": "AioDiscard", + "offset": 0, + "length": 4096, + "discard_granularity_bytes": 4096, +Entry: tag_id=1, commit_tid=2 +{ + "event_type": "AioDiscard", + "offset": 0, + "length": 4096, + "discard_granularity_bytes": 4096, +Entry: tag_id=1, commit_tid=3 +{ + "event_type": "AioDiscard", + "offset": 4096, + "length": 4096, + "discard_granularity_bytes": 4096, +Entry: tag_id=1, commit_tid=4 +{ + "event_type": "AioDiscard", + "offset": 4096, + "length": 4096, + "discard_granularity_bytes": 4096,' +out=`rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose | \ + grep -A5 --no-group-separator Entry` +rbd config image rm ${POOL}/${IMAGE} rbd_discard_granularity_bytes +[ "${out}" == "${expected}" ] +# wait for commit log to be empty, 10 seconds should be well enough +tries=0 +queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'` +while [ ${tries} -lt 10 ] && [ ${queue_length} -gt 0 ]; do + rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose + sleep 1 + queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'` + tries=$((tries+1)) +done +rbd feature disable ${POOL}/${IMAGE} journaling +[ ${queue_length} -eq 0 ] +unmap_device ${DEV} ${PID} +DEV= + # test that rbd_op_threads setting takes effect EXPECTED=`ceph-conf --show-config-value librados_thread_count` DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` diff --git a/src/librbd/io/ImageRequest.cc b/src/librbd/io/ImageRequest.cc index d00292e52d114..9c87c9d8c5fce 100644 --- a/src/librbd/io/ImageRequest.cc +++ b/src/librbd/io/ImageRequest.cc @@ -455,6 +455,19 @@ void AbstractImageWriteRequest::send_request() { return; } + // reflect changes in object_extents back to m_image_extents + if (ret == 1) { + this->m_image_extents.clear(); + for (auto& object_extent : object_extents) { + io::Extents image_extents; + io::util::extent_to_file(&image_ctx, object_extent.object_no, + object_extent.offset, object_extent.length, + image_extents); + this->m_image_extents.insert(this->m_image_extents.end(), + image_extents.begin(), image_extents.end()); + } + } + aio_comp->set_request_count(object_extents.size()); if (!object_extents.empty()) { uint64_t journal_tid = 0; @@ -601,11 +614,12 @@ int ImageDiscardRequest::prune_object_extents( // discard_granularity_bytes >= object_size && tail truncation // is a special case for filestore bool prune_required = false; + bool length_modified = false; auto object_size = this->m_image_ctx.layout.object_size; auto discard_granularity_bytes = std::min(m_discard_granularity_bytes, object_size); auto xform_lambda = - [discard_granularity_bytes, object_size, &prune_required] + [discard_granularity_bytes, object_size, &prune_required, &length_modified] (LightweightObjectExtent& object_extent) { auto& offset = object_extent.offset; auto& length = object_extent.length; @@ -619,7 +633,11 @@ int ImageDiscardRequest::prune_object_extents( prune_required = true; length = 0; } else { - length = next_offset - offset; + auto new_length = next_offset - offset; + if (length != new_length) { + length_modified = true; + length = new_length; + } } } }; @@ -637,6 +655,12 @@ int ImageDiscardRequest::prune_object_extents( remove_lambda), object_extents->end()); } + + // object extents were modified, image extents needs updating + if (length_modified || prune_required) { + return 1; + } + return 0; } diff --git a/src/test/librbd/io/test_mock_ImageRequest.cc b/src/test/librbd/io/test_mock_ImageRequest.cc index cd26fb65e70ed..ded809cd3fc50 100644 --- a/src/test/librbd/io/test_mock_ImageRequest.cc +++ b/src/test/librbd/io/test_mock_ImageRequest.cc @@ -115,6 +115,16 @@ struct TestMockIoImageRequest : public TestMockFixture { } } + void expect_journal_append_io_event(MockTestJournal &mock_journal, uint64_t journal_tid, + uint64_t offset, size_t length) { + EXPECT_CALL(mock_journal, append_io_event_mock(_, offset, length, _, _)) + .WillOnce(Return(journal_tid)); + } + + void expect_journal_append_io_event_times(MockTestJournal &mock_journal, int n) { + EXPECT_CALL(mock_journal, append_io_event_mock(_, _, _, _, _)).Times(n); + } + void expect_object_discard_request(MockTestImageCtx &mock_image_ctx, uint64_t object_no, uint64_t offset, uint32_t length, int r) { @@ -132,6 +142,10 @@ struct TestMockIoImageRequest : public TestMockFixture { })); } + void expect_object_request_send_times(MockTestImageCtx &mock_image_ctx, int n) { + EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_)).Times(n); + } + void expect_object_request_send(MockTestImageCtx &mock_image_ctx, int r) { EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_)) @@ -362,6 +376,140 @@ TEST_F(TestMockIoImageRequest, DiscardGranularity) { ASSERT_EQ(0, aio_comp_ctx.wait()); } +TEST_F(TestMockIoImageRequest, PartialDiscardJournalAppendEnabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + ictx->discard_granularity_bytes = 0; + + MockTestImageCtx mock_image_ctx(*ictx); + MockTestJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + InSequence seq; + expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, true); + expect_journal_append_io_event(mock_journal, 0, 16, 63); + expect_journal_append_io_event(mock_journal, 1, 84, 100); + expect_object_discard_request(mock_image_ctx, 0, 16, 63, 0); + expect_object_discard_request(mock_image_ctx, 0, 84, 100, 0); + + C_SaferCond aio_comp_ctx; + AioCompletion *aio_comp = AioCompletion::create_and_start( + &aio_comp_ctx, ictx, AIO_TYPE_DISCARD); + MockImageDiscardRequest mock_aio_image_discard( + mock_image_ctx, aio_comp, {{16, 63}, {84, 100}}, + ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {}); + { + std::shared_lock owner_locker{mock_image_ctx.owner_lock}; + mock_aio_image_discard.send(); + } + ASSERT_EQ(0, aio_comp_ctx.wait()); +} + +TEST_F(TestMockIoImageRequest, TailDiscardJournalAppendEnabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + ASSERT_EQ(0, resize(ictx, ictx->layout.object_size)); + ictx->discard_granularity_bytes = 2 * ictx->layout.object_size; + + MockTestImageCtx mock_image_ctx(*ictx); + MockTestJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + InSequence seq; + expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, true); + expect_journal_append_io_event( + mock_journal, 0, ictx->layout.object_size - 1024, 1024); + expect_object_discard_request( + mock_image_ctx, 0, ictx->layout.object_size - 1024, 1024, 0); + + C_SaferCond aio_comp_ctx; + AioCompletion *aio_comp = AioCompletion::create_and_start( + &aio_comp_ctx, ictx, AIO_TYPE_DISCARD); + MockImageDiscardRequest mock_aio_image_discard( + mock_image_ctx, aio_comp, {{ictx->layout.object_size - 1024, 1024}}, + ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {}); + { + std::shared_lock owner_locker{mock_image_ctx.owner_lock}; + mock_aio_image_discard.send(); + } + ASSERT_EQ(0, aio_comp_ctx.wait()); +} + +TEST_F(TestMockIoImageRequest, EmptyDiscardJournalAppendEnabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + ASSERT_EQ(0, resize(ictx, ictx->layout.object_size)); + ictx->discard_granularity_bytes = 32; + + MockTestImageCtx mock_image_ctx(*ictx); + MockTestJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + InSequence seq; + expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, true); + expect_journal_append_io_event_times(mock_journal, 0); + expect_object_request_send_times(mock_image_ctx, 0); + + C_SaferCond aio_comp_ctx; + AioCompletion *aio_comp = AioCompletion::create_and_start( + &aio_comp_ctx, ictx, AIO_TYPE_DISCARD); + MockImageDiscardRequest mock_aio_image_discard( + mock_image_ctx, aio_comp, {{96, 31}}, ictx->discard_granularity_bytes, + mock_image_ctx.get_data_io_context(), {}); + { + std::shared_lock owner_locker{mock_image_ctx.owner_lock}; + mock_aio_image_discard.send(); + } + ASSERT_EQ(0, aio_comp_ctx.wait()); +} + +TEST_F(TestMockIoImageRequest, DiscardGranularityJournalAppendEnabled) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + ASSERT_EQ(0, resize(ictx, ictx->layout.object_size)); + ictx->discard_granularity_bytes = 32; + + MockTestImageCtx mock_image_ctx(*ictx); + MockTestJournal mock_journal; + mock_image_ctx.journal = &mock_journal; + + InSequence seq; + expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, true); + expect_journal_append_io_event(mock_journal, 0, 32, 32); + expect_journal_append_io_event(mock_journal, 1, 96, 64); + expect_journal_append_io_event( + mock_journal, 2, ictx->layout.object_size - 32, 32); + expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0); + expect_object_discard_request(mock_image_ctx, 0, 96, 64, 0); + expect_object_discard_request( + mock_image_ctx, 0, ictx->layout.object_size - 32, 32, 0); + + C_SaferCond aio_comp_ctx; + AioCompletion *aio_comp = AioCompletion::create_and_start( + &aio_comp_ctx, ictx, AIO_TYPE_DISCARD); + MockImageDiscardRequest mock_aio_image_discard( + mock_image_ctx, aio_comp, + {{16, 63}, {96, 31}, {84, 100}, {ictx->layout.object_size - 33, 33}}, + ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {}); + { + std::shared_lock owner_locker{mock_image_ctx.owner_lock}; + mock_aio_image_discard.send(); + } + ASSERT_EQ(0, aio_comp_ctx.wait()); +} + TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) { REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); -- 2.39.5