]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: Fix local rbd mirror journals growing forever
authorJosef Johansson <josef@oderland.se>
Mon, 2 Jan 2023 13:12:53 +0000 (14:12 +0100)
committerJosef Johansson <josef@oderland.se>
Fri, 20 Jan 2023 10:59:16 +0000 (11:59 +0100)
This commit fixes commit 7ca1bab90f3 by pushing properly aligned
discards back to m_image_extents, if corrected.

If discards are misaligned (off 0, len 4608, gran=4096), they are
corrected properly, but only in object_extents and not in
m_image_extents.

When journal_append_event is triggered it will only append from
m_image_extents and does not now about the alignment fixes. In
commit_io_events_extent it will log a message and return without
completing the io since the larger misaligned area was sent to the journal.
This will in turn break rbd journal mirroring since the local client will wait
indefinately on the commit to be completed, which it never does.

This does not effect rbd-mirror in any way, which may be confusing and
dangerous since it's only rbd-mirror that updates ceph health, and not
the local client.

Setting `rbd_skip_partial_discard = false` under client will restore the
pre 7ca1bab behaviour and thus not trigger the bug with journals growing.
This will set `rbd_discard_granularity_bytes = 0` internally. This
setting is only changed during startup of a client.

Fixes: 7ca1bab90f3db3aaaa4cdbfc1f18e9f5cfbf5568
Fixes: https://tracker.ceph.com/issues/57396
Signed-off-by: Josef Johansson <josef@oderland.se>
qa/workunits/rbd/rbd-nbd.sh
src/librbd/io/ImageRequest.cc
src/test/librbd/io/test_mock_ImageRequest.cc

index 1af9154e463bef52616abca81fb29d1aaa9781f2..51dafd41c09c52adab2821cd614338a77eeba6b2 100755 (executable)
@@ -442,4 +442,61 @@ _sudo rbd device detach ${DEV} --device-type nbd
 expect_false get_pid ${POOL}
 DEV=
 
+# test discard granularity with journaling
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+rbd config image set ${POOL}/${IMAGE} rbd_discard_granularity_bytes 4096
+rbd feature enable ${POOL}/${IMAGE} journaling
+# since a discard will now be pruned to only whole blocks (0..4095, 4096..8191)
+# let us test all the cases around those alignments. 512 is the smallest
+# possible block blkdiscard allows us to use. Thus the test checks
+# 512 before, on the alignment, 512 after.
+_sudo blkdiscard --offset 0 --length $((4096-512)) ${DEV}
+_sudo blkdiscard --offset 0 --length 4096 ${DEV}
+_sudo blkdiscard --offset 0 --length $((4096+512)) ${DEV}
+_sudo blkdiscard --offset 512 --length $((8192-1024)) ${DEV}
+_sudo blkdiscard --offset 512 --length $((8192-512)) ${DEV}
+_sudo blkdiscard --offset 512 --length 8192 ${DEV}
+expected='Entry: tag_id=1, commit_tid=1
+{
+    "event_type": "AioDiscard",
+    "offset": 0,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,
+Entry: tag_id=1, commit_tid=2
+{
+    "event_type": "AioDiscard",
+    "offset": 0,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,
+Entry: tag_id=1, commit_tid=3
+{
+    "event_type": "AioDiscard",
+    "offset": 4096,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,
+Entry: tag_id=1, commit_tid=4
+{
+    "event_type": "AioDiscard",
+    "offset": 4096,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,'
+out=`rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose | \
+    grep -A5 --no-group-separator Entry`
+rbd config image rm ${POOL}/${IMAGE} rbd_discard_granularity_bytes
+[ "${out}" == "${expected}" ]
+# wait for commit log to be empty, 10 seconds should be well enough
+tries=0
+queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'`
+while [ ${tries} -lt 10 ] && [ ${queue_length} -gt 0 ]; do
+    rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose
+    sleep 1
+    queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'`
+    tries=$((tries+1))
+done
+rbd feature disable ${POOL}/${IMAGE} journaling
+[ ${queue_length} -eq 0 ]
+unmap_device ${DEV} ${PID}
+DEV=
+
 echo OK
index 0a032d25e9d83ab252feb07edc7397e7c1749558..ea6b7301a2a9fd08fd4868f6bae1952f4fe894a9 100644 (file)
@@ -461,6 +461,18 @@ void AbstractImageWriteRequest<I>::send_request() {
     return;
   }
 
+  // reflect changes in object_extents back to m_image_extents
+  if (ret == 1) {
+    this->m_image_extents.clear();
+    for (auto& object_extent : object_extents) {
+      auto [image_extents, _] = io::util::object_to_area_extents(
+          &image_ctx, object_extent.object_no,
+          {{object_extent.offset, object_extent.length}});
+      this->m_image_extents.insert(this->m_image_extents.end(),
+                                   image_extents.begin(), image_extents.end());
+    }
+  }
+
   aio_comp->set_request_count(object_extents.size());
   if (!object_extents.empty()) {
     uint64_t journal_tid = 0;
@@ -607,11 +619,12 @@ int ImageDiscardRequest<I>::prune_object_extents(
   // discard_granularity_bytes >= object_size && tail truncation
   // is a special case for filestore
   bool prune_required = false;
+  bool length_modified = false;
   auto object_size = this->m_image_ctx.layout.object_size;
   auto discard_granularity_bytes = std::min(m_discard_granularity_bytes,
                                             object_size);
   auto xform_lambda =
-    [discard_granularity_bytes, object_size, &prune_required]
+    [discard_granularity_bytes, object_size, &prune_required, &length_modified]
     (LightweightObjectExtent& object_extent) {
       auto& offset = object_extent.offset;
       auto& length = object_extent.length;
@@ -625,7 +638,11 @@ int ImageDiscardRequest<I>::prune_object_extents(
           prune_required = true;
           length = 0;
         } else {
-          length = next_offset - offset;
+          auto new_length = next_offset - offset;
+          if (length != new_length) {
+            length_modified = true;
+            length = new_length;
+          }
         }
       }
     };
@@ -643,6 +660,12 @@ int ImageDiscardRequest<I>::prune_object_extents(
                      remove_lambda),
       object_extents->end());
   }
+
+  // object extents were modified, image extents needs updating
+  if (length_modified || prune_required) {
+    return 1;
+  }
+
   return 0;
 }
 
index e2ac825630ccce4dd750ee26c1873f2833a1022e..56d011560dd37a034bc8c8bf108cca324b16b47c 100644 (file)
@@ -119,6 +119,16 @@ struct TestMockIoImageRequest : public TestMockFixture {
     }
   }
 
+  void expect_journal_append_io_event(MockTestJournal &mock_journal, uint64_t journal_tid,
+                                      uint64_t offset, size_t length) {
+    EXPECT_CALL(mock_journal, append_io_event_mock(_, offset, length, _, _))
+      .WillOnce(Return(journal_tid));
+  }
+
+  void expect_journal_append_io_event_times(MockTestJournal &mock_journal, int n) {
+    EXPECT_CALL(mock_journal, append_io_event_mock(_, _, _, _, _)).Times(n);
+  }
+
   void expect_object_discard_request(MockTestImageCtx &mock_image_ctx,
                                      uint64_t object_no, uint64_t offset,
                                      uint32_t length, int r) {
@@ -136,6 +146,10 @@ struct TestMockIoImageRequest : public TestMockFixture {
                 }));
   }
 
+  void expect_object_request_send_times(MockTestImageCtx &mock_image_ctx, int n) {
+    EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_)).Times(n);
+  }
+
   void expect_object_request_send(MockTestImageCtx &mock_image_ctx,
                                   int r) {
     EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_))
@@ -367,6 +381,143 @@ TEST_F(TestMockIoImageRequest, DiscardGranularity) {
   ASSERT_EQ(0, aio_comp_ctx.wait());
 }
 
+TEST_F(TestMockIoImageRequest, PartialDiscardJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ictx->discard_granularity_bytes = 0;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event(mock_journal, 0, 16, 63);
+  expect_journal_append_io_event(mock_journal, 1, 84, 100);
+  expect_object_discard_request(mock_image_ctx, 0, 16, 63, 0);
+  expect_object_discard_request(mock_image_ctx, 0, 84, 100, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp, {{16, 63}, {84, 100}}, ImageArea::DATA,
+    ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, TailDiscardJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+  ictx->discard_granularity_bytes = 2 * ictx->layout.object_size;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event(
+    mock_journal, 0, ictx->layout.object_size - 1024, 1024);
+  expect_object_discard_request(
+    mock_image_ctx, 0, ictx->layout.object_size - 1024, 1024, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp,
+    {{ictx->layout.object_size - 1024, 1024}}, ImageArea::DATA,
+    ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, EmptyDiscardJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+  ictx->discard_granularity_bytes = 32;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event_times(mock_journal, 0);
+  expect_object_request_send_times(mock_image_ctx, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp, {{96, 31}},
+    ImageArea::DATA, ictx->discard_granularity_bytes,
+    mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, DiscardGranularityJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+  ictx->discard_granularity_bytes = 32;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event(mock_journal, 0, 32, 32);
+  expect_journal_append_io_event(mock_journal, 1, 96, 64);
+  expect_journal_append_io_event(
+    mock_journal, 2, ictx->layout.object_size - 32, 32);
+  expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0);
+  expect_object_discard_request(mock_image_ctx, 0, 96, 64, 0);
+  expect_object_discard_request(
+    mock_image_ctx, 0, ictx->layout.object_size - 32, 32, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp,
+    {{16, 63}, {96, 31}, {84, 100}, {ictx->layout.object_size - 33, 33}},
+    ImageArea::DATA, ictx->discard_granularity_bytes,
+    mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
 TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) {
   REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);