]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: Fix local rbd mirror journals growing forever
authorJosef Johansson <josef@oderland.se>
Mon, 2 Jan 2023 13:12:53 +0000 (14:12 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Fri, 3 Mar 2023 18:12:10 +0000 (19:12 +0100)
This commit fixes commit 7ca1bab90f3 by pushing properly aligned
discards back to m_image_extents, if corrected.

If discards are misaligned (off 0, len 4608, gran=4096), they are
corrected properly, but only in object_extents and not in
m_image_extents.

When journal_append_event is triggered it will only append from
m_image_extents and does not now about the alignment fixes. In
commit_io_events_extent it will log a message and return without
completing the io since the larger misaligned area was sent to the journal.
This will in turn break rbd journal mirroring since the local client will wait
indefinately on the commit to be completed, which it never does.

This does not effect rbd-mirror in any way, which may be confusing and
dangerous since it's only rbd-mirror that updates ceph health, and not
the local client.

Setting `rbd_skip_partial_discard = false` under client will restore the
pre 7ca1bab behaviour and thus not trigger the bug with journals growing.
This will set `rbd_discard_granularity_bytes = 0` internally. This
setting is only changed during startup of a client.

Fixes: 7ca1bab90f3db3aaaa4cdbfc1f18e9f5cfbf5568
Fixes: https://tracker.ceph.com/issues/57396
Signed-off-by: Josef Johansson <josef@oderland.se>
(cherry picked from commit 21a26a752843295ff946d1543c2f5f9fac764593)

Conflicts:
src/librbd/io/ImageRequest.cc [ commit b2c88820923e ("librbd:
  return area from extents_to_file()") not in quincy ]
src/test/librbd/io/test_mock_ImageRequest.cc [ commit
  b9a2384cdc43 ("librbd: propagate area down to
  file_to_extents()") not in quincy ]

qa/workunits/rbd/rbd-nbd.sh
src/librbd/io/ImageRequest.cc
src/test/librbd/io/test_mock_ImageRequest.cc

index ffcb585df69e9308931f201b0dc426b3223c22ee..2229b39c92dc5726692f4f7b936a385ea408b5c8 100755 (executable)
@@ -442,6 +442,63 @@ _sudo rbd device detach ${DEV} --device-type nbd
 expect_false get_pid ${POOL}
 DEV=
 
+# test discard granularity with journaling
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+rbd config image set ${POOL}/${IMAGE} rbd_discard_granularity_bytes 4096
+rbd feature enable ${POOL}/${IMAGE} journaling
+# since a discard will now be pruned to only whole blocks (0..4095, 4096..8191)
+# let us test all the cases around those alignments. 512 is the smallest
+# possible block blkdiscard allows us to use. Thus the test checks
+# 512 before, on the alignment, 512 after.
+_sudo blkdiscard --offset 0 --length $((4096-512)) ${DEV}
+_sudo blkdiscard --offset 0 --length 4096 ${DEV}
+_sudo blkdiscard --offset 0 --length $((4096+512)) ${DEV}
+_sudo blkdiscard --offset 512 --length $((8192-1024)) ${DEV}
+_sudo blkdiscard --offset 512 --length $((8192-512)) ${DEV}
+_sudo blkdiscard --offset 512 --length 8192 ${DEV}
+expected='Entry: tag_id=1, commit_tid=1
+{
+    "event_type": "AioDiscard",
+    "offset": 0,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,
+Entry: tag_id=1, commit_tid=2
+{
+    "event_type": "AioDiscard",
+    "offset": 0,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,
+Entry: tag_id=1, commit_tid=3
+{
+    "event_type": "AioDiscard",
+    "offset": 4096,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,
+Entry: tag_id=1, commit_tid=4
+{
+    "event_type": "AioDiscard",
+    "offset": 4096,
+    "length": 4096,
+    "discard_granularity_bytes": 4096,'
+out=`rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose | \
+    grep -A5 --no-group-separator Entry`
+rbd config image rm ${POOL}/${IMAGE} rbd_discard_granularity_bytes
+[ "${out}" == "${expected}" ]
+# wait for commit log to be empty, 10 seconds should be well enough
+tries=0
+queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'`
+while [ ${tries} -lt 10 ] && [ ${queue_length} -gt 0 ]; do
+    rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose
+    sleep 1
+    queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'`
+    tries=$((tries+1))
+done
+rbd feature disable ${POOL}/${IMAGE} journaling
+[ ${queue_length} -eq 0 ]
+unmap_device ${DEV} ${PID}
+DEV=
+
 # test that rbd_op_threads setting takes effect
 EXPECTED=`ceph-conf --show-config-value librados_thread_count`
 DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
index d00292e52d114b29995b42023b30e3f9366b8242..9c87c9d8c5fceb1498332752176385d2ee9e27de 100644 (file)
@@ -455,6 +455,19 @@ void AbstractImageWriteRequest<I>::send_request() {
     return;
   }
 
+  // reflect changes in object_extents back to m_image_extents
+  if (ret == 1) {
+    this->m_image_extents.clear();
+    for (auto& object_extent : object_extents) {
+      io::Extents image_extents;
+      io::util::extent_to_file(&image_ctx, object_extent.object_no,
+                               object_extent.offset, object_extent.length,
+                               image_extents);
+      this->m_image_extents.insert(this->m_image_extents.end(),
+                                   image_extents.begin(), image_extents.end());
+    }
+  }
+
   aio_comp->set_request_count(object_extents.size());
   if (!object_extents.empty()) {
     uint64_t journal_tid = 0;
@@ -601,11 +614,12 @@ int ImageDiscardRequest<I>::prune_object_extents(
   // discard_granularity_bytes >= object_size && tail truncation
   // is a special case for filestore
   bool prune_required = false;
+  bool length_modified = false;
   auto object_size = this->m_image_ctx.layout.object_size;
   auto discard_granularity_bytes = std::min(m_discard_granularity_bytes,
                                             object_size);
   auto xform_lambda =
-    [discard_granularity_bytes, object_size, &prune_required]
+    [discard_granularity_bytes, object_size, &prune_required, &length_modified]
     (LightweightObjectExtent& object_extent) {
       auto& offset = object_extent.offset;
       auto& length = object_extent.length;
@@ -619,7 +633,11 @@ int ImageDiscardRequest<I>::prune_object_extents(
           prune_required = true;
           length = 0;
         } else {
-          length = next_offset - offset;
+          auto new_length = next_offset - offset;
+          if (length != new_length) {
+            length_modified = true;
+            length = new_length;
+          }
         }
       }
     };
@@ -637,6 +655,12 @@ int ImageDiscardRequest<I>::prune_object_extents(
                      remove_lambda),
       object_extents->end());
   }
+
+  // object extents were modified, image extents needs updating
+  if (length_modified || prune_required) {
+    return 1;
+  }
+
   return 0;
 }
 
index cd26fb65e70ed5973ac4fc9b357af88176d14163..ded809cd3fc50e46aa3c998b2a94f1fc82efb49a 100644 (file)
@@ -115,6 +115,16 @@ struct TestMockIoImageRequest : public TestMockFixture {
     }
   }
 
+  void expect_journal_append_io_event(MockTestJournal &mock_journal, uint64_t journal_tid,
+                                      uint64_t offset, size_t length) {
+    EXPECT_CALL(mock_journal, append_io_event_mock(_, offset, length, _, _))
+      .WillOnce(Return(journal_tid));
+  }
+
+  void expect_journal_append_io_event_times(MockTestJournal &mock_journal, int n) {
+    EXPECT_CALL(mock_journal, append_io_event_mock(_, _, _, _, _)).Times(n);
+  }
+
   void expect_object_discard_request(MockTestImageCtx &mock_image_ctx,
                                      uint64_t object_no, uint64_t offset,
                                      uint32_t length, int r) {
@@ -132,6 +142,10 @@ struct TestMockIoImageRequest : public TestMockFixture {
                 }));
   }
 
+  void expect_object_request_send_times(MockTestImageCtx &mock_image_ctx, int n) {
+    EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_)).Times(n);
+  }
+
   void expect_object_request_send(MockTestImageCtx &mock_image_ctx,
                                   int r) {
     EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_))
@@ -362,6 +376,140 @@ TEST_F(TestMockIoImageRequest, DiscardGranularity) {
   ASSERT_EQ(0, aio_comp_ctx.wait());
 }
 
+TEST_F(TestMockIoImageRequest, PartialDiscardJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ictx->discard_granularity_bytes = 0;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event(mock_journal, 0, 16, 63);
+  expect_journal_append_io_event(mock_journal, 1, 84, 100);
+  expect_object_discard_request(mock_image_ctx, 0, 16, 63, 0);
+  expect_object_discard_request(mock_image_ctx, 0, 84, 100, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp, {{16, 63}, {84, 100}},
+    ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, TailDiscardJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+  ictx->discard_granularity_bytes = 2 * ictx->layout.object_size;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event(
+    mock_journal, 0, ictx->layout.object_size - 1024, 1024);
+  expect_object_discard_request(
+    mock_image_ctx, 0, ictx->layout.object_size - 1024, 1024, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp, {{ictx->layout.object_size - 1024, 1024}},
+    ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, EmptyDiscardJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+  ictx->discard_granularity_bytes = 32;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event_times(mock_journal, 0);
+  expect_object_request_send_times(mock_image_ctx, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp, {{96, 31}}, ictx->discard_granularity_bytes,
+    mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
+TEST_F(TestMockIoImageRequest, DiscardGranularityJournalAppendEnabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, resize(ictx, ictx->layout.object_size));
+  ictx->discard_granularity_bytes = 32;
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockTestJournal mock_journal;
+  mock_image_ctx.journal = &mock_journal;
+
+  InSequence seq;
+  expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, true);
+  expect_journal_append_io_event(mock_journal, 0, 32, 32);
+  expect_journal_append_io_event(mock_journal, 1, 96, 64);
+  expect_journal_append_io_event(
+    mock_journal, 2, ictx->layout.object_size - 32, 32);
+  expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0);
+  expect_object_discard_request(mock_image_ctx, 0, 96, 64, 0);
+  expect_object_discard_request(
+    mock_image_ctx, 0, ictx->layout.object_size - 32, 32, 0);
+
+  C_SaferCond aio_comp_ctx;
+  AioCompletion *aio_comp = AioCompletion::create_and_start(
+    &aio_comp_ctx, ictx, AIO_TYPE_DISCARD);
+  MockImageDiscardRequest mock_aio_image_discard(
+    mock_image_ctx, aio_comp,
+    {{16, 63}, {96, 31}, {84, 100}, {ictx->layout.object_size - 33, 33}},
+    ictx->discard_granularity_bytes, mock_image_ctx.get_data_io_context(), {});
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    mock_aio_image_discard.send();
+  }
+  ASSERT_EQ(0, aio_comp_ctx.wait());
+}
+
 TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) {
   REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);