]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: do deferred 'big' write if blob continuity is broken only.
authorIgor Fedotov <ifedotov@suse.com>
Thu, 20 Feb 2020 13:27:12 +0000 (16:27 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Wed, 1 Apr 2020 09:55:34 +0000 (12:55 +0300)
It makes no sense if affected blob's range is already non-continuous or
full overwrite takes place.

Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/os/bluestore/BlueStore.cc
src/test/objectstore/store_test.cc

index 03a7a963ac2b794b07a1f12824dc1cc7717b43a7..436472a87088cd1b8e2a319665584118e6367d62 100644 (file)
@@ -13328,61 +13328,76 @@ void BlueStore::_do_write_big(
             << std::dec << " write via deferred"
             << dendl;
 
-          bluestore_deferred_op_t *op = _get_deferred_op(txc);
-          op->op = bluestore_deferred_op_t::OP_WRITE;
+          PExtentVector extents;
           int r = b0->get_blob().map(
             b_off, l_aligned,
-            [&](uint64_t offset, uint64_t length) {
-              op->extents.emplace_back(bluestore_pextent_t(offset, length));
-              return 0;
+            [&](const bluestore_pextent_t& pext,
+                uint64_t offset,
+                uint64_t length) {
+              // apply deferred if overwrite breaks blob continuity only.
+              // if it totally overlaps some pextent - fallback to regular write
+              if (pext.offset < offset ||
+                pext.end() > offset + length) {
+                extents.emplace_back(bluestore_pextent_t(offset, length));
+                return 0;
+              }
+              return -1;
             });
-          ceph_assert(r == 0);
-
-          dout(20) << __func__ << "  reading head 0x" << std::hex << head_read
-            << " and tail 0x" << tail_read << std::dec << dendl;
-          if (head_read) {
-            int r = _do_read(c.get(), o, offset - head_read, head_read,
-              op->data, 0);
-            ceph_assert(r >= 0 && r <= (int)head_read);
-            size_t zlen = head_read - r;
-            if (zlen) {
-              op->data.append_zero(zlen);
-              logger->inc(l_bluestore_write_pad_bytes, zlen);
+          if (r < 0) {
+            dout(20) << __func__
+              << " deferring big fell back"
+              << dendl;
+          } else {
+            bluestore_deferred_op_t *op = _get_deferred_op(txc);
+            op->op = bluestore_deferred_op_t::OP_WRITE;
+            op->extents.swap(extents);
+
+            dout(20) << __func__ << "  reading head 0x" << std::hex << head_read
+              << " and tail 0x" << tail_read << std::dec << dendl;
+            if (head_read) {
+              int r = _do_read(c.get(), o, offset - head_read, head_read,
+                op->data, 0);
+              ceph_assert(r >= 0 && r <= (int)head_read);
+              size_t zlen = head_read - r;
+              if (zlen) {
+                op->data.append_zero(zlen);
+                logger->inc(l_bluestore_write_pad_bytes, zlen);
+              }
+              logger->inc(l_bluestore_write_penalty_read_ops);
             }
-            logger->inc(l_bluestore_write_penalty_read_ops);
-          }
-          blp.copy(l, op->data);
-
-          if (tail_read) {
-            bufferlist tail_bl;
-            int r = _do_read(c.get(), o, offset + l, tail_read,
-              tail_bl, 0);
-            ceph_assert(r >= 0 && r <= (int)tail_read);
-            size_t zlen = tail_read - r;
-            if (zlen) {
-              tail_bl.append_zero(zlen);
-              logger->inc(l_bluestore_write_pad_bytes, zlen);
+            blp.copy(l, op->data);
+
+            if (tail_read) {
+              bufferlist tail_bl;
+              int r = _do_read(c.get(), o, offset + l, tail_read,
+                tail_bl, 0);
+              ceph_assert(r >= 0 && r <= (int)tail_read);
+              size_t zlen = tail_read - r;
+              if (zlen) {
+                tail_bl.append_zero(zlen);
+                logger->inc(l_bluestore_write_pad_bytes, zlen);
+              }
+              op->data.claim_append(tail_bl);
+              logger->inc(l_bluestore_write_penalty_read_ops);
             }
-            op->data.claim_append(tail_bl);
-            logger->inc(l_bluestore_write_penalty_read_ops);
-          }
 
-          _buffer_cache_write(txc, b0, b_off, op->data,
-            wctx->buffered ? 0 : Buffer::FLAG_NOCACHE);
+            _buffer_cache_write(txc, b0, b_off, op->data,
+              wctx->buffered ? 0 : Buffer::FLAG_NOCACHE);
 
-          if (b0->get_blob().csum_type) {
-            b0->dirty_blob().calc_csum(b_off, op->data);
-          }
-          Extent *le = o->extent_map.set_lextent(c, offset,
-            offset - ep->blob_start(), l, b0, &wctx->old_extents);
-          txc->statfs_delta.stored() += le->length;
+            if (b0->get_blob().csum_type) {
+              b0->dirty_blob().calc_csum(b_off, op->data);
+            }
+            Extent *le = o->extent_map.set_lextent(c, offset,
+              offset - ep->blob_start(), l, b0, &wctx->old_extents);
+            txc->statfs_delta.stored() += le->length;
 
-          offset += l;
-          length -= l;
-          logger->inc(l_bluestore_write_big_blobs);
-          logger->inc(l_bluestore_write_big_deferred);
+            offset += l;
+            length -= l;
+            logger->inc(l_bluestore_write_big_blobs);
+            logger->inc(l_bluestore_write_big_deferred);
 
-          continue;
+            continue;
+          }
         }
       }
       o->extent_map.punch_hole(c, offset, l, &wctx->old_extents);
index eceecb00d37cef9c41c0e12615fa6a402915326d..90516760dfec617643835088d6187a14f506e095 100644 (file)
@@ -6753,6 +6753,34 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) {
     ASSERT_TRUE(bl_eq(expected, bl));
   }
 
+  // overwrite at the end, 4K alignment
+  {
+    ObjectStore::Transaction t;
+    bufferlist bl;
+
+    bl.append(std::string(block_size, 'g'));
+    t.write(cid, hoid, block_size, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_NOCACHE);
+    r = queue_transaction(store, ch, std::move(t));
+    ASSERT_EQ(r, 0);
+  }
+  ASSERT_EQ(logger->get(l_bluestore_write_big), 4u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u);
+
+  {
+    bufferlist bl, expected;
+    r = store->read(ch, hoid, 0, block_size, bl);
+    ASSERT_EQ(r, (int)block_size);
+    expected.append(string(block_size, 'b'));
+    ASSERT_TRUE(bl_eq(expected, bl));
+  }
+  {
+    bufferlist bl, expected;
+    r = store->read(ch, hoid, block_size, block_size, bl);
+    ASSERT_EQ(r, (int)block_size);
+    expected.append(string(block_size, 'g'));
+    ASSERT_TRUE(bl_eq(expected, bl));
+  }
+
   // overwrite at 4K, 12K alignment
   {
     ObjectStore::Transaction t;
@@ -6763,15 +6791,15 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) {
     r = queue_transaction(store, ch, std::move(t));
     ASSERT_EQ(r, 0);
   }
-  ASSERT_EQ(logger->get(l_bluestore_write_big), 4u);
-  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big), 5u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u);
 
   // makes sure deferred has been submitted
   // and do all the checks again
   sleep(g_conf().get_val<double>("bluestore_max_defer_interval") + 2);
 
-  ASSERT_EQ(logger->get(l_bluestore_write_big), 4u);
-  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big), 5u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u);
 
   {
     bufferlist bl, expected;
@@ -6784,7 +6812,7 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) {
     bufferlist bl, expected;
     r = store->read(ch, hoid, block_size, block_size, bl);
     ASSERT_EQ(r, (int)block_size);
-    expected.append(string(block_size, 'c'));
+    expected.append(string(block_size, 'g'));
     ASSERT_TRUE(bl_eq(expected, bl));
   }
   {
@@ -6836,8 +6864,8 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) {
     r = queue_transaction(store, ch, std::move(t));
     ASSERT_EQ(r, 0);
   }
-  ASSERT_EQ(logger->get(l_bluestore_write_big), 5u);
-  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big), 6u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u);
 
   {
     ObjectStore::Transaction t;
@@ -6879,8 +6907,8 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) {
     r = queue_transaction(store, ch, std::move(t));
     ASSERT_EQ(r, 0);
   }
-  ASSERT_EQ(logger->get(l_bluestore_write_big), 6u);
-  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big), 7u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 4u);
   {
     bufferlist bl, expected;
     r = store->read(ch, hoid, 0, block_size, bl);
@@ -6906,6 +6934,35 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) {
   ASSERT_EQ(logger->get(l_bluestore_blobs), 1u);
   ASSERT_EQ(logger->get(l_bluestore_extents), 1u);
 
+  // check whether full overwrite bypass deferred
+  {
+    ObjectStore::Transaction t;
+    bufferlist bl;
+    bl.append(std::string(block_size * 2, 'h'));
+
+    t.write(cid, hoid, 0, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_NOCACHE);
+    r = queue_transaction(store, ch, std::move(t));
+    ASSERT_EQ(r, 0);
+  }
+  ASSERT_EQ(logger->get(l_bluestore_write_big), 8u);
+  ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 4u);
+
+  {
+    bufferlist bl, expected;
+    r = store->read(ch, hoid, 0, block_size * 2, bl);
+    ASSERT_EQ(r, (int)block_size * 2);
+    expected.append(string(block_size * 2, 'h'));
+    ASSERT_TRUE(bl_eq(expected, bl));
+  }
+
+  {
+    struct store_statfs_t statfs;
+    int r = store->statfs(&statfs);
+    ASSERT_EQ(r, 0);
+    ASSERT_EQ(statfs.data_stored, (unsigned)block_size * 2);
+    ASSERT_LE(statfs.allocated, (unsigned)block_size * 2);
+  }
+
   {
     ObjectStore::Transaction t;
     t.remove(cid, hoid);