From: Igor Fedotov Date: Thu, 20 Feb 2020 13:27:12 +0000 (+0300) Subject: os/bluestore: do deferred 'big' write if blob continuity is broken only. X-Git-Tag: v16.1.0~2618^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=00c0cd9ab7f4de46c171cbf34d95b5991ad2977b;p=ceph.git os/bluestore: do deferred 'big' write if blob continuity is broken only. It makes no sense if affected blob's range is already non-continuous or full overwrite takes place. Signed-off-by: Igor Fedotov --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 03a7a963ac2b..436472a87088 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -13328,61 +13328,76 @@ void BlueStore::_do_write_big( << std::dec << " write via deferred" << dendl; - bluestore_deferred_op_t *op = _get_deferred_op(txc); - op->op = bluestore_deferred_op_t::OP_WRITE; + PExtentVector extents; int r = b0->get_blob().map( b_off, l_aligned, - [&](uint64_t offset, uint64_t length) { - op->extents.emplace_back(bluestore_pextent_t(offset, length)); - return 0; + [&](const bluestore_pextent_t& pext, + uint64_t offset, + uint64_t length) { + // apply deferred if overwrite breaks blob continuity only. + // if it totally overlaps some pextent - fallback to regular write + if (pext.offset < offset || + pext.end() > offset + length) { + extents.emplace_back(bluestore_pextent_t(offset, length)); + return 0; + } + return -1; }); - ceph_assert(r == 0); - - dout(20) << __func__ << " reading head 0x" << std::hex << head_read - << " and tail 0x" << tail_read << std::dec << dendl; - if (head_read) { - int r = _do_read(c.get(), o, offset - head_read, head_read, - op->data, 0); - ceph_assert(r >= 0 && r <= (int)head_read); - size_t zlen = head_read - r; - if (zlen) { - op->data.append_zero(zlen); - logger->inc(l_bluestore_write_pad_bytes, zlen); + if (r < 0) { + dout(20) << __func__ + << " deferring big fell back" + << dendl; + } else { + bluestore_deferred_op_t *op = _get_deferred_op(txc); + op->op = bluestore_deferred_op_t::OP_WRITE; + op->extents.swap(extents); + + dout(20) << __func__ << " reading head 0x" << std::hex << head_read + << " and tail 0x" << tail_read << std::dec << dendl; + if (head_read) { + int r = _do_read(c.get(), o, offset - head_read, head_read, + op->data, 0); + ceph_assert(r >= 0 && r <= (int)head_read); + size_t zlen = head_read - r; + if (zlen) { + op->data.append_zero(zlen); + logger->inc(l_bluestore_write_pad_bytes, zlen); + } + logger->inc(l_bluestore_write_penalty_read_ops); } - logger->inc(l_bluestore_write_penalty_read_ops); - } - blp.copy(l, op->data); - - if (tail_read) { - bufferlist tail_bl; - int r = _do_read(c.get(), o, offset + l, tail_read, - tail_bl, 0); - ceph_assert(r >= 0 && r <= (int)tail_read); - size_t zlen = tail_read - r; - if (zlen) { - tail_bl.append_zero(zlen); - logger->inc(l_bluestore_write_pad_bytes, zlen); + blp.copy(l, op->data); + + if (tail_read) { + bufferlist tail_bl; + int r = _do_read(c.get(), o, offset + l, tail_read, + tail_bl, 0); + ceph_assert(r >= 0 && r <= (int)tail_read); + size_t zlen = tail_read - r; + if (zlen) { + tail_bl.append_zero(zlen); + logger->inc(l_bluestore_write_pad_bytes, zlen); + } + op->data.claim_append(tail_bl); + logger->inc(l_bluestore_write_penalty_read_ops); } - op->data.claim_append(tail_bl); - logger->inc(l_bluestore_write_penalty_read_ops); - } - _buffer_cache_write(txc, b0, b_off, op->data, - wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + _buffer_cache_write(txc, b0, b_off, op->data, + wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); - if (b0->get_blob().csum_type) { - b0->dirty_blob().calc_csum(b_off, op->data); - } - Extent *le = o->extent_map.set_lextent(c, offset, - offset - ep->blob_start(), l, b0, &wctx->old_extents); - txc->statfs_delta.stored() += le->length; + if (b0->get_blob().csum_type) { + b0->dirty_blob().calc_csum(b_off, op->data); + } + Extent *le = o->extent_map.set_lextent(c, offset, + offset - ep->blob_start(), l, b0, &wctx->old_extents); + txc->statfs_delta.stored() += le->length; - offset += l; - length -= l; - logger->inc(l_bluestore_write_big_blobs); - logger->inc(l_bluestore_write_big_deferred); + offset += l; + length -= l; + logger->inc(l_bluestore_write_big_blobs); + logger->inc(l_bluestore_write_big_deferred); - continue; + continue; + } } } o->extent_map.punch_hole(c, offset, l, &wctx->old_extents); diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index eceecb00d37c..90516760dfec 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -6753,6 +6753,34 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) { ASSERT_TRUE(bl_eq(expected, bl)); } + // overwrite at the end, 4K alignment + { + ObjectStore::Transaction t; + bufferlist bl; + + bl.append(std::string(block_size, 'g')); + t.write(cid, hoid, block_size, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_NOCACHE); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + ASSERT_EQ(logger->get(l_bluestore_write_big), 4u); + ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u); + + { + bufferlist bl, expected; + r = store->read(ch, hoid, 0, block_size, bl); + ASSERT_EQ(r, (int)block_size); + expected.append(string(block_size, 'b')); + ASSERT_TRUE(bl_eq(expected, bl)); + } + { + bufferlist bl, expected; + r = store->read(ch, hoid, block_size, block_size, bl); + ASSERT_EQ(r, (int)block_size); + expected.append(string(block_size, 'g')); + ASSERT_TRUE(bl_eq(expected, bl)); + } + // overwrite at 4K, 12K alignment { ObjectStore::Transaction t; @@ -6763,15 +6791,15 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) { r = queue_transaction(store, ch, std::move(t)); ASSERT_EQ(r, 0); } - ASSERT_EQ(logger->get(l_bluestore_write_big), 4u); - ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u); + ASSERT_EQ(logger->get(l_bluestore_write_big), 5u); + ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u); // makes sure deferred has been submitted // and do all the checks again sleep(g_conf().get_val("bluestore_max_defer_interval") + 2); - ASSERT_EQ(logger->get(l_bluestore_write_big), 4u); - ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u); + ASSERT_EQ(logger->get(l_bluestore_write_big), 5u); + ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u); { bufferlist bl, expected; @@ -6784,7 +6812,7 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) { bufferlist bl, expected; r = store->read(ch, hoid, block_size, block_size, bl); ASSERT_EQ(r, (int)block_size); - expected.append(string(block_size, 'c')); + expected.append(string(block_size, 'g')); ASSERT_TRUE(bl_eq(expected, bl)); } { @@ -6836,8 +6864,8 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) { r = queue_transaction(store, ch, std::move(t)); ASSERT_EQ(r, 0); } - ASSERT_EQ(logger->get(l_bluestore_write_big), 5u); - ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 2u); + ASSERT_EQ(logger->get(l_bluestore_write_big), 6u); + ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u); { ObjectStore::Transaction t; @@ -6879,8 +6907,8 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) { r = queue_transaction(store, ch, std::move(t)); ASSERT_EQ(r, 0); } - ASSERT_EQ(logger->get(l_bluestore_write_big), 6u); - ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 3u); + ASSERT_EQ(logger->get(l_bluestore_write_big), 7u); + ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 4u); { bufferlist bl, expected; r = store->read(ch, hoid, 0, block_size, bl); @@ -6906,6 +6934,35 @@ TEST_P(StoreTestSpecificAUSize, DeferredOnBigOverwrite) { ASSERT_EQ(logger->get(l_bluestore_blobs), 1u); ASSERT_EQ(logger->get(l_bluestore_extents), 1u); + // check whether full overwrite bypass deferred + { + ObjectStore::Transaction t; + bufferlist bl; + bl.append(std::string(block_size * 2, 'h')); + + t.write(cid, hoid, 0, bl.length(), bl, CEPH_OSD_OP_FLAG_FADVISE_NOCACHE); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + ASSERT_EQ(logger->get(l_bluestore_write_big), 8u); + ASSERT_EQ(logger->get(l_bluestore_write_big_deferred), 4u); + + { + bufferlist bl, expected; + r = store->read(ch, hoid, 0, block_size * 2, bl); + ASSERT_EQ(r, (int)block_size * 2); + expected.append(string(block_size * 2, 'h')); + ASSERT_TRUE(bl_eq(expected, bl)); + } + + { + struct store_statfs_t statfs; + int r = store->statfs(&statfs); + ASSERT_EQ(r, 0); + ASSERT_EQ(statfs.data_stored, (unsigned)block_size * 2); + ASSERT_LE(statfs.allocated, (unsigned)block_size * 2); + } + { ObjectStore::Transaction t; t.remove(cid, hoid);