From: Igor Fedotov Date: Fri, 24 Apr 2020 22:03:25 +0000 (+0300) Subject: os/bluestore: fix improper blob usage while handling deferred big write. X-Git-Tag: v16.1.0~2456^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f47ccfa70abd6a1ff4eb3ed45015390e50ea6245;p=ceph.git os/bluestore: fix improper blob usage while handling deferred big write. The root cause is tail extent iterator invalidation when rewriting head one. Fixes: https://tracker.ceph.com/issues/45195 Signed-off-by: Igor Fedotov --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index de18468d4745..639f115d7a55 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -13336,14 +13336,17 @@ bool BlueStore::BigDeferredWriteContext::can_defer( res = blob_aligned_len() <= prefer_deferred_size && blob_aligned_len() <= ondisk && blob.is_allocated(b_off, blob_aligned_len()); + if (res) { + blob_ref = ep->blob; + blob_start = ep->blob_start(); + } } return res; } -bool BlueStore::BigDeferredWriteContext::apply_defer( - BlueStore::extent_map_t::iterator ep) +bool BlueStore::BigDeferredWriteContext::apply_defer() { - int r = ep->blob->get_blob().map( + int r = blob_ref->get_blob().map( b_off, blob_aligned_len(), [&](const bluestore_pextent_t& pext, uint64_t offset, @@ -13364,7 +13367,6 @@ void BlueStore::_do_write_big_apply_deferred( TransContext* txc, CollectionRef& c, OnodeRef o, - BlueStore::extent_map_t::iterator ep, BlueStore::BigDeferredWriteContext& dctx, bufferlist::iterator& blp, WriteContext* wctx) @@ -13402,14 +13404,14 @@ void BlueStore::_do_write_big_apply_deferred( bl.claim_append(tail_bl); logger->inc(l_bluestore_write_penalty_read_ops); } - auto b0 = ep->blob; + auto& b0 = dctx.blob_ref; _buffer_cache_write(txc, b0, dctx.b_off, bl, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); b0->dirty_blob().calc_csum(dctx.b_off, bl); Extent* le = o->extent_map.set_lextent(c, dctx.off, - dctx.off - ep->blob_start(), dctx.used, b0, &wctx->old_extents); + dctx.off - dctx.blob_start, dctx.used, b0, &wctx->old_extents); // in fact this is a no-op for big writes but left here to maintain // uniformity and avoid missing after some refactor. @@ -13475,7 +13477,6 @@ void BlueStore::_do_write_big( false; auto offset_next = offset + head_info.used; auto remaining = l - head_info.used; - if (will_defer && remaining) { will_defer = false; if (remaining <= prefer_deferred_size_snapshot) { @@ -13488,31 +13489,30 @@ void BlueStore::_do_write_big( block_size, offset_next, remaining); - will_defer = will_defer && remaining == tail_info.used; } } if (will_defer) { - dout(20) << __func__ << " " << *(ep->blob) + dout(20) << __func__ << " " << *(head_info.blob_ref) << " deferring big " << std::hex << " (0x" << head_info.b_off << "~" << head_info.blob_aligned_len() << ")" << std::dec << " write via deferred" << dendl; if (remaining) { - dout(20) << __func__ << " " << *(ep_next->blob) + dout(20) << __func__ << " " << *(tail_info.blob_ref) << " deferring big " << std::hex << " (0x" << tail_info.b_off << "~" << tail_info.blob_aligned_len() << ")" << std::dec << " write via deferred" << dendl; } - will_defer = head_info.apply_defer(ep); + will_defer = head_info.apply_defer(); if (!will_defer) { dout(20) << __func__ << " deferring big fell back, head isn't continuous" << dendl; } else if (remaining) { - will_defer = tail_info.apply_defer(ep_next); + will_defer = tail_info.apply_defer(); if (!will_defer) { dout(20) << __func__ << " deferring big fell back, tail isn't continuous" @@ -13521,9 +13521,9 @@ void BlueStore::_do_write_big( } } if (will_defer) { - _do_write_big_apply_deferred(txc, c, o, ep, head_info, blp, wctx); + _do_write_big_apply_deferred(txc, c, o, head_info, blp, wctx); if (remaining) { - _do_write_big_apply_deferred(txc, c, o, ep_next, tail_info, + _do_write_big_apply_deferred(txc, c, o, tail_info, blp, wctx); } offset += l; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index c9e831c7a4b1..cedbe53688d6 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1945,6 +1945,8 @@ public: uint32_t used = 0; uint64_t head_read = 0; uint64_t tail_read = 0; + BlobRef blob_ref; + uint64_t blob_start = 0; PExtentVector res_extents; inline uint64_t blob_aligned_len() const { @@ -1956,7 +1958,7 @@ public: uint64_t block_size, uint64_t offset, uint64_t l); - bool apply_defer(BlueStore::extent_map_t::iterator ep); + bool apply_defer(); }; // -------------------------------------------------------- @@ -3090,7 +3092,6 @@ private: TransContext* txc, CollectionRef& c, OnodeRef o, - BlueStore::extent_map_t::iterator ep, BigDeferredWriteContext& dctx, bufferlist::iterator& blp, WriteContext* wctx); diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 9226ce302e57..3e643d586807 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -8815,6 +8815,90 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixed2Test) { ); } +TEST_P(StoreTestSpecificAUSize, Ticket45195Repro) { + if (string(GetParam()) != "bluestore") + return; + + SetVal(g_conf(), "bluestore_default_buffered_write", "true"); + SetVal(g_conf(), "bluestore_max_blob_size", "65536"); + SetVal(g_conf(), "bluestore_debug_enforce_settings", "hdd"); + SetVal(g_conf(), "bluestore_fsck_on_mount", "false"); + g_conf().apply_changes(nullptr); + + StartDeferred(0x1000); + + int r; + coll_t cid; + ghobject_t hoid(hobject_t(sobject_t("Object", CEPH_NOSNAP))); + auto ch = store->create_new_collection(cid); + { + ObjectStore::Transaction t; + t.create_collection(cid, 0); + cerr << "Creating collection " << cid << std::endl; + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + { + size_t large_object_size = 1 * 1024 * 1024; + size_t expected_write_size = 0x8000; + ObjectStore::Transaction t; + t.touch(cid, hoid); + t.set_alloc_hint(cid, hoid, large_object_size, expected_write_size, + CEPH_OSD_ALLOC_HINT_FLAG_SEQUENTIAL_READ | + CEPH_OSD_ALLOC_HINT_FLAG_APPEND_ONLY); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + { + ObjectStore::Transaction t; + bufferlist bl, orig; + string s(0xc000, '0'); + bl.append(s); + t.write(cid, hoid, 0xb000, bl.length(), bl); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + { + ObjectStore::Transaction t; + bufferlist bl, orig; + string s(0x10000, '1'); + bl.append(s); + t.write(cid, hoid, 0x16000, bl.length(), bl); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + { + ObjectStore::Transaction t; + bufferlist bl, orig; + string s(0x4000, '1'); + bl.append(s); + t.write(cid, hoid, 0x1b000, bl.length(), bl); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + bufferlist bl; + r = store->read(ch, hoid, 0xb000, 0xb000, bl); + ASSERT_EQ(r, 0xb000); + + store->umount(); + store->mount(); + + ch = store->open_collection(cid); + { + ObjectStore::Transaction t; + bufferlist bl, orig; + string s(0xf000, '3'); + bl.append(s); + t.write(cid, hoid, 0xf000, bl.length(), bl); + cerr << "write4" << std::endl; + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + + r = store->read(ch, hoid, 0xb000, 0x10000, bl); + ASSERT_EQ(r, 0x10000); +} + #endif // WITH_BLUESTORE int main(int argc, char **argv) {