From: Adam Kupczyk Date: Wed, 8 Nov 2023 13:37:15 +0000 (+0000) Subject: os/bluestore: New variant of bluestore_blob_t::release_extents X-Git-Tag: v20.0.0~1280^2~33 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e51ec4bdb145448d1a063f6f385e480b928e5a10;p=ceph.git os/bluestore: New variant of bluestore_blob_t::release_extents Created new variant of bluestore_blob_t::release_extents function. Now the function takes range [offset~length] as an argument, a simplification that allows it to have much better performance. Created comprehensive unit test, tests 40k random blobs. The unit test does not test for a potential case of having bluestore_blob_t.extents that are not allocation unit aligned. Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc index 3c8dc84810c3..f7ee4a5467cf 100644 --- a/src/os/bluestore/bluestore_types.cc +++ b/src/os/bluestore/bluestore_types.cc @@ -1080,6 +1080,149 @@ bool bluestore_blob_t::release_extents(bool all, return false; } +// Erases allocations from blob's extents and +// appends them to released_disk extents. +// For non-shared blobs it directly represents AUs to release. +// For shared blobs AUs need to be processed by SharesBlob's bluestore_extent_ref_map_t. +// (SharedBlob->persistent->ref_map) +// returns +// disk space size to release +uint32_t bluestore_blob_t::release_extents( + uint32_t offset, + uint32_t length, + PExtentVector* released_disk) +{ + uint32_t released_length = 0; + constexpr auto EMPTY = bluestore_pextent_t::INVALID_OFFSET; + if (offset == 0 && length == get_logical_length()) { + released_length = get_ondisk_length(); + released_disk->insert(released_disk->end(), extents.begin(), extents.end()); + extents.resize(1); + extents[0].offset = EMPTY; + extents[0].length = released_length; + return released_length; + } + bluestore_pextent_t* begin = &*extents.begin(); + bluestore_pextent_t* p = &*extents.begin(); + bluestore_pextent_t* end = &*extents.end(); //beware - it is fixed in place + + bluestore_pextent_t* empty = nullptr; + //skip offset + while (p->length <= offset) { + offset -= p->length; + empty = p->is_valid() ? nullptr : p; + ++p; + ceph_assert(p != end); // we assume that length > 0 + } + bluestore_pextent_t hold[2]; // by default initialized to zeros + uint32_t hold_size = 0; + uint32_t rem = length; + bluestore_pextent_t* anchor = p; + // copy_to_release + if (/*offset >= 0 &&*/ offset + length < p->length) { + //special case when in same extent + uint64_t p_offset = p->offset; + uint32_t p_length = p->length; + auto anchor_it = extents.begin() + (anchor - begin); + if (offset > 0) { + //anchor_it->offset = p_offset; //it is already there + anchor_it->length = offset; + ++anchor_it; + released_disk->emplace_back(p->offset + offset, length); + released_length += length; + anchor_it = extents.insert(anchor_it, 2, bluestore_pextent_t(EMPTY, length)); + ++anchor_it; + anchor_it->offset = p_offset + offset + length; + anchor_it->length = p_length - offset - length; + } else { + released_disk->emplace_back(p->offset, length); + released_length += length; + if (empty) { + empty->length += length; + } else { + anchor_it = extents.insert(anchor_it, 1, bluestore_pextent_t(EMPTY, length)); + ++anchor_it; + } + anchor_it->offset = p_offset + length; + anchor_it->length = p_length - length; + } + } else { + // p->length > offset + // offset + length >= p->length + if (offset > 0) { + //activate hold, put pextent that we need; put new empty + ceph_assert(p->is_valid()); + hold[0].offset = p->offset; + hold[0].length = offset; + hold[1].offset = EMPTY; + hold[1].length = 0; + empty = &hold[1]; + hold_size = 2; + } else { + // offset == 0 + if (empty == nullptr) { + //we need empty, activate hold + hold[0].offset = EMPTY; + hold[0].length = 0; + empty = &hold[0]; + hold_size = 1; + } + } + // starts copying remainder + if (p->length - offset) { + released_disk->emplace_back(p->offset + offset, p->length - offset); + } + released_length += p->length - offset; + empty->length += p->length - offset; + rem -= (p->length - offset); + ++p; + while (rem > 0 && p->length <= rem) { + ceph_assert(p->is_valid()); + released_disk->emplace_back(p->offset, p->length); + released_length += p->length; + empty->length += p->length; + rem -= p->length; + ++p; + } + if (rem > 0) { + ceph_assert(p->is_valid()); + // this we release + released_disk->emplace_back(p->offset, rem); + released_length += rem; + empty->length += rem; + // this much remains + p->offset = p->offset + rem; + p->length = p->length - rem; + //no ++p here; we need this modified p remain part of PExtentVector + } else { + //amazing, clean cut + //if the extent here is empty, we try to meld it + if (p != end && !p->is_valid()) { + empty->length += p->length; + ++p; + } + } + // we erase 0) { + anchor_it = extents.insert(anchor_it, insert_element_cnt, bluestore_pextent_t(0, 0)); + } else { + anchor_it = extents.erase(anchor_it, anchor_it + (-insert_element_cnt)); + } + } + for (uint32_t i = 0; i < hold_size; i++) { + anchor_it->offset = hold[i].offset; + anchor_it->length = hold[i].length; + ++anchor_it; + } + } + return released_length; +} + + void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb) { size_t left = blob_offset; diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 9d9c0271d6fb..2d1540c3382b 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -971,6 +971,18 @@ public: bool all, const PExtentVector& logical, PExtentVector* r); + + /// Remove blob's pextents. + /// [offset~length] - range to remove, in local blob space + /// released_disk - a vector of disk allocation units that are no longer in use; + /// appends to it + /// returns: + /// size of released disk + uint32_t release_extents( + uint32_t offset, + uint32_t length, + PExtentVector* released_disk + ); }; WRITE_CLASS_DENC_FEATURED(bluestore_blob_t) diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index f02da9df5c97..fd761a2f836c 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -2298,6 +2298,171 @@ TEST(bluestore_blob_t, wrong_map_bl_in_51682) { ASSERT_EQ(expected_pos, num_expected_entries); } } +class bluestore_blob_t_test : + public ::testing::Test, + public ::testing::WithParamInterface> +{ +}; + +TEST_P(bluestore_blob_t_test, release_extents) +{ + // how to construct valid release input + // 1. pre-release area (might be empty) + // 2. release area (cannot be empty anywhere), likely to continue extents + // 3. post-release area (might be empty) + // result: + // pre-release + empty_region + post-release + // + + // release area + + std::vector param = GetParam(); + ASSERT_EQ(param.size(), 8); + uint32_t alloc_unit = param[0]; + uint32_t test_region_range = param[1]; + uint32_t test_is_empty_nom = param[2]; + uint32_t test_is_empty_denom = param[3]; + uint32_t test_pmp_range = param[4]; + uint32_t test_pmp_iszero = param[5]; + uint32_t test_pmp_cont_nom = param[6]; + uint32_t test_pmp_cont_denom = param[7]; + + auto generate = [&](PExtentVector* cont, PExtentVector& v, uint32_t num_aus) { + bool prev_is_empty = false; + uint32_t illegal_pos = (uint32_t)-1; + while (num_aus > 0) { + uint32_t a = (rand() % test_region_range) + 1; + if (a > num_aus) a = num_aus; + if (prev_is_empty) { + prev_is_empty = false; + } else { + prev_is_empty = (rand() % test_is_empty_denom) < test_is_empty_nom; + } + if (prev_is_empty) { + v.emplace_back(bluestore_pextent_t::INVALID_OFFSET, a * alloc_unit); + } else { + if (cont && cont->size() > 0 && cont->back().is_valid()) { + v.emplace_back(cont->back().end(), a * alloc_unit); + cont = nullptr; + } else { + uint32_t pos; + do { + pos = (rand() % 1000000) * alloc_unit; + } while (pos == illegal_pos); + v.emplace_back(pos, a * alloc_unit); + illegal_pos = pos + a * alloc_unit; + } + } + num_aus -= a; + } + }; + auto generate_nonempty = [&](PExtentVector* cont, PExtentVector& v, uint32_t num_aus) { + uint32_t illegal_pos = (uint32_t)-1; + while (num_aus > 0) { + uint32_t a = (rand() % test_region_range) + 1; + if (a > num_aus) a = num_aus; + if (cont && cont->size() > 0 && cont->back().is_valid()) { + v.emplace_back(cont->back().end(), a * alloc_unit); + cont = nullptr; + } else { + uint32_t pos; + do { + pos = (rand() % 1000000) * alloc_unit; + } while (pos == illegal_pos); + v.emplace_back(pos, a * alloc_unit); + illegal_pos = pos + a * alloc_unit; + } + num_aus -= a; + } + }; + auto append = [&](PExtentVector& dest, const PExtentVector& src) { + for (auto s: src) { + if (dest.size() > 0 && + ((dest.back().is_valid() && dest.back().end() == s.offset) || + (!dest.back().is_valid() && !s.is_valid()) ) ) { + dest.back().length += s.length; + } else { + dest.push_back(s); + } + } + }; + + for (int i = 0; i < 10000; i++) { + PExtentVector pre; + PExtentVector mid; + PExtentVector post; + + uint32_t aus1 = std::rand() % (test_pmp_range + test_pmp_iszero); + uint32_t punch_offset = 0; + if (aus1 > test_pmp_iszero) { + aus1 -= test_pmp_iszero; + generate(nullptr, pre, aus1); + punch_offset = aus1 * alloc_unit; + } else { + aus1 = 0; + } + uint32_t aus2 = (std::rand() % test_pmp_range) + 1; + uint32_t punch_length = aus2 * alloc_unit; + bool cont2 = std::rand() % test_pmp_cont_denom < test_pmp_cont_nom; + generate_nonempty(cont2 ? &pre: nullptr, mid, aus2); + uint32_t aus3 = std::rand() % (test_pmp_range + test_pmp_iszero); + bool cont3 = std::rand() % test_pmp_cont_denom < test_pmp_cont_nom; + if (aus3 > test_pmp_iszero) { + aus3 -= test_pmp_iszero; + generate(cont3 ? &mid: nullptr, post, aus3); + } else { + aus3 = 0; + } + uint32_t total_length = (aus1 + aus2 + aus3) * alloc_unit; + + PExtentVector input; + input.insert(input.end(), pre.begin(), pre.end()); + append(input, mid); + append(input, post); + PExtentVector output; + output.insert(output.end(), pre.begin(), pre.end()); + PExtentVector empty; + empty.emplace_back(bluestore_pextent_t::INVALID_OFFSET, punch_length); + append(output, empty); + append(output, post); + + bluestore_blob_t blob; + blob.allocated(0, total_length, input); + PExtentVector result; +// std::cout << "inp=" << blob.get_extents() << std::endl; +// std::cout << "punch=0x" << std::hex << punch_offset << "~" << punch_length +// << std::dec << std::endl; + //PExtentVector punch; + //punch.emplace_back(punch_offset, punch_length); + //blob.release_extents(false, punch, &result); + blob.release_extents(punch_offset, punch_length, &result); +// std::cout << "rel=" << result << std::endl; +// std::cout << "out=" << blob.get_extents() << std::endl; +// std::cout << std::endl; + ASSERT_EQ(result, mid); + ASSERT_EQ(blob.get_extents(), output); + } +} + +/* + uint32_t alloc_unit = 4096; + uint32_t test_region_range = 5; + uint32_t test_is_empty_nom = 1; + uint32_t test_is_empty_denom = 3; + uint32_t test_pmp_range = 10; + uint32_t test_pmp_iszero = 3; + uint32_t test_pmp_cont_nom = 2; + uint32_t test_pmp_cont_denom = 4; + */ +INSTANTIATE_TEST_SUITE_P( + ObjectStore, + bluestore_blob_t_test, + ::testing::Values( + std::vector({4096, 5, 1, 3, 10, 3, 2, 4}), + std::vector({4096, 10, 2, 3, 30, 10, 3, 5}), + std::vector({8192, 10, 2, 5, 30, 10, 4, 5}), + std::vector({32768, 15, 1, 6, 60, 30, 3, 4}) + ) +); //--------------------------------------------------------------------------------- static int verify_extent(const extent_t &ext, const extent_t *ext_arr,