From: Radoslaw Zarzynski Date: Wed, 13 Sep 2023 19:45:47 +0000 (+0200) Subject: Merge pull request #53178 from aclamk/wip-aclamk-bs-esb-5-enabled X-Git-Tag: v19.0.0~486 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f417f38beef37f8a069b82ca082a543f5099f533;p=ceph.git Merge pull request #53178 from aclamk/wip-aclamk-bs-esb-5-enabled bluestore: Elastic Shared Blobs 5 - enabled, final Reviewed-by: Igor Fedotov Reviewed-by: Radoslaw Zarzynski logical_offset) { + // a hole in logical mapping, mark it + res.emplace_back(-1ULL, ep->logical_offset - l_pos, 0, 0); + } + l_pos = ep->logical_offset + ep->length; + const bluestore_blob_t& bblob = ep->blob->get_blob(); + uint32_t chunk_size = bblob.get_chunk_size(onode->c->store->block_size); + uint32_t length_left = ep->length; + + bluestore_extent_ref_map_t* ref_map = nullptr; + if (bblob.is_shared()) { + ceph_assert(ep->blob->shared_blob->is_loaded()); + bluestore_shared_blob_t* bsblob = ep->blob->shared_blob->persistent; + ref_map = &bsblob->ref_map; + } + + unsigned csum_i = 0; + size_t csum_cnt; + uint32_t length; + if (bblob.has_csum()) { + csum_cnt = bblob.get_csum_count(); + uint32_t csum_chunk_size = bblob.get_csum_chunk_size(); + uint64_t csum_offset_align = p2align(ep->blob_offset, csum_chunk_size); + csum_i = csum_offset_align / csum_chunk_size; + // size of first chunk + length = p2align(ep->blob_offset + csum_chunk_size, csum_chunk_size) - ep->blob_offset; + length = std::min(length_left, length); + if (csum_chunk_size < chunk_size) { + chunk_size = csum_chunk_size; + } + } else { + length = p2align(ep->blob_offset + chunk_size, chunk_size) - ep->blob_offset; + length = std::min(length_left, length); + } + + uint32_t bo = ep->blob_offset; + while (length_left > 0) { + uint64_t csum_val = 0; + if (bblob.has_csum()) { + ceph_assert(csum_cnt > csum_i); + csum_val = bblob.get_csum_item(csum_i); + ++csum_i; + } + //extract AU from extents + uint64_t disk_extent_left; // length till the end of disk extent + uint64_t disk_offset = bblob.calc_offset(bo, &disk_extent_left); + bluestore_extent_ref_map_t::debug_len_cnt l_c = {0, std::numeric_limits::max()}; + if (bblob.is_shared()) { + l_c = ref_map->debug_peek(disk_offset); + if (l_c.len < length) { + length = l_c.len; + } + } + res.emplace_back(disk_offset, length, csum_val, l_c.cnt); + bo += length; + length_left -= length; + length = chunk_size; + }; + } + return res; + } + + std::ostream& operator<<(std::ostream& out, const BlueStore::ExtentMap::debug_au_vector_t& auv) + { + out << "["; + for (size_t i = 0; i < auv.size(); ++i) { + if (i != 0) { + out << " "; + } + out << "0x" << std::hex; + if (auv[i].disk_offset != -1ULL) { + out << auv[i].disk_offset << "~" << auv[i].disk_length + << "(" << std::dec << int32_t(auv[i].ref_cnts) + << "):" << std::hex << auv[i].chksum; + } else { + out << "~" << auv[i].disk_length << std::dec; + } + } + out << "]" << std::dec; + return out; + } + // Onode +// +// Mapping blobs over Onode's logical offsets. +// +// Blob is always continous. Blobs may overlap. +// Non-mapped regions are "0" when read. +// 1 2 3 +// 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef +// +// +// blob.a starts at 0x0 length 0xe +// blob.b starts at 0xf length 0xb +// blob.c starts at 0x23 length 0x1b +// blob.d starts at 0x06 length 0x12 +// blob.e starts at 0x2d length 0xf +// +// Blobs can have non-encoded parts: +// 1 2 3 +// 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef +// aaaaaa......aaabbbbb...bbbb ccccccccccccccc..........cc +// dddddd........ddd .....eeeeeeeeee +// "." - non-encoded parts of blob (holes) +// +// Mapping logical to blob: +// extent_map maps {Onode's logical offset, length}=>{Blob, in-blob offset} +// {0x0, 0x6}=>{blob.a, 0x0} +// {0x6, 0x6}=>{blob.d, 0x0} +// {0xc, 0x3}=>{blob.a, 0xc} +// {0xf, 0x5}=>{blob.b, 0x0} +// {0x14, 0x3}=>{blob.d, 0xe} +// {0x17, 0x4}=>{blob.b, 0x8} +// a hole here +// {0x23, 0xe}=>{blob.c, 0x0} +// and so on... +// +// Compressed blobs do not have non-encoded parts. +// Same example as above but all blobs are compressed: +// 1 2 3 +// 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef +// aaaaaaAAAAAAaaabbbbbBBBbbbb cccccccccccccccCCCCCCCCCCcc +// ddddddDDDDDDDDddd EEEEEeeeeeeeeee +// A-E: parts of blobs that are never used. +// This can happen when a compressed blob is overwritten partially. +// The target ranges are no longer used, but are left there because they are necessary +// for successful decompression. +// +// In compressed blobs PExtentVector and csum refer to actually occupied disk space. +// Blob's logical length is larger then occupied disk space. +// Mapping from extent_map always uses offsets of decompressed data. #undef dout_prefix #define dout_prefix *_dout << "bluestore.onode(" << this << ")." << __func__ << " "