From: Adam Kupczyk Date: Tue, 19 Mar 2024 21:18:18 +0000 (+0000) Subject: os/bluestore/writer: Split do_write, add handling of compressed X-Git-Tag: v20.3.0~9^2~7 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=533505d147f6aec503cfe330b8e2fc0f67b6f929;p=ceph.git os/bluestore/writer: Split do_write, add handling of compressed Split do_write into do_write and do_write_with_blobs. The original is used when only uncompressed data is written. The new one accepts stream of data formatted into blobs; the blobs can be compressed or uncompressed. Add blob_create_full_compressed. Fix do_put_new_blobs to handle compressed. Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/Writer.cc b/src/os/bluestore/Writer.cc index eeecbf0153016..6cb7597c284dd 100644 --- a/src/os/bluestore/Writer.cc +++ b/src/os/bluestore/Writer.cc @@ -156,6 +156,15 @@ inline void bluestore_blob_use_tracker_t::init_and_ref( } } +inline void bluestore_blob_use_tracker_t::init_and_ref_compressed( + uint32_t logical_length) +{ + au_size = logical_length; + num_au = 0; + alloc_au = 0; + total_bytes = logical_length; +} + inline void bluestore_blob_t::allocated_full( uint32_t length, PExtentVector&& allocs) @@ -527,8 +536,6 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_full( uint32_t blob_length = disk_data.length(); ceph_assert(p2phase(blob_length, bstore->min_alloc_size) == 0); BlobRef blob = onode->c->new_blob(); - - //uint32_t in_blob_end = disk_data.length(); bluestore_blob_t &bblob = blob->dirty_blob(); uint32_t tracked_unit = min_alloc_size; uint32_t csum_order = // conv 8 -> 32 so "<<" does not overflow @@ -538,7 +545,6 @@ BlueStore::BlobRef BlueStore::Writer::_blob_create_full( bblob.calc_csum(0, disk_data); tracked_unit = std::max(1u << csum_order, min_alloc_size); } - //std::cout << "blob_length=" << blob_length << std::endl; blob->dirty_blob_use_tracker().init_and_ref(blob_length, tracked_unit); PExtentVector blob_allocs; _get_disk_space(blob_length, blob_allocs); @@ -610,6 +616,37 @@ void BlueStore::Writer::_maybe_meld_with_prev_extent(exmp_it it) } } +BlueStore::BlobRef BlueStore::Writer::_blob_create_full_compressed( + bufferlist& disk_data, + uint32_t compressed_length, + bufferlist& object_data) +{ + uint32_t disk_length = disk_data.length(); + uint32_t object_length = object_data.length(); + ceph_assert(p2phase(disk_length, bstore->min_alloc_size) == 0); + BlobRef blob = onode->c->new_blob(); + + bluestore_blob_t &bblob = blob->dirty_blob(); + uint32_t csum_order = // conv 8 -> 32 so "<<" does not overflow + std::min(wctx->csum_order, std::countr_zero(disk_length)); + if (wctx->csum_type != Checksummer::CSUM_NONE) { + bblob.init_csum(wctx->csum_type, csum_order, disk_length); + bblob.calc_csum(0, disk_data); + } + bblob.set_compressed(object_length, compressed_length); + blob->dirty_blob_use_tracker().init_and_ref_compressed(object_length); + PExtentVector blob_allocs; + _get_disk_space(disk_length, blob_allocs); + _schedule_io(blob_allocs, disk_data); //have to do before move() + //todo: we are setting blob's logical length twice + bblob.allocated_full(object_length, std::move(blob_allocs)); + //no unused in compressed //bblob.mark_used(0, disk_length); + statfs_delta.compressed_allocated() += disk_length; + statfs_delta.compressed_original() += object_length; + statfs_delta.compressed() += compressed_length; + return blob; +} + /** * Note from developer * This module tries to keep naming convention: @@ -1041,7 +1078,13 @@ void BlueStore::Writer::_do_put_new_blobs( logical_offset = ref_end; } else { // compressed - ceph_assert(false); + BlobRef new_blob = _blob_create_full_compressed( + bd_it->disk_data, bd_it->compressed_length, bd_it->object_data); + le = new Extent( + logical_offset, 0, bd_it->real_length, new_blob); + dout(20) << __func__ << " new compressed extent+blob " << le->print(pp_mode) << dendl; + emap.insert(*le); + logical_offset += bd_it->real_length; } bstore->logger->inc(l_bluestore_write_big); bstore->logger->inc(l_bluestore_write_big_bytes, le->length); @@ -1389,6 +1432,15 @@ void BlueStore::Writer::do_write( if (ref_end < onode->onode.size) { ref_end = std::min(data_end, onode->onode.size); } + do_write_with_blobs(location, data_end, ref_end, bd); +} + +void BlueStore::Writer::do_write_with_blobs( + uint32_t location, + uint32_t data_end, + uint32_t ref_end, + blob_vec& bd) +{ dout(20) << "blobs to put:" << blob_data_printer(bd, location) << dendl; statfs_delta.stored() += ref_end - location; exmp_it after_punch_it = @@ -1399,9 +1451,15 @@ void BlueStore::Writer::do_write( // todo: if we align to disk block before splitting, we could do it in one go uint32_t pos = location; for (auto& b : bd) { - bstore->_buffer_cache_write(this->txc, onode, pos, b.disk_data, - wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); - pos += b.disk_data.length(); + if (b.is_compressed()) { + bstore->_buffer_cache_write(this->txc, onode, pos, b.object_data, + wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + pos += b.object_data.length(); + } else { + bstore->_buffer_cache_write(this->txc, onode, pos, b.disk_data, + wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + pos += b.disk_data.length(); + } } ceph_assert(pos == data_end); @@ -1415,10 +1473,13 @@ void BlueStore::Writer::do_write( uint32_t location_tmp = location; for (auto& i : bd) { uint32_t location_end = location_tmp + i.real_length; - need_size += p2roundup(location_end, au_size) - p2align(location_tmp, au_size); + if (i.is_compressed()) { + need_size += p2roundup(i.disk_data.length(), au_size); + } else { + need_size += p2roundup(location_end, au_size) - p2align(location_tmp, au_size); + } location_tmp = location_end; } - _defer_or_allocate(need_size); _do_put_blobs(location, data_end, ref_end, bd, after_punch_it); } else { diff --git a/src/os/bluestore/Writer.h b/src/os/bluestore/Writer.h index 3962aa02dc94c..3c2449399415e 100644 --- a/src/os/bluestore/Writer.h +++ b/src/os/bluestore/Writer.h @@ -60,6 +60,13 @@ public: bufferlist& data ); + void do_write_with_blobs( + uint32_t location, + uint32_t data_end, + uint32_t ref_end, + blob_vec& blobs + ); + void debug_iterate_buffers( std::function data_callback ); @@ -176,6 +183,11 @@ private: BlobRef _blob_create_full( bufferlist& disk_data); + BlobRef _blob_create_full_compressed( + bufferlist& disk_data, + uint32_t compressed_length, + bufferlist& object_data); + void _try_reuse_allocated_l( exmp_it after_punch_it, // hint, we could have found it ourselves uint32_t& logical_offset, // will fix value if something consumed diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 83e7dacb52625..3fcb99d55ab23 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -393,6 +393,9 @@ struct bluestore_blob_use_tracker_t { uint32_t full_length, uint32_t tracked_chunk); + inline void init_and_ref_compressed( + uint32_t logical_length); + void get( uint32_t offset, uint32_t len);