From: Adam Kupczyk Date: Thu, 13 Oct 2022 14:25:42 +0000 (+0000) Subject: os/bluestore: fix no deferred writing X-Git-Tag: v17.2.6~280^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e4adf4a23e595951ddb777b4f7efd80c0e453910;p=ceph.git os/bluestore: fix no deferred writing Fixes: https://tracker.ceph.com/issues/56488 Signed-off-by: Adam Kupczyk (cherry picked from commit 9a57b631730674c2c4d94383002813ad2b7e868d) --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 3239a825846d..2e1e35ecc226 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -15448,6 +15448,18 @@ int BlueStore::_do_alloc_write( // compress (as needed) and calc needed space uint64_t need = 0; + uint64_t data_size = 0; + // 'need' is amount of space that must be provided by allocator. + // 'data_size' is a size of data that will be transferred to disk. + // Note that data_size is always <= need. This comes from: + // - write to blob was unaligned, and there is free space + // - data has been compressed + // + // We make one decision and apply it to all blobs. + // All blobs will be deferred or none will. + // We assume that allocator does its best to provide contiguous space, + // and the condition is : (data_size < deferred). + auto max_bsize = std::max(wctx->target_blob_size, min_alloc_size); for (auto& wi : wctx->writes) { if (c && wi.blob_length > min_alloc_size) { @@ -15494,6 +15506,7 @@ int BlueStore::_do_alloc_write( txc->statfs_delta.compressed_allocated() += result_len; logger->inc(l_bluestore_compress_success_count); need += result_len; + data_size += result_len; } else { rejected = true; } @@ -15506,6 +15519,7 @@ int BlueStore::_do_alloc_write( << dendl; logger->inc(l_bluestore_compress_rejected_count); need += wi.blob_length; + data_size += wi.bl.length(); } else { rejected = true; } @@ -15520,6 +15534,7 @@ int BlueStore::_do_alloc_write( << std::dec << dendl; logger->inc(l_bluestore_compress_rejected_count); need += wi.blob_length; + data_size += wi.bl.length(); } log_latency("compress@_do_alloc_write", l_bluestore_compress_lat, @@ -15527,10 +15542,11 @@ int BlueStore::_do_alloc_write( cct->_conf->bluestore_log_op_age ); } else { need += wi.blob_length; + data_size += wi.bl.length(); } } PExtentVector prealloc; - prealloc.reserve(2 * wctx->writes.size());; + prealloc.reserve(2 * wctx->writes.size()); int64_t prealloc_left = 0; prealloc_left = alloc->allocate( need, min_alloc_size, need, @@ -15548,10 +15564,10 @@ int BlueStore::_do_alloc_write( } _collect_allocation_stats(need, min_alloc_size, prealloc); - dout(20) << __func__ << " prealloc " << prealloc << dendl; + dout(20) << __func__ << std::hex << " need=0x" << need << " data=0x" << data_size + << " prealloc " << prealloc << dendl; auto prealloc_pos = prealloc.begin(); ceph_assert(prealloc_pos != prealloc.end()); - uint64_t prealloc_pos_length = prealloc_pos->length; for (auto& wi : wctx->writes) { bluestore_blob_t& dblob = wi.b->dirty_blob(); @@ -15614,20 +15630,15 @@ int BlueStore::_do_alloc_write( PExtentVector extents; int64_t left = final_length; - bool has_chunk2defer = false; auto prefer_deferred_size_snapshot = prefer_deferred_size.load(); while (left > 0) { ceph_assert(prealloc_left > 0); - has_chunk2defer |= (prealloc_pos_length < prefer_deferred_size_snapshot); if (prealloc_pos->length <= left) { prealloc_left -= prealloc_pos->length; left -= prealloc_pos->length; txc->statfs_delta.allocated() += prealloc_pos->length; extents.push_back(*prealloc_pos); ++prealloc_pos; - if (prealloc_pos != prealloc.end()) { - prealloc_pos_length = prealloc_pos->length; - } } else { extents.emplace_back(prealloc_pos->offset, left); prealloc_pos->offset += left; @@ -15673,7 +15684,7 @@ int BlueStore::_do_alloc_write( // queue io if (!g_conf()->bluestore_debug_omit_block_device_write) { - if (has_chunk2defer && l->length() < prefer_deferred_size_snapshot) { + if (data_size < prefer_deferred_size_snapshot) { dout(20) << __func__ << " deferring 0x" << std::hex << l->length() << std::dec << " write via deferred" << dendl; bluestore_deferred_op_t *op = _get_deferred_op(txc, l->length());