From: Laura Flores Date: Fri, 24 Dec 2021 01:15:33 +0000 (+0000) Subject: os/bluestore: detect unnecessary zeros in _do_write_small() X-Git-Tag: v17.1.0~159^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=15cd3550320c9c6095dad0be021a9a87177deb46;p=ceph.git os/bluestore: detect unnecessary zeros in _do_write_small() Bluestore's `_do_write()` method handles writing data from bufferlists. Currently, it writes data from bufferlists without checking for unnecessary zeros. The lack zero detection may negatively impact performance. In _do_write_small(), we check if a bufferlist is made up of zeros and avoid writing it if so. Two new counters, `l_bluestore_write_small_skipped` and l_bluestore_write_small_skipped_bytes`, have been introduced to help us count how many zero blocks and bytes from _do_write_small() have been skipped. Signed-off-by: Laura Flores --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 38b9e332be82..8cf3bef1165c 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5090,6 +5090,13 @@ void BlueStore::_init_logger() NULL, PerfCountersBuilder::PRIO_DEBUGONLY, unit_t(UNIT_BYTES)); + + b.add_u64_counter(l_bluestore_write_small_skipped, + "write_small_skipped", + "Small writes into existing or sparse small blobs skipped due to zero detection"); + b.add_u64_counter(l_bluestore_write_small_skipped_bytes, + "write_small_skipped_bytes", + "Small writes into existing or sparse small blobs skipped due to zero detection (bytes)"); //**************************************** // compressions stats @@ -14268,12 +14275,24 @@ void BlueStore::_do_write_small( // temporarily just pad them to min_alloc_size and write them to a new place // on every update. if (bdev->is_smr()) { - BlobRef b = c->new_blob(); uint64_t b_off = p2phase(offset, alloc_len); uint64_t b_off0 = b_off; - _pad_zeros(&bl, &b_off0, min_alloc_size); o->extent_map.punch_hole(c, offset, length, &wctx->old_extents); - wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, false, true); + + // Zero detection -- small block + if (!bl.is_zero()) { + BlobRef b = c->new_blob(); + _pad_zeros(&bl, &b_off0, min_alloc_size); + wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, false, true); + } else { // if (bl.is_zero()) + dout(20) << __func__ << " skip small zero block " << std::hex + << " (0x" << b_off0 << "~" << bl.length() << ")" + << " (0x" << b_off << "~" << length << ")" + << std::dec << dendl; + logger->inc(l_bluestore_write_small_skipped); + logger->inc(l_bluestore_write_small_skipped_bytes, length); + } + return; } #endif @@ -14497,17 +14516,29 @@ void BlueStore::_do_write_small( // due to existent extents uint64_t b_off = offset - bstart; uint64_t b_off0 = b_off; - _pad_zeros(&bl, &b_off0, chunk_size); + o->extent_map.punch_hole(c, offset, length, &wctx->old_extents); - dout(20) << __func__ << " reuse blob " << *b << std::hex - << " (0x" << b_off0 << "~" << bl.length() << ")" - << " (0x" << b_off << "~" << length << ")" - << std::dec << dendl; + // Zero detection -- small block + if (!bl.is_zero()) { + _pad_zeros(&bl, &b_off0, chunk_size); + + dout(20) << __func__ << " reuse blob " << *b << std::hex + << " (0x" << b_off0 << "~" << bl.length() << ")" + << " (0x" << b_off << "~" << length << ")" + << std::dec << dendl; + + wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, + false, false); + logger->inc(l_bluestore_write_small_unused); + } else { // if (bl.is_zero()) + dout(20) << __func__ << " skip small zero block " << std::hex + << " (0x" << b_off0 << "~" << bl.length() << ")" + << " (0x" << b_off << "~" << length << ")" + << std::dec << dendl; + logger->inc(l_bluestore_write_small_skipped); + logger->inc(l_bluestore_write_small_skipped_bytes, length); + } - o->extent_map.punch_hole(c, offset, length, &wctx->old_extents); - wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, - false, false); - logger->inc(l_bluestore_write_small_unused); return; } } @@ -14545,20 +14576,32 @@ void BlueStore::_do_write_small( offset0 + alloc_len, min_alloc_size)) { - uint64_t chunk_size = b->get_blob().get_chunk_size(block_size); uint64_t b_off = offset - bstart; uint64_t b_off0 = b_off; - _pad_zeros(&bl, &b_off0, chunk_size); + o->extent_map.punch_hole(c, offset, length, &wctx->old_extents); + + // Zero detection -- small block + if (!bl.is_zero()) { + uint64_t chunk_size = b->get_blob().get_chunk_size(block_size); + _pad_zeros(&bl, &b_off0, chunk_size); - dout(20) << __func__ << " reuse blob " << *b << std::hex - << " (0x" << b_off0 << "~" << bl.length() << ")" - << " (0x" << b_off << "~" << length << ")" - << std::dec << dendl; + dout(20) << __func__ << " reuse blob " << *b << std::hex + << " (0x" << b_off0 << "~" << bl.length() << ")" + << " (0x" << b_off << "~" << length << ")" + << std::dec << dendl; + + wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, + false, false); + logger->inc(l_bluestore_write_small_unused); + } else { // if (bl.is_zero()) + dout(20) << __func__ << " skip small zero block " << std::hex + << " (0x" << b_off0 << "~" << bl.length() << ")" + << " (0x" << b_off << "~" << length << ")" + << std::dec << dendl; + logger->inc(l_bluestore_write_small_skipped); + logger->inc(l_bluestore_write_small_skipped_bytes, length); + } - o->extent_map.punch_hole(c, offset, length, &wctx->old_extents); - wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, - false, false); - logger->inc(l_bluestore_write_small_unused); return; } } @@ -14589,16 +14632,27 @@ void BlueStore::_do_write_small( << std::hex << offset << "~" << length << std::dec << dendl; } - // new blob. - BlobRef b = c->new_blob(); uint64_t b_off = p2phase(offset, alloc_len); uint64_t b_off0 = b_off; - _pad_zeros(&bl, &b_off0, block_size); o->extent_map.punch_hole(c, offset, length, &wctx->old_extents); - wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, - min_alloc_size != block_size, // use 'unused' bitmap when alloc granularity - // doesn't match disk one only - true); + + // Zero detection -- small block + if (!bl.is_zero()) { + // new blob. + BlobRef b = c->new_blob(); + _pad_zeros(&bl, &b_off0, block_size); + wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, + min_alloc_size != block_size, // use 'unused' bitmap when alloc granularity + // doesn't match disk one only + true); + } else { // if (bl.is_zero()) + dout(20) << __func__ << " skip small zero block " << std::hex + << " (0x" << b_off0 << "~" << bl.length() << ")" + << " (0x" << b_off << "~" << length << ")" + << std::dec << dendl; + logger->inc(l_bluestore_write_small_skipped); + logger->inc(l_bluestore_write_small_skipped_bytes, length); + } return; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 3b9118f03efb..d6ab76019656 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -141,6 +141,9 @@ enum { l_bluestore_issued_deferred_write_bytes, l_bluestore_submitted_deferred_writes, l_bluestore_submitted_deferred_write_bytes, + + l_bluestore_write_small_skipped, + l_bluestore_write_small_skipped_bytes, //**************************************** // compressions stats @@ -3257,7 +3260,6 @@ private: uint64_t loffs_end, uint64_t min_alloc_size); }; - void _do_write_small( TransContext *txc, CollectionRef &c,