]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: detect unnecessary zeros in _do_write_small()
authorLaura Flores <lflores@redhat.com>
Fri, 24 Dec 2021 01:15:33 +0000 (01:15 +0000)
committerLaura Flores <lflores@redhat.com>
Fri, 24 Dec 2021 01:15:33 +0000 (01:15 +0000)
Bluestore's `_do_write()` method handles writing data from bufferlists. Currently, it writes data from bufferlists without checking for unnecessary zeros. The lack zero detection may negatively impact performance.

In _do_write_small(), we check if a bufferlist is made up of zeros and avoid writing it if so.
Two new counters, `l_bluestore_write_small_skipped` and l_bluestore_write_small_skipped_bytes`, have been introduced to help us count how many zero blocks and bytes from _do_write_small() have been skipped.

Signed-off-by: Laura Flores <lflores@redhat.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 38b9e332be82821de8d79a3839feffc72b276bbd..8cf3bef1165cf8102ee648d4526ee4e70d7a6da1 100644 (file)
@@ -5090,6 +5090,13 @@ void BlueStore::_init_logger()
                    NULL,
                    PerfCountersBuilder::PRIO_DEBUGONLY,
                    unit_t(UNIT_BYTES));
+
+  b.add_u64_counter(l_bluestore_write_small_skipped,
+      "write_small_skipped",
+      "Small writes into existing or sparse small blobs skipped due to zero detection");
+  b.add_u64_counter(l_bluestore_write_small_skipped_bytes,
+      "write_small_skipped_bytes",
+      "Small writes into existing or sparse small blobs skipped due to zero detection (bytes)");
   //****************************************
 
   // compressions stats
@@ -14268,12 +14275,24 @@ void BlueStore::_do_write_small(
   // temporarily just pad them to min_alloc_size and write them to a new place
   // on every update.
   if (bdev->is_smr()) {
-    BlobRef b = c->new_blob();
     uint64_t b_off = p2phase<uint64_t>(offset, alloc_len);
     uint64_t b_off0 = b_off;
-    _pad_zeros(&bl, &b_off0, min_alloc_size);
     o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
-    wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, false, true);
+
+    // Zero detection -- small block
+    if (!bl.is_zero()) {
+      BlobRef b = c->new_blob();
+      _pad_zeros(&bl, &b_off0, min_alloc_size);
+      wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, false, true);
+    } else { // if (bl.is_zero())
+      dout(20) << __func__ << " skip small zero block " << std::hex
+        << " (0x" << b_off0 << "~" << bl.length() << ")"
+        << " (0x" << b_off << "~" << length << ")"
+        << std::dec << dendl;
+      logger->inc(l_bluestore_write_small_skipped);
+      logger->inc(l_bluestore_write_small_skipped_bytes, length);
+    }
+
     return;
   }
 #endif
@@ -14497,17 +14516,29 @@ void BlueStore::_do_write_small(
            // due to existent extents
            uint64_t b_off = offset - bstart;
            uint64_t b_off0 = b_off;
-           _pad_zeros(&bl, &b_off0, chunk_size);
+           o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
 
-           dout(20) << __func__ << " reuse blob " << *b << std::hex
-                    << " (0x" << b_off0 << "~" << bl.length() << ")"
-                    << " (0x" << b_off << "~" << length << ")"
-                    << std::dec << dendl;
+           // Zero detection -- small block
+           if (!bl.is_zero()) {
+             _pad_zeros(&bl, &b_off0, chunk_size);
+
+             dout(20) << __func__ << " reuse blob " << *b << std::hex
+                      << " (0x" << b_off0 << "~" << bl.length() << ")"
+                      << " (0x" << b_off << "~" << length << ")"
+                      << std::dec << dendl;
+
+             wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length,
+                 false, false);
+             logger->inc(l_bluestore_write_small_unused);
+           } else { // if (bl.is_zero())
+             dout(20) << __func__ << " skip small zero block " << std::hex
+                << " (0x" << b_off0 << "~" << bl.length() << ")"
+                << " (0x" << b_off << "~" << length << ")"
+                << std::dec << dendl;
+             logger->inc(l_bluestore_write_small_skipped);
+             logger->inc(l_bluestore_write_small_skipped_bytes, length);
+           }
 
-           o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
-           wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length,
-                       false, false);
-           logger->inc(l_bluestore_write_small_unused);
            return;
          }
        }
@@ -14545,20 +14576,32 @@ void BlueStore::_do_write_small(
                                offset0 + alloc_len, 
                                min_alloc_size)) {
 
-         uint64_t chunk_size = b->get_blob().get_chunk_size(block_size);
          uint64_t b_off = offset - bstart;
          uint64_t b_off0 = b_off;
-         _pad_zeros(&bl, &b_off0, chunk_size);
+         o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
+
+         // Zero detection -- small block
+         if (!bl.is_zero()) {
+           uint64_t chunk_size = b->get_blob().get_chunk_size(block_size);
+           _pad_zeros(&bl, &b_off0, chunk_size);
 
-         dout(20) << __func__ << " reuse blob " << *b << std::hex
-                   << " (0x" << b_off0 << "~" << bl.length() << ")"
-                   << " (0x" << b_off << "~" << length << ")"
-                   << std::dec << dendl;
+           dout(20) << __func__ << " reuse blob " << *b << std::hex
+             << " (0x" << b_off0 << "~" << bl.length() << ")"
+             << " (0x" << b_off << "~" << length << ")"
+             << std::dec << dendl;
+
+           wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length,
+               false, false);
+           logger->inc(l_bluestore_write_small_unused);
+         } else { // if (bl.is_zero())
+           dout(20) << __func__ << " skip small zero block " << std::hex
+             << " (0x" << b_off0 << "~" << bl.length() << ")"
+             << " (0x" << b_off << "~" << length << ")"
+             << std::dec << dendl;
+           logger->inc(l_bluestore_write_small_skipped);
+           logger->inc(l_bluestore_write_small_skipped_bytes, length);
+         }
 
-         o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
-         wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length,
-                     false, false);
-         logger->inc(l_bluestore_write_small_unused);
          return;
        }
       } 
@@ -14589,16 +14632,27 @@ void BlueStore::_do_write_small(
               << std::hex << offset << "~" << length
              << std::dec << dendl;
   }
-  // new blob.
-  BlobRef b = c->new_blob();
   uint64_t b_off = p2phase<uint64_t>(offset, alloc_len);
   uint64_t b_off0 = b_off;
-  _pad_zeros(&bl, &b_off0, block_size);
   o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
-  wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length,
-    min_alloc_size != block_size, // use 'unused' bitmap when alloc granularity
-                                  // doesn't match disk one only
-    true);
+
+  // Zero detection -- small block
+  if (!bl.is_zero()) {
+    // new blob.
+    BlobRef b = c->new_blob();
+    _pad_zeros(&bl, &b_off0, block_size);
+    wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length,
+       min_alloc_size != block_size, // use 'unused' bitmap when alloc granularity
+                                      // doesn't match disk one only
+       true);
+  } else { // if (bl.is_zero())
+    dout(20) << __func__ << " skip small zero block " << std::hex
+      << " (0x" << b_off0 << "~" << bl.length() << ")"
+      << " (0x" << b_off << "~" << length << ")"
+      << std::dec << dendl;
+    logger->inc(l_bluestore_write_small_skipped);
+    logger->inc(l_bluestore_write_small_skipped_bytes, length);
+  }
 
   return;
 }
index 3b9118f03efbe0bb091e57250392a52c3053d207..d6ab760196563eb30a6a537af52c865ad2d18b0e 100644 (file)
@@ -141,6 +141,9 @@ enum {
   l_bluestore_issued_deferred_write_bytes,
   l_bluestore_submitted_deferred_writes,
   l_bluestore_submitted_deferred_write_bytes,
+
+  l_bluestore_write_small_skipped,
+  l_bluestore_write_small_skipped_bytes,
   //****************************************
 
   // compressions stats
@@ -3257,7 +3260,6 @@ private:
       uint64_t loffs_end,
       uint64_t min_alloc_size);
   };
-
   void _do_write_small(
     TransContext *txc,
     CollectionRef &c,