]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: revert preferred csum behavior
authorMark Nelson <mnelson@redhat.com>
Sun, 17 Jul 2016 11:39:41 +0000 (06:39 -0500)
committerMark Nelson <mnelson@redhat.com>
Mon, 18 Jul 2016 02:33:59 +0000 (21:33 -0500)
This passes "ceph_test_objectstore --gtest_filter=*/2".
This restores 4K random read performance to previous levels when objects
are were previously written out using large IOs (4MB in this case):

pre-patch: 26MB/s
post-pated: 610MB/s

Closes #10320

Signed-off-by: Mark Nelson <mnelson@redhat.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/bluestore_types.cc

index cd2a9a12adcae788bf01018b0a8b83996ac5208b..afcba07ba7504e5dd9a3ab211716a5690ebdb5bd 100644 (file)
@@ -6164,7 +6164,11 @@ int BlueStore::_do_write(
     dout(20) << __func__ << " will do buffered write" << dendl;
     wctx.buffered = true;
   }
-  wctx.csum_order = MAX(block_size_order, o->onode.get_preferred_csum_order());
+
+  // FIXME: Using the MAX of the block_size_order and preferred_csum_order
+  // results in poor small random read performance when data was initially 
+  // written out in large chunks.  Reverting to previous behavior for now.
+  wctx.csum_order = block_size_order;
 
   // compression parameters
   unsigned alloc_hints = o->onode.alloc_hint_flags;
index 49b12c762773f2f7c43b2165faa5d04e5f3e70b7..0c8808ae7d3ef8215cf47bcb2fcdb3de5c245537 100644 (file)
@@ -852,6 +852,8 @@ void bluestore_onode_t::generate_test_instances(list<bluestore_onode_t*>& o)
   // FIXME
 }
 
+// FIXME: Using this to compute the ctx.csum_order can lead to poor small
+// random read performance when initial writes are large.
 size_t bluestore_onode_t::get_preferred_csum_order() const
 {
   uint32_t t = expected_write_size;