From: Mark Nelson Date: Sun, 17 Jul 2016 11:39:41 +0000 (-0500) Subject: os/bluestore: revert preferred csum behavior X-Git-Tag: ses5-milestone5~380 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cab254e924ab4393c5418e98b9ce775b56869568;p=ceph.git os/bluestore: revert preferred csum behavior This passes "ceph_test_objectstore --gtest_filter=*/2". This restores 4K random read performance to previous levels when objects are were previously written out using large IOs (4MB in this case): pre-patch: 26MB/s post-pated: 610MB/s Closes #10320 Signed-off-by: Mark Nelson --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index cd2a9a12adca..afcba07ba750 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6164,7 +6164,11 @@ int BlueStore::_do_write( dout(20) << __func__ << " will do buffered write" << dendl; wctx.buffered = true; } - wctx.csum_order = MAX(block_size_order, o->onode.get_preferred_csum_order()); + + // FIXME: Using the MAX of the block_size_order and preferred_csum_order + // results in poor small random read performance when data was initially + // written out in large chunks. Reverting to previous behavior for now. + wctx.csum_order = block_size_order; // compression parameters unsigned alloc_hints = o->onode.alloc_hint_flags; diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc index 49b12c762773..0c8808ae7d3e 100644 --- a/src/os/bluestore/bluestore_types.cc +++ b/src/os/bluestore/bluestore_types.cc @@ -852,6 +852,8 @@ void bluestore_onode_t::generate_test_instances(list& o) // FIXME } +// FIXME: Using this to compute the ctx.csum_order can lead to poor small +// random read performance when initial writes are large. size_t bluestore_onode_t::get_preferred_csum_order() const { uint32_t t = expected_write_size;