From f254486ad99089eba5381be704ce528a7f6fb99c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 30 Mar 2016 11:17:12 -0400 Subject: [PATCH] os/bluestore: _do_write: fix _do_zero_tail_extent to handle shared extents Also add a test case to reproduce this case. Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 87 ++++++++++++++++-------------- src/os/bluestore/BlueStore.h | 4 +- src/test/objectstore/store_test.cc | 81 ++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 42 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index ec8a852b1ef1b..6f5710c1588c9 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5417,6 +5417,11 @@ int BlueStore::_do_write( uint64_t cow_rmw_head = 0; uint64_t cow_rmw_tail = 0; + if (orig_offset > o->onode.size) { + // zero tail of previous existing extent? + _do_zero_tail_extent(txc, c, o, orig_offset); + } + r = _do_allocate(txc, c, o, orig_offset, orig_length, fadvise_flags, true, &cow_rmw_head, &cow_rmw_tail); if (r < 0) { @@ -5426,17 +5431,6 @@ int BlueStore::_do_write( bp = o->onode.seek_extent(orig_offset); - if (orig_offset > o->onode.size) { - // zero tail of previous existing extent? - map::iterator pp = - o->onode.find_extent(o->onode.size); - if (pp != o->onode.block_map.end() && - pp != bp) { - assert(pp->first < bp->first); - _do_zero_tail_extent(txc, o, orig_offset, pp); - } - } - for (uint64_t offset = orig_offset; offset < orig_offset + orig_length; offset += length) { @@ -5749,50 +5743,63 @@ int BlueStore::_zero(TransContext *txc, void BlueStore::_do_zero_tail_extent( TransContext *txc, + CollectionRef& c, OnodeRef& o, - uint64_t offset, - map::iterator pp) + uint64_t offset) { const uint64_t block_size = bdev->get_block_size(); const uint64_t block_mask = ~(block_size - 1); + map::iterator bp, pp; + bp = o->onode.seek_extent(offset); + pp = o->onode.find_extent(o->onode.size); + dout(10) << __func__ << " offset " << offset << " extent " << pp->first << ": " << pp->second << dendl; assert(offset > o->onode.size); - assert(pp != o->onode.block_map.end()); // we assume the caller will handle any partial block they start with offset &= block_mask; if (offset <= o->onode.size) return; - uint64_t end_block = ROUND_UP_TO(o->onode.size, block_size); - - assert(!pp->second.has_flag(bluestore_extent_t::FLAG_SHARED)); - - if (end_block > o->onode.size) { - // end was in a partial block, do wal r/m/w. - bluestore_wal_op_t *op = _get_wal_op(txc, o); - op->op = bluestore_wal_op_t::OP_ZERO; - uint64_t x_off = o->onode.size; - uint64_t x_len = end_block - x_off; - op->extent.offset = pp->second.offset + x_off - pp->first; - op->extent.length = x_len; - dout(10) << __func__ << " wal zero tail partial block " - << x_off << "~" << x_len << " at " << op->extent - << dendl; - assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_HEAD)); - assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_TAIL)); - } - if (offset > end_block) { - // end was block-aligned. zero the rest of the extent now. - uint64_t x_off = end_block - pp->first; - uint64_t x_len = pp->second.length - x_off; - if (x_len > 0) { - dout(10) << __func__ << " zero tail " << x_off << "~" << x_len - << " of tail extent " << pp->first << ": " << pp->second + if (pp != o->onode.block_map.end() && + pp != bp) { + if (pp->second.has_flag(bluestore_extent_t::FLAG_SHARED)) { + dout(10) << __func__ << " shared tail extent; doing _do_write_zero" << dendl; - bdev->aio_zero(pp->second.offset + x_off, x_len, &txc->ioc); + uint64_t old_size = o->onode.size; + uint64_t end = pp->first + pp->second.length; + uint64_t zlen = end - old_size; + _do_write_zero(txc, c, o, old_size, zlen); + } else { + uint64_t end_block = ROUND_UP_TO(o->onode.size, block_size); + + if (end_block > o->onode.size) { + // end was in a partial block, do wal r/m/w. + bluestore_wal_op_t *op = _get_wal_op(txc, o); + op->op = bluestore_wal_op_t::OP_ZERO; + uint64_t x_off = o->onode.size; + uint64_t x_len = end_block - x_off; + op->extent.offset = pp->second.offset + x_off - pp->first; + op->extent.length = x_len; + dout(10) << __func__ << " wal zero tail partial block " + << x_off << "~" << x_len << " at " << op->extent + << dendl; + assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_HEAD)); + assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_TAIL)); + } + if (offset > end_block) { + // end was block-aligned. zero the rest of the extent now. + uint64_t x_off = end_block - pp->first; + uint64_t x_len = pp->second.length - x_off; + if (x_len > 0) { + dout(10) << __func__ << " zero tail " << x_off << "~" << x_len + << " of tail extent " << pp->first << ": " << pp->second + << dendl; + bdev->aio_zero(pp->second.offset + x_off, x_len, &txc->ioc); + } + } } } } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index b32279c531739..828dd914ff165 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -900,9 +900,9 @@ private: uint64_t offset, uint64_t length); void _do_zero_tail_extent( TransContext *txc, + CollectionRef& c, OnodeRef& o, - uint64_t offset, - map::iterator pp); + uint64_t offset); int _do_zero(TransContext *txc, CollectionRef& c, OnodeRef& o, diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 59dc408124062..d6e9203da6c03 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -723,6 +723,87 @@ TEST_P(StoreTest, AppendWalVsTailCache) { g_ceph_context->_conf->apply_changes(NULL); } +TEST_P(StoreTest, AppendZeroTrailingSharedBlock) { + ObjectStore::Sequencer osr("test"); + int r; + coll_t cid; + ghobject_t a(hobject_t(sobject_t("fooo", CEPH_NOSNAP))); + ghobject_t b = a; + b.hobj.snap = 1; + { + ObjectStore::Transaction t; + t.create_collection(cid, 0); + cerr << "Creating collection " << cid << std::endl; + r = store->apply_transaction(&osr, std::move(t)); + ASSERT_EQ(r, 0); + } + unsigned min_alloc = g_conf->bluestore_min_alloc_size; + unsigned size = min_alloc / 3; + bufferptr bpa(size); + memset(bpa.c_str(), 1, bpa.length()); + bufferlist bla; + bla.append(bpa); + // make sure there is some trailing gunk in the last block + { + bufferlist bt; + bt.append(bla); + bt.append("BADBADBADBAD"); + ObjectStore::Transaction t; + t.write(cid, a, 0, bt.length(), bt, 0); + r = store->apply_transaction(&osr, std::move(t)); + ASSERT_EQ(r, 0); + } + { + ObjectStore::Transaction t; + t.truncate(cid, a, size); + r = store->apply_transaction(&osr, std::move(t)); + ASSERT_EQ(r, 0); + } + + // clone + { + ObjectStore::Transaction t; + t.clone(cid, a, b); + r = store->apply_transaction(&osr, std::move(t)); + ASSERT_EQ(r, 0); + } + + // append with implicit zeroing + bufferptr bpb(size); + memset(bpb.c_str(), 2, bpb.length()); + bufferlist blb; + blb.append(bpb); + { + ObjectStore::Transaction t; + t.write(cid, a, min_alloc * 3, blb.length(), blb, 0); + r = store->apply_transaction(&osr, std::move(t)); + ASSERT_EQ(r, 0); + } + bufferlist final; + final.append(bla); + bufferlist zeros; + zeros.append_zero(min_alloc * 3 - size); + final.append(zeros); + final.append(blb); + bufferlist actual; + { + ASSERT_EQ((int)final.length(), + store->read(cid, a, 0, final.length(), actual)); + final.hexdump(cout); + actual.hexdump(cout); + ASSERT_TRUE(final.contents_equal(actual)); + } + { + ObjectStore::Transaction t; + t.remove(cid, a); + t.remove(cid, b); + t.remove_collection(cid); + cerr << "Cleaning" << std::endl; + r = store->apply_transaction(&osr, std::move(t)); + ASSERT_EQ(r, 0); + } +} + TEST_P(StoreTest, SmallSequentialUnaligned) { ObjectStore::Sequencer osr("test"); int r; -- 2.39.5