uint64_t cow_rmw_head = 0;
uint64_t cow_rmw_tail = 0;
+ if (orig_offset > o->onode.size) {
+ // zero tail of previous existing extent?
+ _do_zero_tail_extent(txc, c, o, orig_offset);
+ }
+
r = _do_allocate(txc, c, o, orig_offset, orig_length, fadvise_flags, true,
&cow_rmw_head, &cow_rmw_tail);
if (r < 0) {
bp = o->onode.seek_extent(orig_offset);
- if (orig_offset > o->onode.size) {
- // zero tail of previous existing extent?
- map<uint64_t, bluestore_extent_t>::iterator pp =
- o->onode.find_extent(o->onode.size);
- if (pp != o->onode.block_map.end() &&
- pp != bp) {
- assert(pp->first < bp->first);
- _do_zero_tail_extent(txc, o, orig_offset, pp);
- }
- }
-
for (uint64_t offset = orig_offset;
offset < orig_offset + orig_length;
offset += length) {
void BlueStore::_do_zero_tail_extent(
TransContext *txc,
+ CollectionRef& c,
OnodeRef& o,
- uint64_t offset,
- map<uint64_t, bluestore_extent_t>::iterator pp)
+ uint64_t offset)
{
const uint64_t block_size = bdev->get_block_size();
const uint64_t block_mask = ~(block_size - 1);
+ map<uint64_t, bluestore_extent_t>::iterator bp, pp;
+ bp = o->onode.seek_extent(offset);
+ pp = o->onode.find_extent(o->onode.size);
+
dout(10) << __func__ << " offset " << offset << " extent "
<< pp->first << ": " << pp->second << dendl;
assert(offset > o->onode.size);
- assert(pp != o->onode.block_map.end());
// we assume the caller will handle any partial block they start with
offset &= block_mask;
if (offset <= o->onode.size)
return;
- uint64_t end_block = ROUND_UP_TO(o->onode.size, block_size);
-
- assert(!pp->second.has_flag(bluestore_extent_t::FLAG_SHARED));
-
- if (end_block > o->onode.size) {
- // end was in a partial block, do wal r/m/w.
- bluestore_wal_op_t *op = _get_wal_op(txc, o);
- op->op = bluestore_wal_op_t::OP_ZERO;
- uint64_t x_off = o->onode.size;
- uint64_t x_len = end_block - x_off;
- op->extent.offset = pp->second.offset + x_off - pp->first;
- op->extent.length = x_len;
- dout(10) << __func__ << " wal zero tail partial block "
- << x_off << "~" << x_len << " at " << op->extent
- << dendl;
- assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_HEAD));
- assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_TAIL));
- }
- if (offset > end_block) {
- // end was block-aligned. zero the rest of the extent now.
- uint64_t x_off = end_block - pp->first;
- uint64_t x_len = pp->second.length - x_off;
- if (x_len > 0) {
- dout(10) << __func__ << " zero tail " << x_off << "~" << x_len
- << " of tail extent " << pp->first << ": " << pp->second
+ if (pp != o->onode.block_map.end() &&
+ pp != bp) {
+ if (pp->second.has_flag(bluestore_extent_t::FLAG_SHARED)) {
+ dout(10) << __func__ << " shared tail extent; doing _do_write_zero"
<< dendl;
- bdev->aio_zero(pp->second.offset + x_off, x_len, &txc->ioc);
+ uint64_t old_size = o->onode.size;
+ uint64_t end = pp->first + pp->second.length;
+ uint64_t zlen = end - old_size;
+ _do_write_zero(txc, c, o, old_size, zlen);
+ } else {
+ uint64_t end_block = ROUND_UP_TO(o->onode.size, block_size);
+
+ if (end_block > o->onode.size) {
+ // end was in a partial block, do wal r/m/w.
+ bluestore_wal_op_t *op = _get_wal_op(txc, o);
+ op->op = bluestore_wal_op_t::OP_ZERO;
+ uint64_t x_off = o->onode.size;
+ uint64_t x_len = end_block - x_off;
+ op->extent.offset = pp->second.offset + x_off - pp->first;
+ op->extent.length = x_len;
+ dout(10) << __func__ << " wal zero tail partial block "
+ << x_off << "~" << x_len << " at " << op->extent
+ << dendl;
+ assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_HEAD));
+ assert(!pp->second.has_flag(bluestore_extent_t::FLAG_COW_TAIL));
+ }
+ if (offset > end_block) {
+ // end was block-aligned. zero the rest of the extent now.
+ uint64_t x_off = end_block - pp->first;
+ uint64_t x_len = pp->second.length - x_off;
+ if (x_len > 0) {
+ dout(10) << __func__ << " zero tail " << x_off << "~" << x_len
+ << " of tail extent " << pp->first << ": " << pp->second
+ << dendl;
+ bdev->aio_zero(pp->second.offset + x_off, x_len, &txc->ioc);
+ }
+ }
}
}
}
g_ceph_context->_conf->apply_changes(NULL);
}
+TEST_P(StoreTest, AppendZeroTrailingSharedBlock) {
+ ObjectStore::Sequencer osr("test");
+ int r;
+ coll_t cid;
+ ghobject_t a(hobject_t(sobject_t("fooo", CEPH_NOSNAP)));
+ ghobject_t b = a;
+ b.hobj.snap = 1;
+ {
+ ObjectStore::Transaction t;
+ t.create_collection(cid, 0);
+ cerr << "Creating collection " << cid << std::endl;
+ r = store->apply_transaction(&osr, std::move(t));
+ ASSERT_EQ(r, 0);
+ }
+ unsigned min_alloc = g_conf->bluestore_min_alloc_size;
+ unsigned size = min_alloc / 3;
+ bufferptr bpa(size);
+ memset(bpa.c_str(), 1, bpa.length());
+ bufferlist bla;
+ bla.append(bpa);
+ // make sure there is some trailing gunk in the last block
+ {
+ bufferlist bt;
+ bt.append(bla);
+ bt.append("BADBADBADBAD");
+ ObjectStore::Transaction t;
+ t.write(cid, a, 0, bt.length(), bt, 0);
+ r = store->apply_transaction(&osr, std::move(t));
+ ASSERT_EQ(r, 0);
+ }
+ {
+ ObjectStore::Transaction t;
+ t.truncate(cid, a, size);
+ r = store->apply_transaction(&osr, std::move(t));
+ ASSERT_EQ(r, 0);
+ }
+
+ // clone
+ {
+ ObjectStore::Transaction t;
+ t.clone(cid, a, b);
+ r = store->apply_transaction(&osr, std::move(t));
+ ASSERT_EQ(r, 0);
+ }
+
+ // append with implicit zeroing
+ bufferptr bpb(size);
+ memset(bpb.c_str(), 2, bpb.length());
+ bufferlist blb;
+ blb.append(bpb);
+ {
+ ObjectStore::Transaction t;
+ t.write(cid, a, min_alloc * 3, blb.length(), blb, 0);
+ r = store->apply_transaction(&osr, std::move(t));
+ ASSERT_EQ(r, 0);
+ }
+ bufferlist final;
+ final.append(bla);
+ bufferlist zeros;
+ zeros.append_zero(min_alloc * 3 - size);
+ final.append(zeros);
+ final.append(blb);
+ bufferlist actual;
+ {
+ ASSERT_EQ((int)final.length(),
+ store->read(cid, a, 0, final.length(), actual));
+ final.hexdump(cout);
+ actual.hexdump(cout);
+ ASSERT_TRUE(final.contents_equal(actual));
+ }
+ {
+ ObjectStore::Transaction t;
+ t.remove(cid, a);
+ t.remove(cid, b);
+ t.remove_collection(cid);
+ cerr << "Cleaning" << std::endl;
+ r = store->apply_transaction(&osr, std::move(t));
+ ASSERT_EQ(r, 0);
+ }
+}
+
TEST_P(StoreTest, SmallSequentialUnaligned) {
ObjectStore::Sequencer osr("test");
int r;