uint64_t start_offset,
uint64_t length,
const BlueStore::ExtentMap& extent_map,
- const BlueStore::extent_map_t& old_extents,
+ const BlueStore::old_extent_map_t& old_extents,
uint64_t min_alloc_size)
{
uint64_t end_offset = start_offset + length;
for (auto it = old_extents.begin(); it != old_extents.end(); ++it) {
- Blob* b = it->blob.get();
+ Blob* b = it->e.blob.get();
if (b->get_blob().is_compressed()) {
// update gc_start_offset/gc_end_offset if needed
- gc_start_offset = min(gc_start_offset, (uint64_t)it->blob_start());
- gc_end_offset = max(gc_end_offset, (uint64_t)it->blob_end());
+ gc_start_offset = min(gc_start_offset, (uint64_t)it->e.blob_start());
+ gc_end_offset = max(gc_end_offset, (uint64_t)it->e.blob_end());
- auto o = it->logical_offset;
- auto l = it->length;
+ auto o = it->e.logical_offset;
+ auto l = it->e.length;
uint64_t ref_bytes = b->get_referenced_bytes();
- assert(l <= ref_bytes);
// micro optimization to bypass blobs that have no more references
- if (ref_bytes != l) {
+ if (ref_bytes != 0) {
dout(30) << __func__ << " affected_blob:" << *b
<< " unref 0x" << std::hex << o << "~" << l
<< std::dec << dendl;
- // emplace () call below returns previously existed entry from
- // the map if any instead of inserting a new one. Hence we need
- // to decrement referenced_bytes counter after such an insert.
- BlobInfo& bi =
- affected_blobs.emplace(b, BlobInfo(ref_bytes)).first->second;
- bi.referenced_bytes -= l;
+ affected_blobs.emplace(b, BlobInfo(ref_bytes)).first->second;
}
}
}
<< " " << *e.blob;
}
+// OldExtent
+BlueStore::OldExtent* BlueStore::OldExtent::create(CollectionRef c,
+ uint32_t lo,
+ uint32_t o,
+ uint32_t l,
+ BlobRef& b) {
+ OldExtent* oe = new OldExtent(lo, o, l, b);
+ b->put_ref(c.get(), o, l, &(oe->r));
+ oe->blob_empty = b->get_referenced_bytes() == 0;
+ return oe;
+}
+
// ExtentMap
#undef dout_prefix
}
void BlueStore::ExtentMap::punch_hole(
+ CollectionRef &c,
uint64_t offset,
uint64_t length,
- extent_map_t *old_extents)
+ old_extent_map_t *old_extents)
{
auto p = seek_lextent(offset);
uint64_t end = offset + length;
if (p->logical_end() > end) {
// split and deref middle
uint64_t front = offset - p->logical_offset;
- old_extents->insert(
- *new Extent(offset, p->blob_offset + front, length, p->blob));
+ OldExtent* oe = OldExtent::create(c, offset, p->blob_offset + front,
+ length, p->blob);
+ old_extents->push_back(*oe);
add(end,
p->blob_offset + front + length,
p->length - front - length,
// deref tail
assert(p->logical_end() > offset); // else seek_lextent bug
uint64_t keep = offset - p->logical_offset;
- old_extents->insert(*new Extent(offset, p->blob_offset + keep,
- p->length - keep, p->blob));
+ OldExtent* oe = OldExtent::create(c, offset, p->blob_offset + keep,
+ p->length - keep, p->blob);
+ old_extents->push_back(*oe);
p->length = keep;
++p;
continue;
}
if (p->logical_offset + p->length <= end) {
// deref whole lextent
- old_extents->insert(*new Extent(p->logical_offset, p->blob_offset,
- p->length, p->blob));
+ OldExtent* oe = OldExtent::create(c, p->logical_offset, p->blob_offset,
+ p->length, p->blob);
+ old_extents->push_back(*oe);
rm(p++);
continue;
}
// deref head
uint64_t keep = p->logical_end() - end;
- old_extents->insert(*new Extent(p->logical_offset, p->blob_offset,
- p->length - keep, p->blob));
+ OldExtent* oe = OldExtent::create(c, p->logical_offset, p->blob_offset,
+ p->length - keep, p->blob);
+ old_extents->push_back(*oe);
+
add(end, p->blob_offset + p->length - keep, keep, p->blob);
rm(p);
break;
}
BlueStore::Extent *BlueStore::ExtentMap::set_lextent(
+ CollectionRef &c,
uint64_t logical_offset,
uint64_t blob_offset, uint64_t length, BlobRef b,
- extent_map_t *old_extents)
+ old_extent_map_t *old_extents)
{
- if (old_extents) {
- punch_hole(logical_offset, length, old_extents);
- }
-
// We need to have completely initialized Blob to increment its ref counters.
assert(b->get_blob().get_logical_length() != 0);
+
+ // Do get_ref prior to punch_hole to prevent from putting reused blob into
+ // old_extents list if we overwre the blob totally
+ // This might happen during WAL overwrite.
b->get_ref(onode->c, blob_offset, length);
+ if (old_extents) {
+ punch_hole(c, logical_offset, length, old_extents);
+ }
+
Extent *le = new Extent(logical_offset, blob_offset, length, b);
extent_map.insert(*le);
if (spans_shard(logical_offset, length)) {
}
b->dirty_blob().calc_csum(b_off, padded);
dout(20) << __func__ << " lex old " << *ep << dendl;
- Extent *le = o->extent_map.set_lextent(offset, b_off + head_pad, length,
+ Extent *le = o->extent_map.set_lextent(c, offset, b_off + head_pad, length,
b,
&wctx->old_extents);
b->dirty_blob().mark_used(le->blob_offset, le->length);
dout(20) << __func__ << " deferred write 0x" << std::hex << b_off << "~"
<< b_len << std::dec << " of mutable " << *b
<< " at " << op->extents << dendl;
- Extent *le = o->extent_map.set_lextent(offset, offset - bstart, length,
- b, &wctx->old_extents);
+ Extent *le = o->extent_map.set_lextent(c, offset, offset - bstart, length,
+ b, &wctx->old_extents);
b->dirty_blob().mark_used(le->blob_offset, le->length);
txc->statfs_delta.stored() += le->length;
dout(20) << __func__ << " lex " << *le << dendl;
<< " (" << b_off << "~" << length << ")"
<< std::dec << dendl;
- o->extent_map.punch_hole(offset, length, &wctx->old_extents);
+ o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
wctx->write(offset, b, alloc_len, b_off0, padded, b_off, length, false, false);
logger->inc(l_bluestore_write_small_unused);
return;
uint64_t b_off = P2PHASE(offset, alloc_len);
uint64_t b_off0 = b_off;
_pad_zeros(&bl, &b_off0, block_size);
- o->extent_map.punch_hole(offset, length, &wctx->old_extents);
+ o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, true, true);
logger->inc(l_bluestore_write_small_new);
<< dendl;
logger->inc(l_bluestore_write_big);
logger->inc(l_bluestore_write_big_bytes, length);
- o->extent_map.punch_hole(offset, length, &wctx->old_extents);
+ o->extent_map.punch_hole(c, offset, length, &wctx->old_extents);
auto max_bsize = MAX(wctx->target_blob_size, min_alloc_size);
while (length > 0) {
bool new_blob = false;
}
}
- Extent *le = o->extent_map.set_lextent(wi.logical_offset,
+ Extent *le = o->extent_map.set_lextent(coll, wi.logical_offset,
b_off + (wi.b_off0 - wi.b_off),
wi.length0,
wi.b,
while (oep != wctx->old_extents.end()) {
auto &lo = *oep;
oep = wctx->old_extents.erase(oep);
- dout(20) << __func__ << " lex_old " << lo << dendl;
- BlobRef b = lo.blob;
+ dout(20) << __func__ << " lex_old " << lo.e << dendl;
+ BlobRef b = lo.e.blob;
const bluestore_blob_t& blob = b->get_blob();
- PExtentVector r;
- if (b->put_ref(c.get(), lo.blob_offset, lo.length, &r)) {
- if (blob.is_compressed()) {
+ if (blob.is_compressed()) {
+ if (lo.blob_empty) {
txc->statfs_delta.compressed() -= blob.get_compressed_payload_length();
}
+ txc->statfs_delta.compressed_original() -= lo.e.length;
}
- txc->statfs_delta.stored() -= lo.length;
- if (blob.is_compressed()) {
- txc->statfs_delta.compressed_original() -= lo.length;
- }
+ auto& r = lo.r;
+ txc->statfs_delta.stored() -= lo.e.length;
if (!r.empty()) {
dout(20) << __func__ << " blob release " << r << dendl;
if (blob.is_shared()) {
WriteContext wctx;
o->extent_map.fault_range(db, offset, length);
- o->extent_map.punch_hole(offset, length, &wctx.old_extents);
+ o->extent_map.punch_hole(c, offset, length, &wctx.old_extents);
o->extent_map.dirty_range(txc->t, offset, length);
_wctx_finish(txc, c, o, &wctx);
WriteContext wctx;
uint64_t length = o->onode.size - offset;
o->extent_map.fault_range(db, offset, length);
- o->extent_map.punch_hole(offset, length, &wctx.old_extents);
+ o->extent_map.punch_hole(c, offset, length, &wctx.old_extents);
o->extent_map.dirty_range(txc->t, offset, length);
_wctx_finish(txc, c, o, &wctx);
};
typedef boost::intrusive::set<Extent> extent_map_t;
+
friend ostream& operator<<(ostream& out, const Extent& e);
+ struct OldExtent {
+ boost::intrusive::list_member_hook<> old_extent_item;
+ Extent e;
+ PExtentVector r;
+ bool blob_empty; // flag to track the last removed extent that makes blob
+ // empty - required to update compression stat properly
+ OldExtent(uint32_t lo, uint32_t o, uint32_t l, BlobRef& b)
+ : e(lo, o, l, b), blob_empty(false) {
+ }
+ static OldExtent* create(CollectionRef c,
+ uint32_t lo,
+ uint32_t o,
+ uint32_t l,
+ BlobRef& b);
+ };
+ typedef boost::intrusive::list<
+ OldExtent,
+ boost::intrusive::member_hook<
+ OldExtent,
+ boost::intrusive::list_member_hook<>,
+ &OldExtent::old_extent_item> > old_extent_map_t;
+
struct Onode;
/// a sharded extent map, mapping offsets to lextents to blobs
int compress_extent_map(uint64_t offset, uint64_t length);
/// punch a logical hole. add lextents to deref to target list.
- void punch_hole(uint64_t offset, uint64_t length,
- extent_map_t *old_extents);
+ void punch_hole(CollectionRef &c,
+ uint64_t offset, uint64_t length,
+ old_extent_map_t *old_extents);
/// put new lextent into lextent_map overwriting existing ones if
/// any and update references accordingly
- Extent *set_lextent(uint64_t logical_offset,
+ Extent *set_lextent(CollectionRef &c,
+ uint64_t logical_offset,
uint64_t offset, uint64_t length,
BlobRef b,
- extent_map_t *old_extents);
+ old_extent_map_t *old_extents);
/// split a blob (and referring extents)
BlobRef split_blob(BlobRef lb, uint32_t blob_offset, uint32_t pos);
uint64_t offset,
uint64_t length,
const ExtentMap& extent_map,
- const extent_map_t& old_extents,
+ const old_extent_map_t& old_extents,
uint64_t min_alloc_size);
/// return a collection of extents to perform GC on
uint64_t target_blob_size = 0; ///< target (max) blob size
unsigned csum_order = 0; ///< target checksum chunk order
- extent_map_t old_extents; ///< must deref these blobs
+ old_extent_map_t old_extents; ///< must deref these blobs
struct write_item {
uint64_t logical_offset; ///< write logical offset
BlueStore::Onode onode(coll.get(), ghobject_t(), "");
BlueStore::ExtentMap em(&onode);
- BlueStore::extent_map_t old_extents;
+ BlueStore::old_extent_map_t old_extents;
/*
em.extent_map.insert(*new BlueStore::Extent(4096, 0, 10, b3));
b3->get_ref(coll.get(), 0, 10);
- old_extents.push_back(*new BlueStore::Extent(300, 300, 10, b1));
- b1->get_ref(coll.get(), 300, 10);
+ old_extents.push_back(*new BlueStore::OldExtent(300, 300, 10, b1));
saving = gc.estimate(300, 100, em, old_extents, 4096);
ASSERT_EQ(saving, 1);
BlueStore::Onode onode(coll.get(), ghobject_t(), "");
BlueStore::ExtentMap em(&onode);
- BlueStore::extent_map_t old_extents;
+ BlueStore::old_extent_map_t old_extents;
BlueStore::GarbageCollector gc(g_ceph_context);
int64_t saving;
BlueStore::BlobRef b1(new BlueStore::Blob);
em.extent_map.insert(*new BlueStore::Extent(0x3f000, 0x3f000, 0x1000, b1));
b1->get_ref(coll.get(), 0x3f000, 0x1000);
- old_extents.push_back(*new BlueStore::Extent(0x8000, 0x8000, 0x8000, b1));
- b1->get_ref(coll.get(), 0x8000, 0x8000);
+ old_extents.push_back(*new BlueStore::OldExtent(0x8000, 0x8000, 0x8000, b1));
old_extents.push_back(
- *new BlueStore::Extent(0x10000, 0x10000, 0x20000, b1));
- b1->get_ref(coll.get(), 0x10000, 0x20000);
- old_extents.push_back(*new BlueStore::Extent(0x30000, 0x30000, 0xf000, b1));
- b1->get_ref(coll.get(), 0x30000, 0xf000);
+ *new BlueStore::OldExtent(0x10000, 0x10000, 0x20000, b1));
+ old_extents.push_back(*new BlueStore::OldExtent(0x30000, 0x30000, 0xf000, b1));
saving = gc.estimate(0x30000, 0xf000, em, old_extents, 0x10000);
ASSERT_EQ(saving, 2);
*new BlueStore::Extent(0x3000, 0, 0x4000, b2)); // new extent
b2->get_ref(coll.get(), 0, 0x4000);
- old_extents.push_back(*new BlueStore::Extent(0x3000, 0x3000, 0x1000, b1));
- b1->get_ref(coll.get(), 0x3000, 0x1000);
+ old_extents.push_back(*new BlueStore::OldExtent(0x3000, 0x3000, 0x1000, b1));
saving = gc.estimate(0x3000, 0x4000, em, old_extents, 0x1000);
ASSERT_EQ(saving, 0);
BlueStore::Onode onode(coll.get(), ghobject_t(), "");
BlueStore::ExtentMap em(&onode);
- BlueStore::extent_map_t old_extents;
+ BlueStore::old_extent_map_t old_extents;
BlueStore::GarbageCollector gc(g_ceph_context);
int64_t saving;
BlueStore::BlobRef b0(new BlueStore::Blob);
em.extent_map.insert(*new BlueStore::Extent(0x3f000, 0x1f000, 0x1000, b1));
b1->get_ref(coll.get(), 0x1f000, 0x1000);
- old_extents.push_back(*new BlueStore::Extent(0x8000, 0x8000, 0x8000, b0));
- b0->get_ref(coll.get(), 0x8000, 0x8000);
+ old_extents.push_back(*new BlueStore::OldExtent(0x8000, 0x8000, 0x8000, b0));
old_extents.push_back(
- *new BlueStore::Extent(0x10000, 0x10000, 0x10000, b0));
- b0->get_ref(coll.get(), 0x10000, 0x10000);
+ *new BlueStore::OldExtent(0x10000, 0x10000, 0x10000, b0));
old_extents.push_back(
- *new BlueStore::Extent(0x20000, 0x00000, 0x1f000, b1));
- b1->get_ref(coll.get(), 0x0, 0x1f000);
+ *new BlueStore::OldExtent(0x20000, 0x00000, 0x1f000, b1));
saving = gc.estimate(0x30000, 0xf000, em, old_extents, 0x10000);
ASSERT_EQ(saving, 2);