}
void BlueStore::SharedBlob::put_ref(uint64_t offset, uint32_t length,
- PExtentVector *r)
+ PExtentVector *r,
+ set<SharedBlob*> *maybe_unshared)
{
assert(persistent);
- persistent->ref_map.put(offset, length, r);
+ bool maybe = false;
+ persistent->ref_map.put(offset, length, r, maybe_unshared ? &maybe : nullptr);
+ if (maybe_unshared && maybe) {
+ maybe_unshared->insert(this);
+ }
}
// Blob
void BlueStore::Collection::make_blob_shared(uint64_t sbid, BlobRef b)
{
- assert(!b->shared_blob->is_loaded());
-
ldout(store->cct, 10) << __func__ << " " << *b << dendl;
- bluestore_blob_t& blob = b->dirty_blob();
+ assert(!b->shared_blob->is_loaded());
// update blob
+ bluestore_blob_t& blob = b->dirty_blob();
blob.set_flag(bluestore_blob_t::FLAG_SHARED);
// update shared blob
ldout(store->cct, 20) << __func__ << " now " << *b << dendl;
}
+uint64_t BlueStore::Collection::make_blob_unshared(SharedBlob *sb)
+{
+ ldout(store->cct, 10) << __func__ << " " << *sb << dendl;
+ assert(sb->is_loaded());
+
+ uint64_t sbid = sb->get_sbid();
+ shared_blob_set.remove(sb);
+ sb->loaded = false;
+ delete sb->persistent;
+ sb->sbid_unloaded = 0;
+ ldout(store->cct, 20) << __func__ << " now " << *sb << dendl;
+ return sbid;
+}
+
BlueStore::OnodeRef BlueStore::Collection::get_onode(
const ghobject_t& oid,
bool create)
TransContext *txc,
CollectionRef& c,
OnodeRef o,
- WriteContext *wctx)
+ WriteContext *wctx,
+ set<SharedBlob*> *maybe_unshared_blobs)
{
auto oep = wctx->old_extents.begin();
while (oep != wctx->old_extents.end()) {
PExtentVector final;
c->load_shared_blob(b->shared_blob);
for (auto e : r) {
- b->shared_blob->put_ref(e.offset, e.length, &final);
+ b->shared_blob->put_ref(
+ e.offset, e.length, &final,
+ b->is_referenced() ? nullptr : maybe_unshared_blobs);
}
dout(20) << __func__ << " shared_blob release " << final
<< " from " << *b->shared_blob << dendl;
}
void BlueStore::_do_truncate(
- TransContext *txc, CollectionRef& c, OnodeRef o, uint64_t offset)
+ TransContext *txc, CollectionRef& c, OnodeRef o, uint64_t offset,
+ set<SharedBlob*> *maybe_unshared_blobs)
{
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << std::dec << dendl;
_dump_onode(o, 30);
if (offset == o->onode.size)
- return ;
+ return;
if (offset < o->onode.size) {
WriteContext wctx;
uint64_t length = o->onode.size - offset;
o->extent_map.fault_range(db, offset, length);
o->extent_map.punch_hole(c, offset, length, &wctx.old_extents);
- o->extent_map.dirty_range(txc->t, offset, length);
- _wctx_finish(txc, c, o, &wctx);
+ o->extent_map.dirty_range(offset, length);
+ _wctx_finish(txc, c, o, &wctx, maybe_unshared_blobs);
// if we have shards past EOF, ask for a reshard
if (!o->onode.extent_map_shards.empty() &&
CollectionRef& c,
OnodeRef o)
{
- _do_truncate(txc, c, o, 0);
+ set<SharedBlob*> maybe_unshared_blobs;
+ _do_truncate(txc, c, o, 0, &maybe_unshared_blobs);
if (o->onode.has_omap()) {
o->flush();
_do_omap_clear(txc, o->onode.nid);
o->extent_map.clear();
o->onode = bluestore_onode_t();
_debug_obj_on_delete(o->oid);
+
+ if (!o->oid.is_no_gen() &&
+ !maybe_unshared_blobs.empty()) {
+ // see if we can unshare blobs still referenced by the head
+ dout(10) << __func__ << " gen and maybe_unshared_blobs "
+ << maybe_unshared_blobs << dendl;
+ ghobject_t nogen = o->oid;
+ nogen.generation = ghobject_t::NO_GEN;
+ OnodeRef h = c->onode_map.lookup(nogen);
+ if (h && h->exists) {
+ dout(20) << __func__ << " checking for unshareable blobs on " << h
+ << " " << h->oid << dendl;
+ map<SharedBlob*,bluestore_extent_ref_map_t> expect;
+ for (auto& e : h->extent_map.extent_map) {
+ const bluestore_blob_t& b = e.blob->get_blob();
+ SharedBlob *sb = e.blob->shared_blob.get();
+ if (b.is_shared() &&
+ sb->loaded &&
+ maybe_unshared_blobs.count(sb)) {
+ b.map(e.blob_offset, e.length, [&](uint64_t off, uint64_t len) {
+ expect[sb].get(off, len);
+ return 0;
+ });
+ }
+ }
+ vector<SharedBlob*> unshared_blobs;
+ unshared_blobs.reserve(maybe_unshared_blobs.size());
+ for (auto& p : expect) {
+ dout(20) << " ? " << *p.first << " vs " << p.second << dendl;
+ if (p.first->persistent->ref_map == p.second) {
+ SharedBlob *sb = p.first;
+ dout(20) << __func__ << " unsharing " << *sb << dendl;
+ unshared_blobs.push_back(sb);
+ txc->unshare_blob(sb);
+ uint64_t sbid = c->make_blob_unshared(sb);
+ string key;
+ get_shared_blob_key(sbid, &key);
+ txc->t->rmkey(PREFIX_SHARED_BLOB, key);
+ }
+ }
+
+ uint32_t b_start = OBJECT_MAX_SIZE;
+ uint32_t b_end = 0;
+ for (auto& e : h->extent_map.extent_map) {
+ const bluestore_blob_t& b = e.blob->get_blob();
+ SharedBlob *sb = e.blob->shared_blob.get();
+ if (b.is_shared() &&
+ std::find(unshared_blobs.begin(), unshared_blobs.end(),
+ sb) != unshared_blobs.end()) {
+ dout(20) << __func__ << " unsharing " << *e.blob << dendl;
+ bluestore_blob_t& blob = e.blob->dirty_blob();
+ blob.clear_flag(bluestore_blob_t::FLAG_SHARED);
+ if (e.logical_offset < b_start) {
+ b_start = e.logical_offset;
+ }
+ if (e.logical_end() > b_end) {
+ b_end = e.logical_end();
+ }
+ }
+ }
+ if (!unshared_blobs.empty()) {
+ h->extent_map.dirty_range(b_start, b_end);
+ txc->write_onode(h);
+ }
+ }
+ }
return 0;
}
/// put logical references, and get back any released extents
void put_ref(uint64_t offset, uint32_t length,
- PExtentVector *r);
+ PExtentVector *r, set<SharedBlob*> *maybe_unshared_blobs);
friend bool operator==(const SharedBlob &l, const SharedBlob &r) {
return l.get_sbid() == r.get_sbid();
void open_shared_blob(uint64_t sbid, BlobRef b);
void load_shared_blob(SharedBlobRef sb);
void make_blob_shared(uint64_t sbid, BlobRef b);
+ uint64_t make_blob_unshared(SharedBlob *sb);
BlobRef new_blob() {
BlobRef b = new Blob();
void write_shared_blob(SharedBlobRef &sb) {
shared_blobs.insert(sb);
}
+ void unshare_blob(SharedBlob *sb) {
+ shared_blobs.erase(sb);
+ }
+
/// note we logically modified object (when onode itself is unmodified)
void note_modified_object(OnodeRef &o) {
// onode itself isn't written, though
TransContext *txc,
CollectionRef& c,
OnodeRef o,
- WriteContext *wctx);
+ WriteContext *wctx,
+ set<SharedBlob*> *maybe_unshared_blobs=0);
int _do_transaction(Transaction *t,
TransContext *txc,
void _do_truncate(TransContext *txc,
CollectionRef& c,
OnodeRef o,
- uint64_t offset);
+ uint64_t offset,
+ set<SharedBlob*> *maybe_unshared_blobs=0);
void _truncate(TransContext *txc,
CollectionRef& c,
OnodeRef& o,
void bluestore_extent_ref_map_t::put(
uint64_t offset, uint32_t length,
- PExtentVector *release)
+ PExtentVector *release,
+ bool *maybe_unshared)
{
//NB: existing entries in 'release' container must be preserved!
-
+ bool unshared = true;
auto p = ref_map.lower_bound(offset);
if (p == ref_map.end() || p->first > offset) {
if (p == ref_map.begin()) {
if (p->first < offset) {
uint64_t left = p->first + p->second.length - offset;
p->second.length = offset - p->first;
+ if (p->second.refs != 1) {
+ unshared = false;
+ }
p = ref_map.insert(map<uint64_t,record_t>::value_type(
offset, record_t(left, p->second.refs))).first;
}
while (length > 0) {
assert(p->first == offset);
if (length < p->second.length) {
+ if (p->second.refs != 1) {
+ unshared = false;
+ }
ref_map.insert(make_pair(offset + length,
record_t(p->second.length - length,
p->second.refs)));
if (p->second.refs > 1) {
p->second.length = length;
--p->second.refs;
+ if (p->second.refs != 1) {
+ unshared = false;
+ }
_maybe_merge_left(p);
} else {
if (release)
release->push_back(bluestore_pextent_t(p->first, length));
ref_map.erase(p);
}
- return;
+ goto out;
}
offset += p->second.length;
length -= p->second.length;
if (p->second.refs > 1) {
--p->second.refs;
+ if (p->second.refs != 1) {
+ unshared = false;
+ }
_maybe_merge_left(p);
++p;
} else {
if (p != ref_map.end())
_maybe_merge_left(p);
//_check();
+out:
+ if (maybe_unshared) {
+ if (unshared) {
+ // we haven't seen a ref != 1 yet; check the whole map.
+ for (auto& p : ref_map) {
+ if (p.second.refs != 1) {
+ unshared = false;
+ break;
+ }
+ }
+ }
+ *maybe_unshared = unshared;
+ }
}
bool bluestore_extent_ref_map_t::contains(uint64_t offset, uint32_t length) const
{
bluestore_extent_ref_map_t m;
PExtentVector r;
+ bool maybe_unshared = false;
m.get(10, 30);
- m.put(10, 30, &r);
- cout << m << " " << r << std::endl;
+ maybe_unshared = true;
+ m.put(10, 30, &r, &maybe_unshared);
+ cout << m << " " << r << " " << (int)maybe_unshared << std::endl;
ASSERT_EQ(0u, m.ref_map.size());
ASSERT_EQ(1u, r.size());
ASSERT_EQ(10u, r[0].offset);
ASSERT_EQ(30u, r[0].length);
+ ASSERT_TRUE(maybe_unshared);
r.clear();
m.get(10, 30);
m.get(20, 10);
- m.put(10, 30, &r);
- cout << m << " " << r << std::endl;
+ maybe_unshared = true;
+ m.put(10, 30, &r, &maybe_unshared);
+ cout << m << " " << r << " " << (int)maybe_unshared << std::endl;
ASSERT_EQ(1u, m.ref_map.size());
ASSERT_EQ(10u, m.ref_map[20].length);
ASSERT_EQ(1u, m.ref_map[20].refs);
ASSERT_EQ(10u, r[0].length);
ASSERT_EQ(30u, r[1].offset);
ASSERT_EQ(10u, r[1].length);
+ ASSERT_TRUE(maybe_unshared);
r.clear();
m.get(30, 10);
m.get(30, 10);
- m.put(20, 15, &r);
- cout << m << " " << r << std::endl;
+ maybe_unshared = true;
+ m.put(20, 15, &r, &maybe_unshared);
+ cout << m << " " << r << " " << (int)maybe_unshared << std::endl;
ASSERT_EQ(2u, m.ref_map.size());
ASSERT_EQ(5u, m.ref_map[30].length);
ASSERT_EQ(1u, m.ref_map[30].refs);
ASSERT_EQ(1u, r.size());
ASSERT_EQ(20u, r[0].offset);
ASSERT_EQ(10u, r[0].length);
+ ASSERT_FALSE(maybe_unshared);
r.clear();
- m.put(33, 5, &r);
- cout << m << " " << r << std::endl;
+ maybe_unshared = true;
+ m.put(33, 5, &r, &maybe_unshared);
+ cout << m << " " << r << " " << (int)maybe_unshared << std::endl;
ASSERT_EQ(3u, m.ref_map.size());
ASSERT_EQ(3u, m.ref_map[30].length);
ASSERT_EQ(1u, m.ref_map[30].refs);
ASSERT_EQ(1u, r.size());
ASSERT_EQ(33u, r[0].offset);
ASSERT_EQ(2u, r[0].length);
+ ASSERT_FALSE(maybe_unshared);
+ r.clear();
+ maybe_unshared = true;
+ m.put(38, 2, &r, &maybe_unshared);
+ cout << m << " " << r << " " << (int)maybe_unshared << std::endl;
+ ASSERT_TRUE(maybe_unshared);
}
TEST(bluestore_extent_ref_map_t, contains)