From 282947e29d1e9f2791745c35bb411cbadc0705d1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 19 May 2016 11:57:43 -0400 Subject: [PATCH] os/bluestore/bluestore_types: blob_t: add tracking for released extents We reference count which parts of the blob are used (by lextents), but currently we only release our space back to the system when all references go away. That is a problem if the blob is large (say, 4MB), and we, say, truncate off most (but not all) of it. Unfortunately, we can't simply deallocate anything that doesn't have a reference, because the logical refs are on byte boundaries, and allocation happens in larger units (min_alloc_size). A one byte logical punch_hole might be responsible for the release of a larger block of storage. To resolve this, we keep track of which portions of the blob have been released by poisoning the offset in the extents vector. We expect that this vector will be almost always short, so we do not bother with a indexed structure, since iterating a blob offset to determine if it is still allocated is likely faster. Signed-off-by: Sage Weil --- src/os/bluestore/bluestore_types.cc | 110 +++++++ src/os/bluestore/bluestore_types.h | 26 ++ src/test/objectstore/test_bluestore_types.cc | 304 +++++++++++++++++++ 3 files changed, 440 insertions(+) diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc index bb425352ed410..f3de90eb70904 100644 --- a/src/os/bluestore/bluestore_types.cc +++ b/src/os/bluestore/bluestore_types.cc @@ -539,6 +539,116 @@ ostream& operator<<(ostream& out, const bluestore_blob_t& o) return out; } +void bluestore_blob_t::put_ref( + uint64_t offset, + uint64_t length, + uint64_t min_release_size, + vector *r) +{ + vector logical; + ref_map.put(offset, length, &logical); + + r->clear(); + + // common case: all of it? + if (ref_map.empty()) { + uint64_t pos = 0; + for (auto& e : extents) { + if (e.is_valid()) { + r->push_back(e); + } + pos += e.length; + } + extents.resize(1); + extents[0].offset = bluestore_pextent_t::INVALID_OFFSET; + extents[0].length = this->length; + return; + } + + // search from logical releases + for (auto le : logical) { + uint64_t r_off = le.offset; + auto p = ref_map.ref_map.lower_bound(le.offset); + if (p != ref_map.ref_map.begin()) { + --p; + r_off = p->first + p->second.length; + ++p; + } else { + r_off = 0; + } + uint64_t end; + if (p == ref_map.ref_map.end()) { + end = this->length; + } else { + end = p->first; + } + r_off = ROUND_UP_TO(r_off, min_release_size); + end -= end % min_release_size; + if (r_off >= end) { + continue; + } + uint64_t r_len = end - r_off; + + // cut it out of extents + struct vecbuilder { + vector v; + uint64_t invalid = 0; + + void add_invalid(uint64_t length) { + invalid += length; + } + void flush() { + if (invalid) { + v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, + invalid)); + invalid = 0; + } + } + void add(uint64_t offset, uint64_t length) { + if (offset == bluestore_pextent_t::INVALID_OFFSET) { + add_invalid(length); + } else { + flush(); + v.emplace_back(bluestore_pextent_t(offset, length)); + } + } + } vb; + + assert(r_len > 0); + auto q = extents.begin(); + assert(q != extents.end()); + while (r_off >= q->length) { + vb.add(q->offset, q->length); + r_off -= q->length; + ++q; + assert(q != extents.end()); + } + while (r_len > 0) { + uint64_t l = MIN(r_len, q->length - r_off); + if (q->is_valid()) { + r->push_back(bluestore_pextent_t(q->offset + r_off, l)); + } + if (r_off) { + vb.add(q->offset, r_off); + } + vb.add_invalid(l); + if (r_off + l < q->length) { + vb.add(q->offset + r_off + l, q->length - (r_off + l)); + } + r_len -= l; + r_off = 0; + ++q; + assert(q != extents.end() || r_len == 0); + } + while (q != extents.end()) { + vb.add(q->offset, q->length); + ++q; + } + vb.flush(); + extents.swap(vb.v); + } +} + void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl) { switch (csum_type) { diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 580b5394350a5..3382c6254af30 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -319,6 +319,28 @@ struct bluestore_blob_t { return !ref_map.intersects(offset, length); } + /// return true if the entire range is allocated (mapped to extents on disk) + bool is_allocated(uint64_t b_off, uint64_t b_len) const { + auto p = extents.begin(); + assert(p != extents.end()); + while (b_off >= p->length) { + b_off -= p->length; + ++p; + assert(p != extents.end()); + } + b_len += b_off; + while (b_len) { + if (!p->is_valid()) { + return false; + } + if (p->length >= b_len) { + return true; + } + b_len -= p->length; + } + assert(0 == "we should not get here"); + } + /// return true if the logical range has never been used bool is_unused(uint64_t offset, uint64_t length) const { return unused.contains(offset, length); @@ -340,6 +362,10 @@ struct bluestore_blob_t { unused.subtract(t); } + /// put logical references, and get back any released extents + void put_ref(uint64_t offset, uint64_t length, uint64_t min_alloc_size, + vector *r); + void map(uint64_t x_off, uint64_t x_len, std::function f) { auto p = extents.begin(); diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index 422cf9c6131e6..fa2840eee82e4 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -189,6 +189,310 @@ TEST(bluestore_extent_ref_map_t, intersects) ASSERT_FALSE(m.intersects(55, 1)); } +TEST(bluestore_blob_t, put_ref) +{ + unsigned mas = 4096; + unsigned mrs = 8192; + + { + bluestore_blob_t b; + vector r; + b.length = mas * 2; + b.extents.push_back(bluestore_pextent_t(0, mas*2)); + b.ref_map.get(0, mas*2); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + b.put_ref(0, mas*2, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_FALSE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(0, mas)); + ASSERT_FALSE(b.is_allocated(mas, 0)); + ASSERT_FALSE(b.extents[0].is_valid()); + ASSERT_EQ(mas*2, b.extents[0].length); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 2; + b.extents.push_back(bluestore_pextent_t(123, mas*2)); + b.ref_map.get(0, mas*2); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(123u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_FALSE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.extents[0].is_valid()); + ASSERT_EQ(mas*2, b.extents[0].length); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 4; + b.extents.push_back(bluestore_pextent_t(1, mas)); + b.extents.push_back(bluestore_pextent_t(2, mas)); + b.extents.push_back(bluestore_pextent_t(3, mas)); + b.extents.push_back(bluestore_pextent_t(4, mas)); + b.ref_map.get(0, mas*4); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*4)); + ASSERT_TRUE(b.is_allocated(mas, mas)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(mas*2, mas)); + ASSERT_TRUE(b.is_allocated(0, mas*4)); + b.put_ref(mas*3, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(2u, r.size()); + ASSERT_EQ(3u, r[0].offset); + ASSERT_EQ(mas, r[0].length); + ASSERT_EQ(4u, r[1].offset); + ASSERT_EQ(mas, r[1].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_TRUE(b.extents[1].is_valid()); + ASSERT_FALSE(b.extents[2].is_valid()); + ASSERT_EQ(3u, b.extents.size()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 6; + b.extents.push_back(bluestore_pextent_t(1, mas)); + b.extents.push_back(bluestore_pextent_t(2, mas)); + b.extents.push_back(bluestore_pextent_t(3, mas)); + b.extents.push_back(bluestore_pextent_t(4, mas)); + b.extents.push_back(bluestore_pextent_t(5, mas)); + b.extents.push_back(bluestore_pextent_t(6, mas)); + b.ref_map.get(0, mas*6); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*3, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(2u, r.size()); + ASSERT_EQ(3u, r[0].offset); + ASSERT_EQ(mas, r[0].length); + ASSERT_EQ(4u, r[1].offset); + ASSERT_EQ(mas, r[1].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); + ASSERT_TRUE(b.is_allocated(mas*4, mas*2)); + ASSERT_EQ(5u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_TRUE(b.extents[1].is_valid()); + ASSERT_FALSE(b.extents[2].is_valid()); + ASSERT_TRUE(b.extents[3].is_valid()); + ASSERT_TRUE(b.extents[4].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 6; + b.extents.push_back(bluestore_pextent_t(1, mas * 6)); + b.ref_map.get(0, mas*6); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*3, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); + ASSERT_TRUE(b.is_allocated(mas*4, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 12; + b.extents.push_back(bluestore_pextent_t(1, mas * 4)); + b.extents.push_back(bluestore_pextent_t(2, mas * 4)); + b.extents.push_back(bluestore_pextent_t(3, mas * 4)); + b.ref_map.get(0, mas*12); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*9, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*2, mas*7, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(3u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(0x2u, r[1].offset); + ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(0x3u, r[2].offset); + ASSERT_EQ(mas*2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); + ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 12; + b.extents.push_back(bluestore_pextent_t(1, mas * 4)); + b.extents.push_back(bluestore_pextent_t(2, mas * 4)); + b.extents.push_back(bluestore_pextent_t(3, mas * 4)); + b.ref_map.get(0, mas*12); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*9, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*2, mas*7, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(3u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(0x2u, r[1].offset); + ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(0x3u, r[2].offset); + ASSERT_EQ(mas*2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); + ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x1u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(2u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + ASSERT_TRUE(b.extents[1].is_valid()); + b.put_ref(mas*10, mas*2, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2003u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(1u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 12; + b.extents.push_back(bluestore_pextent_t(1, mas * 4)); + b.extents.push_back(bluestore_pextent_t(2, mas * 4)); + b.extents.push_back(bluestore_pextent_t(3, mas * 4)); + b.ref_map.get(0, mas*12); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*9, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*2, mas*7, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(3u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(0x2u, r[1].offset); + ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(0x3u, r[2].offset); + ASSERT_EQ(mas*2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); + ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + b.put_ref(mas*10, mas*2, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2003u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(2u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x1u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(1u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 8; + b.extents.push_back(bluestore_pextent_t(1, mas * 8)); + b.ref_map.get(0, mas*8); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*8)); + b.put_ref(mas*7, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*8)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, 8)); + b.put_ref(mas*3, mas*4, mrs, &r); + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*6, r[0].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*6)); + ASSERT_EQ(2u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x1u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(1u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + } +} + TEST(bluestore_blob_t, calc_csum) { bufferlist bl; -- 2.39.5