From: Sage Weil Date: Thu, 19 May 2016 15:57:43 +0000 (-0400) Subject: os/bluestore/bluestore_types: blob_t: add tracking for released extents X-Git-Tag: v11.0.0~359^2~53 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=282947e29d1e9f2791745c35bb411cbadc0705d1;p=ceph.git os/bluestore/bluestore_types: blob_t: add tracking for released extents We reference count which parts of the blob are used (by lextents), but currently we only release our space back to the system when all references go away. That is a problem if the blob is large (say, 4MB), and we, say, truncate off most (but not all) of it. Unfortunately, we can't simply deallocate anything that doesn't have a reference, because the logical refs are on byte boundaries, and allocation happens in larger units (min_alloc_size). A one byte logical punch_hole might be responsible for the release of a larger block of storage. To resolve this, we keep track of which portions of the blob have been released by poisoning the offset in the extents vector. We expect that this vector will be almost always short, so we do not bother with a indexed structure, since iterating a blob offset to determine if it is still allocated is likely faster. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc index bb425352ed4..f3de90eb709 100644 --- a/src/os/bluestore/bluestore_types.cc +++ b/src/os/bluestore/bluestore_types.cc @@ -539,6 +539,116 @@ ostream& operator<<(ostream& out, const bluestore_blob_t& o) return out; } +void bluestore_blob_t::put_ref( + uint64_t offset, + uint64_t length, + uint64_t min_release_size, + vector *r) +{ + vector logical; + ref_map.put(offset, length, &logical); + + r->clear(); + + // common case: all of it? + if (ref_map.empty()) { + uint64_t pos = 0; + for (auto& e : extents) { + if (e.is_valid()) { + r->push_back(e); + } + pos += e.length; + } + extents.resize(1); + extents[0].offset = bluestore_pextent_t::INVALID_OFFSET; + extents[0].length = this->length; + return; + } + + // search from logical releases + for (auto le : logical) { + uint64_t r_off = le.offset; + auto p = ref_map.ref_map.lower_bound(le.offset); + if (p != ref_map.ref_map.begin()) { + --p; + r_off = p->first + p->second.length; + ++p; + } else { + r_off = 0; + } + uint64_t end; + if (p == ref_map.ref_map.end()) { + end = this->length; + } else { + end = p->first; + } + r_off = ROUND_UP_TO(r_off, min_release_size); + end -= end % min_release_size; + if (r_off >= end) { + continue; + } + uint64_t r_len = end - r_off; + + // cut it out of extents + struct vecbuilder { + vector v; + uint64_t invalid = 0; + + void add_invalid(uint64_t length) { + invalid += length; + } + void flush() { + if (invalid) { + v.emplace_back(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, + invalid)); + invalid = 0; + } + } + void add(uint64_t offset, uint64_t length) { + if (offset == bluestore_pextent_t::INVALID_OFFSET) { + add_invalid(length); + } else { + flush(); + v.emplace_back(bluestore_pextent_t(offset, length)); + } + } + } vb; + + assert(r_len > 0); + auto q = extents.begin(); + assert(q != extents.end()); + while (r_off >= q->length) { + vb.add(q->offset, q->length); + r_off -= q->length; + ++q; + assert(q != extents.end()); + } + while (r_len > 0) { + uint64_t l = MIN(r_len, q->length - r_off); + if (q->is_valid()) { + r->push_back(bluestore_pextent_t(q->offset + r_off, l)); + } + if (r_off) { + vb.add(q->offset, r_off); + } + vb.add_invalid(l); + if (r_off + l < q->length) { + vb.add(q->offset + r_off + l, q->length - (r_off + l)); + } + r_len -= l; + r_off = 0; + ++q; + assert(q != extents.end() || r_len == 0); + } + while (q != extents.end()) { + vb.add(q->offset, q->length); + ++q; + } + vb.flush(); + extents.swap(vb.v); + } +} + void bluestore_blob_t::calc_csum(uint64_t b_off, const bufferlist& bl) { switch (csum_type) { diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 580b5394350..3382c6254af 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -319,6 +319,28 @@ struct bluestore_blob_t { return !ref_map.intersects(offset, length); } + /// return true if the entire range is allocated (mapped to extents on disk) + bool is_allocated(uint64_t b_off, uint64_t b_len) const { + auto p = extents.begin(); + assert(p != extents.end()); + while (b_off >= p->length) { + b_off -= p->length; + ++p; + assert(p != extents.end()); + } + b_len += b_off; + while (b_len) { + if (!p->is_valid()) { + return false; + } + if (p->length >= b_len) { + return true; + } + b_len -= p->length; + } + assert(0 == "we should not get here"); + } + /// return true if the logical range has never been used bool is_unused(uint64_t offset, uint64_t length) const { return unused.contains(offset, length); @@ -340,6 +362,10 @@ struct bluestore_blob_t { unused.subtract(t); } + /// put logical references, and get back any released extents + void put_ref(uint64_t offset, uint64_t length, uint64_t min_alloc_size, + vector *r); + void map(uint64_t x_off, uint64_t x_len, std::function f) { auto p = extents.begin(); diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index 422cf9c6131..fa2840eee82 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -189,6 +189,310 @@ TEST(bluestore_extent_ref_map_t, intersects) ASSERT_FALSE(m.intersects(55, 1)); } +TEST(bluestore_blob_t, put_ref) +{ + unsigned mas = 4096; + unsigned mrs = 8192; + + { + bluestore_blob_t b; + vector r; + b.length = mas * 2; + b.extents.push_back(bluestore_pextent_t(0, mas*2)); + b.ref_map.get(0, mas*2); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + b.put_ref(0, mas*2, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_FALSE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(0, mas)); + ASSERT_FALSE(b.is_allocated(mas, 0)); + ASSERT_FALSE(b.extents[0].is_valid()); + ASSERT_EQ(mas*2, b.extents[0].length); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 2; + b.extents.push_back(bluestore_pextent_t(123, mas*2)); + b.ref_map.get(0, mas*2); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(123u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_FALSE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.extents[0].is_valid()); + ASSERT_EQ(mas*2, b.extents[0].length); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 4; + b.extents.push_back(bluestore_pextent_t(1, mas)); + b.extents.push_back(bluestore_pextent_t(2, mas)); + b.extents.push_back(bluestore_pextent_t(3, mas)); + b.extents.push_back(bluestore_pextent_t(4, mas)); + b.ref_map.get(0, mas*4); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*4)); + ASSERT_TRUE(b.is_allocated(mas, mas)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(mas*2, mas)); + ASSERT_TRUE(b.is_allocated(0, mas*4)); + b.put_ref(mas*3, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(2u, r.size()); + ASSERT_EQ(3u, r[0].offset); + ASSERT_EQ(mas, r[0].length); + ASSERT_EQ(4u, r[1].offset); + ASSERT_EQ(mas, r[1].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_TRUE(b.extents[1].is_valid()); + ASSERT_FALSE(b.extents[2].is_valid()); + ASSERT_EQ(3u, b.extents.size()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 6; + b.extents.push_back(bluestore_pextent_t(1, mas)); + b.extents.push_back(bluestore_pextent_t(2, mas)); + b.extents.push_back(bluestore_pextent_t(3, mas)); + b.extents.push_back(bluestore_pextent_t(4, mas)); + b.extents.push_back(bluestore_pextent_t(5, mas)); + b.extents.push_back(bluestore_pextent_t(6, mas)); + b.ref_map.get(0, mas*6); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*3, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(2u, r.size()); + ASSERT_EQ(3u, r[0].offset); + ASSERT_EQ(mas, r[0].length); + ASSERT_EQ(4u, r[1].offset); + ASSERT_EQ(mas, r[1].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); + ASSERT_TRUE(b.is_allocated(mas*4, mas*2)); + ASSERT_EQ(5u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_TRUE(b.extents[1].is_valid()); + ASSERT_FALSE(b.extents[2].is_valid()); + ASSERT_TRUE(b.extents[3].is_valid()); + ASSERT_TRUE(b.extents[4].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 6; + b.extents.push_back(bluestore_pextent_t(1, mas * 6)); + b.ref_map.get(0, mas*6); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*6)); + b.put_ref(mas*3, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); + ASSERT_TRUE(b.is_allocated(mas*4, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 12; + b.extents.push_back(bluestore_pextent_t(1, mas * 4)); + b.extents.push_back(bluestore_pextent_t(2, mas * 4)); + b.extents.push_back(bluestore_pextent_t(3, mas * 4)); + b.ref_map.get(0, mas*12); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*9, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*2, mas*7, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(3u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(0x2u, r[1].offset); + ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(0x3u, r[2].offset); + ASSERT_EQ(mas*2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); + ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 12; + b.extents.push_back(bluestore_pextent_t(1, mas * 4)); + b.extents.push_back(bluestore_pextent_t(2, mas * 4)); + b.extents.push_back(bluestore_pextent_t(3, mas * 4)); + b.ref_map.get(0, mas*12); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*9, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*2, mas*7, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(3u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(0x2u, r[1].offset); + ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(0x3u, r[2].offset); + ASSERT_EQ(mas*2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); + ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x1u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(2u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + ASSERT_TRUE(b.extents[1].is_valid()); + b.put_ref(mas*10, mas*2, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2003u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(1u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 12; + b.extents.push_back(bluestore_pextent_t(1, mas * 4)); + b.extents.push_back(bluestore_pextent_t(2, mas * 4)); + b.extents.push_back(bluestore_pextent_t(3, mas * 4)); + b.ref_map.get(0, mas*12); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*9, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*12)); + b.put_ref(mas*2, mas*7, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(3u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(0x2u, r[1].offset); + ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(0x3u, r[2].offset); + ASSERT_EQ(mas*2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); + ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(3u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + ASSERT_TRUE(b.extents[2].is_valid()); + b.put_ref(mas*10, mas*2, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2003u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(2u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x1u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(1u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + } + { + bluestore_blob_t b; + vector r; + b.length = mas * 8; + b.extents.push_back(bluestore_pextent_t(1, mas * 8)); + b.ref_map.get(0, mas*8); + b.put_ref(0, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*8)); + b.put_ref(mas*7, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, mas*8)); + b.put_ref(mas*2, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(0u, r.size()); + ASSERT_TRUE(b.is_allocated(0, 8)); + b.put_ref(mas*3, mas*4, mrs, &r); + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x2001u, r[0].offset); + ASSERT_EQ(mas*6, r[0].length); + ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_FALSE(b.is_allocated(mas*2, mas*6)); + ASSERT_EQ(2u, b.extents.size()); + ASSERT_TRUE(b.extents[0].is_valid()); + ASSERT_FALSE(b.extents[1].is_valid()); + b.put_ref(mas, mas, mrs, &r); + cout << "r " << r << " " << b << std::endl; + ASSERT_EQ(1u, r.size()); + ASSERT_EQ(0x1u, r[0].offset); + ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(1u, b.extents.size()); + ASSERT_FALSE(b.extents[0].is_valid()); + } +} + TEST(bluestore_blob_t, calc_csum) { bufferlist bl;