From 9ee134e2950d9770427886c6edab4c3fb59b8d37 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Fri, 24 Jun 2016 22:40:51 +0300 Subject: [PATCH] os/bluestore: replace bluestore_blob_t::unused from interval to bitmap Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueStore.cc | 16 +++--- src/os/bluestore/bluestore_types.cc | 34 +++++++----- src/os/bluestore/bluestore_types.h | 83 +++++++++++++++++++++-------- 3 files changed, 89 insertions(+), 44 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 58383596a7bf..e3854b51091f 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5664,10 +5664,10 @@ void BlueStore::_do_write_small( // direct write into unused blocks of an existing mutable blob? uint64_t b_off = offset - head_pad - bstart; uint64_t b_len = length + head_pad + tail_pad; - if (b->blob.get_ondisk_length() >= b_off + b_len && - b->blob.is_unused(b_off, b_len) && - b->blob.is_allocated(b_off, b_len) && - (b_off % chunk_size == 0 && b_len % chunk_size == 0)) { + if ((b_off % chunk_size == 0 && b_len % chunk_size == 0) && + b->blob.get_ondisk_length() >= b_off + b_len && + b->blob.is_unused(b_off, b_len, min_alloc_size) && + b->blob.is_allocated(b_off, b_len)) { dout(20) << __func__ << " write to unused 0x" << std::hex << b_off << "~" << b_len << " pad 0x" << head_pad << " + 0x" << tail_pad @@ -5688,7 +5688,7 @@ void BlueStore::_do_write_small( bluestore_lextent_t& lex = o->onode.extent_map[offset] = bluestore_lextent_t(blob, b_off + head_pad, length); b->blob.ref_map.get(lex.offset, lex.length); - b->blob.mark_used(lex.offset, lex.length); + b->blob.mark_used(lex.offset, lex.length, min_alloc_size); txc->statfs_delta.stored() += lex.length; dout(20) << __func__ << " lex 0x" << std::hex << offset << std::dec << ": " << lex << dendl; @@ -5762,7 +5762,7 @@ void BlueStore::_do_write_small( bluestore_lextent_t& lex = o->onode.extent_map[offset] = bluestore_lextent_t(blob, offset - bstart, length); b->blob.ref_map.get(lex.offset, lex.length); - b->blob.mark_used(lex.offset, lex.length); + b->blob.mark_used(lex.offset, lex.length, min_alloc_size); txc->statfs_delta.stored() += lex.length; dout(20) << __func__ << " lex 0x" << std::hex << offset << std::dec << ": " << lex << dendl; @@ -5937,9 +5937,9 @@ int BlueStore::_do_alloc_write( auto b_off = wi.b_off; auto b_len = wi.bl.length(); if (b_off) - b->blob.add_unused(0, b_off); + b->blob.add_unused(0, b_off, min_alloc_size); if (b_off + b_len < wi.blob_length) - b->blob.add_unused(b_off + b_len, wi.blob_length - (b_off + b_len)); + b->blob.add_unused(b_off + b_len, wi.blob_length - (b_off + b_len), min_alloc_size); } // queue io diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc index 3400946f147b..6f6e74868bba 100644 --- a/src/os/bluestore/bluestore_types.cc +++ b/src/os/bluestore/bluestore_types.cc @@ -418,6 +418,12 @@ string bluestore_blob_t::get_flags_string(unsigned flags) s += '+'; s += "csum"; } + if (flags & FLAG_HAS_UNUSED) { + if (s.length()) + s += '+'; + s += "has_unused"; + } + return s; } @@ -435,7 +441,9 @@ void bluestore_blob_t::encode(bufferlist& bl) const small_encode_buf_lowz(csum_data, bl); } ::encode(ref_map, bl); - ::encode(unused, bl); + if (has_unused()) { + ::encode( unused_uint_t(unused.to_ullong()), bl); + } ENCODE_FINISH(bl); } @@ -458,7 +466,11 @@ void bluestore_blob_t::decode(bufferlist::iterator& p) csum_chunk_order = 0; } ::decode(ref_map, p); - ::decode(unused, p); + if (has_unused()) { + unused_uint_t val; + ::decode(val, p); + unused = unused_t(val); + } DECODE_FINISH(p); } @@ -479,14 +491,7 @@ void bluestore_blob_t::dump(Formatter *f) const for (unsigned i = 0; i < n; ++i) f->dump_unsigned("csum", get_csum_item(i)); f->close_section(); - f->open_array_section("unused"); - for (auto p = unused.begin(); p != unused.end(); ++p) { - f->open_object_section("range"); - f->dump_unsigned("offset", p.get_start()); - f->dump_unsigned("length", p.get_len()); - f->close_section(); - } - f->close_section(); + f->dump_unsigned("unused", unused.to_ullong()); } void bluestore_blob_t::generate_test_instances(list& ls) @@ -499,8 +504,9 @@ void bluestore_blob_t::generate_test_instances(list& ls) ls.back()->init_csum(CSUM_XXHASH32, 16, 65536); ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd")); ls.back()->ref_map.get(3, 5); - ls.back()->add_unused(0, 3); - ls.back()->add_unused(8, 8); + ls.back()->add_unused(0, 3, 4096); + ls.back()->add_unused(8, 8, 4096); + ls.back()->add_unused(80, 8192-1, 4096); ls.back()->extents.emplace_back(bluestore_pextent_t(0x40100000, 0x10000)); ls.back()->extents.emplace_back( bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000)); @@ -521,8 +527,8 @@ ostream& operator<<(ostream& out, const bluestore_blob_t& o) if (!o.ref_map.empty()) { out << " " << o.ref_map; } - if (!o.unused.empty()) - out << " unused=0x" << std::hex << o.unused << std::dec; + if (o.has_unused()) + out << " unused=0x" << std::hex << o.unused.to_ullong() << std::dec; out << ")"; return out; } diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index b8c404a29a82..3d8d14074014 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -16,6 +16,7 @@ #define CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H #include +#include #include "include/types.h" #include "include/interval_set.h" #include "include/utime.h" @@ -150,9 +151,10 @@ static inline bool operator!=(const bluestore_extent_ref_map_t& l, /// blob: a piece of data on disk struct bluestore_blob_t { enum { - FLAG_MUTABLE = 1, ///< blob can be overwritten or split - FLAG_COMPRESSED = 2, ///< blob is compressed - FLAG_CSUM = 4, ///< blob as checksums + FLAG_MUTABLE = 1, ///< blob can be overwritten or split + FLAG_COMPRESSED = 2, ///< blob is compressed + FLAG_CSUM = 4, ///< blob has checksums + FLAG_HAS_UNUSED = 8, ///< blob has unused map }; static string get_flags_string(unsigned flags); @@ -192,16 +194,19 @@ struct bluestore_blob_t { return -EINVAL; } - vector extents; ///< raw data position on device - uint32_t compressed_length = 0; ///< compressed length if any - uint32_t flags = 0; ///< FLAG_* + vector extents;///< raw data position on device + uint32_t compressed_length = 0; ///< compressed length if any + uint32_t flags = 0; ///< FLAG_* - uint8_t csum_type = CSUM_NONE; ///< CSUM_* - uint8_t csum_chunk_order = 0; ///< csum block size is 1< unused; ///< portion that has never been written to - bufferptr csum_data; ///< opaque vector of csum data + bufferptr csum_data; ///< opaque vector of csum data + + typedef uint16_t unused_uint_t; + typedef std::bitset unused_t; + unused_t unused; ///< portion that has never been written to bluestore_blob_t(uint32_t f = 0) : flags(f) {} @@ -236,6 +241,9 @@ struct bluestore_blob_t { bool has_csum() const { return has_flag(FLAG_CSUM); } + bool has_unused() const { + return has_flag(FLAG_HAS_UNUSED); + } /// return chunk (i.e. min readable block) size for the blob uint64_t get_chunk_size(uint64_t dev_block_size) { @@ -288,24 +296,55 @@ struct bluestore_blob_t { } /// return true if the logical range has never been used - bool is_unused(uint64_t offset, uint64_t length) const { - return unused.contains(offset, length); + bool is_unused(uint64_t offset, uint64_t length, uint64_t min_alloc_size) const { + if (!has_unused()) { + return false; + } + assert((min_alloc_size % unused.size()) == 0); + assert(offset + length <= min_alloc_size); + uint64_t chunk_size = min_alloc_size / unused.size(); + uint64_t start = offset / chunk_size; + uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size; + assert(end <= unused.size()); + auto i = start; + while (i < end && unused[i]) { + i++; + } + return i >= end; } /// mark a range that has never been used - void add_unused(uint64_t offset, uint64_t length) { - unused.insert(offset, length); + void add_unused(uint64_t offset, uint64_t length, uint64_t min_alloc_size) { + assert((min_alloc_size % unused.size()) == 0); + assert(offset + length <= min_alloc_size); + uint64_t chunk_size = min_alloc_size / unused.size(); + uint64_t start = ROUND_UP_TO(offset, chunk_size) / chunk_size; + uint64_t end = (offset + length) / chunk_size; + assert(end <= unused.size()); + for (auto i = start; i < end; ++i) { + unused[i] = 1; + } + if (start != end) { + set_flag(FLAG_HAS_UNUSED); + } } /// indicate that a range has (now) been used. - void mark_used(uint64_t offset, uint64_t length) { - if (unused.empty()) - return; - interval_set t; - t.insert(offset, length); - t.intersection_of(unused); - if (!t.empty()) - unused.subtract(t); + void mark_used(uint64_t offset, uint64_t length, uint64_t min_alloc_size) { + if (has_unused()) { + assert((min_alloc_size % unused.size()) == 0); + assert(offset + length <= min_alloc_size); + uint64_t chunk_size = min_alloc_size / unused.size(); + uint64_t start = offset / chunk_size; + uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size; + assert(end <= unused.size()); + for (auto i = start; i < end; ++i) { + unused[i] = 0; + } + if (unused.none()) { + clear_flag(FLAG_HAS_UNUSED); + } + } } /// put logical references, and get back any released extents -- 2.47.3