// direct write into unused blocks of an existing mutable blob?
uint64_t b_off = offset - head_pad - bstart;
uint64_t b_len = length + head_pad + tail_pad;
- if (b->blob.get_ondisk_length() >= b_off + b_len &&
- b->blob.is_unused(b_off, b_len) &&
- b->blob.is_allocated(b_off, b_len) &&
- (b_off % chunk_size == 0 && b_len % chunk_size == 0)) {
+ if ((b_off % chunk_size == 0 && b_len % chunk_size == 0) &&
+ b->blob.get_ondisk_length() >= b_off + b_len &&
+ b->blob.is_unused(b_off, b_len, min_alloc_size) &&
+ b->blob.is_allocated(b_off, b_len)) {
dout(20) << __func__ << " write to unused 0x" << std::hex
<< b_off << "~" << b_len
<< " pad 0x" << head_pad << " + 0x" << tail_pad
bluestore_lextent_t& lex = o->onode.extent_map[offset] =
bluestore_lextent_t(blob, b_off + head_pad, length);
b->blob.ref_map.get(lex.offset, lex.length);
- b->blob.mark_used(lex.offset, lex.length);
+ b->blob.mark_used(lex.offset, lex.length, min_alloc_size);
txc->statfs_delta.stored() += lex.length;
dout(20) << __func__ << " lex 0x" << std::hex << offset << std::dec
<< ": " << lex << dendl;
bluestore_lextent_t& lex = o->onode.extent_map[offset] =
bluestore_lextent_t(blob, offset - bstart, length);
b->blob.ref_map.get(lex.offset, lex.length);
- b->blob.mark_used(lex.offset, lex.length);
+ b->blob.mark_used(lex.offset, lex.length, min_alloc_size);
txc->statfs_delta.stored() += lex.length;
dout(20) << __func__ << " lex 0x" << std::hex << offset
<< std::dec << ": " << lex << dendl;
auto b_off = wi.b_off;
auto b_len = wi.bl.length();
if (b_off)
- b->blob.add_unused(0, b_off);
+ b->blob.add_unused(0, b_off, min_alloc_size);
if (b_off + b_len < wi.blob_length)
- b->blob.add_unused(b_off + b_len, wi.blob_length - (b_off + b_len));
+ b->blob.add_unused(b_off + b_len, wi.blob_length - (b_off + b_len), min_alloc_size);
}
// queue io
s += '+';
s += "csum";
}
+ if (flags & FLAG_HAS_UNUSED) {
+ if (s.length())
+ s += '+';
+ s += "has_unused";
+ }
+
return s;
}
small_encode_buf_lowz(csum_data, bl);
}
::encode(ref_map, bl);
- ::encode(unused, bl);
+ if (has_unused()) {
+ ::encode( unused_uint_t(unused.to_ullong()), bl);
+ }
ENCODE_FINISH(bl);
}
csum_chunk_order = 0;
}
::decode(ref_map, p);
- ::decode(unused, p);
+ if (has_unused()) {
+ unused_uint_t val;
+ ::decode(val, p);
+ unused = unused_t(val);
+ }
DECODE_FINISH(p);
}
for (unsigned i = 0; i < n; ++i)
f->dump_unsigned("csum", get_csum_item(i));
f->close_section();
- f->open_array_section("unused");
- for (auto p = unused.begin(); p != unused.end(); ++p) {
- f->open_object_section("range");
- f->dump_unsigned("offset", p.get_start());
- f->dump_unsigned("length", p.get_len());
- f->close_section();
- }
- f->close_section();
+ f->dump_unsigned("unused", unused.to_ullong());
}
void bluestore_blob_t::generate_test_instances(list<bluestore_blob_t*>& ls)
ls.back()->init_csum(CSUM_XXHASH32, 16, 65536);
ls.back()->csum_data = buffer::claim_malloc(4, strdup("abcd"));
ls.back()->ref_map.get(3, 5);
- ls.back()->add_unused(0, 3);
- ls.back()->add_unused(8, 8);
+ ls.back()->add_unused(0, 3, 4096);
+ ls.back()->add_unused(8, 8, 4096);
+ ls.back()->add_unused(80, 8192-1, 4096);
ls.back()->extents.emplace_back(bluestore_pextent_t(0x40100000, 0x10000));
ls.back()->extents.emplace_back(
bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x1000));
if (!o.ref_map.empty()) {
out << " " << o.ref_map;
}
- if (!o.unused.empty())
- out << " unused=0x" << std::hex << o.unused << std::dec;
+ if (o.has_unused())
+ out << " unused=0x" << std::hex << o.unused.to_ullong() << std::dec;
out << ")";
return out;
}
#define CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
#include <ostream>
+#include <bitset>
#include "include/types.h"
#include "include/interval_set.h"
#include "include/utime.h"
/// blob: a piece of data on disk
struct bluestore_blob_t {
enum {
- FLAG_MUTABLE = 1, ///< blob can be overwritten or split
- FLAG_COMPRESSED = 2, ///< blob is compressed
- FLAG_CSUM = 4, ///< blob as checksums
+ FLAG_MUTABLE = 1, ///< blob can be overwritten or split
+ FLAG_COMPRESSED = 2, ///< blob is compressed
+ FLAG_CSUM = 4, ///< blob has checksums
+ FLAG_HAS_UNUSED = 8, ///< blob has unused map
};
static string get_flags_string(unsigned flags);
return -EINVAL;
}
- vector<bluestore_pextent_t> extents; ///< raw data position on device
- uint32_t compressed_length = 0; ///< compressed length if any
- uint32_t flags = 0; ///< FLAG_*
+ vector<bluestore_pextent_t> extents;///< raw data position on device
+ uint32_t compressed_length = 0; ///< compressed length if any
+ uint32_t flags = 0; ///< FLAG_*
- uint8_t csum_type = CSUM_NONE; ///< CSUM_*
- uint8_t csum_chunk_order = 0; ///< csum block size is 1<<block_order bytes
+ uint8_t csum_type = CSUM_NONE; ///< CSUM_*
+ uint8_t csum_chunk_order = 0; ///< csum block size is 1<<block_order bytes
bluestore_extent_ref_map_t ref_map; ///< references (empty when in onode)
- interval_set<uint32_t> unused; ///< portion that has never been written to
- bufferptr csum_data; ///< opaque vector of csum data
+ bufferptr csum_data; ///< opaque vector of csum data
+
+ typedef uint16_t unused_uint_t;
+ typedef std::bitset<sizeof(unused_uint_t) * 8> unused_t;
+ unused_t unused; ///< portion that has never been written to
bluestore_blob_t(uint32_t f = 0) : flags(f) {}
bool has_csum() const {
return has_flag(FLAG_CSUM);
}
+ bool has_unused() const {
+ return has_flag(FLAG_HAS_UNUSED);
+ }
/// return chunk (i.e. min readable block) size for the blob
uint64_t get_chunk_size(uint64_t dev_block_size) {
}
/// return true if the logical range has never been used
- bool is_unused(uint64_t offset, uint64_t length) const {
- return unused.contains(offset, length);
+ bool is_unused(uint64_t offset, uint64_t length, uint64_t min_alloc_size) const {
+ if (!has_unused()) {
+ return false;
+ }
+ assert((min_alloc_size % unused.size()) == 0);
+ assert(offset + length <= min_alloc_size);
+ uint64_t chunk_size = min_alloc_size / unused.size();
+ uint64_t start = offset / chunk_size;
+ uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
+ assert(end <= unused.size());
+ auto i = start;
+ while (i < end && unused[i]) {
+ i++;
+ }
+ return i >= end;
}
/// mark a range that has never been used
- void add_unused(uint64_t offset, uint64_t length) {
- unused.insert(offset, length);
+ void add_unused(uint64_t offset, uint64_t length, uint64_t min_alloc_size) {
+ assert((min_alloc_size % unused.size()) == 0);
+ assert(offset + length <= min_alloc_size);
+ uint64_t chunk_size = min_alloc_size / unused.size();
+ uint64_t start = ROUND_UP_TO(offset, chunk_size) / chunk_size;
+ uint64_t end = (offset + length) / chunk_size;
+ assert(end <= unused.size());
+ for (auto i = start; i < end; ++i) {
+ unused[i] = 1;
+ }
+ if (start != end) {
+ set_flag(FLAG_HAS_UNUSED);
+ }
}
/// indicate that a range has (now) been used.
- void mark_used(uint64_t offset, uint64_t length) {
- if (unused.empty())
- return;
- interval_set<uint32_t> t;
- t.insert(offset, length);
- t.intersection_of(unused);
- if (!t.empty())
- unused.subtract(t);
+ void mark_used(uint64_t offset, uint64_t length, uint64_t min_alloc_size) {
+ if (has_unused()) {
+ assert((min_alloc_size % unused.size()) == 0);
+ assert(offset + length <= min_alloc_size);
+ uint64_t chunk_size = min_alloc_size / unused.size();
+ uint64_t start = offset / chunk_size;
+ uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
+ assert(end <= unused.size());
+ for (auto i = start; i < end; ++i) {
+ unused[i] = 0;
+ }
+ if (unused.none()) {
+ clear_flag(FLAG_HAS_UNUSED);
+ }
+ }
}
/// put logical references, and get back any released extents