void BlueStore::ExtentMap::reshard(
KeyValueDB *db,
- KeyValueDB::Transaction t)
+ KeyValueDB::Transaction t,
+ uint32_t segment_size)
{
auto cct = onode->c->store->cct; // used by dout
dout(10) << __func__ << " 0x[" << std::hex << needs_reshard_begin << ","
- << needs_reshard_end << ")" << std::dec
+ << needs_reshard_end << ") segment 0x" << segment_size << std::dec
<< " of " << onode->onode.extent_map_shards.size()
<< " shards on " << onode->oid << dendl;
for (auto& p : spanning_blob_map) {
}
fault_range(db, needs_reshard_begin, (needs_reshard_end - needs_reshard_begin));
+ uint64_t data_reshard_end = needs_reshard_end;
+ if (needs_reshard_end == OBJECT_MAX_SIZE && !extent_map.empty()) {
+ data_reshard_end = extent_map.rbegin()->blob_end();
+ }
// we may need to fault in a larger interval later must have all
// referring extents for spanning blobs loaded in order to have
dout(20) << __func__ << " extent_avg " << extent_avg << ", target " << target
<< ", slop " << slop << dendl;
+ uint32_t next_boundary = segment_size;
+ uint32_t encoded_segment_estimate = bytes * segment_size / (data_reshard_end - needs_reshard_begin);
+
// reshard
unsigned estimate = 0;
unsigned offset = needs_reshard_begin;
}
dout(30) << " extent " << *extent << dendl;
- // disfavor shard boundaries that span a blob
- bool would_span = (extent->logical_offset < max_blob_end) || extent->blob_offset;
- if (estimate &&
- estimate + extent_avg > target + (would_span ? slop : 0)) {
+ bool make_shard_here = false;
+ if (segment_size != 0) { //onode data has strict boundaries
+ if (extent->blob_start() >= next_boundary) {
+ // beginning of the extent is a place that might be a shard boundary
+ // we want to decide whether to continue streaming to the current shard
+ // or move to the next one
+ if ((estimate >= target /*we have enough already*/) ||
+ (estimate + encoded_segment_estimate >= (target * 3 / 2))
+ /*we will be too large if we wait for next segment*/) {
+ make_shard_here = true;
+ }
+ next_boundary = p2roundup(extent->blob_end(), segment_size);
+ }
+ } else {
+ // disfavor shard boundaries that span a blob
+ bool would_span = (extent->logical_offset < max_blob_end) || (extent->blob_offset != 0);
+ if ((estimate > 0)
+ && (estimate + extent_avg > target + (would_span ? slop : 0))) {
+ make_shard_here = true;
+ }
+ }
+ if (make_shard_here) {
// new shard
if (offset == needs_reshard_begin) {
new_shard_info.emplace_back(bluestore_onode_t::shard_info());
"bluestore_warn_on_legacy_statfs"s,
"bluestore_warn_on_no_per_pool_omap"s,
"bluestore_warn_on_no_per_pg_omap"s,
- "bluestore_max_defer_interval"s
+ "bluestore_max_defer_interval"s,
+ "bluestore_onode_segment_size"s
};
}
_set_compression();
}
}
+ if (changed.count("bluestore_onode_segment_size")) {
+ segment_size = (cct->_conf.get_val<Option::size_t>("bluestore_onode_segment_size"));
+ }
if (changed.count("bluestore_max_blob_size") ||
changed.count("bluestore_max_blob_size_ssd") ||
changed.count("bluestore_max_blob_size_hdd")) {
def_compressor_alg = Compressor::COMP_ALG_NONE;
alg_name = "(none)";
}
-
dout(10) << __func__ << " mode " << Compressor::get_comp_mode_name(comp_mode)
<< " alg " << alg_name
<< " min_blob " << comp_min_blob_size
<< " max_blob " << comp_max_blob_size
+ << " segment_size " << segment_size
<< dendl;
}
if (head_length) {
_do_write_small(txc, c, o, head_offset, head_length, p, wctx);
}
-
- _do_write_big(txc, c, o, middle_offset, middle_length, p, wctx);
+ uint32_t segment_size = this->segment_size.load();
+ if (segment_size) {
+ // split data to chunks
+ uint64_t write_offset = middle_offset;
+ while (write_offset < middle_offset + middle_length) {
+ uint64_t segment_end = std::min(
+ p2roundup<uint64_t>(write_offset + 1, segment_size),
+ middle_offset + middle_length);
+ _do_write_big(txc, c, o, write_offset, segment_end - write_offset, p, wctx);
+ write_offset = segment_end;
+ }
+ } else {
+ _do_write_big(txc, c, o, middle_offset, middle_length, p, wctx);
+ }
if (tail_length) {
_do_write_small(txc, c, o, tail_offset, tail_length, p, wctx);
// if we have compression, skip to write_v1
return _do_write(txc, c, o, offset, length, bl, fadvise_flags);
}
+ if (segment_size != 0 && wctx.target_blob_size > segment_size) {
+ wctx.target_blob_size = segment_size;
+ }
if (bl.length() != length) {
bl.splice(length, bl.length() - length);
}
// finalize extent_map shards
o->extent_map.update(txn, false);
if (o->extent_map.needs_reshard()) {
- o->extent_map.reshard(db, txn);
+ o->extent_map.reshard(db, txn, segment_size);
o->extent_map.update(txn, true);
if (o->extent_map.needs_reshard()) {
dout(20) << __func__ << " warning: still wants reshard, check options?"