ldout(c->store->cct, 10) << __func__ << " done " << txc << dendl;
}
+struct FragMetric {
+ // Computes fragmentation as the number of disjoint segments
+ // produced by a stream of mapped ranges.
+ // frag_score == current disjoint segment count.
+
+ std::unordered_set<uint64_t> endpoints;
+ uint64_t frag_score = 0;
+
+ FragMetric() {}
+
+ inline void note(uint64_t offset, uint64_t length) {
+ bool merge_left = endpoints.count(offset);
+ bool merge_right = endpoints.count(offset + length);
+ if (merge_left && merge_right) {
+ endpoints.erase(offset);
+ endpoints.erase(offset + length);
+ frag_score--;
+ } else if (merge_left) {
+ endpoints.erase(offset);
+ endpoints.insert(offset + length);
+ } else if (merge_right) {
+ endpoints.erase(offset + length);
+ endpoints.insert(offset);
+ } else {
+ endpoints.insert(offset);
+ endpoints.insert(offset + length);
+ frag_score++;
+ }
+ }
+};
+
+int BlueStore::Onode::get_fragmentation_score()
+{
+ FragMetric frag;
+
+ std::unordered_set<BlobRef> visited_compressed_blobs;
+
+ for (const auto& e : extent_map.extent_map) {
+ if (e.blob->get_blob().is_compressed()) {
+ if (visited_compressed_blobs.insert(e.blob).second) {
+ e.blob->get_blob().map(
+ 0, e.blob->get_blob().get_ondisk_length(),
+ [&](uint64_t offset, uint64_t length) {
+ frag.note(offset, length);
+ return 0;
+ }
+ );
+ }
+ } else {
+ e.blob->get_blob().map(
+ e.blob_offset,
+ e.length,
+ [&](uint64_t phys_offset, uint64_t len) {
+ frag.note(phys_offset, len);
+ return 0;
+ }
+ );
+ }
+ }
+ return frag.frag_score;
+}
+
// =======================================================
// WriteContext
return 0;
}
+void BlueStore::_measure_runtime_frag(
+ Collection *c,
+ const blobs2read_t& blobs2read)
+{
+ auto start = mono_clock::now();
+ FragMetric frag;
+ for (auto& p : blobs2read) {
+ const BlobRef& bptr = p.first;
+ const regions2read_t& r2r = p.second;
+ for (auto req : r2r) {
+ bptr->get_blob().map(
+ req.r_off, req.r_len,
+ [&](uint64_t offset, uint64_t length) {
+ frag.note(offset, length);
+ return 0;
+ });
+ }
+ }
+ if (frag.frag_score > 0) {
+ c->runtime_read_samples.fetch_add(1, std::memory_order_relaxed);
+ c->runtime_frag_count.fetch_add(frag.frag_score, std::memory_order_relaxed);
+ }
+}
+
+void BlueStore::_measure_static_frag(
+ Collection *c,
+ const OnodeRef& o)
+{
+ auto read_samples = c->object_read_samples.load(std::memory_order_relaxed);
+ auto frag_score = o->get_fragmentation_score();
+ if (read_samples == 0) {
+ c->static_frag_score.store(frag_score, std::memory_order_relaxed);
+ c->object_read_samples.store(1, std::memory_order_relaxed);
+ } else {
+ c->static_frag_score.fetch_add(frag_score, std::memory_order_relaxed);
+ c->object_read_samples.fetch_add(1, std::memory_order_relaxed);
+ }
+}
+
int BlueStore::_do_read(
Collection *c,
OnodeRef& o,
);
}
+ if (cct->_conf->bluestore_frag_runtime) {
+ _measure_runtime_frag(c, blobs2read);
+ }
+
+ if ((op_flags & CEPH_OSD_OP_FLAG_SCRUB) && cct->_conf->bluestore_frag_static) {
+ if (!o->extent_map.extent_map.empty()) {
+ o->extent_map.fault_range(db, 0, OBJECT_MAX_SIZE);
+ auto it = o->extent_map.extent_map.begin();
+ uint64_t first_extent_offset = it->logical_offset;
+ if (offset <= first_extent_offset && first_extent_offset < offset + length) {
+ _measure_static_frag(c, o);
+ }
+ }
+ }
+
bool csum_error = false;
r = _generate_read_result_bl(o, offset, length, ready_regions,
compressed_blob_bls, blobs2read,
// we always issue aio for reading, so errors other than EIO are not allowed
if (r < 0)
return r;
+ if (cct->_conf->bluestore_frag_runtime) {
+ _measure_runtime_frag(c, std::get<2>(raw_results[i]));
+ }
}
auto num_ios = m.size();
);
}
+ if ((op_flags & CEPH_OSD_OP_FLAG_SCRUB) && cct->_conf->bluestore_frag_static) {
+ if (!o->extent_map.extent_map.empty()) {
+ o->extent_map.fault_range(db, 0, OBJECT_MAX_SIZE);
+ auto it = o->extent_map.extent_map.begin();
+ uint64_t first_extent_offset = it->logical_offset;
+ for (auto& p : m) {
+ uint64_t off = p.first;
+ uint64_t len = p.second;
+
+ if (off <= first_extent_offset &&
+ first_extent_offset < off + len) {
+ _measure_static_frag(c, o);
+ break;
+ }
+ }
+ }
+ }
+
ceph_assert(raw_results.size() == (size_t)m.num_intervals());
i = 0;
for (auto p = m.begin(); p != m.end(); p++, i++) {