From: Bill Scales Date: Wed, 28 Aug 2024 07:59:13 +0000 (+0000) Subject: osd: further EC partial stripe read fixes X-Git-Tag: v20.0.0~288^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ccf1229bba076a93f15d58dd6587a448d59507da;p=ceph.git osd: further EC partial stripe read fixes * More fixes for multi-region reads * Enable partial stripe reads that span stripes Signed-off-by: Bill Scales --- diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index fa2570aba42..3c06268995e 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1540,20 +1540,6 @@ int ECBackend::objects_read_sync( return -EOPNOTSUPP; } -static bool should_partial_read( - const ECUtil::stripe_info_t& sinfo, - uint64_t off, - uint32_t len, - bool fast_read) -{ - // Don't partial read if we are doing a fast_read - if (fast_read) { - return false; - } - // Same stripe only - return sinfo.offset_length_is_same_stripe(off, len); -} - void ECBackend::objects_read_async( const hobject_t &hoid, const list tmp; - if (!cct->_conf->osd_ec_partial_reads || - !should_partial_read(sinfo, read.offset, read.size, fast_read)) { + if (!cct->_conf->osd_ec_partial_reads || fast_read) { tmp = sinfo.offset_len_to_stripe_bounds(make_pair(read.offset, read.size)); } else { tmp = sinfo.offset_len_to_chunk_bounds(make_pair(read.offset, read.size)); @@ -1626,18 +1611,18 @@ void ECBackend::objects_read_async( uint64_t offset = read.first.offset; uint64_t length = read.first.size; auto range = got.emap.get_containing_range(offset, length); + uint64_t range_offset = range.first.get_off(); + uint64_t range_length = range.first.get_len(); ceph_assert(range.first != range.second); - ceph_assert(range.first.get_off() <= offset); + ceph_assert(range_offset <= offset); ldpp_dout(dpp, 20) << "offset: " << offset << dendl; - ldpp_dout(dpp, 20) << "range offset: " << range.first.get_off() << dendl; + ldpp_dout(dpp, 20) << "range offset: " << range_offset << dendl; ldpp_dout(dpp, 20) << "length: " << length << dendl; - ldpp_dout(dpp, 20) << "range length: " << range.first.get_len() << dendl; - ceph_assert( - (offset + length) <= - (range.first.get_off() + range.first.get_len())); + ldpp_dout(dpp, 20) << "range length: " << range_length << dendl; + ceph_assert(offset + length <= range_offset + range_length); read.second.first->substr_of( range.first.get_val(), - offset - range.first.get_off(), + offset - range_offset, length); if (read.second.second) { read.second.second->complete(length); diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc index 609ac3141ae..99f9a90eea6 100644 --- a/src/osd/ECCommon.cc +++ b/src/osd/ECCommon.cc @@ -531,12 +531,8 @@ struct ClientReadCompleter : ECCommon::ReadCompleter { ceph_assert(res.errors.empty()); for (auto &&read: to_read) { const auto bounds = make_pair(read.offset, read.size); - // the configurable serves only the preservation of old behavior - // which will be dropped. ReadPipeline is actually able to handle - // reads aligned to chunk size. - const auto aligned = g_conf()->osd_ec_partial_reads \ - ? read_pipeline.sinfo.offset_len_to_chunk_bounds(bounds) - : read_pipeline.sinfo.offset_len_to_stripe_bounds(bounds); + const auto aligned = + read_pipeline.sinfo.offset_len_to_chunk_bounds(bounds); ceph_assert(res.returned.front().get<0>() == aligned.first); ceph_assert(res.returned.front().get<1>() == aligned.second); map to_decode; @@ -563,13 +559,29 @@ struct ClientReadCompleter : ECCommon::ReadCompleter { goto out; } bufferlist trimmed; - auto off = read.offset - aligned.first; - auto len = std::min(read.size, bl.length() - off); + // If partial stripe reads are disabled aligned_offset_in_stripe will + // be 0 which will mean trim_offset is 0. When partial reads are enabled + // the shards read (wanted_to_read) is a union of the requirements for + // each stripe, each range being read may need to trim unneeded shards + uint64_t aligned_offset_in_stripe = aligned.first - + read_pipeline.sinfo.logical_to_prev_stripe_offset(aligned.first); + uint64_t chunk_size = read_pipeline.sinfo.get_chunk_size(); + uint64_t trim_offset = 0; + for (auto shard : wanted_to_read) { + if (shard * chunk_size < aligned_offset_in_stripe) { + trim_offset += chunk_size; + } else { + break; + } + } + auto off = read.offset + trim_offset - aligned.first; dout(20) << __func__ << " bl.length()=" << bl.length() - << " len=" << len << " read.size=" << read.size - << " off=" << off << " read.offset=" << read.offset - << dendl; - trimmed.substr_of(bl, off, len); + << " off=" << off + << " read.offset=" << read.offset + << " read.size=" << read.size + << " trim_offset="<< trim_offset << dendl; + ceph_assert(read.size <= bl.length() - off); + trimmed.substr_of(bl, off, read.size); result.insert( read.offset, trimmed.length(), std::move(trimmed)); res.returned.pop_front(); diff --git a/src/osd/ECUtil.cc b/src/osd/ECUtil.cc index 6d9477a99af..e7e6faffdf6 100644 --- a/src/osd/ECUtil.cc +++ b/src/osd/ECUtil.cc @@ -13,9 +13,10 @@ using ceph::Formatter; std::pair ECUtil::stripe_info_t::chunk_aligned_offset_len_to_chunk( std::pair in) const { + pair tmp = offset_len_to_stripe_bounds(in); return std::make_pair( - chunk_aligned_logical_offset_to_chunk_offset(in.first), - chunk_aligned_logical_size_to_chunk_size(in.second)); + chunk_aligned_logical_offset_to_chunk_offset(tmp.first), + chunk_aligned_logical_size_to_chunk_size(tmp.second)); } int ECUtil::decode( diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc index f93ed7ff67a..1421eedfe4f 100644 --- a/src/test/osd/TestECBackend.cc +++ b/src/test/osd/TestECBackend.cc @@ -54,22 +54,34 @@ TEST(ECUtil, stripe_info_t) ASSERT_EQ(s.aligned_chunk_offset_to_logical_offset(2*s.get_chunk_size()), 2*s.get_stripe_width()); + // Stripe 1 + 1 chunk for 10 stripes needs to read 11 stripes starting + // from 1 because there is a partial stripe at the start and end ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk( make_pair(swidth+s.get_chunk_size(), 10*swidth)), - make_pair(s.get_chunk_size(), 10*s.get_chunk_size())); + make_pair(s.get_chunk_size(), 11*s.get_chunk_size())); + // Stripe 1 + 0 chunks for 10 stripes needs to read 10 stripes starting + // from 1 because there are no partial stripes ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk(make_pair(swidth, 10*swidth)), make_pair(s.get_chunk_size(), 10*s.get_chunk_size())); - // round down offset if it's under stripe width + // Stripe 0 + 1 chunk for 10 stripes needs to read 11 stripes starting + // from 0 because there is a partial stripe at the start and end ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk(make_pair(s.get_chunk_size(), 10*swidth)), - make_pair(0, 10*s.get_chunk_size())); + make_pair(0, 11*s.get_chunk_size())); - // round up size if above stripe + // Stripe 0 + 1 chunk for (10 stripes + 1 chunk) needs to read 11 stripes + // starting from 0 because there is a partial stripe at the start and end ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk(make_pair(s.get_chunk_size(), 10*swidth + s.get_chunk_size())), make_pair(0, 11*s.get_chunk_size())); + // Stripe 0 + 2 chunks for (10 stripes + 2 chunks) needs to read 11 stripes + // starting from 0 because there is a partial stripe at the start + ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk(make_pair(2*s.get_chunk_size(), + 10*swidth + 2*s.get_chunk_size())), + make_pair(0, 11*s.get_chunk_size())); + ASSERT_EQ(s.offset_len_to_stripe_bounds(make_pair(swidth-10, (uint64_t)20)), make_pair((uint64_t)0, 2*swidth)); }