From a41402a0102a3aa59c2a136116e9fee83b17f1ef Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Wed, 11 Jun 2025 16:30:40 +0100 Subject: [PATCH] osd: Fix attribute recover in rare recovery scenario When recovering attributes, we read them from the first potential primary, then if that read failures, attempt to read from another potential primary. The problem is that the code which calculates which shards to read for a recovery only takes into account *data* and not where the attributes are. As such, if the second read only required a non-primary, then the attribute read fails and the OSD panics. The fix is to detect this scenario and perform an empty read to that shard, which the attribute-read code can use for attribute reads. Code was incorrectly interpreting a failed attribute read on recovery as meaning a "fast_read". Also, no attribute recovery would occur in this case. Signed-off-by: Alex Ainscow (cherry picked from commit 98eae78f7629295800cb7dbb252cac7d0feff680) --- src/osd/ECCommon.cc | 35 ++++++++++++++++++++++++++++++++--- src/osd/ECCommon.h | 2 +- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc index 45f1a1c6909..cdc7f1d7831 100644 --- a/src/osd/ECCommon.cc +++ b/src/osd/ECCommon.cc @@ -307,17 +307,18 @@ int ECCommon::ReadPipeline::get_remaining_shards( read_result_t &read_result, read_request_t &read_request, const bool for_recovery, - const bool fast_read) { - shard_id_map shards(sinfo.get_k_plus_m()); + bool want_attrs) { set error_shards; for (auto &shard: std::views::keys(read_result.errors)) { error_shards.insert(shard); } + /* fast-reads should already have scheduled reads to everything, so + * this function is irrelevant. */ const int r = get_min_avail_to_read_shards( hoid, for_recovery, - fast_read, + false, read_request, error_shards); @@ -327,6 +328,8 @@ int ECCommon::ReadPipeline::get_remaining_shards( return -EIO; } + bool need_attr_request = want_attrs; + // Rather than repeating whole read, we can remove everything we already have. for (auto iter = read_request.shard_reads.begin(); iter != read_request.shard_reads.end();) { @@ -343,10 +346,36 @@ int ECCommon::ReadPipeline::get_remaining_shards( if (do_erase) { iter = read_request.shard_reads.erase(iter); } else { + if (need_attr_request && !sinfo.is_nonprimary_shard(shard_id)) { + // We have a suitable candidate for attr requests. + need_attr_request = false; + } ++iter; } } + if (need_attr_request) { + // This happens if we got an error from the shard where we were requesting + // the attributes from and the recovery does not require any non primary + // shards. The example seen in test was a 2+1 EC being recovered. shards 0 + // and 2 were being requested and read as part of recovery. Shard was reading + // the attributes and failed. The recovery required shard 1, but that does + // not have valid attributes on it, so the attribute read failed. + // This is a pretty obscure case, so no need to optimise that much. Do an + // empty read! + shard_id_set have; + shard_id_map pg_shards(sinfo.get_k_plus_m()); + get_all_avail_shards(hoid, have, pg_shards, for_recovery, error_shards); + for (auto shard : have) { + if (!sinfo.is_nonprimary_shard(shard)) { + shard_read_t shard_read; + shard_read.pg_shard = pg_shards[shard]; + read_request.shard_reads.insert(shard, shard_read_t()); + break; + } + } + } + return read_request.shard_reads.empty()?1:0; } diff --git a/src/osd/ECCommon.h b/src/osd/ECCommon.h index 85e368f3b77..a685a50291e 100644 --- a/src/osd/ECCommon.h +++ b/src/osd/ECCommon.h @@ -378,7 +378,7 @@ struct ECCommon { read_result_t &read_result, read_request_t &read_request, bool for_recovery, - bool fast_read); + bool want_attrs); void get_all_avail_shards( const hobject_t &hoid, -- 2.39.5