osd: Fix attribute recover in rare recovery scenario

author Alex Ainscow <aainscow@uk.ibm.com>

Wed, 11 Jun 2025 15:30:40 +0000 (16:30 +0100)

committer Jon <jonathan.bailey1@ibm.com>

Fri, 3 Oct 2025 13:31:23 +0000 (14:31 +0100)
author Alex Ainscow <aainscow@uk.ibm.com>
Wed, 11 Jun 2025 15:30:40 +0000 (16:30 +0100)
committer Jon <jonathan.bailey1@ibm.com>
Fri, 3 Oct 2025 13:31:23 +0000 (14:31 +0100)
diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc

index 9fb14156f55791b0dfdb217f8a35790e8e798595..51e2e19eae82585ad9cd8e48641f31c39946b1c2 100644 (file)
--- a/src/osd/ECCommon.cc
+++ b/src/osd/ECCommon.cc
@@ -322,17 +322,18 @@ int ECCommon::ReadPipeline::get_remaining_shards(
      read_result_t &read_result,
      read_request_t &read_request,
      const bool for_recovery,
-    const bool fast_read) {
-  shard_id_map<pg_shard_t> shards(sinfo.get_k_plus_m());
+    bool want_attrs) {
    set<pg_shard_t> error_shards;
    for (auto &shard: std::views::keys(read_result.errors)) {
      error_shards.insert(shard);
    }
  
+  /* fast-reads should already have scheduled reads to everything, so
+   * this function is irrelevant. */
    const int r = get_min_avail_to_read_shards(
      hoid,
      for_recovery,
-    fast_read,
+    false,
      read_request,
      error_shards);
  
@@ -342,6 +343,8 @@ int ECCommon::ReadPipeline::get_remaining_shards(
      return -EIO;
    }
  
+  bool need_attr_request = want_attrs;
+
    // Rather than repeating whole read, we can remove everything we already have.
    for (auto iter = read_request.shard_reads.begin();
         iter != read_request.shard_reads.end();) {
@@ -358,10 +361,36 @@ int ECCommon::ReadPipeline::get_remaining_shards(
      if (do_erase) {
        iter = read_request.shard_reads.erase(iter);
      } else {
+      if (need_attr_request && !sinfo.is_nonprimary_shard(shard_id)) {
+        // We have a suitable candidate for attr requests.
+        need_attr_request = false;
+      }
        ++iter;
      }
    }
  
+  if (need_attr_request) {
+    // This happens if we got an error from the shard where we were requesting
+    // the attributes from and the recovery does not require any non primary
+    // shards. The example seen in test was a 2+1 EC being recovered. shards 0
+    // and 2 were being requested and read as part of recovery. Shard was reading
+    // the attributes and failed. The recovery required shard 1, but that does
+    // not have valid attributes on it, so the attribute read failed.
+    // This is a pretty obscure case, so no need to optimise that much. Do an
+    // empty read!
+    shard_id_set have;
+    shard_id_map<pg_shard_t> pg_shards(sinfo.get_k_plus_m());
+    get_all_avail_shards(hoid, have, pg_shards, for_recovery, error_shards);
+    for (auto shard : have) {
+      if (!sinfo.is_nonprimary_shard(shard)) {
+        shard_read_t shard_read;
+        shard_read.pg_shard = pg_shards[shard];
+        read_request.shard_reads.insert(shard, shard_read_t());
+        break;
+      }
+    }
+  }
+
    return read_request.shard_reads.empty()?1:0;
  }
  
diff --git a/src/osd/ECCommon.h b/src/osd/ECCommon.h

index 85e368f3b77ce8e22d602cf36f2c6ddcfe6cd191..a685a50291eccd9563a8b912c0208a1a5e0873b6 100644 (file)
--- a/src/osd/ECCommon.h
+++ b/src/osd/ECCommon.h
@@ -378,7 +378,7 @@ struct ECCommon {
          read_result_t &read_result,
          read_request_t &read_request,
          bool for_recovery,
-        bool fast_read);
+        bool want_attrs);
  
      void get_all_avail_shards(
          const hobject_t &hoid,
author	Alex Ainscow <aainscow@uk.ibm.com>
	Wed, 11 Jun 2025 15:30:40 +0000 (16:30 +0100)
committer	Jon <jonathan.bailey1@ibm.com>
	Fri, 3 Oct 2025 13:31:23 +0000 (14:31 +0100)
src/osd/ECCommon.cc		patch \| blob \| history
src/osd/ECCommon.h		patch \| blob \| history