osd: retry to read object attrs at EC recovery

author cuixf <cuixiaofei@sangfor.com.cn>

Thu, 24 May 2018 00:01:25 +0000 (20:01 -0400)

committer ningtao63358 <ningtao@sangfor.com>

Fri, 1 Jun 2018 10:26:56 +0000 (06:26 -0400)
author cuixf <cuixiaofei@sangfor.com.cn>
Thu, 24 May 2018 00:01:25 +0000 (20:01 -0400)
committer ningtao63358 <ningtao@sangfor.com>
Fri, 1 Jun 2018 10:26:56 +0000 (06:26 -0400)
diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh

index 309df13340cf74ac4ef18dba3098f054fb01dee8..a37258b2fd86327fa4049efb398ebc26284f6d22 100755 (executable)
--- a/qa/standalone/erasure-code/test-erasure-eio.sh
+++ b/qa/standalone/erasure-code/test-erasure-eio.sh
@@ -353,6 +353,37 @@ function TEST_rados_get_with_subreadall_eio_shard_1() {
      delete_erasure_coded_pool $poolname
  }
  
+# Test recovery the object attr read error
+function TEST_ec_object_attr_read_error() {
+    local dir=$1
+    local objname=myobject
+
+    setup_osds 7 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 3 2 || return 1
+
+    local primary_osd=$(get_primary $poolname $objname)
+    # Kill primary OSD
+    kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1
+
+    # Write data
+    rados_put $dir $poolname $objname || return 1
+
+    # Inject eio, shard 1 is the one read attr
+    inject_eio ec mdata $poolname $objname $dir 1 || return 1
+
+    # Restart OSD
+    run_osd $dir ${primary_osd} || return 1
+
+    # Cluster should recover this object
+    wait_for_clean || return 1
+
+    rados_get $dir $poolname myobject || return 1
+
+    delete_erasure_coded_pool $poolname
+}
+
  # Test recovery the first k copies aren't all available
  function TEST_ec_single_recovery_error() {
      local dir=$1
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc

index 50eb7583579a02178fde26342086555fd3dbbb59..151e35e41416857ce74d990ccd87fa1ff0c9b9ab 100644 (file)
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -1059,6 +1059,8 @@ error:
         *i, ghobject_t::NO_GEN, shard),
        reply->attrs_read[*i]);
      if (r < 0) {
+      // If we read error, we should not return the attrs too.
+      reply->attrs_read.erase(*i);
        reply->buffers_read.erase(*i);
        reply->errors[*i] = r;
      }
@@ -2342,13 +2344,21 @@ int ECBackend::send_all_remaining_reads(
    GenContext<pair<RecoveryMessages *, read_result_t& > &> *c =
      rop.to_read.find(hoid)->second.cb;
  
+  // (Note cuixf) If we need to read attrs and we read failed, try to read again.
+  bool want_attrs =
+    rop.to_read.find(hoid)->second.want_attrs &&
+    (!rop.complete[hoid].attrs || rop.complete[hoid].attrs->empty());
+  if (want_attrs) {
+    dout(10) << __func__ << " want attrs again" << dendl;
+  }
+
    rop.to_read.erase(hoid);
    rop.to_read.insert(make_pair(
        hoid,
        read_request_t(
         offsets,
         shards,
-       false,
+       want_attrs,
         c)));
    do_read_op(rop);
    return 0;
author	cuixf <cuixiaofei@sangfor.com.cn>
	Thu, 24 May 2018 00:01:25 +0000 (20:01 -0400)
committer	ningtao63358 <ningtao@sangfor.com>
	Fri, 1 Jun 2018 10:26:56 +0000 (06:26 -0400)
qa/standalone/erasure-code/test-erasure-eio.sh		patch \| blob \| history
src/osd/ECBackend.cc		patch \| blob \| history