osd: add EIO injection on EC pool for mdata and data error

author Zhi Z Zhang <zhangz@yahoo-inc.com>

Tue, 10 Feb 2015 10:52:39 +0000 (10:52 +0000)

committer Zhi Z Zhang <zhangz@yahoo-inc.com>

Tue, 10 Feb 2015 10:52:39 +0000 (10:52 +0000)
author Zhi Z Zhang <zhangz@yahoo-inc.com>
Tue, 10 Feb 2015 10:52:39 +0000 (10:52 +0000)
committer Zhi Z Zhang <zhangz@yahoo-inc.com>
Tue, 10 Feb 2015 10:52:39 +0000 (10:52 +0000)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index 27296bcefdec6ea319b5deac32af5d8cbdb7a1f5..9451fbfc28d8088d9bf5a8fa3761cb40741ed200 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -2041,7 +2041,8 @@ void OSD::final_init()
      "injectdataerr",
      "injectdataerr " \
      "name=pool,type=CephString " \
-    "name=objname,type=CephObjectname",
+    "name=objname,type=CephObjectname " \
+    "name=shardid,type=CephInt,req=false,range=0|255",
      test_ops_hook,
      "inject data error into omap");
    assert(r == 0);
@@ -2050,7 +2051,8 @@ void OSD::final_init()
      "injectmdataerr",
      "injectmdataerr " \
      "name=pool,type=CephString " \
-    "name=objname,type=CephObjectname",
+    "name=objname,type=CephObjectname " \
+    "name=shardid,type=CephInt,req=false,range=0|255",
      test_ops_hook,
      "inject metadata error");
    assert(r == 0);
@@ -3939,8 +3941,8 @@ void OSD::check_ops_in_flight()
  //   setomapheader <pool-id> [namespace/]<obj-name> <header>
  //   getomap <pool> [namespace/]<obj-name>
  //   truncobj <pool-id> [namespace/]<obj-name> <newlen>
-//   injectmdataerr [namespace/]<obj-name>
-//   injectdataerr [namespace/]<obj-name>
+//   injectmdataerr [namespace/]<obj-name> [shardid]
+//   injectdataerr [namespace/]<obj-name> [shardid]
  void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
       std::string command, cmdmap_t& cmdmap, ostream &ss)
  {
@@ -3983,13 +3985,19 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
        ss << "Invalid namespace/objname";
        return;
      }
+
+    int64_t shardid;
+    cmd_getval(service->cct, cmdmap, "shardid", shardid, int64_t(shard_id_t::NO_SHARD));
+    hobject_t obj(object_t(objname), string(""), CEPH_NOSNAP, rawpg.ps(), pool, nspace);
+    ghobject_t gobj(obj, ghobject_t::NO_GEN, shard_id_t(uint8_t(shardid)));
+    spg_t pgid(curmap->raw_pg_to_pg(rawpg), shard_id_t(shardid));
      if (curmap->pg_is_ec(rawpg)) {
-      ss << "Must not call on ec pool";
-      return;
+        if ((command != "injectdataerr") && (command != "injectmdataerr")) {
+            ss << "Must not call on ec pool, except injectdataerr or injectmdataerr";
+            return;
+        }
      }
-    spg_t pgid = spg_t(curmap->raw_pg_to_pg(rawpg), shard_id_t::NO_SHARD);
  
-    hobject_t obj(object_t(objname), string(""), CEPH_NOSNAP, rawpg.ps(), pool, nspace);
      ObjectStore::Transaction t;
  
      if (command == "setomapval") {
@@ -4055,10 +4063,10 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
        else
         ss << "ok";
      } else if (command == "injectdataerr") {
-      store->inject_data_error(obj);
+      store->inject_data_error(gobj);
        ss << "ok";
      } else if (command == "injectmdataerr") {
-      store->inject_mdata_error(obj);
+      store->inject_mdata_error(gobj);
        ss << "ok";
      }
      return;
diff --git a/src/test/erasure-code/test-erasure-code.sh b/src/test/erasure-code/test-erasure-code.sh

index 04652d02e85a90cd806a34837ac32046a129e43d..2c13d95750f5613a15a1657cbb9487bcc9e1d019 100755 (executable)
--- a/src/test/erasure-code/test-erasure-code.sh
+++ b/src/test/erasure-code/test-erasure-code.sh
@@ -42,7 +42,7 @@ function run() {
      FUNCTIONS=${FUNCTIONS:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
      for TEST_function in $FUNCTIONS ; do
          if ! $TEST_function $dir ; then
-            cat $dir/a/log
+            #cat $dir/a/log
              return 1
          fi
      done
@@ -68,6 +68,8 @@ function delete_pool() {
  function rados_put_get() {
      local dir=$1
      local poolname=$2
+    local objname=${3:-SOMETHING}
+
  
      for marker in AAA BBB CCCC DDDD ; do
          printf "%*s" 1024 $marker
@@ -76,8 +78,8 @@ function rados_put_get() {
      #
      # get and put an object, compare they are equal
      #
-    ./rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
-    ./rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    ./rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+    ./rados --pool $poolname get $objname $dir/COPY || return 1
      diff $dir/ORIGINAL $dir/COPY || return 1
      rm $dir/COPY
  
@@ -86,11 +88,11 @@ function rados_put_get() {
      # check the object can still be retrieved, which implies
      # recovery
      #
-    local -a initial_osds=($(get_osds $poolname SOMETHING))
+    local -a initial_osds=($(get_osds $poolname $objname))
      local last=$((${#initial_osds[@]} - 1))
      ./ceph osd out ${initial_osds[$last]} || return 1
-    ! get_osds $poolname SOMETHING | grep '\<'${initial_osds[$last]}'\>' || return 1
-    ./rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1
+    ./rados --pool $poolname get $objname $dir/COPY || return 1
      diff $dir/ORIGINAL $dir/COPY || return 1
      ./ceph osd in ${initial_osds[$last]} || return 1
  
@@ -272,6 +274,147 @@ function TEST_chunk_mapping() {
      ./ceph osd erasure-code-profile rm remap-profile
  }
  
+# this test case is aimd to reproduce the original OSD crashing when hitting EIO
+# see https://github.com/ceph/ceph/pull/2952
+# but the original crashing behavior seems changed from latest giant, so this
+# test case is also modified
+#
+function TEST_rados_get_dataeio_no_subreadall_jerasure() {
+    local dir=$1
+
+    # check if osd_pool_erasure_code_subread_all is enabled or not
+    # turn it off if it is enabled
+    #
+    local subread=1
+    CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.0.asok config get \
+        "osd_pool_erasure_code_subread_all" | grep "true"
+    if (( $? == 0 )); then
+        subread=0
+        for id in $(seq 0 10) ; do
+            kill_osd_daemon $dir $id || return 1
+            run_osd $dir $id "--osd_pool_erasure_code_subread_all=false" || return 1
+        done
+    fi
+     
+    local poolname=pool-jerasure
+    local profile=profile-jerasure
+    ./ceph osd erasure-code-profile set $profile \
+        plugin=jerasure \
+        k=4 m=2 \
+        ruleset-failure-domain=osd || return 1
+    ./ceph osd pool create $poolname 12 12 erasure $profile \
+        || return 1
+
+    # inject eio on primary OSD (0), then peer OSD (1)
+    # OSD with eio injection will crash at reading object
+    #
+    for shardid in 0 1; do
+        local objname=obj-eio-$$-$shardid
+        local -a initial_osds=($(get_osds $poolname $objname))
+        local last=$((${#initial_osds[@]} - 1))
+
+         CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok config set \
+            filestore_debug_inject_read_err true || return 1
+         CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok injectdataerr \
+            $poolname $objname $shardid || return 1
+         rados_put_get $dir $poolname $objname || return 1
+         check_osd_status ${initial_osds[$shardid]} "down" || return 1
+
+         # recreate crashed OSD with the same id since I don't know how to restart it :(
+         if (( $subread == 0 )); then
+            #if (( $shardid != 0 )); then
+            #    run_osd $dir ${initial_osds[0]} "--osd_pool_erasure_code_subread_all=false" || return 1
+            #fi
+            run_osd $dir ${initial_osds[$shardid]} "--osd_pool_erasure_code_subread_all=false" || return 1
+         else
+            #if (( $shardid != 0 )); then
+            #    run_osd $dir ${initial_osds[0]} || return 1
+            #fi
+            run_osd $dir ${initial_osds[$shardid]} || return 1
+         fi
+    done
+
+    delete_pool $poolname
+    ./ceph osd erasure-code-profile rm $profile
+}
+
+# this test case is aimed to test the fix of https://github.com/ceph/ceph/pull/2952
+# this test case can test both client read and recovery read on EIO
+# but at this moment, above pull request ONLY resolves client read on EIO
+# so this case will fail at function *rados_put_get* when one OSD out
+# so disable this case for now until both crashes of client read and recovery read 
+# on EIO to be fixed
+#
+#function TEST_rados_get_dataeio_subreadall_jerasure() {
+#    local dir=$1
+#
+#    # check if osd_pool_erasure_code_subread_all is enabled or not
+#    # turn it on if it is disabled
+#    # skip this case if osd_pool_erasure_code_subread_all is not supported
+#    #
+#    CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.0.asok config get \
+#        "osd_pool_erasure_code_subread_all" | grep "error"
+#    if (( $? == 0 )); then
+#        echo "Skip this case because osd_pool_erasure_code_subread_all is not supported"
+#        return 0
+#    fi
+#
+#    # make sure osd_pool_erasure_code_subread_all is true on every OSD
+#    #
+#    for id in $(seq 0 10) ; do
+#        CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.$id.asok config get \
+#            "osd_pool_erasure_code_subread_all" | grep "true"
+#        if (( $? != 0 )); then
+#            kill_osd_daemon $dir $id || return 1
+#            run_osd $dir $id "--osd_pool_erasure_code_subread_all=true" || return 1
+#        fi
+#    done
+#     
+#    local poolname=pool-jerasure
+#    local profile=profile-jerasure
+#    ./ceph osd erasure-code-profile set $profile \
+#        plugin=jerasure \
+#        k=4 m=2 \
+#        ruleset-failure-domain=osd || return 1
+#    ./ceph osd pool create $poolname 12 12 erasure $profile \
+#        || return 1
+#
+#    # inject eio on primary OSD (0), then peer OSD (1)
+#    # primary OSD will not crash at reading object but pg will be marked as inconsistent
+#    #
+#    for shardid in 0 1; do
+#        local objname=obj-eio-$$-$shardid
+#        local -a initial_osds=($(get_osds $poolname $objname))
+#        local last=$((${#initial_osds[@]} - 1))
+#        local pg=$(get_pg $poolname $objname)
+#
+#        CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok config set \
+#            filestore_debug_inject_read_err true || return 1
+#        CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok injectdataerr \
+#            $poolname $objname $shardid || return 1
+#        rados_put_get $dir $poolname $objname || return 1
+#        check_osd_status ${initial_osds[0]} "up" || return 1
+#
+#        # the reason to skip this check when current shardid != 0 is that the first k chunks returned is not
+#        # always containing current shardid, so this pg may not be marked as inconsistent
+#        # However, primary OSD (when shardid == 0) is always the faster one normally, so we can check pg status
+#        if (( $shardid == 0 )); then
+#            check_pg_status $pg "inconsistent" || return 1
+#        fi
+#
+#        # recreate crashed OSD with the same id since I don't know how to restart it :(
+#        if (( $shardid != 0 )); then
+#            kill_osd_daemon $dir ${initial_osds[0]} || return 1
+#            run_osd $dir ${initial_osds[0]} "--osd_pool_erasure_code_subread_all=true" || return 1
+#        fi
+#        kill_osd_daemon $dir ${initial_osds[$shardid]} || return 1
+#        run_osd $dir ${initial_osds[$shardid]} "--osd_pool_erasure_code_subread_all=true" || return 1
+#    done
+#
+#    delete_pool $poolname
+#    ./ceph osd erasure-code-profile rm $profile
+#}
+
  main test-erasure-code
  
  # Local Variables:
diff --git a/src/test/osd/osd-test-helpers.sh b/src/test/osd/osd-test-helpers.sh

index a1cef91038d1a5e1ccac3abba3af0e15e943fcf7..34e07443951c98fd241f88a19c644993c5719f91 100644 (file)
--- a/src/test/osd/osd-test-helpers.sh
+++ b/src/test/osd/osd-test-helpers.sh
@@ -87,3 +87,49 @@ function get_pg() {
      ./ceph osd map $poolname $objectname | \
         perl -p -e 's/.*\((.*?)\) -> up.*/$1/'
  }
+
+function kill_osd_daemon() {
+    local dir=$1
+    local osdid=$2
+    pidfile=$(find $dir | grep pidfile | grep "osd-$osdid\.")
+    pid=$(cat $pidfile)
+    for try in 0 1 1 1 2 3 ; do
+             alive=0
+               kill -9 $pid 2> /dev/null || break
+             alive=1
+        sleep $try
+    done
+    return $alive
+}
+
+function check_osd_status() {
+    local osdid=$1
+    local st=$2
+       
+    status=1   
+    for ((i=0; i < 30; i++)); do
+        if ! ceph osd dump | grep "osd.$osdid $st"; then
+            sleep 1
+        else
+            status=0
+            break
+        fi
+    done
+    return $status
+}
+
+function check_pg_status() {
+    local pgid=$1
+    local st=$2
+
+    status=1   
+    for ((i=0; i < 30; i++)); do
+        if ! ceph pg dump | grep "$pgid" | grep "$st"; then
+            sleep 1
+        else
+            status=0
+            break
+        fi
+    done
+    return $status
+}
author	Zhi Z Zhang <zhangz@yahoo-inc.com>
	Tue, 10 Feb 2015 10:52:39 +0000 (10:52 +0000)
committer	Zhi Z Zhang <zhangz@yahoo-inc.com>
	Tue, 10 Feb 2015 10:52:39 +0000 (10:52 +0000)
src/osd/OSD.cc		patch \| blob \| history
src/test/erasure-code/test-erasure-code.sh		patch \| blob \| history
src/test/osd/osd-test-helpers.sh		patch \| blob \| history