"injectdataerr",
"injectdataerr " \
"name=pool,type=CephString " \
- "name=objname,type=CephObjectname",
+ "name=objname,type=CephObjectname " \
+ "name=shardid,type=CephInt,req=false,range=0|255",
test_ops_hook,
"inject data error into omap");
assert(r == 0);
"injectmdataerr",
"injectmdataerr " \
"name=pool,type=CephString " \
- "name=objname,type=CephObjectname",
+ "name=objname,type=CephObjectname " \
+ "name=shardid,type=CephInt,req=false,range=0|255",
test_ops_hook,
"inject metadata error");
assert(r == 0);
// setomapheader <pool-id> [namespace/]<obj-name> <header>
// getomap <pool> [namespace/]<obj-name>
// truncobj <pool-id> [namespace/]<obj-name> <newlen>
-// injectmdataerr [namespace/]<obj-name>
-// injectdataerr [namespace/]<obj-name>
+// injectmdataerr [namespace/]<obj-name> [shardid]
+// injectdataerr [namespace/]<obj-name> [shardid]
void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
std::string command, cmdmap_t& cmdmap, ostream &ss)
{
ss << "Invalid namespace/objname";
return;
}
+
+ int64_t shardid;
+ cmd_getval(service->cct, cmdmap, "shardid", shardid, int64_t(shard_id_t::NO_SHARD));
+ hobject_t obj(object_t(objname), string(""), CEPH_NOSNAP, rawpg.ps(), pool, nspace);
+ ghobject_t gobj(obj, ghobject_t::NO_GEN, shard_id_t(uint8_t(shardid)));
+ spg_t pgid(curmap->raw_pg_to_pg(rawpg), shard_id_t(shardid));
if (curmap->pg_is_ec(rawpg)) {
- ss << "Must not call on ec pool";
- return;
+ if ((command != "injectdataerr") && (command != "injectmdataerr")) {
+ ss << "Must not call on ec pool, except injectdataerr or injectmdataerr";
+ return;
+ }
}
- spg_t pgid = spg_t(curmap->raw_pg_to_pg(rawpg), shard_id_t::NO_SHARD);
- hobject_t obj(object_t(objname), string(""), CEPH_NOSNAP, rawpg.ps(), pool, nspace);
ObjectStore::Transaction t;
if (command == "setomapval") {
else
ss << "ok";
} else if (command == "injectdataerr") {
- store->inject_data_error(obj);
+ store->inject_data_error(gobj);
ss << "ok";
} else if (command == "injectmdataerr") {
- store->inject_mdata_error(obj);
+ store->inject_mdata_error(gobj);
ss << "ok";
}
return;
FUNCTIONS=${FUNCTIONS:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for TEST_function in $FUNCTIONS ; do
if ! $TEST_function $dir ; then
- cat $dir/a/log
+ #cat $dir/a/log
return 1
fi
done
function rados_put_get() {
local dir=$1
local poolname=$2
+ local objname=${3:-SOMETHING}
+
for marker in AAA BBB CCCC DDDD ; do
printf "%*s" 1024 $marker
#
# get and put an object, compare they are equal
#
- ./rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
- ./rados --pool $poolname get SOMETHING $dir/COPY || return 1
+ ./rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+ ./rados --pool $poolname get $objname $dir/COPY || return 1
diff $dir/ORIGINAL $dir/COPY || return 1
rm $dir/COPY
# check the object can still be retrieved, which implies
# recovery
#
- local -a initial_osds=($(get_osds $poolname SOMETHING))
+ local -a initial_osds=($(get_osds $poolname $objname))
local last=$((${#initial_osds[@]} - 1))
./ceph osd out ${initial_osds[$last]} || return 1
- ! get_osds $poolname SOMETHING | grep '\<'${initial_osds[$last]}'\>' || return 1
- ./rados --pool $poolname get SOMETHING $dir/COPY || return 1
+ ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1
+ ./rados --pool $poolname get $objname $dir/COPY || return 1
diff $dir/ORIGINAL $dir/COPY || return 1
./ceph osd in ${initial_osds[$last]} || return 1
./ceph osd erasure-code-profile rm remap-profile
}
+# this test case is aimd to reproduce the original OSD crashing when hitting EIO
+# see https://github.com/ceph/ceph/pull/2952
+# but the original crashing behavior seems changed from latest giant, so this
+# test case is also modified
+#
+function TEST_rados_get_dataeio_no_subreadall_jerasure() {
+ local dir=$1
+
+ # check if osd_pool_erasure_code_subread_all is enabled or not
+ # turn it off if it is enabled
+ #
+ local subread=1
+ CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.0.asok config get \
+ "osd_pool_erasure_code_subread_all" | grep "true"
+ if (( $? == 0 )); then
+ subread=0
+ for id in $(seq 0 10) ; do
+ kill_osd_daemon $dir $id || return 1
+ run_osd $dir $id "--osd_pool_erasure_code_subread_all=false" || return 1
+ done
+ fi
+
+ local poolname=pool-jerasure
+ local profile=profile-jerasure
+ ./ceph osd erasure-code-profile set $profile \
+ plugin=jerasure \
+ k=4 m=2 \
+ ruleset-failure-domain=osd || return 1
+ ./ceph osd pool create $poolname 12 12 erasure $profile \
+ || return 1
+
+ # inject eio on primary OSD (0), then peer OSD (1)
+ # OSD with eio injection will crash at reading object
+ #
+ for shardid in 0 1; do
+ local objname=obj-eio-$$-$shardid
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last=$((${#initial_osds[@]} - 1))
+
+ CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok config set \
+ filestore_debug_inject_read_err true || return 1
+ CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok injectdataerr \
+ $poolname $objname $shardid || return 1
+ rados_put_get $dir $poolname $objname || return 1
+ check_osd_status ${initial_osds[$shardid]} "down" || return 1
+
+ # recreate crashed OSD with the same id since I don't know how to restart it :(
+ if (( $subread == 0 )); then
+ #if (( $shardid != 0 )); then
+ # run_osd $dir ${initial_osds[0]} "--osd_pool_erasure_code_subread_all=false" || return 1
+ #fi
+ run_osd $dir ${initial_osds[$shardid]} "--osd_pool_erasure_code_subread_all=false" || return 1
+ else
+ #if (( $shardid != 0 )); then
+ # run_osd $dir ${initial_osds[0]} || return 1
+ #fi
+ run_osd $dir ${initial_osds[$shardid]} || return 1
+ fi
+ done
+
+ delete_pool $poolname
+ ./ceph osd erasure-code-profile rm $profile
+}
+
+# this test case is aimed to test the fix of https://github.com/ceph/ceph/pull/2952
+# this test case can test both client read and recovery read on EIO
+# but at this moment, above pull request ONLY resolves client read on EIO
+# so this case will fail at function *rados_put_get* when one OSD out
+# so disable this case for now until both crashes of client read and recovery read
+# on EIO to be fixed
+#
+#function TEST_rados_get_dataeio_subreadall_jerasure() {
+# local dir=$1
+#
+# # check if osd_pool_erasure_code_subread_all is enabled or not
+# # turn it on if it is disabled
+# # skip this case if osd_pool_erasure_code_subread_all is not supported
+# #
+# CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.0.asok config get \
+# "osd_pool_erasure_code_subread_all" | grep "error"
+# if (( $? == 0 )); then
+# echo "Skip this case because osd_pool_erasure_code_subread_all is not supported"
+# return 0
+# fi
+#
+# # make sure osd_pool_erasure_code_subread_all is true on every OSD
+# #
+# for id in $(seq 0 10) ; do
+# CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.$id.asok config get \
+# "osd_pool_erasure_code_subread_all" | grep "true"
+# if (( $? != 0 )); then
+# kill_osd_daemon $dir $id || return 1
+# run_osd $dir $id "--osd_pool_erasure_code_subread_all=true" || return 1
+# fi
+# done
+#
+# local poolname=pool-jerasure
+# local profile=profile-jerasure
+# ./ceph osd erasure-code-profile set $profile \
+# plugin=jerasure \
+# k=4 m=2 \
+# ruleset-failure-domain=osd || return 1
+# ./ceph osd pool create $poolname 12 12 erasure $profile \
+# || return 1
+#
+# # inject eio on primary OSD (0), then peer OSD (1)
+# # primary OSD will not crash at reading object but pg will be marked as inconsistent
+# #
+# for shardid in 0 1; do
+# local objname=obj-eio-$$-$shardid
+# local -a initial_osds=($(get_osds $poolname $objname))
+# local last=$((${#initial_osds[@]} - 1))
+# local pg=$(get_pg $poolname $objname)
+#
+# CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok config set \
+# filestore_debug_inject_read_err true || return 1
+# CEPH_ARGS='' ./ceph --admin-daemon $dir/ceph-osd.${initial_osds[$shardid]}.asok injectdataerr \
+# $poolname $objname $shardid || return 1
+# rados_put_get $dir $poolname $objname || return 1
+# check_osd_status ${initial_osds[0]} "up" || return 1
+#
+# # the reason to skip this check when current shardid != 0 is that the first k chunks returned is not
+# # always containing current shardid, so this pg may not be marked as inconsistent
+# # However, primary OSD (when shardid == 0) is always the faster one normally, so we can check pg status
+# if (( $shardid == 0 )); then
+# check_pg_status $pg "inconsistent" || return 1
+# fi
+#
+# # recreate crashed OSD with the same id since I don't know how to restart it :(
+# if (( $shardid != 0 )); then
+# kill_osd_daemon $dir ${initial_osds[0]} || return 1
+# run_osd $dir ${initial_osds[0]} "--osd_pool_erasure_code_subread_all=true" || return 1
+# fi
+# kill_osd_daemon $dir ${initial_osds[$shardid]} || return 1
+# run_osd $dir ${initial_osds[$shardid]} "--osd_pool_erasure_code_subread_all=true" || return 1
+# done
+#
+# delete_pool $poolname
+# ./ceph osd erasure-code-profile rm $profile
+#}
+
main test-erasure-code
# Local Variables: