# Parameters:
# $1: the test directory
# $2: [in/out] an array of configuration values
+# $3: test name to appear in the output
#
# Argument 2 might look like this:
# (note: the dict contains all the elements required by standard_scrub_cluster,
# - pool_default_size: size of the pool
# - objects_per_pg: average number of objects per PG
# - object_size: size of each object in bytes
+# - stride: deep-scrub stride (bytes); if 0 - use default
# - modify_as_prim_cnt: number of objects to corrupt their Primary version
# - modify_as_repl_cnt: number of objects to corrupt one of their replicas
# - manipulations: a list of functions to perform on the objects
# ['msg']="Test message"
# ['objects_per_pg']="100"
# ['object_size']="256"
+# ['stride']="1048576"
# ['modify_as_prim_cnt']="10" # elements to corrupt their Primary version
# ['modify_as_repl_cnt']="10" # elements to corrupt one of their replicas
# ['manipulations']="delete_oi rm_object" the list of manipulations to perform
{
local dir=$1
local -n alargs=$2
+ local test_name=$3
local OSDS=${alargs['osds_num']}
local PGS=${alargs['pgs_in_pool']}
local OBJECT_SIZE=${alargs['object_size']:-256}
local modify_as_prim_cnt=${alargs['modify_as_prim_cnt']}
local modify_as_repl_cnt=${alargs['modify_as_repl_cnt']}
+ local stride_override=${alargs['stride']:-0}
local objects=$(($PGS * $OBJS_PER_PG))
# the total number of corrupted objects cannot exceed the number of objects
set -x
# Create some objects
- #create_objects_2 "$dir" "$poolname" "$objects" 256 4 || return 1
create_objects "$dir" "$poolname" "$objects" "$OBJECT_SIZE" 4 || return 1
echo "Pre-wait-forclean: $(date +%T.%N)"
wait_for_clean || return 1
ceph osd pool stats
+ if (( stride_override > 0 )); then
+ echo "Setting osd_deep_scrub_stride to $stride_override"
+ ceph tell osd.* config set osd_deep_scrub_stride "$stride_override"
+ fi
sleep 5
ceph pg dump pgs
+ # the actual stride (either a default or the override value)
+ (( extr_dbg >= 1 )) && echo "Getting actual osd_deep_scrub_stride: " && ceph tell osd.0 config show | jq -r '.osd_deep_scrub_stride'
+ local actual_stride=$(ceph tell osd.0 config show | jq -r '.osd_deep_scrub_stride')
echo "Pre dict creation: $(date +%T.%N)"
local start_dict=$(date +%s%N)
# disable rescheduling of the queue due to 'no-scrub' flags
ceph tell osd.* config set osd_scrub_backoff_ratio 0.9999
-
# -------------------------- step 2: corruption of objects --------------------------
kill_daemons $dir TERM osd || return 1
ceph tell osd.* config set osd_scrub_chunk_max 5
ceph tell osd.* config set osd_shallow_scrub_chunk_max 5
ceph tell osd.* config set osd_scrub_backoff_ratio 0.9999
+ if (( stride_override > 0 )); then
+ echo "Setting osd_deep_scrub_stride to $stride_override"
+ ceph tell osd.* config set osd_deep_scrub_stride "$stride_override"
+ fi
+ sleep 5
+ ceph pg dump pgs
+ # the actual stride (either a default or the override value)
+ (( extr_dbg >= 1 )) && echo "Getting actual osd_deep_scrub_stride: " && ceph tell osd.0 config show | jq -r '.osd_deep_scrub_stride'
# no auto-repair
ceph tell osd.* config set osd_scrub_auto_repair false
sleep 2
ceph pg dump pgs
- printf 'MSR NAUT %3d %3d %3d %3d/%3d %6d\n' "$OSDS" "$PGS" "$OBJS_PER_PG" \
- "$modify_as_prim_cnt" "$modify_as_repl_cnt" "$duration"
+ printf 'MSR @%15s@ NAUT %3d %3d %3d %3d/%3d %7d %7d/%7d %6d\n' "$test_name" "$OSDS" "$PGS" "$OBJS_PER_PG" \
+ "$modify_as_prim_cnt" "$modify_as_repl_cnt" "$OBJECT_SIZE" "$actual_stride" \
+ "$stride_override" "$duration"
for pg in "${!pg_pr[@]}"; do
echo "list-inconsistent for PG $pg"
rados -p $poolname list-inconsistent-obj $pg --format=json-pretty | jq '.' | wc -l
sleep 5
ceph pg dump pgs
- printf 'MSR REPR %3d %3d %3d %3d/%3d %6d\n' "$OSDS" "$PGS" "$OBJS_PER_PG" \
- "$modify_as_prim_cnt" "$modify_as_repl_cnt" "$duration"
+ printf 'MSR @%15s@ REPR %3d %3d %3d %3d/%3d %7d %7d/%7d %6d\n' "$test_name" "$OSDS" "$PGS" "$OBJS_PER_PG" \
+ "$modify_as_prim_cnt" "$modify_as_repl_cnt" "$OBJECT_SIZE" "$actual_stride" \
+ "$stride_override" "$duration"
dump_scrub_counters "$dir" "$OSDS" "After repair"
# -- collecting some data after the repair
wait_for_pool_scrubbed "$dir" "$poolid" saved_last_stamp
end_time=$(date +%s%N)
duration=$(( (end_time - start_time)/1000000 ))
- printf 'MSR REDE %3d %3d %3d %3d/%3d %6d\n' "$OSDS" "$PGS" "$OBJS_PER_PG" \
- "$modify_as_prim_cnt" "$modify_as_repl_cnt" "$duration"
+ printf 'MSR @%15s@ REDE %3d %3d %3d %3d/%3d %7d %7d/%7d %6d\n' "$test_name" "$OSDS" "$PGS" "$OBJS_PER_PG" \
+ "$modify_as_prim_cnt" "$modify_as_repl_cnt" "$OBJECT_SIZE" "$actual_stride" \
+ "$stride_override" "$duration"
sleep 3
ceph pg dump pgs
dump_scrub_counters "$dir" "$OSDS" "Final"
function TEST_time_measurements_basic_1()
{
# the following is enough to trigger the 'auto-repair operator-repair' bug
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="3"
['pgs_in_pool']="8"
['pool_name']="test"
['modify_as_repl_cnt']="24" # elements to corrupt one of their replicas
['manipulations']="delete_oi"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "basic_1" || return 1
}
function TEST_rmobj_1()
{
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="3"
['pgs_in_pool']="4"
['pool_name']="test"
['modify_as_repl_cnt']="2"
['manipulations']="rm_object"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "rmobj_1" || return 1
}
function TEST_time_measurements_basic_2()
{
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="3"
['pgs_in_pool']="4"
['pool_name']="test"
['modify_as_repl_cnt']="0"
['manipulations']="delete_oi"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "basic_2" || return 1
}
function TEST_time_measurements_basic_2b()
{
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="3"
['pgs_in_pool']="8"
['pool_name']="test"
['modify_as_repl_cnt']="10"
['manipulations']="delete_oi"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "basic_2b" || return 1
}
function TEST_time_measurements_basic_2c()
{
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="3"
['pgs_in_pool']="4"
['pool_name']="test"
['modify_as_repl_cnt']="10"
['manipulations']="delete_oi"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "basic_2c" || return 1
}
function TEST_time_measurements_basic_3()
{
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="4"
['pgs_in_pool']="16"
['pool_name']="test"
['modify_as_repl_cnt']="10"
['manipulations']="delete_oi"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "basic_3" || return 1
}
function TEST_time_large_objects()
{
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="3"
['pgs_in_pool']="8"
['pool_name']="test"
['modify_as_repl_cnt']="10"
['manipulations']="rm_object"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "large_obj" || return 1
}
-
function TEST_time_large_objects_2()
{
- local -A cls_conf=(
+ local -A cluster_conf=(
['osds_num']="3"
['pgs_in_pool']="8"
['pool_name']="test"
['modify_as_repl_cnt']="20"
['manipulations']="delete_oi"
)
- corrupt_and_measure "$1" cls_conf || return 1
+ corrupt_and_measure "$1" cluster_conf "large_obj_2" || return 1
+}
+
+function TEST_time_large_objects_2b()
+{
+ local -A cluster_conf=(
+ ['osds_num']="3"
+ ['pgs_in_pool']="8"
+ ['pool_name']="test"
+ ['pool_default_size']="3"
+ ['msg']="large_objs"
+ ['objects_per_pg']="8"
+ ['object_size']="4194300" # 4MB - 4bytes
+ ['stride']="4194304" # 4MB
+ ['modify_as_prim_cnt']="20"
+ ['modify_as_repl_cnt']="20"
+ ['manipulations']="delete_oi"
+ )
+ corrupt_and_measure "$1" cluster_conf "large_obj_2b" || return 1
+}
+
+
+function TEST_time_large_objects_2bs1M()
+{
+ local -A cluster_conf=(
+ ['osds_num']="3"
+ ['pgs_in_pool']="8"
+ ['pool_name']="test"
+ ['pool_default_size']="3"
+ ['msg']="large_objs"
+ ['objects_per_pg']="8"
+ ['object_size']="4194300" # 4MB - 4bytes
+ ['stride']="1048576" # 1MB
+ ['modify_as_prim_cnt']="20"
+ ['modify_as_repl_cnt']="20"
+ ['manipulations']="delete_oi"
+ )
+ corrupt_and_measure "$1" cluster_conf "large_obj_2bs1M" || return 1
+}
+
+function TEST_time_large_objects_3()
+{
+ // stride size unmodified from the default configuration value
+ local -A cluster_conf=(
+ ['osds_num']="3"
+ ['pgs_in_pool']="8"
+ ['pool_name']="test"
+ ['pool_default_size']="3"
+ ['msg']="large_objs"
+ ['objects_per_pg']="16"
+ ['object_size']="12582912" # 12MB
+ ['modify_as_prim_cnt']="20"
+ ['modify_as_repl_cnt']="20"
+ ['manipulations']="delete_oi"
+ )
+ corrupt_and_measure "$1" cluster_conf "large_obj_3" || return 1
}
main osd-scrub-dump "$@"