From: Loic Dachary Date: Tue, 3 Jun 2014 17:27:26 +0000 (+0200) Subject: erasure-code: remap chunks if not sequential X-Git-Tag: v0.85~38^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5c2d2320c0a58d192f264dc2884189acb5b7e044;p=ceph.git erasure-code: remap chunks if not sequential If the remap vector is not empty, use it to figure out the sequence of data chunks. http://tracker.ceph.com/issues/9025 Fixes: #9025 Signed-off-by: Loic Dachary --- diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc index 44da1b32746b..e8cab9a6146d 100644 --- a/src/erasure-code/ErasureCode.cc +++ b/src/erasure-code/ErasureCode.cc @@ -83,11 +83,16 @@ int ErasureCode::encode(const set &want_to_encode, if (err) return err; unsigned blocksize = get_chunk_size(in.length()); + map sorted_encoded; for (unsigned int i = 0; i < k + m; i++) { - bufferlist &chunk = (*encoded)[i]; + bufferlist &chunk = sorted_encoded[i]; chunk.substr_of(out, i * blocksize, blocksize); } - encode_chunks(want_to_encode, encoded); + encode_chunks(want_to_encode, &sorted_encoded); + for (unsigned int i = 0; i < k + m; i++) { + int chunk = chunk_mapping.size() > 0 ? chunk_mapping[i] : i; + (*encoded)[chunk].claim(sorted_encoded[i]); + } for (unsigned int i = 0; i < k + m; i++) { if (want_to_encode.count(i) == 0) encoded->erase(i); @@ -219,12 +224,18 @@ int ErasureCode::decode_concat(const map &chunks, bufferlist *decoded) { set want_to_read; - for (unsigned int i = 0; i < get_data_chunk_count(); i++) - want_to_read.insert(i); + + for (unsigned int i = 0; i < get_data_chunk_count(); i++) { + int chunk = chunk_mapping.size() > i ? chunk_mapping[i] : i; + want_to_read.insert(chunk); + } map decoded_map; int r = decode(want_to_read, chunks, &decoded_map); - if (r == 0) - for (unsigned int i = 0; i < get_data_chunk_count(); i++) - decoded->claim_append(decoded_map[i]); + if (r == 0) { + for (unsigned int i = 0; i < get_data_chunk_count(); i++) { + int chunk = chunk_mapping.size() > i ? chunk_mapping[i] : i; + decoded->claim_append(decoded_map[chunk]); + } + } return r; } diff --git a/src/erasure-code/jerasure/ErasureCodeJerasure.cc b/src/erasure-code/jerasure/ErasureCodeJerasure.cc index 834a2d5d8845..4bd6aeb21458 100644 --- a/src/erasure-code/jerasure/ErasureCodeJerasure.cc +++ b/src/erasure-code/jerasure/ErasureCodeJerasure.cc @@ -72,6 +72,13 @@ int ErasureCodeJerasure::parse(const map ¶meters, err |= to_int("k", parameters, &k, DEFAULT_K, ss); err |= to_int("m", parameters, &m, DEFAULT_M, ss); err |= to_int("w", parameters, &w, DEFAULT_W, ss); + if (chunk_mapping.size() > 0 && (int)chunk_mapping.size() != k + m) { + *ss << "mapping " << parameters.find("mapping")->second + << " maps " << chunk_mapping.size() << " chunks instead of" + << " the expected " << k + m << " and will be ignored" << std::endl; + chunk_mapping.clear(); + err = -EINVAL; + } return err; } diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index d58a17f3a12b..a87b5b4ec1df 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1656,9 +1656,11 @@ void ECBackend::objects_read_async( sinfo.offset_len_to_stripe_bounds(i->first)); } + const vector &chunk_mapping = ec_impl->get_chunk_mapping(); set want_to_read; for (int i = 0; i < (int)ec_impl->get_data_chunk_count(); ++i) { - want_to_read.insert(i); + int chunk = (int)chunk_mapping.size() > i ? chunk_mapping[i] : i; + want_to_read.insert(chunk); } set shards; int r = get_min_avail_to_read_shards( diff --git a/src/test/erasure-code/test-erasure-code.sh b/src/test/erasure-code/test-erasure-code.sh index b9caef362adb..e27c5e5232e1 100755 --- a/src/test/erasure-code/test-erasure-code.sh +++ b/src/test/erasure-code/test-erasure-code.sh @@ -30,7 +30,7 @@ function run() { for id in $(seq 0 4) ; do run_osd $dir $id || return 1 done - create_erasure_coded_pool || return 1 + create_erasure_coded_pool ecpool || return 1 FUNCTIONS=${FUNCTIONS:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} for TEST_function in $FUNCTIONS ; do if ! $TEST_function $dir ; then @@ -38,13 +38,16 @@ function run() { return 1 fi done + delete_pool ecpool || return 1 teardown $dir || return 1 } function create_erasure_coded_pool() { + local poolname=$1 + ./ceph osd erasure-code-profile set myprofile \ ruleset-failure-domain=osd || return 1 - ./ceph osd pool create ecpool 12 12 erasure myprofile \ + ./ceph osd pool create $poolname 12 12 erasure myprofile \ || return 1 } @@ -126,6 +129,77 @@ function TEST_alignment_constraints() { rm $dir/ORIGINAL } +function get_osds() { + local poolname=$1 + local objectname=$2 + + ./ceph osd map $poolname $objectname | \ + perl -p -e 's/.*up \(\[(.*?)\].*/$1/; s/,/ /g' +} + +function chunk_size() { + local stripe_width=$(./ceph-conf --show-config-value osd_pool_erasure_code_stripe_width) + eval local $(./ceph osd erasure-code-profile get default | grep k=) + echo $(($stripe_width / $k)) +} + +# +# By default an object will be split in two (k=2) with the first part +# of the object in the first OSD of the up set and the second part in +# the next OSD in the up set. This layout is defined by the mapping +# parameter and this function helps verify that the first and second +# part of the object are located in the OSD where they should be. +# +function verify_chunk_mapping() { + local dir=$1 + local poolname=$2 + local first=$3 + local second=$4 + + local payload=$(printf '%*s' $(chunk_size) FIRST$poolname ; printf '%*s' $(chunk_size) SECOND$poolname) + echo -n "$payload" > $dir/ORIGINAL + + ./rados --pool $poolname put SOMETHING$poolname $dir/ORIGINAL || return 1 + ./rados --pool $poolname get SOMETHING$poolname $dir/COPY || return 1 + local -a osds=($(get_osds $poolname SOMETHING$poolname)) + for (( i = 0; i < ${#osds[@]}; i++ )) ; do + ./ceph daemon osd.${osds[$i]} flush_journal + done + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY + + local -a osds=($(get_osds $poolname SOMETHING$poolname)) + grep --quiet --recursive --text FIRST$poolname $dir/${osds[$first]} || return 1 + grep --quiet --recursive --text SECOND$poolname $dir/${osds[$second]} || return 1 +} + +function TEST_chunk_mapping() { + local dir=$1 + + # + # mapping=DD_ is the default: + # first OSD (i.e. 0) in the up set has the first part of the object + # second OSD (i.e. 1) in the up set has the second part of the object + # + verify_chunk_mapping $dir ecpool 0 1 || return 1 + + ./ceph osd erasure-code-profile set remap-profile \ + ruleset-failure-domain=osd \ + mapping='_DD' || return 1 + ./ceph osd erasure-code-profile get remap-profile + ./ceph osd pool create remap-pool 12 12 erasure remap-profile \ + || return 1 + + # + # mapping=_DD + # second OSD (i.e. 1) in the up set has the first part of the object + # third OSD (i.e. 2) in the up set has the second part of the object + # + verify_chunk_mapping $dir remap-pool 1 2 || return 1 + + delete_pool remap-pool +} + main test-erasure-code # Local Variables: