From 7cd136aa7462afff405cf21787825285dd249d06 Mon Sep 17 00:00:00 2001 From: Jamie Pryde Date: Mon, 28 Oct 2024 14:37:46 +0000 Subject: [PATCH] erasure-code: Add parity delta write optimization to ISA This commit adds FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION to the list of optimizations supported by the ISA plugin, and encode_delta and apply_delta functions to ISA. Stubs for these functions are added to the other plugins. Two new tests have been added to TestErasureCodePlugins.cc to test the encode_delta and apply_delta functions. The first new test updates a single parity chunk using a single delta. The second new test updates all parity chunks with a delta for every data chunk. Signed-off-by: Jamie Pryde Signed-off-by: Alex Ainscow --- src/erasure-code/ErasureCode.h | 166 ++++++++-------- src/erasure-code/ErasureCodeInterface.h | 50 +++++ src/erasure-code/isa/ErasureCodeIsa.cc | 97 +++++++--- src/erasure-code/isa/ErasureCodeIsa.h | 17 +- .../erasure-code/TestErasureCodePlugins.cc | 183 +++++++++++++++++- 5 files changed, 410 insertions(+), 103 deletions(-) diff --git a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h index 2ae40b6368604..4810db4c5ed49 100644 --- a/src/erasure-code/ErasureCode.h +++ b/src/erasure-code/ErasureCode.h @@ -20,111 +20,125 @@ /*! @file ErasureCode.h @brief Base class for erasure code plugins implementors - */ + */ #include "ErasureCodeInterface.h" + #include "include/ceph_assert.h" namespace ceph { - class ErasureCode : public ErasureCodeInterface { - public: - static const unsigned SIMD_ALIGN; +class ErasureCode : public ErasureCodeInterface { +public: + static const unsigned SIMD_ALIGN; + + std::vector chunk_mapping; + ErasureCodeProfile _profile; - std::vector chunk_mapping; - ErasureCodeProfile _profile; + // for CRUSH rule + std::string rule_root; + std::string rule_failure_domain; + std::string rule_device_class; + int rule_osds_per_failure_domain = -1; + int rule_num_failure_domains = -1; - // for CRUSH rule - std::string rule_root; - std::string rule_failure_domain; - std::string rule_device_class; - int rule_osds_per_failure_domain = -1; - int rule_num_failure_domains = -1; + ~ErasureCode() override {} - ~ErasureCode() override {} + int init(ceph::ErasureCodeProfile &profile, std::ostream *ss) override; - int init(ceph::ErasureCodeProfile &profile, std::ostream *ss) override; + const ErasureCodeProfile &get_profile() const override { + return _profile; + } - const ErasureCodeProfile &get_profile() const override { - return _profile; - } + int create_rule(const std::string &name, + CrushWrapper &crush, + std::ostream *ss) const override; - int create_rule(const std::string &name, - CrushWrapper &crush, - std::ostream *ss) const override; + int sanity_check_k_m(int k, int m, std::ostream *ss); - int sanity_check_k_m(int k, int m, std::ostream *ss); + unsigned int get_coding_chunk_count() const override { + return get_chunk_count() - get_data_chunk_count(); + } - unsigned int get_coding_chunk_count() const override { - return get_chunk_count() - get_data_chunk_count(); - } + virtual int get_sub_chunk_count() override { + return 1; + } - virtual int get_sub_chunk_count() override { - return 1; - } + virtual int _minimum_to_decode(const std::set &want_to_read, + const std::set &available_chunks, + std::set *minimum); - virtual int _minimum_to_decode(const std::set &want_to_read, - const std::set &available_chunks, - std::set *minimum); + int minimum_to_decode(const std::set &want_to_read, + const std::set &available, + std::map>> *minimum) override; - int minimum_to_decode(const std::set &want_to_read, - const std::set &available, - std::map>> *minimum) override; + int minimum_to_decode_with_cost(const std::set &want_to_read, + const std::map &available, + std::set *minimum) override; - int minimum_to_decode_with_cost(const std::set &want_to_read, - const std::map &available, - std::set *minimum) override; + int encode_prepare(const bufferlist &raw, + std::map &encoded) const; - int encode_prepare(const bufferlist &raw, - std::map &encoded) const; + int encode(const std::set &want_to_encode, + const bufferlist &in, + std::map *encoded) override; - int encode(const std::set &want_to_encode, - const bufferlist &in, - std::map *encoded) override; + int decode(const std::set &want_to_read, + const std::map &chunks, + std::map *decoded, int chunk_size) override; - int decode(const std::set &want_to_read, - const std::map &chunks, - std::map *decoded, int chunk_size) override; + virtual int _decode(const std::set &want_to_read, + const std::map &chunks, + std::map *decoded); - virtual int _decode(const std::set &want_to_read, - const std::map &chunks, - std::map *decoded); + const std::vector &get_chunk_mapping() const override; - const std::vector &get_chunk_mapping() const override; + int to_mapping(const ErasureCodeProfile &profile, + std::ostream *ss); - int to_mapping(const ErasureCodeProfile &profile, - std::ostream *ss); + static int to_int(const std::string &name, + ErasureCodeProfile &profile, + int *value, + const std::string &default_value, + std::ostream *ss); - static int to_int(const std::string &name, - ErasureCodeProfile &profile, - int *value, - const std::string &default_value, - std::ostream *ss); + static int to_bool(const std::string &name, + ErasureCodeProfile &profile, + bool *value, + const std::string &default_value, + std::ostream *ss); - static int to_bool(const std::string &name, + static int to_string(const std::string &name, ErasureCodeProfile &profile, - bool *value, + std::string *value, const std::string &default_value, std::ostream *ss); - static int to_string(const std::string &name, - ErasureCodeProfile &profile, - std::string *value, - const std::string &default_value, - std::ostream *ss); - - int decode_concat(const std::set& want_to_read, - const std::map &chunks, - bufferlist *decoded) override; - int decode_concat(const std::map &chunks, - bufferlist *decoded) override; - - protected: - int parse(const ErasureCodeProfile &profile, - std::ostream *ss); - - private: - int chunk_index(unsigned int i) const; - }; + int decode_concat(const std::set& want_to_read, + const std::map &chunks, + bufferlist *decoded) override; + int decode_concat(const std::map &chunks, + bufferlist *decoded) override; + + void encode_delta(const bufferptr &old_data, + const bufferptr &new_data, + bufferptr *delta_maybe_in_place) override + { + ceph_abort("Not yet supported by this plugin"); + } + + void apply_delta(const shard_id_map &in, + shard_id_map &out) override + { + ceph_abort("Not yet supported by this plugin"); + } + + protected: + int parse(const ErasureCodeProfile &profile, + std::ostream *ss); + + private: + int chunk_index(unsigned int i) const; +}; } #endif diff --git a/src/erasure-code/ErasureCodeInterface.h b/src/erasure-code/ErasureCodeInterface.h index 8420df40f3e22..f57c0a1d41f3f 100644 --- a/src/erasure-code/ErasureCodeInterface.h +++ b/src/erasure-code/ErasureCodeInterface.h @@ -379,6 +379,56 @@ namespace ceph { virtual int encode_chunks(const std::set &want_to_encode, std::map *encoded) = 0; + /** + * Calculate the delta between the old_data and new_data buffers using xor, + * (or plugin-specific implementation) and returns the result in the + * delta_maybe_in_place buffer. + * + * Assumes old_data, new_data and delta_maybe_in_place are all buffers of + * the same length. + * + * Optionally, the delta_maybe_in_place and old_data parameters can be the + * same buffer. For some plugins making these the same buffer is slightly + * faster, as it avoids a memcpy. Reduced allocations in the caller may + * also provide a performance advantage. + * + * @param [in] old_data first buffer to xor + * @param [in] new_data second buffer to xor + * @delta_maybe_in_place [out] delta buffer to write the delta of + * old_data and new_data. This can optionally be a + * pointer to old_data. + */ + virtual void encode_delta(const bufferptr &old_data, + const bufferptr &new_data, + bufferptr *delta_maybe_in_place) = 0; + + /** + * Applies one or more deltas to one or more coding + * chunks. + * + * Assumes all buffers in the in and out maps are the same length. + * + * The in map should contain deltas of data chunks to be applied to + * the coding chunks. The delta for a specific data chunk must have + * the correct integer key in the map. e.g. if k=2 m=2 and a delta for k[1] + * is being applied, then the delta should have key 1 in the in map. + * + * The in map should also contain the coding chunks that the delta will + * be applied to. The coding chunks must also have the correct integer key in the + * map. e.g. if k=2 m=2 and the delta for k[1] is to be applied to m[1], then + * the coding chunk should have key 3 in the in map. + * + * If a coding buffer is present in the in map, then it must also be present in the + * out map with the same key. + * + * + * @param [in] old_data first buffer to xor + * @param [in] new_data second buffer to xor + * @param [out] delta buffer containing the delta of old_data and new_data + */ + virtual void apply_delta(const std::map &in, + std::map &out) = 0; + /** * Decode the **chunks** and store at least **want_to_read** * chunks in **decoded**. diff --git a/src/erasure-code/isa/ErasureCodeIsa.cc b/src/erasure-code/isa/ErasureCodeIsa.cc index 7c28bbb0a6acb..0d18bcb8b290c 100644 --- a/src/erasure-code/isa/ErasureCodeIsa.cc +++ b/src/erasure-code/isa/ErasureCodeIsa.cc @@ -117,33 +117,40 @@ int ErasureCodeIsa::decode_chunks(const set &want_to_read, // ----------------------------------------------------------------------------- void -ErasureCodeIsa::isa_xor(char **data, char **coding, int blocksize) +ErasureCodeIsa::isa_xor(char **data, char *coding, int blocksize, int data_vectors) { - // If addresses are aligned to 32 bytes, then we can use xor_gen() - // Otherwise, use byte_xor() - int i; - bool src_aligned = true; + ceph_assert(data_vectors <= MAX_K); + char * xor_bufs[MAX_K + 1]; + for (int i = 0; i < data_vectors; i++) { + xor_bufs[i] = data[i]; + } + xor_bufs[data_vectors] = coding; - for (i = 0; i < k; i++) { - src_aligned &= is_aligned(data[i], EC_ISA_ADDRESS_ALIGNMENT); - } + // If addresses are aligned to 32 bytes, then we can use xor_gen() + // Otherwise, use byte_xor() + bool aligned = true; + for (int i = 0; i <= data_vectors; i++) { + aligned &= is_aligned(xor_bufs[i], EC_ISA_ADDRESS_ALIGNMENT); + } - if (src_aligned && is_aligned(coding[0], EC_ISA_ADDRESS_ALIGNMENT)) { - xor_gen(k+1, blocksize, (void**) data); - } - else { - memcpy(coding[0], data[0], blocksize); - for (i = 1; i < k; i++) { - byte_xor(data[i], coding[0], data[i]+blocksize); - } - } + if (aligned) { + xor_gen(data_vectors + 1, blocksize, (void**) xor_bufs); + } + else { + byte_xor(data_vectors, blocksize, xor_bufs); + } } void -ErasureCodeIsa::byte_xor(char *data, char *coding, char *data_end) +ErasureCodeIsa::byte_xor(int data_vects, int blocksize, char **array) { - while (data < data_end) - *coding++ ^= *data++; + for (int i = 0; i < blocksize; i++) { + char parity = array[0][i]; + for (int j = 1; j < data_vects; j++ ) { + parity ^= array[j][i]; + } + array[data_vects][i] = parity; + } } // ----------------------------------------------------------------------------- @@ -154,7 +161,7 @@ ErasureCodeIsaDefault::isa_encode(char **data, int blocksize) { if (m == 1) { - isa_xor(data, coding, blocksize); + isa_xor(data, coding[0], blocksize, k); } else { ec_encode_data(blocksize, k, m, encode_tbls, (unsigned char**) data, (unsigned char**) coding); @@ -175,6 +182,52 @@ ErasureCodeIsaDefault::erasure_contains(int *erasures, int i) // ----------------------------------------------------------------------------- +void +ErasureCodeIsaDefault::encode_delta(const bufferptr &old_data, + const bufferptr &new_data, + bufferptr *delta) +{ + constexpr int data_vectors = 2; + char * data[data_vectors]; + data[0] = const_cast(old_data.c_str()); + data[1] = const_cast(new_data.c_str()); + char * coding = delta->c_str(); + + isa_xor(data, coding, delta->length(), data_vectors); +} + +// ----------------------------------------------------------------------------- + +void +ErasureCodeIsaDefault::apply_delta(const shard_id_map &in, + shard_id_map &out) +{ + auto first = in.begin(); + const unsigned blocksize = first->second.length(); + + for (auto const& [datashard, databuf] : in) { + if (datashard < k) { + for (auto const& [codingshard, codingbuf] : out) { + if (codingshard >= k) { + ceph_assert(codingbuf.length() == blocksize); + if (m==1) { + constexpr int data_vectors = 2; + char * data[data_vectors]; + data[0] = const_cast(databuf.c_str()); + data[1] = const_cast(codingbuf.c_str()); + char * coding = const_cast(codingbuf.c_str()); + isa_xor(data, coding, blocksize, data_vectors); + } + else { + unsigned char* data = reinterpret_cast(const_cast(databuf.c_str())); + unsigned char* coding = reinterpret_cast(const_cast(codingbuf.c_str())); + ec_encode_data_update(blocksize, k, 1, static_cast(datashard), encode_tbls + (32 * k * (static_cast(codingshard) - k)), data, &coding); + } + } + } + } + } +} // ----------------------------------------------------------------------------- @@ -262,7 +315,7 @@ ErasureCodeIsaDefault::isa_decode(int *erasures, ((matrixtype == kVandermonde) && (nerrs == 1) && (erasures[0] < (k + 1)))) { // single parity decoding dout(20) << "isa_decode: reconstruct using xor_gen [" << erasures[0] << "]" << dendl; - isa_xor(recover_buf, &recover_buf[k], blocksize); + isa_xor(recover_buf, recover_buf[k], blocksize, k); return 0; } diff --git a/src/erasure-code/isa/ErasureCodeIsa.h b/src/erasure-code/isa/ErasureCodeIsa.h index 21ab3c645f553..4e1fe24e323cd 100644 --- a/src/erasure-code/isa/ErasureCodeIsa.h +++ b/src/erasure-code/isa/ErasureCodeIsa.h @@ -42,6 +42,8 @@ public: kVandermonde = 0, kCauchy = 1 }; + static constexpr int MAX_K = 32; + int k; int m; int w; @@ -67,7 +69,8 @@ public: uint64_t get_supported_optimizations() const override { return FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION | FLAG_EC_PLUGIN_PARTIAL_WRITE_OPTIMIZATION | - FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION; + FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION | + FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION; } unsigned int @@ -93,15 +96,14 @@ public: int init(ceph::ErasureCodeProfile &profile, std::ostream *ss) override; - void isa_xor(char **data, char **coding, int blocksize); + void isa_xor(char **data, char *coding, int blocksize, int data_vectors); - void byte_xor(char *data, char *coding, char *data_end); + void byte_xor(int data_vects, int blocksize, char **array); virtual void isa_encode(char **data, char **coding, int blocksize) = 0; - virtual int isa_decode(int *erasures, char **data, char **coding, @@ -156,6 +158,13 @@ public: char **coding, int blocksize) override; + void encode_delta(const ceph::bufferptr &old_data, + const ceph::bufferptr &new_data, + ceph::bufferptr *delta) override; + + void apply_delta(const shard_id_map &in, + shard_id_map &out); + unsigned get_alignment() const override; size_t get_minimum_granularity() override diff --git a/src/test/erasure-code/TestErasureCodePlugins.cc b/src/test/erasure-code/TestErasureCodePlugins.cc index 0efdaa7555481..43d5bb41ac8d5 100644 --- a/src/test/erasure-code/TestErasureCodePlugins.cc +++ b/src/test/erasure-code/TestErasureCodePlugins.cc @@ -139,7 +139,7 @@ TEST_P(PluginTest,PartialWrite) // Create buffer 2 that has a different middle // chunk for each shard // - // Create buffer 4 that just has the 1 different + // Create buffer 3 that just has the 1 different // middle chunk for each shard // // encoded the 3 buffers. Check if the first and @@ -256,6 +256,187 @@ TEST_P(PluginTest,ZeroInZeroOut) " but test indicates support is possible for this configuration"; } } +TEST_P(PluginTest,ParityDelta_SingleDeltaSingleParity) +{ + // Test erasure code plugin can perform parity delta writes + // to a single parity chunk using a single delta. + // + // 1. Create a buffer of random chunks and do a full stripe write. + // 2. Generate a new chunk to replace one of the original data chunks. + // 3. Test that EncodeDelta generates the expected delta when given the + // original data chunk and the new data chunk. + // 4. Do a second full write with the new chunk. + // 5. Test that ApplyDelta correctly applies the delta to the original parity chunk + // and returns the same new parity chunk as the second full write. + initialize(); + if (!(erasure_code->get_supported_optimizations() & + ErasureCodeInterface::FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION)) { + GTEST_SKIP() << "Plugin does not support parity delta optimization"; + } + set want_to_encode; + for (unsigned int i = 0 ; i < get_k_plus_m(); i++) { + want_to_encode.insert(i); + } + bufferlist old_bl; + for (unsigned int i = 0; i < get_k(); i++) { + generate_chunk(old_bl); + } + map old_encoded; + erasure_code->encode(want_to_encode, old_bl, &old_encoded); + + bufferlist new_chunk_bl; + generate_chunk(new_chunk_bl); + + random_device rand; + mt19937 gen(rand()); + uniform_int_distribution<> chunk_range(0, get_k()-1); + unsigned int random_chunk = chunk_range(gen); + + ceph::bufferptr old_data = buffer::create_aligned(chunk_size, 4096); + old_bl.begin(random_chunk * chunk_size).copy(chunk_size, old_data.c_str()); + ceph::bufferptr new_data = new_chunk_bl.front(); + ceph::bufferptr delta = buffer::create_aligned(chunk_size, 4096); + ceph::bufferptr expected_delta = buffer::create_aligned(chunk_size, 4096); + + for (int i = 0; i < chunk_size; i++) { + expected_delta.c_str()[i] = old_data.c_str()[i] ^ new_data.c_str()[i]; + } + + erasure_code->encode_delta(old_data, new_data, &delta); + + bool delta_matches = true; + for (int i = 0; i < chunk_size; i++) { + if (expected_delta.c_str()[i] != delta.c_str()[i]) { + delta_matches = false; + } + } + EXPECT_EQ(delta_matches, true); + + uniform_int_distribution<> parity_range(get_k(), get_k_plus_m()-1); + unsigned int random_parity = parity_range(gen); + ceph::bufferptr old_parity = buffer::create_aligned(chunk_size, 4096); + old_encoded[random_parity].begin(0).copy(chunk_size, old_parity.c_str()); + + map new_encoded; + bufferlist new_bl; + for (auto i = old_encoded.begin(); i != old_encoded.end(); i++) { + if ((unsigned int)i->first >= get_k()) { + continue; + } + if ((unsigned int)i->first == random_chunk) { + new_bl.append(new_data); + } + else { + new_bl.append(i->second); + } + } + + erasure_code->encode(want_to_encode, new_bl, &new_encoded); + ceph::bufferptr expected_parity = buffer::create_aligned(chunk_size, 4096); + new_encoded[random_parity].begin().copy_deep(chunk_size, expected_parity); + + map in_map; + in_map[random_chunk] = delta; + in_map[random_parity] = old_parity; + map out_map; + out_map[random_parity] = old_parity; + erasure_code->apply_delta((const map)in_map, out_map); + + bool parity_matches = true; + for (int i = 0; i < chunk_size; i++) { + if (out_map[random_parity].c_str()[i] != expected_parity.c_str()[i]) { + parity_matches = false; + } + } + EXPECT_EQ(parity_matches, true); +} +TEST_P(PluginTest,ParityDelta_MultipleDeltaMultipleParity) +{ + // Test erasure code plugin can perform parity delta writes + // to all parity chunks with deltas for all data chunks. + // + // 1. Create a buffer of random chunks and do a full write. + // 2. Create a second buffer of random chunks and do a full write. + // 3. Calculate the deltas between all of the chunks using xor. + // 4. Test that EncodeDelta generates the expected delta when given the + // original data chunks and the new data chunks. + // 5. Create an in map that contains every data delta and every parity chunk + // from the first full write. Test that ApplyDelta applies every delta to + // every parity, and returns an out map containing the same parity + // chunks that were generated by the second full stripe write. + initialize(); + if (!(erasure_code->get_supported_optimizations() & + ErasureCodeInterface::FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION)) { + GTEST_SKIP() << "Plugin does not support parity delta optimization"; + } + set want_to_encode; + for (unsigned int i = 0 ; i < get_k_plus_m(); i++) { + want_to_encode.insert(i); + } + + bufferlist old_bl; + for (unsigned int i = 0; i < get_k(); i++) { + generate_chunk(old_bl); + } + map old_encoded; + erasure_code->encode(want_to_encode, old_bl, &old_encoded); + + bufferlist new_bl; + for (unsigned int i = 0; i < get_k(); i++) { + generate_chunk(new_bl); + } + map new_encoded; + erasure_code->encode(want_to_encode, new_bl, &new_encoded); + + ceph::bufferptr old_data = buffer::create_aligned(chunk_size*get_k(), 4096); + ceph::bufferptr new_data = buffer::create_aligned(chunk_size*get_k(), 4096); + ceph::bufferptr delta = buffer::create_aligned(chunk_size*get_k(), 4096); + ceph::bufferptr expected_delta = buffer::create_aligned(chunk_size*get_k(), 4096); + + old_bl.begin().copy(chunk_size*get_k(), old_data.c_str()); + new_bl.begin().copy(chunk_size*get_k(), new_data.c_str()); + + for (unsigned int i = 0; i < chunk_size*get_k() ; i++) { + expected_delta.c_str()[i] = old_bl.c_str()[i] ^ new_bl.c_str()[i]; + } + + erasure_code->encode_delta(old_data, new_data, &delta); + + bool delta_matches = true; + for (unsigned int i = 0; i < chunk_size * get_k(); i++) { + if (expected_delta.c_str()[i] != delta.c_str()[i]) { + delta_matches = false; + } + } + EXPECT_EQ(delta_matches, true); + + map in_map; + map out_map; + for (unsigned int i = 0; i < get_k(); i++) { + ceph::bufferptr tmp = buffer::create_aligned(chunk_size, 4096); + delta.copy_out(chunk_size * i, chunk_size, tmp.c_str()); + in_map[i] = tmp; + } + for (unsigned int i = get_k(); i < get_k_plus_m(); i++) { + ceph::bufferptr tmp = buffer::create_aligned(chunk_size, 4096); + old_encoded[i].begin().copy(chunk_size, tmp.c_str()); + in_map[i] = tmp; + out_map[i] = tmp; + } + + erasure_code->apply_delta((const map)in_map, out_map); + + bool parity_matches = true; + + for (unsigned int i = get_k(); i < get_k_plus_m(); i++) { + for (int j = 0; j < chunk_size; j++) { + if (out_map[i].c_str()[j] != new_encoded[i].c_str()[j]) { + parity_matches = false; + } + } + } + EXPECT_EQ(parity_matches, true); +} INSTANTIATE_TEST_SUITE_P( PluginTests, PluginTest, -- 2.39.5