]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
erasure-code: implement alignment on chunk sizes 1890/head
authorLoic Dachary <loic@dachary.org>
Tue, 27 May 2014 16:40:45 +0000 (18:40 +0200)
committerLoic Dachary <loic@dachary.org>
Mon, 4 Aug 2014 08:54:21 +0000 (10:54 +0200)
jerasure expects chunk sizes that are aligned on the largest possible
vector size that could be used by SSE instructions, when available (
LARGEST_VECTOR_WORDSIZE == 16 bytes ).

For techniques derived from Cauchy, encoding and decoding is done by
subdividing the chunk into packets of packetsize bytes. The operations
are done w * packetsize bytes at a time. It follows that each chunk must
have a size that is a multiple of w * packetsize bytes.

For techniques derived from Vandermonde, it is enough for a chunk to be
a multiple of w * LARGEST_VECTOR_WORDSIZE.

ErasureCodeJerasure::get_alignment returns a size alignment constraint
that has to be enforced as a multiple of the object size. The resulting
object size then has to match the chunk constraints described above
although they have no relationship with K. For Cauchy, it leads to
excessive padding, making it impossible to set sensible parameters for
when the object size is small.

When the per_chunk_alignement data member is true, the semantic of
ErasureCodeJerasure::get_alignment is changed to return a size alignment
constraint to be enforced as a multiple of the chunk size. The
ErasureCodeJerasure::get_chunk_size method is modified to use the new
semantic when appropriate.

The jerasure-per-chunk-alignement parameter is parsed to set
per_chunk_alignement for the Vandermonde and Cauchy techniques.

The memory address of a chunk is implicitly aligned to a page boundary
because it is allocated with buffer::create_page_aligned.

http://tracker.ceph.com/issues/8475 Fixes: #8475

Signed-off-by: Loic Dachary <loic@dachary.org>
src/erasure-code/jerasure/ErasureCodeJerasure.cc
src/erasure-code/jerasure/ErasureCodeJerasure.h
src/test/erasure-code/TestErasureCodeJerasure.cc

index 3fa1f6cdb446c0515dec615d09fe9fa9f3a666df..8b982ce3dc20604b140c601bd7f6bdd14935d273 100644 (file)
@@ -28,7 +28,6 @@ extern "C" {
 #include "liberation.h"
 }
 
-// FIXME(loic) this may be too conservative, check back with feedback from Andreas 
 #define LARGEST_VECTOR_WORDSIZE 16
 
 #define dout_subsys ceph_subsys_osd
@@ -65,10 +64,26 @@ void ErasureCodeJerasure::init(const map<string,string> &parameters)
 unsigned int ErasureCodeJerasure::get_chunk_size(unsigned int object_size) const
 {
   unsigned alignment = get_alignment();
-  unsigned tail = object_size % alignment;
-  unsigned padded_length = object_size + ( tail ?  ( alignment - tail ) : 0 );
-  assert(padded_length % k == 0);
-  return padded_length / k;
+  if (per_chunk_alignment) {
+    unsigned chunk_size = object_size / k;
+    if (object_size % k)
+      chunk_size++;
+    dout(20) << "get_chunk_size: chunk_size " << chunk_size
+            << " must be modulo " << alignment << dendl; 
+    assert(alignment <= chunk_size);
+    unsigned modulo = chunk_size % alignment;
+    if (modulo) {
+      dout(10) << "get_chunk_size: " << chunk_size
+              << " padded to " << chunk_size + alignment - modulo << dendl;
+      chunk_size += alignment - modulo;
+    }
+    return chunk_size;
+  } else {
+    unsigned tail = object_size % alignment;
+    unsigned padded_length = object_size + ( tail ?  ( alignment - tail ) : 0 );
+    assert(padded_length % k == 0);
+    return padded_length / k;
+  }
 }
 
 int ErasureCodeJerasure::minimum_to_decode(const set<int> &want_to_read,
@@ -205,6 +220,19 @@ int ErasureCodeJerasure::to_int(const std::string &name,
   return r;
 }
 
+bool ErasureCodeJerasure::to_bool(const std::string &name,
+                                 const map<std::string,std::string> &parameters,
+                                 bool default_value)
+{
+  if (parameters.find(name) == parameters.end() ||
+      parameters.find(name)->second.size() == 0) {
+    dout(10) << name << " defaults to " << default_value << dendl;
+    return default_value;
+  }
+  const std::string value = parameters.find(name)->second;
+  return (value == "yes") || (value == "1") || (value == "true");
+}
+
 bool ErasureCodeJerasure::is_prime(int value)
 {
   int prime55[] = {
@@ -241,11 +269,14 @@ int ErasureCodeJerasureReedSolomonVandermonde::jerasure_decode(int *erasures,
 
 unsigned ErasureCodeJerasureReedSolomonVandermonde::get_alignment() const
 {
-  unsigned alignment = k*w*sizeof(int);
-  if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) )
-    alignment = k*w*LARGEST_VECTOR_WORDSIZE;
-  return alignment;
-
+  if (per_chunk_alignment) {
+    return w * LARGEST_VECTOR_WORDSIZE;
+  } else {
+    unsigned alignment = k*w*sizeof(int);
+    if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) )
+      alignment = k*w*LARGEST_VECTOR_WORDSIZE;
+    return alignment;
+  }
 }
 
 void ErasureCodeJerasureReedSolomonVandermonde::parse(const map<std::string,std::string> &parameters)
@@ -258,6 +289,7 @@ void ErasureCodeJerasureReedSolomonVandermonde::parse(const map<std::string,std:
         << " must be one of {8, 16, 32} : revert to " << DEFAULT_W << dendl;
     w = DEFAULT_W;
   }
+  per_chunk_alignment = to_bool("jerasure-per-chunk-alignment", parameters, false);
 }
 
 void ErasureCodeJerasureReedSolomonVandermonde::prepare()
@@ -285,10 +317,14 @@ int ErasureCodeJerasureReedSolomonRAID6::jerasure_decode(int *erasures,
 
 unsigned ErasureCodeJerasureReedSolomonRAID6::get_alignment() const
 {
-  unsigned alignment = k*w*sizeof(int);
-  if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) )
-    alignment = k*w*LARGEST_VECTOR_WORDSIZE;
-  return alignment;
+  if (per_chunk_alignment) {
+    return w * LARGEST_VECTOR_WORDSIZE;
+  } else {
+    unsigned alignment = k*w*sizeof(int);
+    if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) )
+      alignment = k*w*LARGEST_VECTOR_WORDSIZE;
+    return alignment;
+  }
 }
 
 void ErasureCodeJerasureReedSolomonRAID6::parse(const map<std::string,std::string> &parameters)
@@ -330,10 +366,18 @@ int ErasureCodeJerasureCauchy::jerasure_decode(int *erasures,
 
 unsigned ErasureCodeJerasureCauchy::get_alignment() const
 {
-  unsigned alignment = k*w*packetsize*sizeof(int);
-  if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) )
-    alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE;
-  return alignment;
+  if (per_chunk_alignment) {
+    unsigned alignment = w * packetsize;
+    unsigned modulo = alignment % LARGEST_VECTOR_WORDSIZE;
+    if (modulo)
+      alignment += LARGEST_VECTOR_WORDSIZE - modulo;
+    return alignment;
+  } else {
+    unsigned alignment = k*w*packetsize*sizeof(int);
+    if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) )
+      alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE;
+    return alignment;
+  }  
 }
 
 void ErasureCodeJerasureCauchy::parse(const map<std::string,std::string> &parameters)
@@ -348,6 +392,7 @@ void ErasureCodeJerasureCauchy::parse(const map<std::string,std::string> &parame
     w = DEFAULT_W;
   }
   packetsize = to_int("packetsize", parameters, DEFAULT_PACKETSIZE);
+  per_chunk_alignment = to_bool("jerasure-per-chunk-alignment", parameters, false);
 }
 
 void ErasureCodeJerasureCauchy::prepare_schedule(int *matrix)
index f5255497a80179958485cb4efa20e9f54e3bdc73..97257550b09ab8934d83d8f741238e2b5da5856b 100644 (file)
@@ -27,11 +27,13 @@ public:
   const char *technique;
   string ruleset_root;
   string ruleset_failure_domain;
+  bool per_chunk_alignment;
 
   ErasureCodeJerasure(const char *_technique) :
     technique(_technique),
     ruleset_root("default"),
-    ruleset_failure_domain("host")
+    ruleset_failure_domain("host"),
+    per_chunk_alignment(false)
   {}
 
   virtual ~ErasureCodeJerasure() {}
@@ -80,6 +82,9 @@ public:
   static int to_int(const std::string &name,
                     const map<std::string,std::string> &parameters,
                     int default_value);
+  static bool to_bool(const std::string &name,
+                     const map<std::string,std::string> &parameters,
+                     bool default_value);
   static bool is_prime(int value);
 };
 
index 2ec8f184df114fa91f9b2753acffe8c0eabb76df..5a164bbfab77ed11dd20c014f168aa21892c7ca9 100644 (file)
@@ -42,70 +42,77 @@ TYPED_TEST_CASE(ErasureCodeTest, JerasureTypes);
 
 TYPED_TEST(ErasureCodeTest, encode_decode)
 {
-  TypeParam jerasure;
-  map<std::string,std::string> parameters;
-  parameters["k"] = "2";
-  parameters["m"] = "2";
-  parameters["w"] = "7";
-  parameters["packetsize"] = "8";
-  jerasure.init(parameters);
+  const char *per_chunk_alignments[] = { "false", "true" };
+  for (int per_chunk_alignment = 0 ;
+       per_chunk_alignment < 2;
+       per_chunk_alignment++) {
+    TypeParam jerasure;
+    map<std::string,std::string> parameters;
+    parameters["k"] = "2";
+    parameters["m"] = "2";
+    parameters["w"] = "7";
+    parameters["packetsize"] = "8";
+    parameters["jerasure-per-chunk-alignment"] =
+      per_chunk_alignments[per_chunk_alignment];
+    jerasure.init(parameters);
 
 #define LARGE_ENOUGH 2048
-  bufferptr in_ptr(buffer::create_page_aligned(LARGE_ENOUGH));
-  in_ptr.zero();
-  in_ptr.set_length(0);
-  const char *payload =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
-  in_ptr.append(payload, strlen(payload));
-  bufferlist in;
-  in.push_front(in_ptr);
-  int want_to_encode[] = { 0, 1, 2, 3 };
-  map<int, bufferlist> encoded;
-  EXPECT_EQ(0, jerasure.encode(set<int>(want_to_encode, want_to_encode+4),
-                              in,
-                              &encoded));
-  EXPECT_EQ(4u, encoded.size());
-  unsigned length =  encoded[0].length();
-  EXPECT_EQ(0, strncmp(encoded[0].c_str(), in.c_str(), length));
-  EXPECT_EQ(0, strncmp(encoded[1].c_str(), in.c_str() + length,
-                      in.length() - length));
-
-
-  // all chunks are available
-  {
-    int want_to_decode[] = { 0, 1 };
-    map<int, bufferlist> decoded;
-    EXPECT_EQ(0, jerasure.decode(set<int>(want_to_decode, want_to_decode+2),
-                                encoded,
-                                &decoded));
-    EXPECT_EQ(2u, decoded.size()); 
-    EXPECT_EQ(length, decoded[0].length());
-    EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length));
-    EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length,
+    bufferptr in_ptr(buffer::create_page_aligned(LARGE_ENOUGH));
+    in_ptr.zero();
+    in_ptr.set_length(0);
+    const char *payload =
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+    in_ptr.append(payload, strlen(payload));
+    bufferlist in;
+    in.push_front(in_ptr);
+    int want_to_encode[] = { 0, 1, 2, 3 };
+    map<int, bufferlist> encoded;
+    EXPECT_EQ(0, jerasure.encode(set<int>(want_to_encode, want_to_encode+4),
+                                in,
+                                &encoded));
+    EXPECT_EQ(4u, encoded.size());
+    unsigned length =  encoded[0].length();
+    EXPECT_EQ(0, strncmp(encoded[0].c_str(), in.c_str(), length));
+    EXPECT_EQ(0, strncmp(encoded[1].c_str(), in.c_str() + length,
                         in.length() - length));
-  }
 
-  // two chunks are missing 
-  {
-    map<int, bufferlist> degraded = encoded;
-    degraded.erase(0);
-    degraded.erase(1);
-    EXPECT_EQ(2u, degraded.size());
-    int want_to_decode[] = { 0, 1 };
-    map<int, bufferlist> decoded;
-    EXPECT_EQ(0, jerasure.decode(set<int>(want_to_decode, want_to_decode+2),
-                                degraded,
-                                &decoded));
-    // always decode all, regardless of want_to_decode
-    EXPECT_EQ(4u, decoded.size()); 
-    EXPECT_EQ(length, decoded[0].length());
-    EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length));
-    EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length,
-                        in.length() - length));
+
+    // all chunks are available
+    {
+      int want_to_decode[] = { 0, 1 };
+      map<int, bufferlist> decoded;
+      EXPECT_EQ(0, jerasure.decode(set<int>(want_to_decode, want_to_decode+2),
+                                  encoded,
+                                  &decoded));
+      EXPECT_EQ(2u, decoded.size()); 
+      EXPECT_EQ(length, decoded[0].length());
+      EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length));
+      EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length,
+                          in.length() - length));
+    }
+
+    // two chunks are missing 
+    {
+      map<int, bufferlist> degraded = encoded;
+      degraded.erase(0);
+      degraded.erase(1);
+      EXPECT_EQ(2u, degraded.size());
+      int want_to_decode[] = { 0, 1 };
+      map<int, bufferlist> decoded;
+      EXPECT_EQ(0, jerasure.decode(set<int>(want_to_decode, want_to_decode+2),
+                                  degraded,
+                                  &decoded));
+      // always decode all, regardless of want_to_decode
+      EXPECT_EQ(4u, decoded.size()); 
+      EXPECT_EQ(length, decoded[0].length());
+      EXPECT_EQ(0, strncmp(decoded[0].c_str(), in.c_str(), length));
+      EXPECT_EQ(0, strncmp(decoded[1].c_str(), in.c_str() + length,
+                          in.length() - length));
+    }
   }
 }
 
@@ -216,7 +223,7 @@ TEST(ErasureCodeTest, encode)
   parameters["w"] = "8";
   jerasure.init(parameters);
 
-  unsigned alignment = jerasure.get_alignment();
+  unsigned aligned_object_size = jerasure.get_alignment() * 2;
   {
     //
     // When the input bufferlist needs to be padded because
@@ -225,17 +232,16 @@ TEST(ErasureCodeTest, encode)
     bufferlist in;
     map<int,bufferlist> encoded;
     int want_to_encode[] = { 0, 1, 2, 3 };
-    int trail_length = 10;
-    in.append(string(alignment + trail_length, 'X'));
+    int trail_length = 1;
+    in.append(string(aligned_object_size + trail_length, 'X'));
     EXPECT_EQ(0, jerasure.encode(set<int>(want_to_encode, want_to_encode+4),
                                 in,
                                 &encoded));
     EXPECT_EQ(4u, encoded.size());
-    for(int i = 0; i < 4; i++)
-      EXPECT_EQ(alignment, encoded[i].length());
     char *last_chunk = encoded[1].c_str();
+    int length =encoded[1].length();
     EXPECT_EQ('X', last_chunk[0]);
-    EXPECT_EQ('\0', last_chunk[trail_length]);
+    EXPECT_EQ('\0', last_chunk[length - trail_length]);
   }
 
   {
@@ -251,11 +257,10 @@ TEST(ErasureCodeTest, encode)
     map<int,bufferlist> encoded;
     set<int> want_to_encode;
     want_to_encode.insert(0);
-    int trail_length = 10;
-    in.append(string(alignment + trail_length, 'X'));
+    int trail_length = 1;
+    in.append(string(aligned_object_size + trail_length, 'X'));
     EXPECT_EQ(0, jerasure.encode(want_to_encode, in, &encoded));
     EXPECT_EQ(1u, encoded.size());
-    EXPECT_EQ(alignment, encoded[0].length());
   }
 }