]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
erasure-code/isa: Use isa/raid's xor_gen() instead of the region_xor() optimisation 58594/head
authorJamie Pryde <jamiepry@uk.ibm.com>
Tue, 9 Jul 2024 19:05:58 +0000 (19:05 +0000)
committerJamie Pryde <jamiepry@uk.ibm.com>
Thu, 18 Jul 2024 14:04:36 +0000 (15:04 +0100)
Signed-off-by: Jamie Pryde <jamiepry@uk.ibm.com>
src/erasure-code/isa/CMakeLists.txt
src/erasure-code/isa/ErasureCodeIsa.cc
src/erasure-code/isa/ErasureCodeIsa.h
src/erasure-code/isa/xor_op.cc [deleted file]
src/erasure-code/isa/xor_op.h [deleted file]
src/test/erasure-code/TestErasureCodeIsa.cc

index 2486692b843f2bad72ea1758600ea24c828516f0..2ca398ffcb1a1b734cc6a613bdaa9b0bbdf7615e 100644 (file)
@@ -54,10 +54,19 @@ if(HAVE_NASM_X64_AVX2)
     ${isal_src_dir}/erasure_code/gf_4vect_mad_avx512.asm
     ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx512.asm
     ${isal_src_dir}/erasure_code/gf_vect_mad_avx512.asm
+    ${isal_src_dir}/raid/raid_base.c
+    ${isal_src_dir}/raid/raid_multibinary.asm
+    ${isal_src_dir}/raid/xor_check_sse.asm
+    ${isal_src_dir}/raid/xor_gen_sse.asm
+    ${isal_src_dir}/raid/xor_gen_avx.asm
+    ${isal_src_dir}/raid/xor_gen_avx512.asm
+    ${isal_src_dir}/raid/pq_check_sse.asm
+    ${isal_src_dir}/raid/pq_gen_sse.asm
+    ${isal_src_dir}/raid/pq_gen_avx.asm
+    ${isal_src_dir}/raid/pq_gen_avx2.asm
     ErasureCodeIsa.cc
     ErasureCodeIsaTableCache.cc
     ErasureCodePluginIsa.cc
-    xor_op.cc
   )
 elseif(HAVE_ARMV8_SIMD)
   set(isa_srcs
@@ -77,13 +86,20 @@ elseif(HAVE_ARMV8_SIMD)
     ${isal_src_dir}/erasure_code/aarch64/gf_vect_mad_neon.S
     ${isal_src_dir}/erasure_code/aarch64/gf_vect_mul_neon.S
     ${isal_src_dir}/erasure_code/aarch64/ec_multibinary_arm.S
+    ${isal_src_dir}/raid/raid_base.c
+    ${isal_src_dir}/raid/aarch64/raid_aarch64_dispatcher.c
+    ${isal_src_dir}/raid/aarch64/raid_multibinary_arm.S
+    ${isal_src_dir}/raid/aarch64/xor_check_neon.S
+    ${isal_src_dir}/raid/aarch64/xor_gen_neon.S
+    ${isal_src_dir}/raid/aarch64/pq_check_neon.S
+    ${isal_src_dir}/raid/aarch64/pq_gen_neon.S
     ErasureCodeIsa.cc
     ErasureCodeIsaTableCache.cc
     ErasureCodePluginIsa.cc
-    xor_op.cc
   )
   set_source_files_properties(
     ${isal_src_dir}/erasure_code/aarch64/ec_multibinary_arm.S
+    ${isal_src_dir}/raid/aarch64/raid_multibinary_arm.S
     PROPERTIES COMPILE_FLAGS "-D__ASSEMBLY__"
   )
 endif()
index 305667e72e9f3f339740edacc880594415bee398..1548139756bb167d24241fed01847c1ea6d0ae9b 100644 (file)
@@ -18,7 +18,6 @@
 // -----------------------------------------------------------------------------
 #include "common/debug.h"
 #include "ErasureCodeIsa.h"
-#include "xor_op.h"
 #include "include/ceph_assert.h"
 using namespace std;
 using namespace ceph;
@@ -26,6 +25,7 @@ using namespace ceph;
 // -----------------------------------------------------------------------------
 extern "C" {
 #include "isa-l/include/erasure_code.h"
+#include "isa-l/include/raid.h"
 }
 // -----------------------------------------------------------------------------
 #define dout_context g_ceph_context
@@ -121,10 +121,9 @@ ErasureCodeIsaDefault::isa_encode(char **data,
                                   char **coding,
                                   int blocksize)
 {
-
   if (m == 1)
     // single parity stripe
-    region_xor((unsigned char**) data, (unsigned char*) coding[0], k, blocksize);
+    xor_gen(k+m, blocksize, (void**) data);
   else
     ec_encode_data(blocksize, k, m, encode_tbls,
                    (unsigned char**) data, (unsigned char**) coding);
@@ -157,61 +156,81 @@ ErasureCodeIsaDefault::isa_decode(int *erasures,
   int nerrs = 0;
   int i, r, s;
 
+  unsigned char *recover_source[k];
+  unsigned char *recover_target[m];
+  unsigned char *recover_buf[k+1];
+
   // count the errors
   for (int l = 0; erasures[l] != -1; l++) {
     nerrs++;
   }
 
-  unsigned char *recover_source[k];
-  unsigned char *recover_target[m];
-
-  memset(recover_source, 0, sizeof (recover_source));
-  memset(recover_target, 0, sizeof (recover_target));
+  if (nerrs > m)
+    return -1;
 
-  // ---------------------------------------------
-  // Assign source and target buffers
-  // ---------------------------------------------
-  for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) {
-    if (!erasure_contains(erasures, i)) {
-      if (r < k) {
+  // -----------------------------------
+  // Assign source and target buffers.
+  // -----------------------------------
+  if ((m == 1) || 
+      ((matrixtype == kVandermonde) && (nerrs == 1) && (erasures[0] < (k + 1)))) {
+    // We need a single buffer to use the xor_gen() optimisation.
+    // The last index must point to the erasure, and index that contained
+    // the erasure must point to the parity.
+    memset(recover_buf, 0, sizeof (recover_buf));
+    bool parity_set = false;
+    for (i = 0; i < (k + 1); i++) {
+      if (erasure_contains(erasures, i)) {
+          if (i < k) {
+            recover_buf[i] = (unsigned char*) coding[0];
+            recover_buf[k] = (unsigned char*) data[i];
+            parity_set = true;
+          } else {
+            recover_buf[i] = (unsigned char*) coding[0];
+          }
+      } else {
         if (i < k) {
-          recover_source[r] = (unsigned char*) data[i];
+          recover_buf[i] = (unsigned char*) data[i];
         } else {
-          recover_source[r] = (unsigned char*) coding[i - k];
+          if (!parity_set) {
+            recover_buf[i] = (unsigned char*) coding[0];
+          }
         }
-        r++;
       }
-    } else {
-      if (s < m) {
-        if (i < k) {
-          recover_target[s] = (unsigned char*) data[i];
-        } else {
-          recover_target[s] = (unsigned char*) coding[i - k];
+    }
+  }
+  else {
+    // We need source and target buffers to use ec_encode_data().
+    // The erasure must be moved to the target buffer.
+    memset(recover_source, 0, sizeof (recover_source));
+    memset(recover_target, 0, sizeof (recover_target));
+    for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) {
+      if (!erasure_contains(erasures, i)) {
+        if (r < k) {
+          if (i < k) {
+            recover_source[r] = (unsigned char*) data[i];
+          } else {
+            recover_source[r] = (unsigned char*) coding[i - k];
+          }
+          r++;
+        }
+      } else {
+        if (s < m) {
+          if (i < k) {
+            recover_target[s] = (unsigned char*) data[i];
+          } else {
+            recover_target[s] = (unsigned char*) coding[i - k];
+          }
+          s++;
         }
-        s++;
       }
     }
   }
 
-  if (m == 1) {
+  if ((m == 1) || 
+      ((matrixtype == kVandermonde) && (nerrs == 1) && (erasures[0] < (k + 1)))) {
     // single parity decoding
-    ceph_assert(1 == nerrs);
-    dout(20) << "isa_decode: reconstruct using region xor [" <<
-      erasures[0] << "]" << dendl;
-    region_xor(recover_source, recover_target[0], k, blocksize);
-    return 0;
-  }
-
-
-  if ((matrixtype == kVandermonde) &&
-      (nerrs == 1) &&
-      (erasures[0] < (k + 1))) {
-    // use xor decoding if a data chunk is missing or the first coding chunk
-    dout(20) << "isa_decode: reconstruct using region xor [" <<
-      erasures[0] << "]" << dendl;
-    ceph_assert(1 == s);
-    ceph_assert(k == r);
-    region_xor(recover_source, recover_target[0], k, blocksize);
+    dout(20) << "isa_decode: reconstruct using xor_gen [" << erasures[0] << "]" << dendl;
+    xor_gen(k+1, blocksize, (void **) recover_buf);
     return 0;
   }
 
@@ -221,9 +240,6 @@ ErasureCodeIsaDefault::isa_decode(int *erasures,
 
   int decode_index[k];
 
-  if (nerrs > m)
-    return -1;
-
   std::string erasure_signature; // describes a matrix configuration for caching
 
   // ---------------------------------------------
index 06c51bbc907b725f488b0ae21f36c8a091889922..85f1cd9cb469196830399bd831a1d23648e15bde 100644 (file)
@@ -30,6 +30,8 @@
 #include "ErasureCodeIsaTableCache.h"
 // -----------------------------------------------------------------------------
 
+#define EC_ISA_ADDRESS_ALIGNMENT 32u
+
 class ErasureCodeIsa : public ceph::ErasureCode {
 public:
 
diff --git a/src/erasure-code/isa/xor_op.cc b/src/erasure-code/isa/xor_op.cc
deleted file mode 100644 (file)
index 33f1335..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2014 CERN (Switzerland)
- *                                                                                                                                                                                                            * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>                                                                                                                                            *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
- *
- */
-
-// -----------------------------------------------------------------------------
-#include "xor_op.h"
-#include <stdio.h>
-#include <string.h>
-#include "arch/intel.h"
-#include "arch/arm.h"
-
-#if defined(__aarch64__) && defined(__ARM_NEON)
-  #include <arm_neon.h>
-#endif
-
-#include "include/ceph_assert.h"
-
-// -----------------------------------------------------------------------------
-
-
-// -----------------------------------------------------------------------------
-
-void
-// -----------------------------------------------------------------------------
-byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew)
-// -----------------------------------------------------------------------------
-{
-  while (cw < ew)
-    *dw++ ^= *cw++;
-}
-
-// -----------------------------------------------------------------------------
-
-void
-// -----------------------------------------------------------------------------
-vector_xor(vector_op_t* cw,
-           vector_op_t* dw,
-           vector_op_t* ew)
-// -----------------------------------------------------------------------------
-{
-  ceph_assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE));
-  ceph_assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE));
-  ceph_assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE));
-  while (cw < ew) {
-    *dw++ ^= *cw++;
-  }
-}
-
-
-// -----------------------------------------------------------------------------
-
-void
-// -----------------------------------------------------------------------------
-region_xor(unsigned char** src,
-           unsigned char* parity,
-           int src_size,
-           unsigned size)
-{
-  if (!size) {
-    // nothing to do
-    return;
-  }
-
-  if (!src_size) {
-    // nothing to do
-    return;
-  }
-
-  if (src_size == 1) {
-    // just copy source to parity
-    memcpy(parity, src[0], size);
-    return;
-  }
-
-  unsigned size_left = size;
-
-  // ----------------------------------------------------------
-  // region or vector XOR operations require aligned addresses
-  // ----------------------------------------------------------
-
-  bool src_aligned = true;
-  for (int i = 0; i < src_size; i++) {
-    src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE);
-  }
-
-  if (src_aligned &&
-      is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) {
-
-#ifdef __x86_64__
-    if (ceph_arch_intel_sse2) {
-      // -----------------------------
-      // use SSE2 region xor function
-      // -----------------------------
-      unsigned region_size =
-        (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE;
-
-      size_left -= region_size;
-      // 64-byte region xor
-      region_sse2_xor((char**) src, (char*) parity, src_size, region_size);
-    } else
-#elif defined (__aarch64__) && defined(__ARM_NEON)
-    if (ceph_arch_neon) {
-      // -----------------------------
-      // use NEON region xor function
-      // -----------------------------
-      unsigned region_size = 
-        (size / EC_ISA_VECTOR_NEON_WORDSIZE) * EC_ISA_VECTOR_NEON_WORDSIZE;
-      size_left -= region_size;
-      region_neon_xor((char**) src, (char *) parity, src_size, region_size);
-    } else
-#endif
-    {
-      // --------------------------------------------
-      // use region xor based on vector xor operation
-      // --------------------------------------------
-      unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE;
-      unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE;
-      memcpy(parity, src[0], vector_size);
-
-      size_left -= vector_size;
-      vector_op_t* p_vec = (vector_op_t*) parity;
-      for (int i = 1; i < src_size; i++) {
-        vector_op_t* s_vec = (vector_op_t*) src[i];
-        vector_op_t* e_vec = s_vec + vector_words;
-        vector_xor(s_vec, p_vec, e_vec);
-      }
-    }
-  }
-
-  if (size_left) {
-    // --------------------------------------------------
-    // xor the not aligned part with byte-wise region xor
-    // --------------------------------------------------
-    memcpy(parity + size - size_left, src[0] + size - size_left, size_left);
-    for (int i = 1; i < src_size; i++) {
-      byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size);
-    }
-  }
-}
-
-// -----------------------------------------------------------------------------
-
-void
-// -----------------------------------------------------------------------------
-region_sse2_xor(char** src,
-                char* parity,
-                int src_size,
-                unsigned size)
-// -----------------------------------------------------------------------------
-{
-#ifdef __x86_64__
-  ceph_assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE));
-  unsigned char* p;
-  int d, l;
-  unsigned i;
-  unsigned char* vbuf[256];
-
-  for (int v = 0; v < src_size; v++) {
-    vbuf[v] = (unsigned char*) src[v];
-  }
-
-  l = src_size;
-  p = (unsigned char*) parity;
-
-  for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) {
-    asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i]));
-    asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16]));
-    asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32]));
-    asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48]));
-
-    for (d = 1; d < l; d++) {
-      asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i]));
-      asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16]));
-      asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32]));
-      asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48]));
-      asm volatile("pxor %xmm4,%xmm0");
-      asm volatile("pxor %xmm5,%xmm1");
-      asm volatile("pxor %xmm6,%xmm2");
-      asm volatile("pxor %xmm7,%xmm3");
-    }
-    asm volatile("movntdq %%xmm0,%0" : "=m" (p[i]));
-    asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
-    asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
-    asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
-  }
-
-  asm volatile("sfence" : : : "memory");
-#endif // __x86_64__
-  return;
-}
-
-#if defined(__aarch64__) && defined(__ARM_NEON)
-void
-// -----------------------------------------------------------------------------
-region_neon_xor(char **src,
-                char *parity,
-                int src_size,
-                unsigned size)
-// -----------------------------------------------------------------------------
-{
-  ceph_assert(!(size % EC_ISA_VECTOR_NEON_WORDSIZE));
-  unsigned char *p = (unsigned char *)parity;
-  unsigned char *vbuf[256] = { NULL };
-  for (int v = 0; v < src_size; v++) {
-    vbuf[v] = (unsigned char *)src[v];
-  }
-
-  // ----------------------------------------------------------------------------------------
-  // NEON load instructions can load 128bits of data each time, and there are 2 load channels
-  // ----------------------------------------------------------------------------------------
-  for (unsigned i = 0; i < size; i += EC_ISA_VECTOR_NEON_WORDSIZE) {
-    uint64x2_t d0_1 = vld1q_u64((uint64_t *)(&(vbuf[0][i])));
-    uint64x2_t d0_2 = vld1q_u64((uint64_t *)(&(vbuf[0][i + 16])));
-
-    for (int d = 1; d < src_size; d++) {
-      uint64x2_t di_1 = vld1q_u64((uint64_t *)(&(vbuf[d][i])));
-      uint64x2_t di_2 = vld1q_u64((uint64_t *)(&(vbuf[d][i + 16])));
-
-      d0_1 = veorq_u64(d0_1, di_1);
-      d0_2 = veorq_u64(d0_2, di_2);
-    }
-
-    vst1q_u64((uint64_t *)p, d0_1);
-    vst1q_u64((uint64_t *)(p + 16), d0_2);
-    p += EC_ISA_VECTOR_NEON_WORDSIZE;
-  }
-  return;
-}
-#endif // __aarch64__ && __ARM_NEON
diff --git a/src/erasure-code/isa/xor_op.h b/src/erasure-code/isa/xor_op.h
deleted file mode 100644 (file)
index 86b1645..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2014 CERN (Switzerland)
- *                                                                                                                                                                                                           \
- * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>                                                                                                                                           \
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
- *
- */
-
-#ifndef EC_ISA_XOR_OP_H
-#define EC_ISA_XOR_OP_H
-
-// -----------------------------------------------------------------------------
-#include <assert.h>
-#include <stdint.h>
-// -----------------------------------------------------------------------------
-
-// -------------------------------------------------------------------------
-// declaration of 64/128-bit vector operations depending on availability
-// -------------------------------------------------------------------------
-// -------------------------------------------------------------------------
-
-#define EC_ISA_ADDRESS_ALIGNMENT 32u
-#define EC_ISA_VECTOR_SSE2_WORDSIZE 64u
-#define EC_ISA_VECTOR_NEON_WORDSIZE 32u
-#if __GNUC__ > 4 || \
-  ( (__GNUC__ == 4) && (__GNUC_MINOR__ >= 4) ) ||\
-  (__clang__ == 1 )
-#ifdef EC_ISA_VECTOR_OP_DEBUG
-#pragma message "* using 128-bit vector operations in " __FILE__
-#endif
-
-// -------------------------------------------------------------------------
-// use 128-bit pointer
-// -------------------------------------------------------------------------
-typedef long vector_op_t __attribute__((vector_size(16)));
-#define EC_ISA_VECTOR_OP_WORDSIZE 16
-#else
-// -------------------------------------------------------------------------
-// use 64-bit pointer
-// -------------------------------------------------------------------------
-typedef unsigned long long vector_op_t;
-#define EC_ISA_VECTOR_OP_WORDSIZE 8
-#endif
-
-
-// -------------------------------------------------------------------------
-// check if a pointer is aligend to byte_count
-// -------------------------------------------------------------------------
-#define is_aligned(POINTER, BYTE_COUNT) \
-  (((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0)
-
-// -------------------------------------------------------------------------
-// compute byte-wise XOR of cw and dw block, ew contains the end address of cw
-// -------------------------------------------------------------------------
-void
-byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew);
-
-// -------------------------------------------------------------------------
-// compute word-wise XOR of cw and dw block, ew contains the end address of cw
-// -------------------------------------------------------------------------
-void
-vector_xor(vector_op_t* cw, vector_op_t* dw, vector_op_t* ew);
-
-// -------------------------------------------------------------------------
-// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-]
-// -------------------------------------------------------------------------
-void
-region_xor(unsigned char** src, unsigned char* parity, int src_size, unsigned size);
-
-// -------------------------------------------------------------------------
-// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-]
-// using SSE2 64-byte operations
-// -------------------------------------------------------------------------
-void
-region_sse2_xor(char** src /* array of 64-byte aligned source pointer to xor */,
-                char* parity /* 64-byte aligned output pointer containing the parity */,
-                int src_size /* size of the source pointer array */,
-                unsigned size /* size of the region to xor */);
-
-#if defined(__aarch64__) && defined(__ARM_NEON)
-// -------------------------------------------------------------------------
-// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-1]
-// using NEON 32-byte operations
-// -------------------------------------------------------------------------
-void
-region_neon_xor(char** src    /* array of 64-byte aligned source pointer to xor */,
-                char* parity  /* 32-byte aligned output pointer containing the parity */,
-                int src_size  /* size of the source pointer array */,
-                unsigned size /* size of the region to xor */);
-#endif // __aarch64__ && __ARM_NEON
-#endif // EC_ISA_XOR_OP_H
index bbd4441fc7290826bb4f878624b4f9d81682a532..5235c10caba8a1acd39d649015d9087d9ecf7551 100644 (file)
@@ -21,7 +21,6 @@
 #include "crush/CrushWrapper.h"
 #include "include/stringify.h"
 #include "erasure-code/isa/ErasureCodeIsa.h"
-#include "erasure-code/isa/xor_op.h"
 #include "global/global_context.h"
 #include "common/config.h"
 #include "gtest/gtest.h"