From 9ba65996f12c5ad472d8aa8278d875b431fdfc68 Mon Sep 17 00:00:00 2001 From: Andreas Peters Date: Wed, 18 Dec 2013 14:47:58 +0100 Subject: [PATCH] EC-JERASURE: rewrite region-xor function using vector operations to get ~ x1.5 speedups for erasure code and guarantee proper 64-bit/128-bit buffer alignment --- .../ErasureCodeJerasure.cc | 22 ++++++-- src/osd/ErasureCodePluginJerasure/galois.c | 45 +++++++++------- src/osd/ErasureCodePluginJerasure/vectorop.h | 51 +++++++++++++++++++ 3 files changed, 96 insertions(+), 22 deletions(-) create mode 100644 src/osd/ErasureCodePluginJerasure/vectorop.h diff --git a/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc b/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc index fe656e58ee00..84c82c97151f 100644 --- a/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc +++ b/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc @@ -18,6 +18,7 @@ #include #include "common/debug.h" #include "ErasureCodeJerasure.h" +#include "vectorop.h" extern "C" { #include "jerasure.h" #include "reed_sol.h" @@ -192,7 +193,11 @@ int ErasureCodeJerasureReedSolomonVandermonde::jerasure_decode(int *erasures, unsigned ErasureCodeJerasureReedSolomonVandermonde::get_alignment() { - return k*w*sizeof(int); + unsigned alignment = k*w*sizeof(int); + if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) + alignment = k*w*LARGEST_VECTOR_WORDSIZE; + return alignment; + } void ErasureCodeJerasureReedSolomonVandermonde::parse(const map ¶meters) @@ -232,7 +237,10 @@ int ErasureCodeJerasureReedSolomonRAID6::jerasure_decode(int *erasures, unsigned ErasureCodeJerasureReedSolomonRAID6::get_alignment() { - return k*w*sizeof(int); + unsigned alignment = k*w*sizeof(int); + if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) + alignment = k*w*LARGEST_VECTOR_WORDSIZE; + return alignment; } void ErasureCodeJerasureReedSolomonRAID6::parse(const map ¶meters) @@ -274,7 +282,10 @@ int ErasureCodeJerasureCauchy::jerasure_decode(int *erasures, unsigned ErasureCodeJerasureCauchy::get_alignment() { - return k*w*packetsize*sizeof(int); + unsigned alignment = k*w*packetsize*sizeof(int); + if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) + alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE; + return alignment; } void ErasureCodeJerasureCauchy::parse(const map ¶meters) @@ -341,7 +352,10 @@ int ErasureCodeJerasureLiberation::jerasure_decode(int *erasures, unsigned ErasureCodeJerasureLiberation::get_alignment() { - return k*w*packetsize*sizeof(int); + unsigned alignment = k*w*packetsize*sizeof(int); + if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) + alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE; + return alignment; } void ErasureCodeJerasureLiberation::parse(const map ¶meters) diff --git a/src/osd/ErasureCodePluginJerasure/galois.c b/src/osd/ErasureCodePluginJerasure/galois.c index 0de6fbd334cf..f1ec347fadc0 100755 --- a/src/osd/ErasureCodePluginJerasure/galois.c +++ b/src/osd/ErasureCodePluginJerasure/galois.c @@ -49,8 +49,10 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "galois.h" +#include "vectorop.h" #define NONE (10) #define TABLE (11) @@ -737,31 +739,38 @@ void galois_w32_region_multiply(char *region, /* Region to multiply */ } } return; - } + void galois_region_xor( char *r1, /* Region 1 */ char *r2, /* Region 2 */ char *r3, /* Sum region (r3 = r1 ^ r2) -- can be r1 or r2 */ int nbytes) /* Number of bytes in region */ { - long *l1; - long *l2; - long *l3; - long *ltop; - char *ctop; - - ctop = r1 + nbytes; - ltop = (long *) ctop; - l1 = (long *) r1; - l2 = (long *) r2; - l3 = (long *) r3; - - while (l1 < ltop) { - *l3 = ((*l1) ^ (*l2)); - l1++; - l2++; - l3++; + if (nbytes%VECTOR_WORDSIZE) { + assert(!((long long)r1%sizeof(long))); + assert(!((long long)r2%sizeof(long))); + assert(!((long long)r3%sizeof(long))); + long* l1 = (long*)r1; + long* l2 = (long*)r2; + long* l3 = (long*)r3; + char *ctop = r1 + nbytes; + long* ltop = (long*)ctop; + while (l1 < ltop) { + *l3++ = ((*l1++) ^ (*l2++)); + } + } else { + assert(!((long long)r1%VECTOR_WORDSIZE)); + assert(!((long long)r2%VECTOR_WORDSIZE)); + assert(!((long long)r3%VECTOR_WORDSIZE)); + vector_op_t* l1 = (vector_op_t*)r1; + vector_op_t* l2 = (vector_op_t*)r2; + vector_op_t* l3 = (vector_op_t*)r3; + char *ctop = r1 + nbytes; + vector_op_t* ltop = (vector_op_t*)ctop; + while (l1 < ltop) { + *l3++ = ((*l1++) ^ (*l2++)); + } } } diff --git a/src/osd/ErasureCodePluginJerasure/vectorop.h b/src/osd/ErasureCodePluginJerasure/vectorop.h new file mode 100644 index 000000000000..5135f82d8774 --- /dev/null +++ b/src/osd/ErasureCodePluginJerasure/vectorop.h @@ -0,0 +1,51 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 CERN/Switzerland + * + * + * Authors: Andreas-Joachim Peters + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + */ + +#ifndef CEPH_VECTOROP_H +#define CEPH_VECTOROP_H + +// ------------------------------------------------------------------------- +// constant used in the block alignment function to allow for vector ops +// ------------------------------------------------------------------------- +#define LARGEST_VECTOR_WORDSIZE 16 + +// ------------------------------------------------------------------------- +// switch to 128-bit XOR operations if possible +// ------------------------------------------------------------------------- +#if __GNUC__ > 4 || \ + (__GNUC__ == 4 && (__GNUC_MINOR__ >= 4) ) || \ + (__clang__ == 1 ) + +#ifdef VECTOROP_DEBUG +#pragma message "* using 128-bit vector operations in " __FILE__ +#endif +// ------------------------------------------------------------------------- +// use 128-bit pointer +// ------------------------------------------------------------------------- +typedef long vector_op_t __attribute__ ((vector_size (16))); +#define VECTOR_WORDSIZE 16 +#else +// ------------------------------------------------------------------------- +// use 64-bit pointer +// ------------------------------------------------------------------------- +typedef unsigned long long vector_op_t; +#define VECTOR_WORDSIZE 8 +#endif + +#endif /* CEPH_VECTOROP_H */ + -- 2.47.3