From 4b39d33c4f41f8707213804e80d26830a4f87bbd Mon Sep 17 00:00:00 2001 From: Piotr Kubaj Date: Thu, 18 Feb 2021 14:40:23 +0100 Subject: [PATCH] crc32c: fix build on ppc64le with clang Ceph uses assembly files from https://github.com/antonblanchard/crc32-vpmsum Update them to the newest versions, which fixes build with Clang. Signed-off-by: Piotr Kubaj --- src/common/crc32c_ppc_asm.S | 34 ++- src/common/crc32c_ppc_fast_zero_asm.S | 85 ++++-- src/common/ppc-asm.h | 381 ++++++++++++++++++++++++++ src/common/ppc-opcode.h | 34 +++ 4 files changed, 507 insertions(+), 27 deletions(-) create mode 100644 src/common/ppc-asm.h diff --git a/src/common/crc32c_ppc_asm.S b/src/common/crc32c_ppc_asm.S index 1dc6dd1cf31..096d98591ba 100644 --- a/src/common/crc32c_ppc_asm.S +++ b/src/common/crc32c_ppc_asm.S @@ -17,16 +17,25 @@ * http://en.wikipedia.org/wiki/Barrett_reduction * * Copyright (C) 2015 Anton Blanchard , IBM - * Copyright (C) 2017 International Business Machines Corp. - * All rights reserved. * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * modify it under the terms of either: + * + * a) the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) + * any later version, or + * b) the Apache License, Version 2.0 */ + +#if defined (__clang__) +#ifndef __ALTIVEC__ +#define __ALTIVEC__ +#endif +#include "ppc-asm.h" +#else #include -#include "common/ppc-opcode.h" +#endif +#include "ppc-opcode.h" #undef toc @@ -45,8 +54,11 @@ /* byte reverse permute constant */ .octa 0x0F0E0D0C0B0A09080706050403020100 -#define __ASSEMBLY__ +#ifdef CRC32_CONSTANTS_HEADER +#include CRC32_CONSTANTS_HEADER +#else #include "crc32c_ppc_constants.h" +#endif .text @@ -80,8 +92,12 @@ #define VPERM(A, B, C, D) #endif +#ifndef CRC32_FUNCTION_ASM +#define CRC32_FUNCTION_ASM __crc32_vpmsum +#endif + /* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */ -FUNC_START(__crc32_vpmsum) +FUNC_START(CRC32_FUNCTION_ASM) std r31,-8(r1) std r30,-16(r1) std r29,-24(r1) @@ -768,4 +784,4 @@ FUNC_START(__crc32_vpmsum) mr r3,r10 b .Lout -FUNC_END(__crc32_vpmsum) +FUNC_END(CRC32_FUNCTION_ASM) diff --git a/src/common/crc32c_ppc_fast_zero_asm.S b/src/common/crc32c_ppc_fast_zero_asm.S index a53df1deead..cff9cce7fd4 100644 --- a/src/common/crc32c_ppc_fast_zero_asm.S +++ b/src/common/crc32c_ppc_fast_zero_asm.S @@ -1,6 +1,6 @@ /* * Use the fixed point version of Barrett reduction to compute a mod n - * over GF(2) for given n using POWER8 instructions. We use k = 32. + * over GF(2) for n = 0x104c11db7 using POWER8 instructions. We use k = 32. * * http://en.wikipedia.org/wiki/Barrett_reduction * @@ -14,33 +14,39 @@ * any later version, or * b) the Apache License, Version 2.0 */ -#include -#include "common/ppc-opcode.h" - -#undef toc -#ifndef r1 -#define r1 1 +#if defined (__clang__) +#ifndef __ALTIVEC__ +#define __ALTIVEC__ #endif - -#ifndef r2 -#define r2 2 +#include "ppc-asm.h" +#else +#include #endif +#include "ppc-opcode.h" .section .data .balign 16 - -.barrett_fz_constants: +.constants: /* Barrett constant m - (4^32)/n */ - .octa 0x0000000000000000000000011f91caf6 /* x^64 div p(x) */ + .octa 0x00000000000000000000000104d101df + /* Barrett constant n */ - .octa 0x0000000000000000000000011edc6f41 + .octa 0x00000000000000000000000104c11db7 + +.bit_reflected_constants: + /* 33 bit reflected Barrett constant m - (4^32)/n */ + .octa 0x000000000000000000000001f7011641 + + /* 33 bit reflected Barrett constant n */ + .octa 0x000000000000000000000001db710641 + + .text -.text /* unsigned int barrett_reduction(unsigned long val) */ FUNC_START(barrett_reduction) - addis r4,r2,.barrett_fz_constants@toc@ha - addi r4,r4,.barrett_fz_constants@toc@l + lis r4,.constants@ha + la r4,.constants@l(r4) li r5,16 vxor v1,v1,v1 /* zero v1 */ @@ -74,4 +80,47 @@ FUNC_START(barrett_reduction) blr FUNC_END(barrett_reduction) - + +/* unsigned int barrett_reduction_reflected(unsigned long val) */ +FUNC_START(barrett_reduction_reflected) + lis r4,.bit_reflected_constants@ha + la r4,.bit_reflected_constants@l(r4) + + li r5,16 + vxor v1,v1,v1 /* zero v1 */ + + /* Get a into v0 */ + MTVRD(v0, r3) + vsldoi v0,v1,v0,8 /* shift into bottom 64 bits, this is a */ + + /* Load constants */ + lvx v2,0,r4 /* m */ + lvx v3,r5,r4 /* n */ + + vspltisw v5,-1 /* all ones */ + vsldoi v6,v1,v5,4 /* bitmask with low 32 bits set */ + + /* + * Now for the Barrett reduction algorithm. Instead of bit reflecting + * our data (which is expensive to do), we bit reflect our constants + * and our algorithm, which means the intermediate data in our vector + * registers goes from 0-63 instead of 63-0. We can reflect the + * algorithm because we don't carry in mod 2 arithmetic. + */ + vand v4,v0,v6 /* bottom 32 bits of a */ + VPMSUMD(v4,v4,v2) /* ma */ + vand v4,v4,v6 /* bottom 32bits of ma */ + VPMSUMD(v4,v4,v3) /* qn */ + vxor v0,v0,v4 /* a - qn, subtraction is xor in GF(2) */ + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in the + * high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + vsldoi v0,v0,v1,4 /* shift result into top 64 bits of v0 */ + MFVRD(r3, v0) + + blr +FUNC_END(barrett_reduction_reflected) diff --git a/src/common/ppc-asm.h b/src/common/ppc-asm.h new file mode 100644 index 00000000000..be34cf8febf --- /dev/null +++ b/src/common/ppc-asm.h @@ -0,0 +1,381 @@ +/* PowerPC asm definitions for GNU C. + +Copyright (C) 2002-2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Under winnt, 1) gas supports the following as names and 2) in particular + defining "toc" breaks the FUNC_START macro as ".toc" becomes ".2" */ + +#define r0 0 +#define sp 1 +#define toc 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + +#define f0 0 +#define f1 1 +#define f2 2 +#define f3 3 +#define f4 4 +#define f5 5 +#define f6 6 +#define f7 7 +#define f8 8 +#define f9 9 +#define f10 10 +#define f11 11 +#define f12 12 +#define f13 13 +#define f14 14 +#define f15 15 +#define f16 16 +#define f17 17 +#define f18 18 +#define f19 19 +#define f20 20 +#define f21 21 +#define f22 22 +#define f23 23 +#define f24 24 +#define f25 25 +#define f26 26 +#define f27 27 +#define f28 28 +#define f29 29 +#define f30 30 +#define f31 31 + +#ifdef __VSX__ +#define f32 32 +#define f33 33 +#define f34 34 +#define f35 35 +#define f36 36 +#define f37 37 +#define f38 38 +#define f39 39 +#define f40 40 +#define f41 41 +#define f42 42 +#define f43 43 +#define f44 44 +#define f45 45 +#define f46 46 +#define f47 47 +#define f48 48 +#define f49 49 +#define f50 30 +#define f51 51 +#define f52 52 +#define f53 53 +#define f54 54 +#define f55 55 +#define f56 56 +#define f57 57 +#define f58 58 +#define f59 59 +#define f60 60 +#define f61 61 +#define f62 62 +#define f63 63 +#endif + +#ifdef __ALTIVEC__ +#define v0 0 +#define v1 1 +#define v2 2 +#define v3 3 +#define v4 4 +#define v5 5 +#define v6 6 +#define v7 7 +#define v8 8 +#define v9 9 +#define v10 10 +#define v11 11 +#define v12 12 +#define v13 13 +#define v14 14 +#define v15 15 +#define v16 16 +#define v17 17 +#define v18 18 +#define v19 19 +#define v20 20 +#define v21 21 +#define v22 22 +#define v23 23 +#define v24 24 +#define v25 25 +#define v26 26 +#define v27 27 +#define v28 28 +#define v29 29 +#define v30 30 +#define v31 31 +#endif + +#ifdef __VSX__ +#define vs0 0 +#define vs1 1 +#define vs2 2 +#define vs3 3 +#define vs4 4 +#define vs5 5 +#define vs6 6 +#define vs7 7 +#define vs8 8 +#define vs9 9 +#define vs10 10 +#define vs11 11 +#define vs12 12 +#define vs13 13 +#define vs14 14 +#define vs15 15 +#define vs16 16 +#define vs17 17 +#define vs18 18 +#define vs19 19 +#define vs20 20 +#define vs21 21 +#define vs22 22 +#define vs23 23 +#define vs24 24 +#define vs25 25 +#define vs26 26 +#define vs27 27 +#define vs28 28 +#define vs29 29 +#define vs30 30 +#define vs31 31 +#define vs32 32 +#define vs33 33 +#define vs34 34 +#define vs35 35 +#define vs36 36 +#define vs37 37 +#define vs38 38 +#define vs39 39 +#define vs40 40 +#define vs41 41 +#define vs42 42 +#define vs43 43 +#define vs44 44 +#define vs45 45 +#define vs46 46 +#define vs47 47 +#define vs48 48 +#define vs49 49 +#define vs50 30 +#define vs51 51 +#define vs52 52 +#define vs53 53 +#define vs54 54 +#define vs55 55 +#define vs56 56 +#define vs57 57 +#define vs58 58 +#define vs59 59 +#define vs60 60 +#define vs61 61 +#define vs62 62 +#define vs63 63 +#endif + +/* + * Macros to glue together two tokens. + */ + +#ifdef __STDC__ +#define XGLUE(a,b) a##b +#else +#define XGLUE(a,b) a/**/b +#endif + +#define GLUE(a,b) XGLUE(a,b) + +/* + * Macros to begin and end a function written in assembler. If -mcall-aixdesc + * or -mcall-nt, create a function descriptor with the given name, and create + * the real function with one or two leading periods respectively. + */ + +#if defined(__powerpc64__) && _CALL_ELF == 2 + +/* Defining "toc" above breaks @toc in assembler code. */ +#undef toc + +#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name) +#define JUMP_TARGET(name) FUNC_NAME(name) +#define FUNC_START(name) \ + .type FUNC_NAME(name),@function; \ + .globl FUNC_NAME(name); \ +FUNC_NAME(name): \ +0: addis 2,12,(.TOC.-0b)@ha; \ + addi 2,2,(.TOC.-0b)@l; \ + .localentry FUNC_NAME(name),.-FUNC_NAME(name) + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden FUNC_NAME(name); + +#define FUNC_END(name) \ + .size FUNC_NAME(name),.-FUNC_NAME(name) + +#elif defined (__powerpc64__) + +#define FUNC_NAME(name) GLUE(.,name) +#define JUMP_TARGET(name) FUNC_NAME(name) +#define FUNC_START(name) \ + .section ".opd","aw"; \ +name: \ + .quad GLUE(.,name); \ + .quad .TOC.@tocbase; \ + .quad 0; \ + .previous; \ + .type GLUE(.,name),@function; \ + .globl name; \ + .globl GLUE(.,name); \ +GLUE(.,name): + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden name; \ + .hidden GLUE(.,name); + +#define FUNC_END(name) \ +GLUE(.L,name): \ + .size GLUE(.,name),GLUE(.L,name)-GLUE(.,name) + +#elif defined(_CALL_AIXDESC) + +#ifdef _RELOCATABLE +#define DESC_SECTION ".got2" +#else +#define DESC_SECTION ".got1" +#endif + +#define FUNC_NAME(name) GLUE(.,name) +#define JUMP_TARGET(name) FUNC_NAME(name) +#define FUNC_START(name) \ + .section DESC_SECTION,"aw"; \ +name: \ + .long GLUE(.,name); \ + .long _GLOBAL_OFFSET_TABLE_; \ + .long 0; \ + .previous; \ + .type GLUE(.,name),@function; \ + .globl name; \ + .globl GLUE(.,name); \ +GLUE(.,name): + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden name; \ + .hidden GLUE(.,name); + +#define FUNC_END(name) \ +GLUE(.L,name): \ + .size GLUE(.,name),GLUE(.L,name)-GLUE(.,name) + +#else + +#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name) +#if defined __PIC__ || defined __pic__ +#define JUMP_TARGET(name) FUNC_NAME(name@plt) +#else +#define JUMP_TARGET(name) FUNC_NAME(name) +#endif +#define FUNC_START(name) \ + .type FUNC_NAME(name),@function; \ + .globl FUNC_NAME(name); \ +FUNC_NAME(name): + +#define HIDDEN_FUNC(name) \ + FUNC_START(name) \ + .hidden FUNC_NAME(name); + +#define FUNC_END(name) \ +GLUE(.L,name): \ + .size FUNC_NAME(name),GLUE(.L,name)-FUNC_NAME(name) +#endif + +#ifdef IN_GCC +/* For HAVE_GAS_CFI_DIRECTIVE. */ +#include "auto-host.h" + +#ifdef HAVE_GAS_CFI_DIRECTIVE +# define CFI_STARTPROC .cfi_startproc +# define CFI_ENDPROC .cfi_endproc +# define CFI_OFFSET(reg, off) .cfi_offset reg, off +# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg +# define CFI_RESTORE(reg) .cfi_restore reg +#else +# define CFI_STARTPROC +# define CFI_ENDPROC +# define CFI_OFFSET(reg, off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_RESTORE(reg) +#endif +#endif + +#if defined __linux__ + .section .note.GNU-stack + .previous +#endif diff --git a/src/common/ppc-opcode.h b/src/common/ppc-opcode.h index aabb45c3890..16d5b2e37d7 100644 --- a/src/common/ppc-opcode.h +++ b/src/common/ppc-opcode.h @@ -1,3 +1,37 @@ +/* + * Copyright (C) 2015 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of either: + * + * a) the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) + * any later version, or + * b) the Apache License, Version 2.0 + */ +#ifndef __OPCODES_H +#define __OPCODES_H + +#define __PPC_RA(a) (((a) & 0x1f) << 16) +#define __PPC_RB(b) (((b) & 0x1f) << 11) +#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3)) +#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4)) +#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5)) +#define __PPC_XT(s) __PPC_XS(s) +#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) +#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) + +#define PPC_INST_VPMSUMW 0x10000488 +#define PPC_INST_VPMSUMD 0x100004c8 +#define PPC_INST_MFVSRD 0x7c000066 +#define PPC_INST_MTVSRD 0x7c000166 + +#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b) +#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b) +#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0) +#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0) + +#endif /* Copyright (C) 2017 International Business Machines Corp. * All rights reserved. * -- 2.39.5