CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_GHASH_ARM64_CE=y
-CONFIG_CRYPTO_SHA3_ARM64=m
CONFIG_CRYPTO_SM3_ARM64_CE=m
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
CONFIG_CRYPTO_AES_ARM64_BS=m
Architecture: arm64 using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_SHA3_ARM64
- tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_HASH
- select CRYPTO_SHA3
- help
- SHA-3 secure hash algorithms (FIPS 202)
-
- Architecture: arm64 using:
- - ARMv8.2 Crypto Extensions
-
config CRYPTO_SM3_NEON
tristate "Hash functions: SM3 (NEON)"
depends on KERNEL_MODE_NEON
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
#
-obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
-sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
-
obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o
sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
- .set .Lv\b\().2d, \b
- .set .Lv\b\().16b, \b
- .endr
-
- /*
- * ARMv8.2 Crypto Extensions instructions
- */
- .macro eor3, rd, rn, rm, ra
- .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
- .endm
-
- .macro rax1, rd, rn, rm
- .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
- .endm
-
- .macro bcax, rd, rn, rm, ra
- .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
- .endm
-
- .macro xar, rd, rn, rm, imm6
- .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
- .endm
-
- /*
- * size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
- * size_t nblocks, size_t block_size)
- *
- * block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136
- * (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128).
- */
- .text
-SYM_FUNC_START(sha3_ce_transform)
- /* load state */
- add x8, x0, #32
- ld1 { v0.1d- v3.1d}, [x0]
- ld1 { v4.1d- v7.1d}, [x8], #32
- ld1 { v8.1d-v11.1d}, [x8], #32
- ld1 {v12.1d-v15.1d}, [x8], #32
- ld1 {v16.1d-v19.1d}, [x8], #32
- ld1 {v20.1d-v23.1d}, [x8], #32
- ld1 {v24.1d}, [x8]
-
-0: sub x2, x2, #1
- mov w8, #24
- adr_l x9, .Lsha3_rcon
-
- /* load input */
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b}, [x1], #8
- eor v0.8b, v0.8b, v25.8b
- eor v1.8b, v1.8b, v26.8b
- eor v2.8b, v2.8b, v27.8b
- eor v3.8b, v3.8b, v28.8b
- eor v4.8b, v4.8b, v29.8b
-
- ld1 {v25.8b-v28.8b}, [x1], #32
- eor v5.8b, v5.8b, v25.8b
- eor v6.8b, v6.8b, v26.8b
- eor v7.8b, v7.8b, v27.8b
- eor v8.8b, v8.8b, v28.8b
- cmp x3, #72
- b.eq 3f /* SHA3-512 (block_size=72)? */
-
- ld1 {v25.8b-v28.8b}, [x1], #32
- eor v9.8b, v9.8b, v25.8b
- eor v10.8b, v10.8b, v26.8b
- eor v11.8b, v11.8b, v27.8b
- eor v12.8b, v12.8b, v28.8b
- cmp x3, #104
- b.eq 3f /* SHA3-384 (block_size=104)? */
-
- ld1 {v25.8b-v28.8b}, [x1], #32
- eor v13.8b, v13.8b, v25.8b
- eor v14.8b, v14.8b, v26.8b
- eor v15.8b, v15.8b, v27.8b
- eor v16.8b, v16.8b, v28.8b
- cmp x3, #144
- b.lt 3f /* SHA3-256 or SHAKE256 (block_size=136)? */
- b.eq 2f /* SHA3-224 (block_size=144)? */
-
- /* SHAKE128 (block_size=168) */
- ld1 {v25.8b-v28.8b}, [x1], #32
- eor v17.8b, v17.8b, v25.8b
- eor v18.8b, v18.8b, v26.8b
- eor v19.8b, v19.8b, v27.8b
- eor v20.8b, v20.8b, v28.8b
- b 3f
-2:
- /* SHA3-224 (block_size=144) */
- ld1 {v25.8b}, [x1], #8
- eor v17.8b, v17.8b, v25.8b
-
-3: sub w8, w8, #1
-
- eor3 v29.16b, v4.16b, v9.16b, v14.16b
- eor3 v26.16b, v1.16b, v6.16b, v11.16b
- eor3 v28.16b, v3.16b, v8.16b, v13.16b
- eor3 v25.16b, v0.16b, v5.16b, v10.16b
- eor3 v27.16b, v2.16b, v7.16b, v12.16b
- eor3 v29.16b, v29.16b, v19.16b, v24.16b
- eor3 v26.16b, v26.16b, v16.16b, v21.16b
- eor3 v28.16b, v28.16b, v18.16b, v23.16b
- eor3 v25.16b, v25.16b, v15.16b, v20.16b
- eor3 v27.16b, v27.16b, v17.16b, v22.16b
-
- rax1 v30.2d, v29.2d, v26.2d // bc[0]
- rax1 v26.2d, v26.2d, v28.2d // bc[2]
- rax1 v28.2d, v28.2d, v25.2d // bc[4]
- rax1 v25.2d, v25.2d, v27.2d // bc[1]
- rax1 v27.2d, v27.2d, v29.2d // bc[3]
-
- eor v0.16b, v0.16b, v30.16b
- xar v29.2d, v1.2d, v25.2d, (64 - 1)
- xar v1.2d, v6.2d, v25.2d, (64 - 44)
- xar v6.2d, v9.2d, v28.2d, (64 - 20)
- xar v9.2d, v22.2d, v26.2d, (64 - 61)
- xar v22.2d, v14.2d, v28.2d, (64 - 39)
- xar v14.2d, v20.2d, v30.2d, (64 - 18)
- xar v31.2d, v2.2d, v26.2d, (64 - 62)
- xar v2.2d, v12.2d, v26.2d, (64 - 43)
- xar v12.2d, v13.2d, v27.2d, (64 - 25)
- xar v13.2d, v19.2d, v28.2d, (64 - 8)
- xar v19.2d, v23.2d, v27.2d, (64 - 56)
- xar v23.2d, v15.2d, v30.2d, (64 - 41)
- xar v15.2d, v4.2d, v28.2d, (64 - 27)
- xar v28.2d, v24.2d, v28.2d, (64 - 14)
- xar v24.2d, v21.2d, v25.2d, (64 - 2)
- xar v8.2d, v8.2d, v27.2d, (64 - 55)
- xar v4.2d, v16.2d, v25.2d, (64 - 45)
- xar v16.2d, v5.2d, v30.2d, (64 - 36)
- xar v5.2d, v3.2d, v27.2d, (64 - 28)
- xar v27.2d, v18.2d, v27.2d, (64 - 21)
- xar v3.2d, v17.2d, v26.2d, (64 - 15)
- xar v25.2d, v11.2d, v25.2d, (64 - 10)
- xar v26.2d, v7.2d, v26.2d, (64 - 6)
- xar v30.2d, v10.2d, v30.2d, (64 - 3)
-
- bcax v20.16b, v31.16b, v22.16b, v8.16b
- bcax v21.16b, v8.16b, v23.16b, v22.16b
- bcax v22.16b, v22.16b, v24.16b, v23.16b
- bcax v23.16b, v23.16b, v31.16b, v24.16b
- bcax v24.16b, v24.16b, v8.16b, v31.16b
-
- ld1r {v31.2d}, [x9], #8
-
- bcax v17.16b, v25.16b, v19.16b, v3.16b
- bcax v18.16b, v3.16b, v15.16b, v19.16b
- bcax v19.16b, v19.16b, v16.16b, v15.16b
- bcax v15.16b, v15.16b, v25.16b, v16.16b
- bcax v16.16b, v16.16b, v3.16b, v25.16b
-
- bcax v10.16b, v29.16b, v12.16b, v26.16b
- bcax v11.16b, v26.16b, v13.16b, v12.16b
- bcax v12.16b, v12.16b, v14.16b, v13.16b
- bcax v13.16b, v13.16b, v29.16b, v14.16b
- bcax v14.16b, v14.16b, v26.16b, v29.16b
-
- bcax v7.16b, v30.16b, v9.16b, v4.16b
- bcax v8.16b, v4.16b, v5.16b, v9.16b
- bcax v9.16b, v9.16b, v6.16b, v5.16b
- bcax v5.16b, v5.16b, v30.16b, v6.16b
- bcax v6.16b, v6.16b, v4.16b, v30.16b
-
- bcax v3.16b, v27.16b, v0.16b, v28.16b
- bcax v4.16b, v28.16b, v1.16b, v0.16b
- bcax v0.16b, v0.16b, v2.16b, v1.16b
- bcax v1.16b, v1.16b, v27.16b, v2.16b
- bcax v2.16b, v2.16b, v28.16b, v27.16b
-
- eor v0.16b, v0.16b, v31.16b
-
- cbnz w8, 3b
- cond_yield 4f, x8, x9
- cbnz x2, 0b
-
- /* save state */
-4: st1 { v0.1d- v3.1d}, [x0], #32
- st1 { v4.1d- v7.1d}, [x0], #32
- st1 { v8.1d-v11.1d}, [x0], #32
- st1 {v12.1d-v15.1d}, [x0], #32
- st1 {v16.1d-v19.1d}, [x0], #32
- st1 {v20.1d-v23.1d}, [x0], #32
- st1 {v24.1d}, [x0]
- mov x0, x2
- ret
-SYM_FUNC_END(sha3_ce_transform)
-
- .section ".rodata", "a"
- .align 8
-.Lsha3_rcon:
- .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
- .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
- .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
- .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
- .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
- .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
- .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
- .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/internal/hash.h>
-#include <crypto/sha3.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/unaligned.h>
-
-MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("sha3-224");
-MODULE_ALIAS_CRYPTO("sha3-256");
-MODULE_ALIAS_CRYPTO("sha3-384");
-MODULE_ALIAS_CRYPTO("sha3-512");
-
-asmlinkage size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
- size_t nblocks, size_t block_size);
-
-static int arm64_sha3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha3_state *sctx = shash_desc_ctx(desc);
- struct crypto_shash *tfm = desc->tfm;
- unsigned int bs;
- int blocks;
-
- bs = crypto_shash_blocksize(tfm);
- blocks = len / bs;
- len -= blocks * bs;
- do {
- int rem;
-
- kernel_neon_begin();
- rem = sha3_ce_transform(sctx, data, blocks, bs);
- kernel_neon_end();
- data += (blocks - rem) * bs;
- blocks = rem;
- } while (blocks);
- return len;
-}
-
-static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
- u8 *out)
-{
- struct sha3_state *sctx = shash_desc_ctx(desc);
- struct crypto_shash *tfm = desc->tfm;
- __le64 *digest = (__le64 *)out;
- u8 block[SHA3_224_BLOCK_SIZE];
- unsigned int bs, ds;
- int i;
-
- ds = crypto_shash_digestsize(tfm);
- bs = crypto_shash_blocksize(tfm);
- memcpy(block, src, len);
-
- block[len++] = 0x06;
- memset(block + len, 0, bs - len);
- block[bs - 1] |= 0x80;
-
- kernel_neon_begin();
- sha3_ce_transform(sctx, block, 1, bs);
- kernel_neon_end();
- memzero_explicit(block , sizeof(block));
-
- for (i = 0; i < ds / 8; i++)
- put_unaligned_le64(sctx->st[i], digest++);
-
- if (ds & 4)
- put_unaligned_le32(sctx->st[i], (__le32 *)digest);
-
- return 0;
-}
-
-static struct shash_alg algs[] = { {
- .digestsize = SHA3_224_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = arm64_sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-224",
- .base.cra_driver_name = "sha3-224-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_224_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_256_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = arm64_sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-256",
- .base.cra_driver_name = "sha3-256-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_256_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_384_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = arm64_sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-384",
- .base.cra_driver_name = "sha3-384-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_384_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_512_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = arm64_sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-512",
- .base.cra_driver_name = "sha3-512-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_512_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-} };
-
-static int __init sha3_neon_mod_init(void)
-{
- return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha3_neon_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_cpu_feature_match(SHA3, sha3_neon_mod_init);
-module_exit(sha3_neon_mod_fini);
The SHA3 library functions. Select this if your module uses any of
the functions from <crypto/sha3.h>.
+config CRYPTO_LIB_SHA3_ARCH
+ bool
+ depends on CRYPTO_LIB_SHA3 && !UML
+ default y if ARM64 && KERNEL_MODE_NEON
+
config CRYPTO_LIB_SM3
tristate
obj-$(CONFIG_CRYPTO_LIB_SHA3) += libsha3.o
libsha3-y := sha3.o
+ifeq ($(CONFIG_CRYPTO_LIB_SHA3_ARCH),y)
+CFLAGS_sha3.o += -I$(src)/$(SRCARCH)
+libsha3-$(CONFIG_ARM64) += arm64/sha3-ce-core.o
+endif # CONFIG_CRYPTO_LIB_SHA3_ARCH
+
################################################################################
obj-$(CONFIG_MPILIB) += mpi/
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ .set .Lv\b\().2d, \b
+ .set .Lv\b\().16b, \b
+ .endr
+
+ /*
+ * ARMv8.2 Crypto Extensions instructions
+ */
+ .macro eor3, rd, rn, rm, ra
+ .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+ .endm
+
+ .macro rax1, rd, rn, rm
+ .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+ .endm
+
+ .macro bcax, rd, rn, rm, ra
+ .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+ .endm
+
+ .macro xar, rd, rn, rm, imm6
+ .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
+ .endm
+
+ /*
+ * size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
+ * size_t nblocks, size_t block_size)
+ *
+ * block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136
+ * (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128).
+ */
+ .text
+SYM_FUNC_START(sha3_ce_transform)
+ /* load state */
+ add x8, x0, #32
+ ld1 { v0.1d- v3.1d}, [x0]
+ ld1 { v4.1d- v7.1d}, [x8], #32
+ ld1 { v8.1d-v11.1d}, [x8], #32
+ ld1 {v12.1d-v15.1d}, [x8], #32
+ ld1 {v16.1d-v19.1d}, [x8], #32
+ ld1 {v20.1d-v23.1d}, [x8], #32
+ ld1 {v24.1d}, [x8]
+
+0: sub x2, x2, #1
+ mov w8, #24
+ adr_l x9, .Lsha3_rcon
+
+ /* load input */
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v29.8b}, [x1], #8
+ eor v0.8b, v0.8b, v25.8b
+ eor v1.8b, v1.8b, v26.8b
+ eor v2.8b, v2.8b, v27.8b
+ eor v3.8b, v3.8b, v28.8b
+ eor v4.8b, v4.8b, v29.8b
+
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ eor v5.8b, v5.8b, v25.8b
+ eor v6.8b, v6.8b, v26.8b
+ eor v7.8b, v7.8b, v27.8b
+ eor v8.8b, v8.8b, v28.8b
+ cmp x3, #72
+ b.eq 3f /* SHA3-512 (block_size=72)? */
+
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ eor v9.8b, v9.8b, v25.8b
+ eor v10.8b, v10.8b, v26.8b
+ eor v11.8b, v11.8b, v27.8b
+ eor v12.8b, v12.8b, v28.8b
+ cmp x3, #104
+ b.eq 3f /* SHA3-384 (block_size=104)? */
+
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ eor v13.8b, v13.8b, v25.8b
+ eor v14.8b, v14.8b, v26.8b
+ eor v15.8b, v15.8b, v27.8b
+ eor v16.8b, v16.8b, v28.8b
+ cmp x3, #144
+ b.lt 3f /* SHA3-256 or SHAKE256 (block_size=136)? */
+ b.eq 2f /* SHA3-224 (block_size=144)? */
+
+ /* SHAKE128 (block_size=168) */
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ eor v17.8b, v17.8b, v25.8b
+ eor v18.8b, v18.8b, v26.8b
+ eor v19.8b, v19.8b, v27.8b
+ eor v20.8b, v20.8b, v28.8b
+ b 3f
+2:
+ /* SHA3-224 (block_size=144) */
+ ld1 {v25.8b}, [x1], #8
+ eor v17.8b, v17.8b, v25.8b
+
+3: sub w8, w8, #1
+
+ eor3 v29.16b, v4.16b, v9.16b, v14.16b
+ eor3 v26.16b, v1.16b, v6.16b, v11.16b
+ eor3 v28.16b, v3.16b, v8.16b, v13.16b
+ eor3 v25.16b, v0.16b, v5.16b, v10.16b
+ eor3 v27.16b, v2.16b, v7.16b, v12.16b
+ eor3 v29.16b, v29.16b, v19.16b, v24.16b
+ eor3 v26.16b, v26.16b, v16.16b, v21.16b
+ eor3 v28.16b, v28.16b, v18.16b, v23.16b
+ eor3 v25.16b, v25.16b, v15.16b, v20.16b
+ eor3 v27.16b, v27.16b, v17.16b, v22.16b
+
+ rax1 v30.2d, v29.2d, v26.2d // bc[0]
+ rax1 v26.2d, v26.2d, v28.2d // bc[2]
+ rax1 v28.2d, v28.2d, v25.2d // bc[4]
+ rax1 v25.2d, v25.2d, v27.2d // bc[1]
+ rax1 v27.2d, v27.2d, v29.2d // bc[3]
+
+ eor v0.16b, v0.16b, v30.16b
+ xar v29.2d, v1.2d, v25.2d, (64 - 1)
+ xar v1.2d, v6.2d, v25.2d, (64 - 44)
+ xar v6.2d, v9.2d, v28.2d, (64 - 20)
+ xar v9.2d, v22.2d, v26.2d, (64 - 61)
+ xar v22.2d, v14.2d, v28.2d, (64 - 39)
+ xar v14.2d, v20.2d, v30.2d, (64 - 18)
+ xar v31.2d, v2.2d, v26.2d, (64 - 62)
+ xar v2.2d, v12.2d, v26.2d, (64 - 43)
+ xar v12.2d, v13.2d, v27.2d, (64 - 25)
+ xar v13.2d, v19.2d, v28.2d, (64 - 8)
+ xar v19.2d, v23.2d, v27.2d, (64 - 56)
+ xar v23.2d, v15.2d, v30.2d, (64 - 41)
+ xar v15.2d, v4.2d, v28.2d, (64 - 27)
+ xar v28.2d, v24.2d, v28.2d, (64 - 14)
+ xar v24.2d, v21.2d, v25.2d, (64 - 2)
+ xar v8.2d, v8.2d, v27.2d, (64 - 55)
+ xar v4.2d, v16.2d, v25.2d, (64 - 45)
+ xar v16.2d, v5.2d, v30.2d, (64 - 36)
+ xar v5.2d, v3.2d, v27.2d, (64 - 28)
+ xar v27.2d, v18.2d, v27.2d, (64 - 21)
+ xar v3.2d, v17.2d, v26.2d, (64 - 15)
+ xar v25.2d, v11.2d, v25.2d, (64 - 10)
+ xar v26.2d, v7.2d, v26.2d, (64 - 6)
+ xar v30.2d, v10.2d, v30.2d, (64 - 3)
+
+ bcax v20.16b, v31.16b, v22.16b, v8.16b
+ bcax v21.16b, v8.16b, v23.16b, v22.16b
+ bcax v22.16b, v22.16b, v24.16b, v23.16b
+ bcax v23.16b, v23.16b, v31.16b, v24.16b
+ bcax v24.16b, v24.16b, v8.16b, v31.16b
+
+ ld1r {v31.2d}, [x9], #8
+
+ bcax v17.16b, v25.16b, v19.16b, v3.16b
+ bcax v18.16b, v3.16b, v15.16b, v19.16b
+ bcax v19.16b, v19.16b, v16.16b, v15.16b
+ bcax v15.16b, v15.16b, v25.16b, v16.16b
+ bcax v16.16b, v16.16b, v3.16b, v25.16b
+
+ bcax v10.16b, v29.16b, v12.16b, v26.16b
+ bcax v11.16b, v26.16b, v13.16b, v12.16b
+ bcax v12.16b, v12.16b, v14.16b, v13.16b
+ bcax v13.16b, v13.16b, v29.16b, v14.16b
+ bcax v14.16b, v14.16b, v26.16b, v29.16b
+
+ bcax v7.16b, v30.16b, v9.16b, v4.16b
+ bcax v8.16b, v4.16b, v5.16b, v9.16b
+ bcax v9.16b, v9.16b, v6.16b, v5.16b
+ bcax v5.16b, v5.16b, v30.16b, v6.16b
+ bcax v6.16b, v6.16b, v4.16b, v30.16b
+
+ bcax v3.16b, v27.16b, v0.16b, v28.16b
+ bcax v4.16b, v28.16b, v1.16b, v0.16b
+ bcax v0.16b, v0.16b, v2.16b, v1.16b
+ bcax v1.16b, v1.16b, v27.16b, v2.16b
+ bcax v2.16b, v2.16b, v28.16b, v27.16b
+
+ eor v0.16b, v0.16b, v31.16b
+
+ cbnz w8, 3b
+ cond_yield 4f, x8, x9
+ cbnz x2, 0b
+
+ /* save state */
+4: st1 { v0.1d- v3.1d}, [x0], #32
+ st1 { v4.1d- v7.1d}, [x0], #32
+ st1 { v8.1d-v11.1d}, [x0], #32
+ st1 {v12.1d-v15.1d}, [x0], #32
+ st1 {v16.1d-v19.1d}, [x0], #32
+ st1 {v20.1d-v23.1d}, [x0], #32
+ st1 {v24.1d}, [x0]
+ mov x0, x2
+ ret
+SYM_FUNC_END(sha3_ce_transform)
+
+ .section ".rodata", "a"
+ .align 8
+.Lsha3_rcon:
+ .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
+ .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
+ .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
+ .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
+ .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
+ .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
+ .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
+ .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <linux/cpufeature.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3);
+
+asmlinkage size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
+ size_t nblocks, size_t block_size);
+
+static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data,
+ size_t nblocks, size_t block_size)
+{
+ if (static_branch_likely(&have_sha3) && likely(may_use_simd())) {
+ do {
+ size_t rem;
+
+ kernel_neon_begin();
+ rem = sha3_ce_transform(state, data, nblocks,
+ block_size);
+ kernel_neon_end();
+ data += (nblocks - rem) * block_size;
+ nblocks = rem;
+ } while (nblocks);
+ } else {
+ sha3_absorb_blocks_generic(state, data, nblocks, block_size);
+ }
+}
+
+static void sha3_keccakf(struct sha3_state *state)
+{
+ if (static_branch_likely(&have_sha3) && likely(may_use_simd())) {
+ /*
+ * Passing zeroes into sha3_ce_transform() gives the plain
+ * Keccak-f permutation, which is what we want here. Any
+ * supported block size may be used. Use SHA3_512_BLOCK_SIZE
+ * since it's the shortest.
+ */
+ static const u8 zeroes[SHA3_512_BLOCK_SIZE];
+
+ kernel_neon_begin();
+ sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
+ kernel_neon_end();
+ } else {
+ sha3_keccakf_generic(state);
+ }
+}
+
+#define sha3_mod_init_arch sha3_mod_init_arch
+static void sha3_mod_init_arch(void)
+{
+ if (cpu_have_named_feature(SHA3))
+ static_branch_enable(&have_sha3);
+}