From c4856c14c510afc2800d55deeba2a4537943906f Mon Sep 17 00:00:00 2001 From: Maamoun TK Date: Sun, 29 May 2022 03:34:44 +0200 Subject: [PATCH 0929/1000] [PowerPC] Implement Poly1305 single block update based on radix 2^64 --- Makefile.in | 2 +- configure.ac | 15 +- fat-ppc.c | 54 ++++++ fat-setup.h | 6 + poly1305-internal.c | 22 +++ powerpc64/fat/poly1305-internal-2.asm | 39 +++++ powerpc64/p9/poly1305-internal.asm | 238 ++++++++++++++++++++++++++ 7 files changed, 373 insertions(+), 3 deletions(-) create mode 100644 powerpc64/fat/poly1305-internal-2.asm create mode 100644 powerpc64/p9/poly1305-internal.asm diff --git a/Makefile.in b/Makefile.in index 65911e2a..11c88114 100644 --- a/Makefile.in +++ b/Makefile.in @@ -607,7 +607,7 @@ distdir: $(DISTFILES) x86_64 x86_64/aesni x86_64/sha_ni x86_64/pclmul x86_64/fat \ arm arm/neon arm/v6 arm/fat \ arm64 arm64/crypto arm64/fat \ - powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \ + powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/p9 powerpc64/fat \ s390x s390x/vf s390x/msa s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \ mkdir "$(distdir)/$$d" ; \ find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \ diff --git a/configure.ac b/configure.ac index 73c6fc21..b68b9e23 100644 --- a/configure.ac +++ b/configure.ac @@ -105,6 +105,10 @@ AC_ARG_ENABLE(power-altivec, AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),, [enable_altivec=no]) +AC_ARG_ENABLE(power9, + AC_HELP_STRING([--enable-power9], [Enable POWER ISA v3.0. (default=no)]),, + [enable_power9=no]) + AC_ARG_ENABLE(s390x-vf, AC_HELP_STRING([--enable-s390x-vf], [Enable vector facility on z/Architecture. (default=no)]),, [enable_s390x_vf=no]) @@ -539,9 +543,12 @@ if test "x$enable_assembler" = xyes ; then if test "x$enable_fat" = xyes ; then asm_path="powerpc64/fat $asm_path" OPT_NETTLE_SOURCES="fat-ppc.c $OPT_NETTLE_SOURCES" - FAT_TEST_LIST="none crypto_ext altivec" + FAT_TEST_LIST="none crypto_ext altivec power9" else - if test "$enable_power_crypto_ext" = yes ; then + if test "$enable_power9" = yes ; then + asm_path="powerpc64/p9 $asm_path" + fi + if test "$enable_power_crypto_ext" = yes ; then asm_path="powerpc64/p8 $asm_path" fi if test "$enable_power_altivec" = yes ; then @@ -605,6 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \ aes256-encrypt-2.asm aes256-decrypt-2.asm \ cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \ chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \ + poly1305-internal-2.asm \ ghash-set-key-2.asm ghash-update-2.asm \ salsa20-2core.asm salsa20-core-internal-2.asm \ sha1-compress-2.asm sha256-compress-2.asm \ @@ -751,6 +759,9 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_ecc_secp384r1_redc #undef HAVE_NATIVE_ecc_secp521r1_modp #undef HAVE_NATIVE_ecc_secp521r1_redc +#undef HAVE_NATIVE_poly1305_set_key +#undef HAVE_NATIVE_poly1305_block +#undef HAVE_NATIVE_poly1305_digest #undef HAVE_NATIVE_ghash_set_key #undef HAVE_NATIVE_ghash_update #undef HAVE_NATIVE_salsa20_core diff --git a/fat-ppc.c b/fat-ppc.c index bf622cf5..7569e44d 100644 --- a/fat-ppc.c +++ b/fat-ppc.c @@ -65,6 +65,7 @@ #include "aes-internal.h" #include "chacha-internal.h" #include "ghash-internal.h" +#include "poly1305.h" #include "fat-setup.h" /* Defines from arch/powerpc/include/uapi/asm/cputable.h in Linux kernel */ @@ -77,11 +78,15 @@ #ifndef PPC_FEATURE2_VEC_CRYPTO #define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #endif +#ifndef PPC_FEATURE2_ARCH_3_00 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#endif struct ppc_features { int have_crypto_ext; int have_altivec; + int have_power9; }; #define MATCH(s, slen, literal, llen) \ @@ -93,6 +98,7 @@ get_ppc_features (struct ppc_features *features) const char *s; features->have_crypto_ext = 0; features->have_altivec = 0; + features->have_power9 = 0; s = secure_getenv (ENV_OVERRIDE); if (s) @@ -105,6 +111,8 @@ get_ppc_features (struct ppc_features *features) features->have_crypto_ext = 1; else if (MATCH(s, length, "altivec", 7)) features->have_altivec = 1; + else if (MATCH(s, length, "power9", 6)) + features->have_power9 = 1; if (!sep) break; s = sep + 1; @@ -136,6 +144,9 @@ get_ppc_features (struct ppc_features *features) features->have_crypto_ext = ((hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO); + features->have_power9 + = ((hwcap2 & PPC_FEATURE2_ARCH_3_00) == PPC_FEATURE2_ARCH_3_00); + /* We also need VSX instructions, mainly for load and store. */ features->have_altivec = ((hwcap & (PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX)) @@ -172,6 +183,18 @@ DECLARE_FAT_FUNC(nettle_chacha_crypt32, chacha_crypt_func) DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 1core) DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 3core) +DECLARE_FAT_FUNC(_nettle_poly1305_set_key, poly1305_set_key_func) +DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, c) +DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, ppc64) + +DECLARE_FAT_FUNC(_nettle_poly1305_block, poly1305_block_func) +DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, c) +DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, ppc64) + +DECLARE_FAT_FUNC(_nettle_poly1305_digest, poly1305_digest_func) +DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, c) +DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, ppc64) + static void CONSTRUCTOR fat_init (void) { @@ -220,6 +243,21 @@ fat_init (void) nettle_chacha_crypt_vec = _nettle_chacha_crypt_1core; nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_1core; } + + if (features.have_power9) + { + if (verbose) + fprintf (stderr, "libnettle: enabling arch 3.00 code.\n"); + _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_ppc64; + _nettle_poly1305_block_vec = _nettle_poly1305_block_ppc64; + _nettle_poly1305_digest_vec = _nettle_poly1305_digest_ppc64; + } + else + { + _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_c; + _nettle_poly1305_block_vec = _nettle_poly1305_block_c; + _nettle_poly1305_digest_vec = _nettle_poly1305_digest_c; + } } DEFINE_FAT_FUNC(_nettle_aes_encrypt, void, @@ -261,3 +299,19 @@ DEFINE_FAT_FUNC(nettle_chacha_crypt32, void, uint8_t *dst, const uint8_t *src), (ctx, length, dst, src)) + +DEFINE_FAT_FUNC(_nettle_poly1305_set_key, void, + (struct poly1305_ctx *ctx, + const uint8_t *key), + (ctx, key)) + +DEFINE_FAT_FUNC(_nettle_poly1305_block, void, + (struct poly1305_ctx *ctx, + const uint8_t *m, + unsigned high), + (ctx, m, high)) + +DEFINE_FAT_FUNC(_nettle_poly1305_digest, void, + (struct poly1305_ctx *ctx, + union nettle_block16 *s), + (ctx, s)) diff --git a/fat-setup.h b/fat-setup.h index e77cce02..ad3c10f0 100644 --- a/fat-setup.h +++ b/fat-setup.h @@ -196,6 +196,12 @@ typedef void chacha_crypt_func(struct chacha_ctx *ctx, uint8_t *dst, const uint8_t *src); +struct poly1305_ctx; +typedef void poly1305_set_key_func(struct poly1305_ctx *ctx, const uint8_t *key); +typedef void poly1305_digest_func(struct poly1305_ctx *ctx, union nettle_block16 *s); +typedef void poly1305_block_func(struct poly1305_ctx *ctx, const uint8_t *m, + unsigned high); + struct aes128_ctx; typedef void aes128_set_key_func (struct aes128_ctx *ctx, const uint8_t *key); typedef void aes128_invert_key_func (struct aes128_ctx *dst, const struct aes128_ctx *src); diff --git a/poly1305-internal.c b/poly1305-internal.c index 490fdf71..380b934e 100644 --- a/poly1305-internal.c +++ b/poly1305-internal.c @@ -85,6 +85,28 @@ #define h3 h.h32[3] #define h4 hh +/* For fat builds */ +#if HAVE_NATIVE_poly1305_set_key +void +_nettle_poly1305_set_key_c(struct poly1305_ctx *ctx, + const uint8_t key[16]); +# define _nettle_poly1305_set_key _nettle_poly1305_set_key_c +#endif + +#if HAVE_NATIVE_poly1305_block +void +_nettle_poly1305_block_c(struct poly1305_ctx *ctx, const uint8_t *m, + unsigned t4); +# define _nettle_poly1305_block _nettle_poly1305_block_c +#endif + +#if HAVE_NATIVE_poly1305_digest +void +_nettle_poly1305_digest_c(struct poly1305_ctx *ctx, + union nettle_block16 *s); +# define _nettle_poly1305_digest _nettle_poly1305_digest_c +#endif + void _nettle_poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16]) { diff --git a/powerpc64/fat/poly1305-internal-2.asm b/powerpc64/fat/poly1305-internal-2.asm new file mode 100644 index 00000000..177a4563 --- /dev/null +++ b/powerpc64/fat/poly1305-internal-2.asm @@ -0,0 +1,39 @@ +C powerpc64/fat/poly1305-internal-2.asm + +ifelse(` + Copyright (C) 2022 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl picked up by configure +dnl PROLOGUE(_nettle_poly1305_set_key) +dnl PROLOGUE(_nettle_poly1305_block) +dnl PROLOGUE(_nettle_poly1305_digest) + +define(`fat_transform', `$1_ppc64') +include_src(`powerpc64/p9/poly1305-internal.asm') diff --git a/powerpc64/p9/poly1305-internal.asm b/powerpc64/p9/poly1305-internal.asm new file mode 100644 index 00000000..238d6397 --- /dev/null +++ b/powerpc64/p9/poly1305-internal.asm @@ -0,0 +1,238 @@ +C powerpc64/p9/poly1305-internal.asm + +ifelse(` + Copyright (C) 2013, 2022 Niels Möller + Copyright (C) 2022 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Register usage: + +define(`SP', `r1') +define(`TOCP', `r2') + +C Argments +define(`CTX', `r3') +define(`M', `r4') +define(`M128', `r5') + +C Working state +define(`H0', `r6') +define(`H1', `r7') +define(`H2', `r8') +define(`T0', `r9') +define(`T1', `r10') +define(`T2', `r8') +define(`T2A', `r9') +define(`T2S', `r10') +define(`IDX', `r6') +define(`RZ', `r7') + +define(`ZERO', `v0') +define(`F0', `v1') +define(`F1', `v2') +define(`F0S', `v3') +define(`T', `v4') + +define(`R', `v5') +define(`S', `v6') + +define(`T00', `v7') +define(`T10', `v8') +define(`T11', `v9') +define(`MU0', `v10') +define(`MU1', `v11') +define(`TMP', `v12') + +.text + +C _poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16]) +define(`FUNC_ALIGN', `5') +PROLOGUE(_nettle_poly1305_set_key) + li r9, 0 + addis r5, TOCP, .key_mask@got@ha + ld r5, .key_mask@got@l(r5) + ld r8, 0(r5) + ori r7, r8, 3 + + C Load R_0 and R_1 +IF_LE(` + ld r5, 0(r4) + ld r6, 8(r4) +') +IF_BE(` + ldbrx r5, 0, r4 + addi r4, r4, 8 + ldbrx r6, 0, r4 +') + and r5, r5, r7 C R_0 &= 0x0FFFFFFC0FFFFFFF + and r6, r6, r8 C R_1 &= 0x0FFFFFFC0FFFFFFC + + srdi r10, r6, 2 + sldi r7, r5, 2 + sldi r8, r10, 2 + add r7, r7, r5 + add r8, r8, r10 + + C Store key + std r5, 0(r3) + std r6, 8(r3) + std r7, 16(r3) + std r8, 24(r3) + C Reset state + std r9, 32(r3) + std r9, 40(r3) + std r9, 48(r3) + + blr +EPILOGUE(_nettle_poly1305_set_key) + +C void _nettle_poly1305_block(struct poly1305_ctx *ctx, const uint8_t *m, unsigned m128) +define(`FUNC_ALIGN', `5') +PROLOGUE(_nettle_poly1305_block) + ld H0, 32(CTX) + ld H1, 40(CTX) + ld H2, 48(CTX) +IF_LE(` + ld T0, 0(M) + ld T1, 8(M) +') +IF_BE(` + ldbrx T0, 0, M + addi M, M, 8 + ldbrx T0, 0, M +') + + addc T0, T0, H0 + adde T1, T1, H1 + adde T2, M128, H2 + + mtvsrdd VSR(T), T0, T1 + + li IDX, 16 + lxvd2x VSR(R), 0, CTX + lxvd2x VSR(S), IDX, CTX + + andi. T2A, T2, 3 + srdi T2S, T2, 2 + + li RZ, 0 + vxor ZERO, ZERO, ZERO + + xxpermdi VSR(MU0), VSR(R), VSR(S), 0b01 + xxswapd VSR(MU1), VSR(R) + + mtvsrdd VSR(T11), 0, T2A + mtvsrdd VSR(T00), T2S, RZ + mtvsrdd VSR(T10), 0, T2 + + vmsumudm F0, T, MU0, ZERO + vmsumudm F1, T, MU1, ZERO + vmsumudm TMP, T11, MU1, ZERO + + vmsumudm F0, T00, S, F0 + vmsumudm F1, T10, MU0, F1 + + xxmrgld VSR(TMP), VSR(TMP), VSR(ZERO) + xxswapd VSR(F0S), VSR(F0) + vadduqm F1, F1, TMP + stxsd F0S, 32(CTX) + + li IDX, 40 + xxmrghd VSR(F0), VSR(ZERO), VSR(F0) + vadduqm F1, F1, F0 + xxswapd VSR(F1), VSR(F1) + stxvd2x VSR(F1), IDX, CTX + + blr +EPILOGUE(_nettle_poly1305_block) + +C _poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s) +define(`FUNC_ALIGN', `5') +PROLOGUE(_nettle_poly1305_digest) + C Load current state + ld r5, 32(r3) + ld r6, 40(r3) + ld r7, 48(r3) + + C Fold high part of H2 + li r10, 0 + srdi r9, r7, 2 + sldi r8, r9, 2 + add r8, r8, r9 + andi. r7, r7, 3 + addc r5, r5, r8 + adde r6, r6, r10 + adde r7, r7, r10 + + C Add 5 to state, save result if it carries + li r8, 5 + li r9, 0 + li r10, -4 + addc r8, r8, r5 + adde r9, r9, r6 + adde. r10, r10, r7 + iseleq r5, r8, r5 + iseleq r6, r9, r6 + + C Load digest +IF_LE(` + ld r7, 0(r4) + ld r8, 8(r4) +') +IF_BE(` + li r10, 8 + ldbrx r7, 0, r4 + ldbrx r8, r10, r4 +') + + C Add hash to digest + addc r5, r5, r7 + adde r6, r6, r8 + + C Store digest +IF_LE(` + std r5, 0(r4) + std r6, 8(r4) +') +IF_BE(` + stdbrx r5, 0, r4 + stdbrx r6, r10, r4 +') + C Reset hash + li r9, 0 + std r9, 32(r3) + std r9, 40(r3) + std r9, 48(r3) + + blr +EPILOGUE(_nettle_poly1305_digest) + +.data +.align 3 +.key_mask: +.quad 0x0FFFFFFC0FFFFFFC -- 2.38.0