From 0eb60860f6a4b69a01c8dcdbc39192d3f575e726 Mon Sep 17 00:00:00 2001 From: ccli8 Date: Mon, 13 Nov 2017 17:10:45 +0800 Subject: [PATCH] Support ECP H/W accelerator --- .../TARGET_NUMAKER_PFM_M487/mbedtls_device.h | 12 + .../TARGET_M480/ecp/ecp_internal_alt.c | 828 ++++++++++++++++++ .../TARGET_M480/crypto/crypto-misc.c | 78 +- .../TARGET_M480/crypto/crypto-misc.h | 8 + .../TARGET_NUC472/crypto/crypto-misc.c | 34 +- 5 files changed, 946 insertions(+), 14 deletions(-) create mode 100644 features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/ecp/ecp_internal_alt.c diff --git a/features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/TARGET_NUMAKER_PFM_M487/mbedtls_device.h b/features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/TARGET_NUMAKER_PFM_M487/mbedtls_device.h index 1043cab9fd..902236ded9 100644 --- a/features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/TARGET_NUMAKER_PFM_M487/mbedtls_device.h +++ b/features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/TARGET_NUMAKER_PFM_M487/mbedtls_device.h @@ -24,4 +24,16 @@ #define MBEDTLS_AES_ALT +#define MBEDTLS_ECP_INTERNAL_ALT +/* Support for Weierstrass curves with Jacobi representation */ +//#define MBEDTLS_ECP_RANDOMIZE_JAC_ALT +#define MBEDTLS_ECP_ADD_MIXED_ALT +#define MBEDTLS_ECP_DOUBLE_JAC_ALT +#define MBEDTLS_ECP_NORMALIZE_JAC_ALT +#define MBEDTLS_ECP_NORMALIZE_JAC_MANY_ALT +/* Support for curves with Montgomery arithmetic */ +//#define MBEDTLS_ECP_DOUBLE_ADD_MXZ_ALT +//#define MBEDTLS_ECP_RANDOMIZE_MXZ_ALT +//#define MBEDTLS_ECP_NORMALIZE_MXZ_ALT + #endif /* MBEDTLS_DEVICE_H */ diff --git a/features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/ecp/ecp_internal_alt.c b/features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/ecp/ecp_internal_alt.c new file mode 100644 index 0000000000..a5a65f0203 --- /dev/null +++ b/features/mbedtls/targets/TARGET_NUVOTON/TARGET_M480/ecp/ecp_internal_alt.c @@ -0,0 +1,828 @@ +/* mbed Microcontroller Library + * Copyright (c) 2016-2018 Nuvoton + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if !defined(MBEDTLS_CONFIG_FILE) +#include "mbedtls/config.h" +#else +#include MBEDTLS_CONFIG_FILE +#endif + +#if defined(MBEDTLS_ECP_C) + +#include "mbedtls/ecp.h" +#include "mbedtls/threading.h" + +#include + +#if !defined(MBEDTLS_ECP_ALT) + +#if defined(MBEDTLS_PLATFORM_C) +#include "mbedtls/platform.h" +#else +#include +#include +#define mbedtls_printf printf +#define mbedtls_calloc calloc +#define mbedtls_free free +#endif + +#if defined(MBEDTLS_ECP_INTERNAL_ALT) + +/* FIXME: We shouldn't define ECP_SHORTWEIERSTRASS here. It is expected ECP_SHORTWEIERSTRASS + * would be defined in mbedtls/ecp.h from ecp.c for our inclusion */ +#define ECP_SHORTWEIERSTRASS + +#include "mbedtls/ecp_internal.h" +#include "mbed_toolchain.h" +#include "mbed_assert.h" +#include "mbed_error.h" +#include "nu_bitutil.h" +#include "crypto-misc.h" + +/* Max key size supported */ +#define NU_ECC_MAXKEYBITS 571 +/* Max ECC big-number words */ +#define NU_ECC_BIGNUM_MAXWORD 18 +/* words in limb */ +#define wiL (sizeof (mbedtls_mpi_uint) / sizeof (uint32_t)) +/* Min MPI limbs for ECC big-number */ +#define NU_ECC_BIGNUM_MINLIMB (NU_ECC_BIGNUM_MAXWORD / wiL) + +/* + * Convert between words and number of limbs + * Divide first in order to avoid potential overflows + */ +#define WORDS_TO_LIMBS(i) ( (i) / wiL + ( (i) % wiL != 0 ) ) + + +#define ECCOP_POINT_MUL (0x0UL << CRPT_ECC_CTL_ECCOP_Pos) +#define ECCOP_MODULE (0x1UL << CRPT_ECC_CTL_ECCOP_Pos) +#define ECCOP_POINT_ADD (0x2UL << CRPT_ECC_CTL_ECCOP_Pos) +#define ECCOP_POINT_DOUBLE (0x3UL << CRPT_ECC_CTL_ECCOP_Pos) + +#define MODOP_DIV (0x0UL << CRPT_ECC_CTL_MODOP_Pos) +#define MODOP_MUL (0x1UL << CRPT_ECC_CTL_MODOP_Pos) +#define MODOP_ADD (0x2UL << CRPT_ECC_CTL_MODOP_Pos) +#define MODOP_SUB (0x3UL << CRPT_ECC_CTL_MODOP_Pos) + +/** + * \brief Check if MPI has been normalized + * + * \param N Input MPI which is to check + * \param P Prime modulus + * + * \return 0 if not normalized, + * 1 if normalized + */ +#define MBEDTLS_INTERNAL_MPI_IS_NORM(N, P) \ + ((mbedtls_mpi_cmp_int(&N, 0) >= 0) && (mbedtls_mpi_cmp_mpi(&N, &P) < 0)) + + +/** + * \brief Normalize MPI if it is not normalized yet + * + * \param R Holds pointer to normalized MPI (N1 or N2) + * \param N1 Input MPI which is to normalize + * \param N2 Output MPI which holds normalized N1 if N1 is not normalized yet + * \param P Prime modulus + */ +#define MBEDTLS_INTERNAL_MPI_NORM(R, N1, N2, P) \ + do { \ + if (MBEDTLS_INTERNAL_MPI_IS_NORM(N1, P)) { \ + *R = &N1; \ + } else { \ + MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&N2, &N1, &P)); \ + *R = &N2; \ + } \ + } while(0) + +/** + * \brief Configure ECCOP operation and wait for its completion + * + * \param grp ECP group + * \param R Destination point + * \param m Integer by which to multiply P + * \param P Point to multiply by m + * \param n Integer by which to multiply Q + * \param Q Point to be multiplied by n + * \param eccop ECCOP code. Could be ECCOP_POINT_MUL/ADD/DOUBLE + * Dependent on passed-in eccop, only partial parameters among m/P/n/Q are needed and checked. + * ECCOP_POINT_MUL R = m*P + * ECCOP_POINT_ADD R = P + Q + * ECCOP_POINT_DOUBLE R = 2*P + * + * \return 0 if successful + * + * \note P/Q must be normalized (= affine). R would be normalized. + * + * \note m/n could be negative. + * + * \note ECC accelerator doesn't support R = 0, and we need to detect it additionally. + * For R = P + Q or R = 2*P, we can detect all R = 0 cases. + * For R = m*P, we can detect all R = 0 cases only if grp->N (order) is a prime. + * + */ +int mbedtls_internal_run_eccop(const mbedtls_ecp_group *grp, + mbedtls_ecp_point *R, + const mbedtls_mpi *m, + const mbedtls_ecp_point *P, + const mbedtls_mpi *n, + const mbedtls_ecp_point *Q, + uint32_t eccop); + +/** + * \brief Configure MODOP operation and wait for its completion + * + * \param r Destination MPI + * \param o1 Input MPI for first operand of MODOP + * \param o2 Input MPI for second operand of MODOP + * \param p Prime modulus + * \param pbits Bit number of p + * \param modop ECCOP code. Could be MODOP_ADD/SUB/MUL/DIV + * MODOP_ADD r = o1 + o2 mod p + * MODOP_SUB r = o1 - o2 mod p + * MODOP_MUL r = o1 * o2 mod p + * MODOP_DIV r = o1 / o2 mod p + * + * \return 0 if successful + * + * \note o1/o2 must be normalized (within [0, p - 1]). r would be normalized. + */ +int mbedtls_internal_run_modop(mbedtls_mpi *r, + const mbedtls_mpi *o1, + const mbedtls_mpi *o2, + const mbedtls_mpi *p, + uint32_t pbits, + uint32_t modop); + +/** + * \brief Import X from ECC registers, little endian + * + * \param X Destination MPI + * \param eccreg Start of input ECC register + * \param eccreg_num Number of input ECC register + * + * \return 0 if successful + * + * \note Destination MPI is always non-negative. + */ +static int mbedtls_internal_mpi_read_eccreg( mbedtls_mpi *X, const volatile uint32_t *eccreg, size_t eccreg_num ); + +/** + * \brief Export X into ECC registers, little endian + * + * \param X Source MPI + * \param eccreg Start of ECC output registers + * \param eccreg_num Number of ECC output registers + * + * \return 0 if successful + * + * \note Source MPI cannot be negative. + * \note Fills the remaining MSB ECC registers with zeros if X doesn't cover all. + */ +static int mbedtls_internal_mpi_write_eccreg( const mbedtls_mpi *X, volatile uint32_t *eccreg, size_t eccreg_num ); + +unsigned char mbedtls_internal_ecp_grp_capable( const mbedtls_ecp_group *grp ) +{ + /* Support only short Weierstrass type + * + * ECP type is checked by referring to mbed-os/features/mbedtls/src/ecp.c > ecp_get_type + */ + if (grp->G.X.p == NULL || grp->G.Y.p == NULL) { + return 0; + } + + return 1; +} + +int mbedtls_internal_ecp_init( const mbedtls_ecp_group *grp ) +{ + /* TODO: Change busy-wait with other means to release CPU */ + /* Acquire ownership of ECC accelerator */ + while (! crypto_ecc_acquire()); + + /* Init crypto module */ + crypto_init(); + ECC_ENABLE_INT(); + + return 0; +} + +void mbedtls_internal_ecp_free( const mbedtls_ecp_group *grp ) +{ + /* Disable ECC interrupt */ + ECC_DISABLE_INT(); + /* Uninit crypto module */ + crypto_uninit(); + + /* Release ownership of ECC accelerator */ + crypto_ecc_release(); +} + +#if defined(ECP_SHORTWEIERSTRASS) + +#if defined(MBEDTLS_ECP_ADD_MIXED_ALT) +/** + * \brief Addition: R = P + Q, mixed affine-Jacobian coordinates. + * + * The coordinates of Q must be normalized (= affine), + * but those of P don't need to. R is not normalized. + * + * We accept Q->Z being unset (saving memory in tables) as + * meaning 1. + * + * \param grp Pointer to the group representing the curve. + * + * \param R Pointer to a point structure to hold the result. + * + * \param P Pointer to the first summand, given with Jacobian + * coordinates + * + * \param Q Pointer to the second summand, given with affine + * coordinates. + * + * \return 0 if successful. + */ +int mbedtls_internal_ecp_add_mixed( const mbedtls_ecp_group *grp, + mbedtls_ecp_point *R, const mbedtls_ecp_point *P, + const mbedtls_ecp_point *Q ) +{ + int ret; + mbedtls_ecp_point P_, Q_; + + mbedtls_ecp_point_init(&P_); + mbedtls_ecp_point_init(&Q_); + + /* P_ = normalized P */ + MBEDTLS_MPI_CHK(mbedtls_ecp_copy(&P_, P)); + MBEDTLS_MPI_CHK(mbedtls_internal_ecp_normalize_jac(grp, &P_)); + + /* Q_ = normalized Q */ + MBEDTLS_MPI_CHK(mbedtls_ecp_copy(&Q_, Q)); + /* NOTE: We accept Q->Z being unset (saving memory in tables) as meaning 1. + * + * Q->Z.p == NULL ==> Q->Z = 1 + * Q->Z.p != NULL && mbedtls_mpi_cmp_int(&Q->Z, 0) == 0 ==> Q->Z = 0 + * Q->Z.p != NULL && mbedtls_mpi_cmp_int(&Q->Z, 0) != 0 ==> Q->Z = 1 + */ + if (Q->Z.p != NULL && mbedtls_mpi_cmp_int(&Q->Z, 0) == 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&Q_.Z, 0)); + } else { + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&Q_.Z, 1)); + } + + /* Run ECC point doubling: R = P + Q */ + MBEDTLS_MPI_CHK(mbedtls_internal_run_eccop(grp, R, NULL, &P_, NULL, &Q_, ECCOP_POINT_ADD)); + +cleanup: + + mbedtls_ecp_point_free(&Q_); + mbedtls_ecp_point_free(&P_); + + return ret; +} + +#endif + +#if defined(MBEDTLS_ECP_DOUBLE_JAC_ALT) +/** + * \brief Point doubling R = 2 P, Jacobian coordinates. + * + * \param grp Pointer to the group representing the curve. + * + * \param R Pointer to a point structure to hold the result. + * + * \param P Pointer to the point that has to be doubled, given with + * Jacobian coordinates. + * + * \return 0 if successful. + */ +int mbedtls_internal_ecp_double_jac( const mbedtls_ecp_group *grp, + mbedtls_ecp_point *R, const mbedtls_ecp_point *P ) +{ + int ret; + mbedtls_ecp_point P_; + + mbedtls_ecp_point_init(&P_); + + /* P_ = normalized P */ + MBEDTLS_MPI_CHK(mbedtls_ecp_copy(&P_, P)); + MBEDTLS_MPI_CHK(mbedtls_internal_ecp_normalize_jac(grp, &P_)); + + /* Run ECC point doubling: R = 2*P */ + MBEDTLS_MPI_CHK(mbedtls_internal_run_eccop(grp, R, NULL, &P_, NULL, NULL, ECCOP_POINT_DOUBLE)); + +cleanup: + + mbedtls_ecp_point_free(&P_); + + return ret; +} +#endif + +/** + * \brief Point multiplication R = m*P, Jacobian coordinates. + * + * \param grp Pointer to the group representing the curve. + * + * \param R Pointer to a point structure to hold the result. + * + * \param m Pointer to MPI by which to multiply P + * + * \param P Pointer to the point that has to be multiplied by m, given with + * Jacobian coordinates. + * + * \return 0 if successful. + * + * \note Currently mbedTLS doesn't open R = m*P API like this. + * It is expected because ECC accelerator can improve it by 30~40 times. + */ +int mbedtls_internal_ecp_mul_jac(mbedtls_ecp_group *grp, + mbedtls_ecp_point *R, + const mbedtls_mpi *m, + const mbedtls_ecp_point *P) +{ + int ret; + mbedtls_ecp_point P_; + + mbedtls_ecp_point_init(&P_); + + /* P_ = normalized P */ + MBEDTLS_MPI_CHK(mbedtls_ecp_copy(&P_, P)); + MBEDTLS_MPI_CHK(mbedtls_internal_ecp_normalize_jac(grp, &P_)); + + /* Run ECC point multiplication: R = m*P */ + MBEDTLS_MPI_CHK(mbedtls_internal_run_eccop(grp, R, m, &P_, NULL, NULL, ECCOP_POINT_MUL)); + +cleanup: + + mbedtls_ecp_point_free(&P_); + + return ret; +} + +#if defined(MBEDTLS_ECP_NORMALIZE_JAC_ALT) +/** + * \brief Normalize jacobian coordinates so that Z == 0 || Z == 1. + * + * \param grp Pointer to the group representing the curve. + * + * \param pt pointer to the point to be normalised. This is an + * input/output parameter. + * + * \return 0 if successful. + */ +int mbedtls_internal_ecp_normalize_jac( const mbedtls_ecp_group *grp, + mbedtls_ecp_point *pt ) +{ + if (grp == NULL || pt == NULL) { + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } + + /* Is a zero point + * + * Z = 0 + */ + if (mbedtls_mpi_cmp_int(&pt->Z, 0) == 0) { + return 0; + } + + /* Is a non-zero point which has been normalized + * + * Z = 1 + * 0 <= X < P + * 0 <= y < P + */ + if (mbedtls_mpi_cmp_int(&pt->Z, 1) == 0 && + mbedtls_mpi_cmp_int(&pt->X, 0) >= 0 && + mbedtls_mpi_cmp_mpi(&pt->X, &grp->P) < 0 && + mbedtls_mpi_cmp_int(&pt->Y, 0) >= 0 && + mbedtls_mpi_cmp_mpi(&pt->Y, &grp->P) < 0) { + return 0; + } + + int ret; + mbedtls_mpi N, Zi, ZZi; + const mbedtls_mpi *Np; + + mbedtls_mpi_init(&N); + mbedtls_mpi_init(&Zi); + mbedtls_mpi_init(&ZZi); + + /* Use MBEDTLS_INTERNAL_MPI_NORM(Np, N1, N_, P) to get normalized MPI + * + * N_: Holds normalized MPI if the passed-in MPI N1 is not + * Np: Pointer to normalized MPI, which could be N1 or N_ + */ + + /* Zi = 1 / Z */ + mbedtls_mpi_lset(&Zi, 1); + MBEDTLS_INTERNAL_MPI_NORM(&Np, pt->Z, N, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_run_modop(&Zi, &Zi, Np, &grp->P, grp->pbits, MODOP_DIV)); + + /* ZZi = 1 / Z^2 = Zi * Zi */ + MBEDTLS_MPI_CHK(mbedtls_internal_run_modop(&ZZi, &Zi, &Zi, &grp->P, grp->pbits, MODOP_MUL)); + + /* X = X / Z^2 = X * ZZi */ + MBEDTLS_INTERNAL_MPI_NORM(&Np, pt->X, N, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_run_modop(&pt->X, Np, &ZZi, &grp->P, grp->pbits, MODOP_MUL)); + + /* Y = Y / Z^3 = Y * ZZi * Zi */ + MBEDTLS_INTERNAL_MPI_NORM(&Np, pt->Y, N, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_run_modop(&pt->Y, Np, &ZZi, &grp->P, grp->pbits, MODOP_MUL)); + MBEDTLS_MPI_CHK(mbedtls_internal_run_modop(&pt->Y, &pt->Y, &Zi, &grp->P, grp->pbits, MODOP_MUL)); + + /* Z = 1 */ + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&pt->Z, 1)); + +cleanup: + + mbedtls_mpi_free(&ZZi); + mbedtls_mpi_free(&Zi); + mbedtls_mpi_free(&N); + + return ret; +} +#endif + +#if defined(MBEDTLS_ECP_NORMALIZE_JAC_MANY_ALT) +/** + * \brief Normalize jacobian coordinates of an array of (pointers to) + * points. + * + * \param grp Pointer to the group representing the curve. + * + * \param T Array of pointers to the points to normalise. + * + * \param t_len Number of elements in the array. + * + * \return 0 if successful. + */ +int mbedtls_internal_ecp_normalize_jac_many(const mbedtls_ecp_group *grp, + mbedtls_ecp_point *T[], size_t t_len) +{ + if (T == NULL || t_len == 0) { + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } + + int ret; + + mbedtls_ecp_point **ecp_point = T; + mbedtls_ecp_point **ecp_point_end = T + t_len; + + for (; ecp_point != ecp_point_end; ecp_point ++) { + MBEDTLS_MPI_CHK(mbedtls_internal_ecp_normalize_jac(grp, *ecp_point)); + } + +cleanup: + + return ret; +} +#endif + +int mbedtls_internal_run_eccop(const mbedtls_ecp_group *grp, + mbedtls_ecp_point *R, + const mbedtls_mpi *m, + const mbedtls_ecp_point *P, + const mbedtls_mpi *n, + const mbedtls_ecp_point *Q, + uint32_t eccop) +{ + /* Check necessary arguments for all ECC operations */ + if (grp == NULL || R == NULL) { + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } + + /* Check grp->P is positive */ + if (mbedtls_mpi_cmp_int(&grp->P, 0) <= 0) { + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } + + /* Check supported maximum key bits */ + if (grp->pbits > NU_ECC_MAXKEYBITS) { + return MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE; + } + + int ret; + mbedtls_mpi N_; + const mbedtls_mpi *Np; + + mbedtls_mpi_init(&N_); + + /* Use MBEDTLS_INTERNAL_MPI_NORM(Np, N1, N_, P) to get normalized MPI + * + * N_: Holds normalized MPI if the passed-in MPI N1 is not + * Np: Pointer to normalized MPI, which could be N1 or N_ + */ + + /* Check necessary arguments and handle special cases for specified ECC operation + * + * ECCOP_POINT_MUL R = m*P + * ECCOP_POINT_ADD R = P + Q + * ECCOP_POINT_DOUBLE R = 2*P + * + * ECC accelerator doesn't support R = 0, and we need to detect it. + */ + if (eccop == ECCOP_POINT_MUL) { + /* R = m*P */ + if (m == NULL || P == NULL) { + ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + goto cleanup; + } + + /* R = 0*P = 0 or R = P = 0 */ + if (mbedtls_mpi_cmp_int(m, 0) == 0 || mbedtls_mpi_cmp_int(&P->Z, 0) == 0) { + ret = mbedtls_ecp_set_zero(R); + goto cleanup; + } + + /* R = 1*P */ + if (mbedtls_mpi_cmp_int(m, 1) == 0) { + MBEDTLS_MPI_CHK(mbedtls_ecp_copy(R, P)); + MBEDTLS_MPI_CHK(mbedtls_internal_ecp_normalize_jac(grp, R)); + goto cleanup; + } + + /* R = m*P = (multiple of order)*G = 0 */ + /* NOTE: If grp->N (order) is a prime, we could detect R = 0 for all m*P cases + * by just checking if m is a multiple of grp->N. Otherwise, sigh. */ + /* TODO: Find an approach to detecting R = 0 for all m*P cases */ + MBEDTLS_INTERNAL_MPI_NORM(&Np, *m, N_, grp->N); + if (mbedtls_mpi_cmp_int(Np, 0) == 0) { + MBEDTLS_MPI_CHK(mbedtls_ecp_set_zero(R)); + goto cleanup; + } + + } else if (eccop == ECCOP_POINT_ADD) { + /* R = P + Q */ + if (P == NULL || Q == NULL) { + ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + goto cleanup; + } + + /* R = 0 + Q = Q */ + if (mbedtls_mpi_cmp_int(&P->Z, 0) == 0) { + MBEDTLS_MPI_CHK(mbedtls_ecp_copy(R, Q)); + MBEDTLS_MPI_CHK(mbedtls_internal_ecp_normalize_jac(grp, R)); + goto cleanup; + } + + /* R = P + 0 = P */ + if (mbedtls_mpi_cmp_int(&Q->Z, 0) == 0) { + MBEDTLS_MPI_CHK(mbedtls_ecp_copy(R, P)); + MBEDTLS_MPI_CHK(mbedtls_internal_ecp_normalize_jac(grp, R)); + goto cleanup; + } + + /* R = P + Q = P + (-P) = 0 */ + MBEDTLS_MPI_CHK(mbedtls_internal_run_modop(&N_, &P->Y, &Q->Y, &grp->P, grp->pbits, MODOP_ADD)); + if (mbedtls_mpi_cmp_int(&N_, 0) == 0) { + MBEDTLS_MPI_CHK(mbedtls_ecp_set_zero(R)); + goto cleanup; + } + } else if (eccop == ECCOP_POINT_DOUBLE) { + /* R = 2*P */ + if (P == NULL) { + ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + goto cleanup; + } + + /* R = 2*0 = 0 */ + if (mbedtls_mpi_cmp_int(&P->Z, 0) == 0) { + MBEDTLS_MPI_CHK(mbedtls_ecp_set_zero(R)); + goto cleanup; + } + + /* R = 2*P = P + P = P + (-P) = 0 */ + MBEDTLS_MPI_CHK(mbedtls_internal_run_modop(&N_, &P->Y, &P->Y, &grp->P, grp->pbits, MODOP_ADD)); + if (mbedtls_mpi_cmp_int(&N_, 0) == 0) { + MBEDTLS_MPI_CHK(mbedtls_ecp_set_zero(R)); + goto cleanup; + } + } else { + ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + goto cleanup; + } + + /* Configure ECC curve coefficients A/B */ + /* Special case for A = -3 */ + if (grp->A.p == NULL) { + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&N_, -3)); + MBEDTLS_INTERNAL_MPI_NORM(&Np, N_, N_, grp->P); + } else { + MBEDTLS_INTERNAL_MPI_NORM(&Np, grp->A, N_, grp->P); + } + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_A, NU_ECC_BIGNUM_MAXWORD)); + MBEDTLS_INTERNAL_MPI_NORM(&Np, grp->B, N_, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_B, NU_ECC_BIGNUM_MAXWORD)); + + /* Configure ECC prime modulus */ + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(&grp->P, (uint32_t *) CRPT->ECC_N, NU_ECC_BIGNUM_MAXWORD)); + + /* Configure ECC scalar for point multiplication + * + * Normalize m to within [1, order - 1] which ECCOP_POINT_MUL supports + * Special cases R = 0 should have been detected out above. + */ + if (eccop == ECCOP_POINT_MUL) { + MBEDTLS_INTERNAL_MPI_NORM(&Np, *m, N_, grp->N); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_K, NU_ECC_BIGNUM_MAXWORD)); + } + + /* Configure ECC point (X1, Y1) */ + MBEDTLS_INTERNAL_MPI_NORM(&Np, P->X, N_, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_X1, NU_ECC_BIGNUM_MAXWORD)); + MBEDTLS_INTERNAL_MPI_NORM(&Np, P->Y, N_, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_Y1, NU_ECC_BIGNUM_MAXWORD)); + + /* Configure ECC points (X2, Y2) */ + if (eccop == ECCOP_POINT_ADD) { + MBEDTLS_INTERNAL_MPI_NORM(&Np, Q->X, N_, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_X2, NU_ECC_BIGNUM_MAXWORD)); + MBEDTLS_INTERNAL_MPI_NORM(&Np, Q->Y, N_, grp->P); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_Y2, NU_ECC_BIGNUM_MAXWORD)); + } + + crypto_ecc_prestart(); + CRPT->ECC_CTL = (grp->pbits << CRPT_ECC_CTL_CURVEM_Pos) | eccop | CRPT_ECC_CTL_FSEL_Msk | CRPT_ECC_CTL_START_Msk; + bool ecc_done = crypto_ecc_wait(); + + /* FIXME: Better error code for ECC accelerator error */ + MBEDTLS_MPI_CHK(ecc_done ? 0 : -1); + + /* (X1, Y1) hold the normalized result. */ + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_read_eccreg(&R->X, (uint32_t *) CRPT->ECC_X1, NU_ECC_BIGNUM_MAXWORD)); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_read_eccreg(&R->Y, (uint32_t *) CRPT->ECC_Y1, NU_ECC_BIGNUM_MAXWORD)); + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&R->Z, 1)); + +cleanup: + + mbedtls_mpi_free(&N_); + + return ret; +} + +int mbedtls_internal_run_modop(mbedtls_mpi *r, + const mbedtls_mpi *o1, + const mbedtls_mpi *o2, + const mbedtls_mpi *p, + uint32_t pbits, + uint32_t modop) +{ + if (r == NULL || + o1 == NULL || + o2 == NULL || + p == NULL) { + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } + + /* Check o1/o2 are not negative */ + if (mbedtls_mpi_cmp_int(o1, 0) < 0 || + mbedtls_mpi_cmp_int(o2, 0) < 0) { + return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; + } + + /* Check p is positive */ + if (mbedtls_mpi_cmp_int(p, 0) <= 0) { + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } + + /* Check supported maximum key bits */ + if (pbits > NU_ECC_MAXKEYBITS) { + return MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE; + } + + /* Check MODOP operations are legal */ + if (modop != MODOP_DIV && + modop != MODOP_MUL && + modop != MODOP_ADD && + modop != MODOP_SUB) { + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } + + int ret; + + mbedtls_mpi N_; + const mbedtls_mpi *Np; + + mbedtls_mpi_init(&N_); + + /* Use MBEDTLS_INTERNAL_MPI_NORM(Np, N1, N_, P) to get normalized MPI + * + * N_: Holds normalized MPI if the passed-in MPI N1 is not + * Np: Pointer to normalized MPI, which could be N1 or N_ + */ + + if (modop == MODOP_MUL || + modop == MODOP_ADD || + modop == MODOP_SUB) { + MBEDTLS_INTERNAL_MPI_NORM(&Np, *o1, N_, *p); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_X1, NU_ECC_BIGNUM_MAXWORD)); + MBEDTLS_INTERNAL_MPI_NORM(&Np, *o2, N_, *p); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_Y1, NU_ECC_BIGNUM_MAXWORD)); + } else if (modop == MODOP_DIV) { + MBEDTLS_INTERNAL_MPI_NORM(&Np, *o2, N_, *p); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_X1, NU_ECC_BIGNUM_MAXWORD)); + MBEDTLS_INTERNAL_MPI_NORM(&Np, *o1, N_, *p); + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(Np, (uint32_t *) CRPT->ECC_Y1, NU_ECC_BIGNUM_MAXWORD)); + } else { + MBEDTLS_MPI_CHK(MBEDTLS_ERR_ECP_BAD_INPUT_DATA); + } + + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_write_eccreg(p, (uint32_t *) CRPT->ECC_N, NU_ECC_BIGNUM_MAXWORD)); + + crypto_ecc_prestart(); + CRPT->ECC_CTL = (pbits << CRPT_ECC_CTL_CURVEM_Pos) | (ECCOP_MODULE | modop) | CRPT_ECC_CTL_FSEL_Msk | CRPT_ECC_CTL_START_Msk; + bool ecc_done = crypto_ecc_wait(); + + /* FIXME: Better error code for ECC accelerator error */ + MBEDTLS_MPI_CHK(ecc_done ? 0 : -1); + + /* X1 holds the result. */ + MBEDTLS_MPI_CHK(mbedtls_internal_mpi_read_eccreg(r, (uint32_t *) CRPT->ECC_X1, NU_ECC_BIGNUM_MAXWORD)); + +cleanup: + + mbedtls_mpi_free(&N_); + + return ret; +} + +#endif // ECP_SHORTWEIERSTRASS + +static int mbedtls_internal_mpi_read_eccreg(mbedtls_mpi *x, const volatile uint32_t *eccreg, size_t eccreg_num) +{ + if (x == NULL) { + return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; + } + + int ret; + size_t i, n; + + for (n = eccreg_num; n > 0; n --) { + if (eccreg[n - 1] != 0) { + break; + } + } + + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(x, 0)); + MBEDTLS_MPI_CHK(mbedtls_mpi_grow(x, WORDS_TO_LIMBS(n))); + + for (i = 0; i < n; i ++) { + x->p[i / wiL] |= ((mbedtls_mpi_uint) eccreg[i]) << ((i % wiL) << 5); + } + +cleanup: + + return ret; +} + +static int mbedtls_internal_mpi_write_eccreg( const mbedtls_mpi *x, volatile uint32_t *eccreg, size_t eccreg_num ) +{ + if (x == NULL) { + return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; + } + + if (mbedtls_mpi_cmp_int(x, 0) < 0) { + return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; + } + + size_t i, n; + + /* How many words needed? */ + n = (mbedtls_mpi_size(x) + sizeof (uint32_t) - 1) / sizeof (uint32_t); + + if (eccreg_num < n) { + return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; + } + + /* Fill non-zero part */ + for (i = 0; i < n; i ++) { + eccreg[i] = (uint32_t) (x->p[i / wiL] >> ((i % wiL) << 5)); + } + + /* Zeroize remaining part + * + * crypto_zeroize32() has excluded optimization doubt, so we can safely set H/W registers to 0 via it. + */ + crypto_zeroize32((uint32_t *) eccreg + i, eccreg_num - i); + + return 0; +} + +#endif /* MBEDTLS_ECP_INTERNAL_ALT */ +#endif /* ! MBEDTLS_ECP_ALT */ +#endif /* MBEDTLS_ECP_C */ diff --git a/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.c b/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.c index 808bfd38b8..0b06aadb17 100644 --- a/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.c +++ b/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.c @@ -30,6 +30,8 @@ static uint16_t crypto_aes_avail = 1; static uint16_t crypto_des_avail = 1; /* Track if SHA H/W is available */ static uint16_t crypto_sha_avail = 1; +/* Track if ECC H/W is available */ +static uint16_t crypto_ecc_avail = 1; /* Crypto (AES, DES, SHA, etc.) init counter. Crypto's keeps active as it is non-zero. */ static uint16_t crypto_init_counter = 0U; @@ -37,12 +39,18 @@ static uint16_t crypto_init_counter = 0U; static bool crypto_submodule_acquire(uint16_t *submodule_avail); static void crypto_submodule_release(uint16_t *submodule_avail); +/* Crypto done flags */ +#define CRYPTO_DONE_OK BIT0 /* Done with OK */ +#define CRYPTO_DONE_ERR BIT1 /* Done with error */ + /* Track if PRNG H/W operation is done */ static volatile uint16_t crypto_prng_done; /* Track if AES H/W operation is done */ static volatile uint16_t crypto_aes_done; /* Track if DES H/W operation is done */ static volatile uint16_t crypto_des_done; +/* Track if ECC H/W operation is done */ +static volatile uint16_t crypto_ecc_done; static void crypto_submodule_prestart(volatile uint16_t *submodule_done); static bool crypto_submodule_wait(volatile uint16_t *submodule_done); @@ -102,6 +110,15 @@ void crypto_zeroize(void *v, size_t n) } } +/* Implementation that should never be optimized out by the compiler */ +void crypto_zeroize32(uint32_t *v, size_t n) +{ + volatile uint32_t *p = (uint32_t*) v; + while (n--) { + *p++ = 0; + } +} + bool crypto_aes_acquire(void) { return crypto_submodule_acquire(&crypto_aes_avail); @@ -132,6 +149,16 @@ void crypto_sha_release(void) crypto_submodule_release(&crypto_sha_avail); } +bool crypto_ecc_acquire(void) +{ + return crypto_submodule_acquire(&crypto_ecc_avail); +} + +void crypto_ecc_release(void) +{ + crypto_submodule_release(&crypto_ecc_avail); +} + void crypto_prng_prestart(void) { crypto_submodule_prestart(&crypto_prng_done); @@ -162,6 +189,16 @@ bool crypto_des_wait(void) return crypto_submodule_wait(&crypto_des_done); } +void crypto_ecc_prestart(void) +{ + crypto_submodule_prestart(&crypto_ecc_done); +} + +bool crypto_ecc_wait(void) +{ + return crypto_submodule_wait(&crypto_ecc_done); +} + bool crypto_dma_buff_compat(const void *buff, size_t buff_size, size_t size_aligned_to) { uint32_t buff_ = (uint32_t) buff; @@ -236,20 +273,47 @@ static bool crypto_submodule_wait(volatile uint16_t *submodule_done) /* Ensure while loop above and subsequent code are not reordered */ __DSB(); - return true; + if ((*submodule_done & CRYPTO_DONE_OK)) { + /* Done with OK */ + return true; + } else if ((*submodule_done & CRYPTO_DONE_ERR)) { + /* Done with error */ + return false; + } + + return false; } /* Crypto interrupt handler */ void CRYPTO_IRQHandler() { - if (PRNG_GET_INT_FLAG()) { - crypto_prng_done = 1; + uint32_t intsts; + + if ((intsts = PRNG_GET_INT_FLAG())) { + /* Done with OK */ + crypto_prng_done |= CRYPTO_DONE_OK; + /* Clear interrupt flag */ PRNG_CLR_INT_FLAG(); - } else if (AES_GET_INT_FLAG()) { - crypto_aes_done = 1; + } else if ((intsts = AES_GET_INT_FLAG())) { + /* Done with OK */ + crypto_aes_done |= CRYPTO_DONE_OK; + /* Clear interrupt flag */ AES_CLR_INT_FLAG(); - } else if (TDES_GET_INT_FLAG()) { - crypto_des_done = 1; + } else if ((intsts = TDES_GET_INT_FLAG())) { + /* Done with OK */ + crypto_des_done |= CRYPTO_DONE_OK; + /* Clear interrupt flag */ TDES_CLR_INT_FLAG(); + } else if ((intsts = ECC_GET_INT_FLAG())) { + /* Check interrupt flags */ + if (intsts & CRPT_INTSTS_ECCIF_Msk) { + /* Done with OK */ + crypto_ecc_done |= CRYPTO_DONE_OK; + } else if (intsts & CRPT_INTSTS_ECCEIF_Msk) { + /* Done with error */ + crypto_ecc_done |= CRYPTO_DONE_ERR; + } + /* Clear interrupt flag */ + ECC_CLR_INT_FLAG(); } } diff --git a/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.h b/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.h index f2cc89797f..9aa1ff8121 100644 --- a/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.h +++ b/targets/TARGET_NUVOTON/TARGET_M480/crypto/crypto-misc.h @@ -30,6 +30,7 @@ void crypto_uninit(void); /* Clear buffer to zero * Implementation that should never be optimized out by the compiler */ void crypto_zeroize(void *v, size_t n); +void crypto_zeroize32(uint32_t *v, size_t n); /* Acquire/release ownership of AES H/W */ /* NOTE: If "acquire" succeeds, "release" must be done to pair it. */ @@ -46,6 +47,11 @@ void crypto_des_release(void); bool crypto_sha_acquire(void); void crypto_sha_release(void); +/* Acquire/release ownership of ECC H/W */ +/* NOTE: If "acquire" succeeds, "release" must be done to pair it. */ +bool crypto_ecc_acquire(void); +void crypto_ecc_release(void); + /* Flow control between crypto/xxx start and crypto/xxx ISR * * crypto_xxx_prestart/crypto_xxx_wait encapsulate control flow between crypto/xxx start and crypto/xxx ISR. @@ -67,6 +73,8 @@ void crypto_aes_prestart(void); bool crypto_aes_wait(void); void crypto_des_prestart(void); bool crypto_des_wait(void); +void crypto_ecc_prestart(void); +bool crypto_ecc_wait(void); /* Check if buffer can be used for crypto DMA. It has the following requirements: diff --git a/targets/TARGET_NUVOTON/TARGET_NUC472/crypto/crypto-misc.c b/targets/TARGET_NUVOTON/TARGET_NUC472/crypto/crypto-misc.c index 808bfd38b8..5705558da8 100644 --- a/targets/TARGET_NUVOTON/TARGET_NUC472/crypto/crypto-misc.c +++ b/targets/TARGET_NUVOTON/TARGET_NUC472/crypto/crypto-misc.c @@ -37,6 +37,10 @@ static uint16_t crypto_init_counter = 0U; static bool crypto_submodule_acquire(uint16_t *submodule_avail); static void crypto_submodule_release(uint16_t *submodule_avail); +/* Crypto done flags */ +#define CRYPTO_DONE_OK BIT0 /* Done with OK */ +#define CRYPTO_DONE_ERR BIT1 /* Done with error */ + /* Track if PRNG H/W operation is done */ static volatile uint16_t crypto_prng_done; /* Track if AES H/W operation is done */ @@ -236,20 +240,36 @@ static bool crypto_submodule_wait(volatile uint16_t *submodule_done) /* Ensure while loop above and subsequent code are not reordered */ __DSB(); - return true; + if ((*submodule_done & CRYPTO_DONE_OK)) { + /* Done with OK */ + return true; + } else if ((*submodule_done & CRYPTO_DONE_ERR)) { + /* Done with error */ + return false; + } + + return false; } /* Crypto interrupt handler */ void CRYPTO_IRQHandler() { - if (PRNG_GET_INT_FLAG()) { - crypto_prng_done = 1; + uint32_t intsts; + + if ((intsts = PRNG_GET_INT_FLAG())) { + /* Done with OK */ + crypto_prng_done |= CRYPTO_DONE_OK; + /* Clear interrupt flag */ PRNG_CLR_INT_FLAG(); - } else if (AES_GET_INT_FLAG()) { - crypto_aes_done = 1; + } else if ((intsts = AES_GET_INT_FLAG())) { + /* Done with OK */ + crypto_aes_done |= CRYPTO_DONE_OK; + /* Clear interrupt flag */ AES_CLR_INT_FLAG(); - } else if (TDES_GET_INT_FLAG()) { - crypto_des_done = 1; + } else if ((intsts = TDES_GET_INT_FLAG())) { + /* Done with OK */ + crypto_des_done |= CRYPTO_DONE_OK; + /* Clear interrupt flag */ TDES_CLR_INT_FLAG(); } }