diff --git a/TESTS/mbed_platform/wait_ns/main.cpp b/TESTS/mbed_platform/wait_ns/main.cpp new file mode 100644 index 0000000000..99daca770c --- /dev/null +++ b/TESTS/mbed_platform/wait_ns/main.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2018, ARM Limited, All Rights Reserved + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mbed.h" +#include "greentea-client/test_env.h" +#include "unity.h" +#include "utest.h" +#include "platform/mbed_wait_api.h" +#include "hal/us_ticker_api.h" +#include "hal/lp_ticker_api.h" + +using namespace utest::v1; + +/* This test is created based on the test for Timer class. + * Since low power timer is less accurate than regular + * timer we need to adjust delta. + */ + +/* + * Define tolerance as follows: + * Timer might be +/-5% out; wait_ns is permitted 40% slow, but not fast. + * Therefore minimum measured time should be 95% of requested, maximum should + * be 145%. Unity doesn't let us specify an asymmetric error though. + * + * Would be nice to have tighter upper tolerance, but in practice we've seen + * a few devices unable to sustain theoretical throughput - flash wait states? + */ +#define TOLERANCE_MIN 0.95f +#define TOLERANCE_MAX 1.45f +#define MIDPOINT ((TOLERANCE_MIN+TOLERANCE_MAX)/2) +#define DELTA (MIDPOINT-TOLERANCE_MIN) + +/* This test verifies if wait_ns's wait time + * is accurate, according to a timer. + * + * Given timer is created. + * When timer is used to measure delay. + * Then the results are valid (within acceptable range). + */ +template +void test_wait_ns_time_measurement() +{ + CompareTimer timer; + + float wait_val_s = (float)wait_val_ms / 1000; + + /* Start the timer. */ + timer.start(); + + /* Wait ms - arithmetic inside wait_ns will overflow if + * asked for too large a delay, so break it up. + */ + for (int i = 0; i < wait_val_ms; i++) { + wait_ns(1000000); + } + + /* Stop the timer. */ + timer.stop(); + + /* Check results - wait_val_us us have elapsed. */ + TEST_ASSERT_FLOAT_WITHIN(DELTA * wait_val_s, MIDPOINT * wait_val_s, timer.read()); +} + +utest::v1::status_t test_setup(const size_t number_of_cases) +{ + GREENTEA_SETUP(15, "default_auto"); + return verbose_test_setup_handler(number_of_cases); +} + +Case cases[] = { +#if DEVICE_LPTICKER + Case("Test: wait_ns - compare with lp_timer 1s", test_wait_ns_time_measurement<1000, LowPowerTimer>), +#endif + Case("Test: wait_ns - compare with us_timer 1s", test_wait_ns_time_measurement<1000, Timer>) +}; + +Specification specification(test_setup, cases); + +int main() +{ + return !Harness::run(specification); +} diff --git a/platform/mbed_wait_api.h b/platform/mbed_wait_api.h index 9402d6050a..d0463e5da8 100644 --- a/platform/mbed_wait_api.h +++ b/platform/mbed_wait_api.h @@ -78,11 +78,43 @@ void wait_ms(int ms); * * @note * This function always spins to get the exact number of microseconds. - * If RTOS is present, this will affect power (by preventing deep sleep) and - * multithread performance. Therefore, spinning for millisecond wait is not recommended. + * This will affect power and multithread performance. Therefore, spinning for + * millisecond wait is not recommended, and wait_ms() should + * be used instead. + * + * @note You may call this function from ISR context, but large delays may + * impact system stability - interrupt handlers should take less than + * 50us. */ void wait_us(int us); +/** Waits a number of nanoseconds. + * + * This function spins the CPU to produce a small delay. It should normally + * only be used for delays of 10us (10000ns) or less. As it is calculated + * based on the expected execution time of a software loop, it may well run + * slower than requested based on activity from other threads and interrupts. + * If greater precision is required, this can be called from inside a critical + * section. + * + * @param ns the number of nanoseconds to wait + * + * @note + * wait_us() will likely give more precise time than wait_ns for large-enough + * delays, as it is based on a timer, but its set-up time may be excessive + * for the smallest microsecond counts, at which point wait_ns() is better. + * + * @note + * Any delay larger than a millisecond (1000000ns) is liable to cause + * overflow in the internal loop calculation. You shouldn't normally be + * using this for such large delays anyway in real code, but be aware if + * calibrating. Make repeated calls for longer test runs. + * + * @note You may call this function from ISR context. + * + */ +void wait_ns(unsigned int ns); + #ifdef __cplusplus } #endif diff --git a/platform/mbed_wait_api_no_rtos.c b/platform/mbed_wait_api_no_rtos.c index d03840e866..6c9523361c 100644 --- a/platform/mbed_wait_api_no_rtos.c +++ b/platform/mbed_wait_api_no_rtos.c @@ -15,11 +15,14 @@ * limitations under the License. */ +#include "cmsis.h" +#include "platform/mbed_toolchain.h" +#include "platform/mbed_wait_api.h" + // This implementation of the wait functions will be compiled only // if the RTOS is not present. #ifndef MBED_CONF_RTOS_PRESENT -#include "platform/mbed_wait_api.h" #include "hal/us_ticker_api.h" void wait(float s) @@ -41,3 +44,64 @@ void wait_us(int us) #endif // #ifndef MBED_CONF_RTOS_PRESENT +// This wait_ns is used by both RTOS and non-RTOS builds + +#ifdef __CORTEX_M +#if (__CORTEX_M == 0 && !defined __CM0PLUS_REV) || __CORTEX_M == 1 +// Cortex-M0 and Cortex-M1 take 6 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 3 +#define LOOP_SCALER 6000 +#elif (__CORTEX_M == 0 && defined __CM0PLUS_REV) || __CORTEX_M == 3 || __CORTEX_M == 4 || \ + __CORTEX_M == 23 || __CORTEX_M == 33 +// Cortex-M0+, M3, M4, M23 and M33 take 5 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 2 +// TODO - check M33 +#define LOOP_SCALER 5000 +#elif __CORTEX_M == 7 +// Cortex-M7 manages to dual-issue for 2 cycles per iteration (SUB,NOP) = 1, (NOP,BCS) = 1 +// (The NOPs were added to stabilise this - with just the SUB and BCS, it seems that the +// M7 sometimes takes 1 cycle, sometimes 2, possibly depending on alignment) +#define LOOP_SCALER 2000 +#endif +#elif defined __CORTEX_A +#if __CORTEX_A == 9 +// Cortex-A9 is dual-issue, so let's assume same performance as Cortex-M7. +// TODO - test. +#define LOOP_SCALER 2000 +#endif +#endif + +/* We only define the function if we've identified the CPU. If we haven't, + * rather than a compile-time error, leave it undefined, rather than faulting + * with an immediate #error. This leaves the door open to non-ARM + * builds with or people providing substitutes for other CPUs, and only if + * needed. + */ +#ifdef LOOP_SCALER + +/* Timing seems to depend on alignment, and toolchains do not support aligning + * functions well. So sidestep that by hand-assembling the code. Also avoids + * the hassle of handling multiple toolchains with different assembler + * syntax. + */ +MBED_ALIGN(8) +static const uint16_t delay_loop_code[] = { + 0x1E40, // SUBS R0,R0,#1 + 0xBF00, // NOP + 0xBF00, // NOP + 0xD2FB, // BCS .-3 (0x00 would be .+2, so 0xFB = -5 = .-3) + 0x4770 // BX LR +}; + +/* Take the address of the code, set LSB to indicate Thumb, and cast to void() function pointer */ +#define delay_loop ((void(*)()) ((uintptr_t) delay_loop_code | 1)) + +void wait_ns(unsigned int ns) +{ + uint32_t cycles_per_us = SystemCoreClock / 1000000; + // Note that this very calculation, plus call overhead, will take multiple + // cycles. Could well be 100ns on its own... So round down here, startup is + // worth at least one loop iteration. + uint32_t count = (cycles_per_us * ns) / LOOP_SCALER; + + delay_loop(count); +} +#endif // LOOP_SCALER diff --git a/targets/TARGET_NUVOTON/TARGET_M2351/device/M2351.h b/targets/TARGET_NUVOTON/TARGET_M2351/device/M2351.h index b53a38c018..b61abfd967 100644 --- a/targets/TARGET_NUVOTON/TARGET_M2351/device/M2351.h +++ b/targets/TARGET_NUVOTON/TARGET_M2351/device/M2351.h @@ -192,7 +192,7 @@ typedef enum IRQn /*@}*/ /* end of group CMSIS */ -#include "core_armv8mbl.h" /* Processor and core peripherals */ +#include "core_cm23.h" /* Processor and core peripherals */ #include "system_M2351.h" /* System Header */ /**