wait_us optimization

As the timer code became more generic, coping with initialization on
demand, and variable width and speed us_ticker_api implementations,
wait_us has gradually gotten slower and slower.

Some platforms have reportedly seen overhead of wait_us() increase from
10µs to 30µs. These changes should fully reverse that drop, and even
make it better than ever.

Add fast paths for platforms that provide compile-time information about
us_ticker. Speed and code size is improved further if:

* Timer has >= 2^32 microsecond range, or better still is 32-bit 1MHz.
* Platform implements us_ticker_read() as a macro
* Timer is initialised at boot, rather than first use

The latter initialisation option is the default for STM, as this has
always been the case.
pull/10609/head
Kevin Bracey 2019-05-17 16:03:11 +03:00
parent 5a8f795976
commit 57310729d4
10 changed files with 176 additions and 28 deletions

View File

@ -34,6 +34,11 @@ void us_ticker_info_test()
TEST_ASSERT(p_ticker_info->frequency >= 250000);
TEST_ASSERT(p_ticker_info->frequency <= 8000000);
TEST_ASSERT(p_ticker_info->bits >= 16);
#ifdef US_TICKER_PERIOD_NUM
TEST_ASSERT_UINT32_WITHIN(1, 1000000 * US_TICKER_PERIOD_DEN / US_TICKER_PERIOD_NUM, p_ticker_info->frequency);
TEST_ASSERT_EQUAL_UINT32(US_TICKER_MASK, ((uint64_t)1 << p_ticker_info->bits) - 1);
#endif
}
utest::v1::status_t test_setup(const size_t number_of_cases)

View File

@ -16,6 +16,7 @@
*/
#include <stddef.h>
#include "platform/mbed_atomic.h"
#include "hal/us_ticker_api.h"
#if DEVICE_USTICKER
@ -24,15 +25,54 @@ static ticker_event_queue_t events = { 0 };
static ticker_irq_handler_type irq_handler = ticker_irq_handler;
#if MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
// If we are initializing at boot, we want the timer to be
// always-on, so we block any attempt to free it. We do need
// to pass through init(), as that needs to reset pending
// interrupts.
static void block_us_ticker_free()
{
}
#else // MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
bool _us_ticker_initialized;
// If we are not initializing at boot, we want to track
// whether the timer has been initialized. This permits
// a fast path for wait_us.
static void note_us_ticker_init()
{
us_ticker_init();
core_util_atomic_store_bool(&_us_ticker_initialized, true);
}
static void note_us_ticker_free()
{
core_util_atomic_store_bool(&_us_ticker_initialized, false);
us_ticker_free();
}
#endif // MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
static const ticker_interface_t us_interface = {
#if MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
.init = us_ticker_init,
#else
.init = note_us_ticker_init,
#endif
.read = us_ticker_read,
.disable_interrupt = us_ticker_disable_interrupt,
.clear_interrupt = us_ticker_clear_interrupt,
.set_interrupt = us_ticker_set_interrupt,
.fire_interrupt = us_ticker_fire_interrupt,
.get_info = us_ticker_get_info,
.free = us_ticker_free,
#if MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
.free = block_us_ticker_free,
#else
.free = note_us_ticker_free,
#endif
.runs_in_deep_sleep = false,
};

View File

@ -41,6 +41,21 @@ extern "C" {
*
* @see hal_us_ticker_tests
*
* # Compile-time optimization macros
*
* To permit compile-time optimization, particularly of wait_us, the following macros should
* be defined by a target's device.h:
*
* US_TICKER_PERIOD_NUM, US_TICKER_PERIOD_DEN: These denote the ratio (numerator, denominator)
* of the ticker period to a microsecond. For example, an 8MHz ticker would have NUM = 1, DEN = 8;
* a 1MHz ticker would have NUM = 1, DEN = 1; a 250kHz ticker would have NUM = 4, DEN = 1.
* Both numerator and denominator must be 16 bits or less.
*
* US_TICKER_MASK: The value mask for the ticker - eg 0x07FFFFFF for a 27-bit ticker.
*
* If any are defined, all 3 must be defined, and the macros are checked for consistency with
* us_ticker_get_info by test ::us_ticker_info_test.
* @{
*/
@ -74,6 +89,7 @@ extern "C" {
* Verified by ::ticker_fire_now_test
* * The ticker operations ticker_read, ticker_clear_interrupt, ticker_set_interrupt and ticker_fire_interrupt
* take less than 20us to complete - Verified by ::ticker_speed_test
* * The ticker operations ticker_init and ticker_read are atomic.
*
* # Undefined behavior
* * Calling any function other than ticker_init before the initialization of the ticker
@ -210,7 +226,7 @@ void us_ticker_free(void);
* }
* @endcode
*/
uint32_t us_ticker_read(void);
uint32_t (us_ticker_read)(void);
/** Set interrupt for specified timestamp
*

View File

@ -483,6 +483,9 @@ extern "C" FILEHANDLE PREFIX(_open)(const char *name, int openflags)
if (!mbed_sdk_inited) {
mbed_copy_nvic();
mbed_sdk_init();
#if DEVICE_USTICKER && MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
us_ticker_init();
#endif
mbed_sdk_inited = 1;
}
#endif

View File

@ -19,6 +19,7 @@
#include <stdlib.h>
#include <stdint.h>
#include "cmsis.h"
#include "hal/us_ticker_api.h"
/* This startup is for mbed 2 baremetal. There is no config for RTOS for mbed 2,
* therefore we protect this file with MBED_CONF_RTOS_PRESENT
@ -82,6 +83,9 @@ void _platform_post_stackheap_init(void)
{
mbed_copy_nvic();
mbed_sdk_init();
#if DEVICE_USTICKER && MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
us_ticker_init();
#endif
}
#elif defined (__GNUC__)
@ -92,6 +96,9 @@ void software_init_hook(void)
{
mbed_copy_nvic();
mbed_sdk_init();
#if DEVICE_USTICKER && MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
us_ticker_init();
#endif
software_init_hook_rtos();
}
@ -107,6 +114,9 @@ int __wrap_main(void)
int __low_level_init(void)
{
mbed_copy_nvic();
#if DEVICE_USTICKER && MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
us_ticker_init();
#endif
return 1;
}

View File

@ -25,6 +25,9 @@
#ifndef MBED_WAIT_API_H
#define MBED_WAIT_API_H
#include "platform/mbed_atomic.h"
#include "device.h"
#ifdef __cplusplus
extern "C" {
#endif
@ -115,6 +118,48 @@ void wait_us(int us);
*/
void wait_ns(unsigned int ns);
/* Optimize if we know the rate */
#if DEVICE_USTICKER && defined US_TICKER_PERIOD_NUM
void _wait_us_ticks(uint32_t ticks);
void _wait_us_generic(unsigned int us);
/* Further optimization if we know us_ticker is always running */
#if MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
#define _us_ticker_is_initialized true
#else
extern bool _us_ticker_initialized;
#define _us_ticker_is_initialized core_util_atomic_load_bool(&_us_ticker_initialized)
#endif
#if US_TICKER_PERIOD_DEN == 1 && (US_TICKER_MASK * US_TICKER_PERIOD_NUM) >= 0xFFFFFFFF
/* Ticker is wide and slow enough to have full 32-bit range - can always use it directly */
#define _us_is_small_enough(us) true
#else
/* Threshold is determined by specification of us_ticker_api.h - smallest possible
* time range for the us_ticker is 16-bit 8MHz, which gives 8192us. This also leaves
* headroom for the multiplication in 32 bits.
*/
#define _us_is_small_enough(us) ((us) < 8192)
#endif
/* Speed optimisation for small wait_us. Care taken to preserve binary compatibility */
inline void _wait_us_inline(unsigned int us)
{
/* Threshold is determined by specification of us_ticker_api.h - smallest possible
* time range for the us_ticker is 16-bit 8MHz, which gives 8192us. This also leaves
* headroom for the multiplication in 32 bits.
*/
if (_us_is_small_enough(us) && _us_ticker_is_initialized) {
const uint32_t ticks = ((us * US_TICKER_PERIOD_DEN) + US_TICKER_PERIOD_NUM - 1) / US_TICKER_PERIOD_NUM;
_wait_us_ticks(ticks);
} else {
_wait_us_generic(us);
}
}
#define wait_us(us) _wait_us_inline(us)
#endif // Known-rate, initialised timer
#ifdef __cplusplus
}
#endif

View File

@ -19,13 +19,14 @@
#include "platform/mbed_toolchain.h"
#include "platform/mbed_wait_api.h"
#include "hal/lp_ticker_api.h"
#include "hal/us_ticker_api.h"
#include "hal/ticker_api.h"
// This implementation of the wait functions will be compiled only
// if the RTOS is not present.
#ifndef MBED_CONF_RTOS_PRESENT
#include "hal/lp_ticker_api.h"
#include "hal/us_ticker_api.h"
void wait(float s)
{
wait_ms(s * 1000.0f);
@ -42,24 +43,55 @@ void wait_ms(int ms)
#endif
}
#endif // #ifndef MBED_CONF_RTOS_PRESENT
// This wait_us is used by both RTOS and non-RTOS builds
/* The actual time delay may be 1 less usec */
#if DEVICE_USTICKER
#if defined US_TICKER_PERIOD_NUM
/* Real definition for binary compatibility with binaries not using the new macro */
void (wait_us)(int us)
{
wait_us(us);
}
/* External definition for the inline function */
extern void _wait_us_inline(unsigned int us);
void _wait_us_ticks(uint32_t ticks)
{
const uint32_t start = us_ticker_read();
while (((us_ticker_read() - start) & US_TICKER_MASK) < ticks);
}
void _wait_us_generic(unsigned int us)
#else
void wait_us(int us)
#endif
{
// Generic version using full ticker, allowing for initialization, scaling and widening of timer
const ticker_data_t *const ticker = get_us_ticker_data();
const uint32_t start = ticker_read(ticker);
while ((ticker_read(ticker) - start) < (uint32_t)us);
}
#else // DEVICE_USTICKER
// fallback to wait_ns for targets without usticker
void wait_us(int us)
{
#if DEVICE_USTICKER
const ticker_data_t *const ticker = get_us_ticker_data();
uint32_t start = ticker_read(ticker);
while ((ticker_read(ticker) - start) < (uint32_t)us);
#else // fallback to wait_ns for targets without usticker
while (us > 1000) {
us -= 1000;
wait_ns(1000000);
while (us > 1024) {
us -= 1024;
wait_ns(1024000);
}
if (us > 0) {
wait_ns(us * 1000);
}
#endif // DEVICE_USTICKER
}
#endif // #ifndef MBED_CONF_RTOS_PRESENT
#endif // DEVICE_USTICKER
// This wait_ns is used by both RTOS and non-RTOS builds

View File

@ -63,17 +63,5 @@ void wait_ms(int ms)
}
}
/* The actual time delay may be 1 less usec */
void wait_us(int us)
{
if (us > 10000) {
MBED_WARNING(MBED_MAKE_ERROR(MBED_MODULE_PLATFORM, MBED_ERROR_UNKNOWN),
"wait_us blocks deep sleep, wait_ms recommended for long delays\n");
}
const ticker_data_t *const ticker = get_us_ticker_data();
uint32_t start = ticker_read(ticker);
while ((ticker_read(ticker) - start) < (uint32_t)us);
}
#endif // #if MBED_CONF_RTOS_PRESENT

View File

@ -58,6 +58,7 @@
#include <stdlib.h>
#include "cmsis.h"
#include "hal/us_ticker_api.h"
#include "mbed_toolchain.h"
#include "mbed_boot.h"
#include "mbed_error.h"
@ -75,6 +76,9 @@ void mbed_init(void)
mbed_mpu_manager_init();
mbed_cpy_nvic();
mbed_sdk_init();
#if DEVICE_USTICKER && MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT
us_ticker_init();
#endif
mbed_rtos_init();
}

View File

@ -47,6 +47,10 @@
"tickless-from-us-ticker": {
"help": "Run tickless from the microsecond ticker rather than the low power ticker. Running tickless off of the microsecond ticker improves interrupt latency on targets which use lpticker_delay_ticks",
"value": false
},
"init-us-ticker-at-boot": {
"help": "Initialize the microsecond ticker at boot rather than on first use, and leave it initialized. This speeds up wait_us in particular.",
"value": false
}
}
},
@ -1885,7 +1889,8 @@
},
"overrides": {
"deep-sleep-latency": 3,
"tickless-from-us-ticker": true
"tickless-from-us-ticker": true,
"init-us-ticker-at-boot": true
},
"device_has": [
"USTICKER",