mirror of https://github.com/ARMmbed/mbed-os.git
Enable RPi Pico's optimized ROM floating point routines (#202)
* Enable RPi Pico's optimized ROM floating point routines * Add license headerpull/15494/head
parent
79c56f3155
commit
c1effb15ec
|
@ -30,11 +30,83 @@ file(GENERATE
|
|||
CONTENT "${header_content}"
|
||||
)
|
||||
|
||||
# add a link option to wrap the given function name; i.e. -Wl:wrap=FUNCNAME for gcc
|
||||
function(pico_wrap_function TARGET FUNCNAME)
|
||||
target_link_options(${TARGET} INTERFACE "LINKER:--wrap=${FUNCNAME}")
|
||||
endfunction()
|
||||
|
||||
# Following is copied from src/rp2_common/pico_float/CMakeLists.txt
|
||||
function(wrap_float_functions TARGET)
|
||||
pico_wrap_function(${TARGET} __aeabi_fadd)
|
||||
pico_wrap_function(${TARGET} __aeabi_fdiv)
|
||||
pico_wrap_function(${TARGET} __aeabi_fmul)
|
||||
pico_wrap_function(${TARGET} __aeabi_frsub)
|
||||
pico_wrap_function(${TARGET} __aeabi_fsub)
|
||||
pico_wrap_function(${TARGET} __aeabi_cfcmpeq)
|
||||
pico_wrap_function(${TARGET} __aeabi_cfrcmple)
|
||||
pico_wrap_function(${TARGET} __aeabi_cfcmple)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmpeq)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmplt)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmple)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmpge)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmpgt)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmpun)
|
||||
pico_wrap_function(${TARGET} __aeabi_i2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_l2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_ui2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_ul2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2iz)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2lz)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2uiz)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2ulz)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2d)
|
||||
pico_wrap_function(${TARGET} sqrtf)
|
||||
pico_wrap_function(${TARGET} cosf)
|
||||
pico_wrap_function(${TARGET} sinf)
|
||||
pico_wrap_function(${TARGET} tanf)
|
||||
pico_wrap_function(${TARGET} atan2f)
|
||||
pico_wrap_function(${TARGET} expf)
|
||||
pico_wrap_function(${TARGET} logf)
|
||||
|
||||
pico_wrap_function(${TARGET} ldexpf)
|
||||
pico_wrap_function(${TARGET} copysignf)
|
||||
pico_wrap_function(${TARGET} truncf)
|
||||
pico_wrap_function(${TARGET} floorf)
|
||||
pico_wrap_function(${TARGET} ceilf)
|
||||
pico_wrap_function(${TARGET} roundf)
|
||||
pico_wrap_function(${TARGET} sincosf) # gnu
|
||||
pico_wrap_function(${TARGET} asinf)
|
||||
pico_wrap_function(${TARGET} acosf)
|
||||
pico_wrap_function(${TARGET} atanf)
|
||||
pico_wrap_function(${TARGET} sinhf)
|
||||
pico_wrap_function(${TARGET} coshf)
|
||||
pico_wrap_function(${TARGET} tanhf)
|
||||
pico_wrap_function(${TARGET} asinhf)
|
||||
pico_wrap_function(${TARGET} acoshf)
|
||||
pico_wrap_function(${TARGET} atanhf)
|
||||
pico_wrap_function(${TARGET} exp2f)
|
||||
pico_wrap_function(${TARGET} log2f)
|
||||
pico_wrap_function(${TARGET} exp10f)
|
||||
pico_wrap_function(${TARGET} log10f)
|
||||
pico_wrap_function(${TARGET} powf)
|
||||
pico_wrap_function(${TARGET} powintf) #gnu
|
||||
pico_wrap_function(${TARGET} hypotf)
|
||||
pico_wrap_function(${TARGET} cbrtf)
|
||||
pico_wrap_function(${TARGET} fmodf)
|
||||
pico_wrap_function(${TARGET} dremf)
|
||||
pico_wrap_function(${TARGET} remainderf)
|
||||
pico_wrap_function(${TARGET} remquof)
|
||||
pico_wrap_function(${TARGET} expm1f)
|
||||
pico_wrap_function(${TARGET} log1pf)
|
||||
pico_wrap_function(${TARGET} fmaf)
|
||||
endfunction()
|
||||
|
||||
# Now, add includes and headers from the Pico SDK
|
||||
target_include_directories(mbed-raspberrypi
|
||||
INTERFACE
|
||||
.
|
||||
pico-sdk/src/rp2_common/hardware_adc/include
|
||||
pico-sdk/src/rp2_common/hardware_divider/include
|
||||
pico-sdk/src/rp2_common/hardware_gpio/include
|
||||
pico-sdk/src/rp2_common/hardware_resets/include
|
||||
pico-sdk/src/rp2_common/hardware_pwm/include
|
||||
|
@ -54,6 +126,7 @@ target_include_directories(mbed-raspberrypi
|
|||
pico-sdk/src/rp2_common/pico_platform/include
|
||||
pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/include/
|
||||
pico-sdk/src/rp2_common/pico_bootrom/include
|
||||
pico-sdk/src/rp2_common/pico_float/include
|
||||
pico-sdk/src/rp2_common/hardware_claim/include
|
||||
pico-sdk/src/common/pico_sync/include
|
||||
pico-sdk/src/common/pico_time/include
|
||||
|
@ -89,6 +162,11 @@ target_sources(mbed-raspberrypi
|
|||
pico-sdk/src/common/pico_time/time.c
|
||||
pico-sdk/src/common/pico_sync/lock_core.c
|
||||
pico-sdk/src/rp2_common/cmsis/stub/CMSIS/Device/RaspberryPi/RP2040/Source/system_RP2040.c
|
||||
pico-sdk/src/rp2_common/pico_float/float_aeabi.S
|
||||
pico-sdk/src/rp2_common/pico_float/float_init_rom.c
|
||||
pico-sdk/src/rp2_common/pico_float/float_math.c
|
||||
pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S
|
||||
pico-sdk/src/rp2_common/hardware_divider/divider.S
|
||||
)
|
||||
|
||||
target_compile_definitions(mbed-raspberrypi
|
||||
|
@ -110,4 +188,7 @@ target_sources(mbed-rp2040
|
|||
pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/rp2040_usb_device_enumeration.c
|
||||
)
|
||||
|
||||
# Enable usage of the RPi Pico optimized floating point routines
|
||||
wrap_float_functions(mbed-rp2040)
|
||||
|
||||
add_subdirectory(TARGET_RP2040 EXCLUDE_FROM_ALL)
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#include "pico/asm_helper.S"
|
||||
#include "hardware/regs/addressmap.h"
|
||||
#include "hardware/regs/sio.h"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
// tag::hw_div_s32[]
|
||||
regular_func_with_section hw_divider_divmod_s32
|
||||
ldr r3, =(SIO_BASE)
|
||||
str r0, [r3, #SIO_DIV_SDIVIDEND_OFFSET]
|
||||
str r1, [r3, #SIO_DIV_SDIVISOR_OFFSET]
|
||||
b hw_divider_divmod_return
|
||||
// end::hw_div_s32[]
|
||||
|
||||
// tag::hw_div_u32[]
|
||||
regular_func_with_section hw_divider_divmod_u32
|
||||
ldr r3, =(SIO_BASE)
|
||||
str r0, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
|
||||
str r1, [r3, #SIO_DIV_UDIVISOR_OFFSET]
|
||||
b hw_divider_divmod_return
|
||||
// end::hw_div_u32[]
|
||||
|
||||
// Common delay and return section for s32 and u32
|
||||
.section .text.hw_divider_divmod_return
|
||||
hw_divider_divmod_return:
|
||||
// Branching here is 2 cycles, delay another 6
|
||||
b 1f
|
||||
1: b 1f
|
||||
1: b 1f
|
||||
1: // return 64 bit value so we can efficiently return both (note quotient must be read last)
|
||||
ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
|
||||
ldr r0, [r3, #SIO_DIV_QUOTIENT_OFFSET]
|
||||
bx lr
|
||||
|
||||
regular_func_with_section hw_divider_save_state
|
||||
ldr r3, =SIO_BASE
|
||||
ldr r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
|
||||
ldr r2, [r3, #SIO_DIV_UDIVISOR_OFFSET]
|
||||
stmia r0!, {r1-r2}
|
||||
// The 8 cycles needed to guarantee that the result is ready is ensured by the preceeding
|
||||
// code of 7 cycles together with any branch to it taking at least 2 cycles.
|
||||
ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
|
||||
ldr r2, [r3, #SIO_DIV_QUOTIENT_OFFSET]
|
||||
stmia r0!, {r1-r2}
|
||||
bx lr
|
||||
|
||||
regular_func_with_section hw_divider_restore_state
|
||||
ldr r3, =SIO_BASE
|
||||
ldmia r0!, {r1-r2}
|
||||
str r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
|
||||
str r2, [r3, #SIO_DIV_UDIVISOR_OFFSET]
|
||||
ldmia r0!, {r1-r2}
|
||||
str r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
|
||||
str r2, [r3, #SIO_DIV_QUOTIENT_OFFSET]
|
||||
bx lr
|
|
@ -0,0 +1,404 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#ifndef _HARDWARE_DIVIDER_H
|
||||
#define _HARDWARE_DIVIDER_H
|
||||
|
||||
#include "pico.h"
|
||||
#include "hardware/structs/sio.h"
|
||||
|
||||
/** \file hardware/divider.h
|
||||
* \defgroup hardware_divider hardware_divider
|
||||
*
|
||||
* Low-level hardware-divider access
|
||||
*
|
||||
* The SIO contains an 8-cycle signed/unsigned divide/modulo circuit, per core. Calculation is started by writing a dividend
|
||||
* and divisor to the two argument registers, DIVIDEND and DIVISOR. The divider calculates the quotient / and remainder % of
|
||||
* this division over the next 8 cycles, and on the 9th cycle the results can be read from the two result registers
|
||||
* DIV_QUOTIENT and DIV_REMAINDER. A 'ready' bit in register DIV_CSR can be polled to wait for the calculation to
|
||||
* complete, or software can insert a fixed 8-cycle delay
|
||||
*
|
||||
* This header provides low level macros and inline functions for accessing the hardware dividers directly,
|
||||
* and perhaps most usefully performing asynchronous divides. These functions however do not follow the regular
|
||||
* SDK conventions for saving/restoring the divider state, so are not generally safe to call from interrupt handlers
|
||||
*
|
||||
* The pico_divider library provides a more user friendly set of APIs over the divider (and support for
|
||||
* 64 bit divides), and of course by default regular C language integer divisions are redirected through that library, meaning
|
||||
* you can just use C level `/` and `%` operators and gain the benefits of the fast hardware divider.
|
||||
*
|
||||
* @see pico_divider
|
||||
*
|
||||
* \subsection divider_example Example
|
||||
* \addtogroup hardware_divider
|
||||
* \include hello_divider.c
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef uint64_t divmod_result_t;
|
||||
|
||||
/*! \brief Start a signed asynchronous divide
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Start a divide of the specified signed parameters. You should wait for 8 cycles (__div_pause()) or wait for the ready bit to be set
|
||||
* (hw_divider_wait_ready()) prior to reading the results.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
*/
|
||||
static inline void hw_divider_divmod_s32_start(int32_t a, int32_t b) {
|
||||
check_hw_layout( sio_hw_t, div_sdividend, SIO_DIV_SDIVIDEND_OFFSET);
|
||||
sio_hw->div_sdividend = (uint32_t)a;
|
||||
sio_hw->div_sdivisor = (uint32_t)b;
|
||||
}
|
||||
|
||||
/*! \brief Start an unsigned asynchronous divide
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Start a divide of the specified unsigned parameters. You should wait for 8 cycles (__div_pause()) or wait for the ready bit to be set
|
||||
* (hw_divider_wait_ready()) prior to reading the results.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
*/
|
||||
static inline void hw_divider_divmod_u32_start(uint32_t a, uint32_t b) {
|
||||
check_hw_layout(
|
||||
sio_hw_t, div_udividend, SIO_DIV_UDIVIDEND_OFFSET);
|
||||
sio_hw->div_udividend = a;
|
||||
sio_hw->div_udivisor = b;
|
||||
}
|
||||
|
||||
/*! \brief Wait for a divide to complete
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Wait for a divide to complete
|
||||
*/
|
||||
static inline void hw_divider_wait_ready(void) {
|
||||
// this is #1 in lsr below
|
||||
static_assert(SIO_DIV_CSR_READY_BITS == 1, "");
|
||||
|
||||
// we use one less register and instruction than gcc which uses a TST instruction
|
||||
|
||||
uint32_t tmp; // allow compiler to pick scratch register
|
||||
pico_default_asm_volatile (
|
||||
"hw_divider_result_loop_%=:"
|
||||
"ldr %0, [%1, %2]\n\t"
|
||||
"lsrs %0, %0, #1\n\t"
|
||||
"bcc hw_divider_result_loop_%=\n\t"
|
||||
: "=&l" (tmp)
|
||||
: "l" (sio_hw), "I" (SIO_DIV_CSR_OFFSET)
|
||||
:
|
||||
);
|
||||
}
|
||||
|
||||
/*! \brief Return result of HW divide, nowait
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* \note This is UNSAFE in that the calculation may not have been completed.
|
||||
*
|
||||
* \return Current result. Most significant 32 bits are the remainder, lower 32 bits are the quotient.
|
||||
*/
|
||||
static inline divmod_result_t hw_divider_result_nowait(void) {
|
||||
// as ugly as this looks it is actually quite efficient
|
||||
divmod_result_t rc = ((divmod_result_t) sio_hw->div_remainder) << 32u;
|
||||
rc |= sio_hw->div_quotient;
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*! \brief Return result of last asynchronous HW divide
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* This function waits for the result to be ready by calling hw_divider_wait_ready().
|
||||
*
|
||||
* \return Current result. Most significant 32 bits are the remainder, lower 32 bits are the quotient.
|
||||
*/
|
||||
static inline divmod_result_t hw_divider_result_wait(void) {
|
||||
hw_divider_wait_ready();
|
||||
return hw_divider_result_nowait();
|
||||
}
|
||||
|
||||
/*! \brief Return result of last asynchronous HW divide, unsigned quotient only
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* This function waits for the result to be ready by calling hw_divider_wait_ready().
|
||||
*
|
||||
* \return Current unsigned quotient result.
|
||||
*/
|
||||
static inline uint32_t hw_divider_u32_quotient_wait(void) {
|
||||
hw_divider_wait_ready();
|
||||
return sio_hw->div_quotient;
|
||||
}
|
||||
|
||||
/*! \brief Return result of last asynchronous HW divide, signed quotient only
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* This function waits for the result to be ready by calling hw_divider_wait_ready().
|
||||
*
|
||||
* \return Current signed quotient result.
|
||||
*/
|
||||
static inline int32_t hw_divider_s32_quotient_wait(void) {
|
||||
hw_divider_wait_ready();
|
||||
return (int32_t)sio_hw->div_quotient;
|
||||
}
|
||||
|
||||
/*! \brief Return result of last asynchronous HW divide, unsigned remainder only
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* This function waits for the result to be ready by calling hw_divider_wait_ready().
|
||||
*
|
||||
* \return Current unsigned remainder result.
|
||||
*/
|
||||
static inline uint32_t hw_divider_u32_remainder_wait(void) {
|
||||
hw_divider_wait_ready();
|
||||
uint32_t rc = sio_hw->div_remainder;
|
||||
sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*! \brief Return result of last asynchronous HW divide, signed remainder only
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* This function waits for the result to be ready by calling hw_divider_wait_ready().
|
||||
*
|
||||
* \return Current remainder results.
|
||||
*/
|
||||
static inline int32_t hw_divider_s32_remainder_wait(void) {
|
||||
hw_divider_wait_ready();
|
||||
int32_t rc = (int32_t)sio_hw->div_remainder;
|
||||
sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*! \brief Do a signed HW divide and wait for result
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return result as a pair of 32-bit quotient/remainder values.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Results of divide as a pair of 32-bit quotient/remainder values.
|
||||
*/
|
||||
divmod_result_t hw_divider_divmod_s32(int32_t a, int32_t b);
|
||||
|
||||
/*! \brief Do an unsigned HW divide and wait for result
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return result as a pair of 32-bit quotient/remainder values.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Results of divide as a pair of 32-bit quotient/remainder values.
|
||||
*/
|
||||
divmod_result_t hw_divider_divmod_u32(uint32_t a, uint32_t b);
|
||||
|
||||
/*! \brief Efficient extraction of unsigned quotient from 32p32 fixed point
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* \param r A pair of 32-bit quotient/remainder values.
|
||||
* \return Unsigned quotient
|
||||
*/
|
||||
inline static uint32_t to_quotient_u32(divmod_result_t r) {
|
||||
return (uint32_t) r;
|
||||
}
|
||||
|
||||
/*! \brief Efficient extraction of signed quotient from 32p32 fixed point
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* \param r A pair of 32-bit quotient/remainder values.
|
||||
* \return Unsigned quotient
|
||||
*/
|
||||
inline static int32_t to_quotient_s32(divmod_result_t r) {
|
||||
return (int32_t)(uint32_t)r;
|
||||
}
|
||||
|
||||
/*! \brief Efficient extraction of unsigned remainder from 32p32 fixed point
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* \param r A pair of 32-bit quotient/remainder values.
|
||||
* \return Unsigned remainder
|
||||
*
|
||||
* \note On Arm this is just a 32 bit register move or a nop
|
||||
*/
|
||||
inline static uint32_t to_remainder_u32(divmod_result_t r) {
|
||||
return (uint32_t)(r >> 32u);
|
||||
}
|
||||
|
||||
/*! \brief Efficient extraction of signed remainder from 32p32 fixed point
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* \param r A pair of 32-bit quotient/remainder values.
|
||||
* \return Signed remainder
|
||||
*
|
||||
* \note On arm this is just a 32 bit register move or a nop
|
||||
*/
|
||||
inline static int32_t to_remainder_s32(divmod_result_t r) {
|
||||
return (int32_t)(r >> 32u);
|
||||
}
|
||||
|
||||
/*! \brief Do an unsigned HW divide, wait for result, return quotient
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return quotient.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Quotient results of the divide
|
||||
*/
|
||||
static inline uint32_t hw_divider_u32_quotient(uint32_t a, uint32_t b) {
|
||||
return to_quotient_u32(hw_divider_divmod_u32(a, b));
|
||||
}
|
||||
|
||||
/*! \brief Do an unsigned HW divide, wait for result, return remainder
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return remainder.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Remainder results of the divide
|
||||
*/
|
||||
static inline uint32_t hw_divider_u32_remainder(uint32_t a, uint32_t b) {
|
||||
return to_remainder_u32(hw_divider_divmod_u32(a, b));
|
||||
}
|
||||
|
||||
/*! \brief Do a signed HW divide, wait for result, return quotient
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return quotient.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Quotient results of the divide
|
||||
*/
|
||||
static inline int32_t hw_divider_quotient_s32(int32_t a, int32_t b) {
|
||||
return to_quotient_s32(hw_divider_divmod_s32(a, b));
|
||||
}
|
||||
|
||||
/*! \brief Do a signed HW divide, wait for result, return remainder
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return remainder.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Remainder results of the divide
|
||||
*/
|
||||
static inline int32_t hw_divider_remainder_s32(int32_t a, int32_t b) {
|
||||
return to_remainder_s32(hw_divider_divmod_s32(a, b));
|
||||
}
|
||||
|
||||
/*! \brief Pause for exact amount of time needed for a asynchronous divide to complete
|
||||
* \ingroup hardware_divider
|
||||
*/
|
||||
static inline void hw_divider_pause(void) {
|
||||
pico_default_asm_volatile(
|
||||
"b _1_%=\n"
|
||||
"_1_%=:\n"
|
||||
"b _2_%=\n"
|
||||
"_2_%=:\n"
|
||||
"b _3_%=\n"
|
||||
"_3_%=:\n"
|
||||
"b _4_%=\n"
|
||||
"_4_%=:\n"
|
||||
:::);
|
||||
}
|
||||
|
||||
/*! \brief Do a hardware unsigned HW divide, wait for result, return quotient
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return quotient.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Quotient result of the divide
|
||||
*/
|
||||
static inline uint32_t hw_divider_u32_quotient_inlined(uint32_t a, uint32_t b) {
|
||||
hw_divider_divmod_u32_start(a, b);
|
||||
hw_divider_pause();
|
||||
return sio_hw->div_quotient;
|
||||
}
|
||||
|
||||
/*! \brief Do a hardware unsigned HW divide, wait for result, return remainder
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return remainder.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Remainder result of the divide
|
||||
*/
|
||||
static inline uint32_t hw_divider_u32_remainder_inlined(uint32_t a, uint32_t b) {
|
||||
hw_divider_divmod_u32_start(a, b);
|
||||
hw_divider_pause();
|
||||
uint32_t rc = sio_hw->div_remainder;
|
||||
sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*! \brief Do a hardware signed HW divide, wait for result, return quotient
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return quotient.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Quotient result of the divide
|
||||
*/
|
||||
static inline int32_t hw_divider_s32_quotient_inlined(int32_t a, int32_t b) {
|
||||
hw_divider_divmod_s32_start(a, b);
|
||||
hw_divider_pause();
|
||||
return (int32_t)sio_hw->div_quotient;
|
||||
}
|
||||
|
||||
/*! \brief Do a hardware signed HW divide, wait for result, return remainder
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Divide \p a by \p b, wait for calculation to complete, return remainder.
|
||||
*
|
||||
* \param a The dividend
|
||||
* \param b The divisor
|
||||
* \return Remainder result of the divide
|
||||
*/
|
||||
static inline int32_t hw_divider_s32_remainder_inlined(int32_t a, int32_t b) {
|
||||
hw_divider_divmod_s32_start(a, b);
|
||||
hw_divider_pause();
|
||||
int32_t rc = (int32_t)sio_hw->div_remainder;
|
||||
sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
|
||||
return rc;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t values[4];
|
||||
} hw_divider_state_t;
|
||||
|
||||
/*! \brief Save the calling cores hardware divider state
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Copy the current core's hardware divider state into the provided structure. This method
|
||||
* waits for the divider results to be stable, then copies them to memory.
|
||||
* They can be restored via hw_divider_restore_state()
|
||||
*
|
||||
* \param dest the location to store the divider state
|
||||
*/
|
||||
void hw_divider_save_state(hw_divider_state_t *dest);
|
||||
|
||||
/*! \brief Load a saved hardware divider state into the current core's hardware divider
|
||||
* \ingroup hardware_divider
|
||||
*
|
||||
* Copy the passed hardware divider state into the hardware divider.
|
||||
*
|
||||
* \param src the location to load the divider state from
|
||||
*/
|
||||
|
||||
void hw_divider_restore_state(hw_divider_state_t *src);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _HARDWARE_DIVIDER_H
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
// Note this file is always included by another, so does not do pico_default_asm_setup
|
||||
#include "hardware/regs/addressmap.h"
|
||||
#include "hardware/regs/sio.h"
|
||||
|
||||
#if SIO_DIV_CSR_READY_LSB == 0
|
||||
.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
|
||||
#else
|
||||
need to change SHIFT above
|
||||
#endif
|
||||
#if SIO_DIV_CSR_DIRTY_LSB == 1
|
||||
.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
|
||||
#else
|
||||
need to change SHIFT above
|
||||
#endif
|
||||
|
||||
// SIO_BASE ptr in r2; pushes r4-r7, lr to stack
|
||||
.macro save_div_state_and_lr
|
||||
// originally we did this, however a) it uses r3, and b) the push and dividend/divisor
|
||||
// readout takes 8 cycles, c) any IRQ which uses the divider will necessarily put the
|
||||
// data back, which will immediately make it ready
|
||||
//
|
||||
// // ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
// // // wait for results as we can't save signed-ness of operation
|
||||
// // 1:
|
||||
// // lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
|
||||
// // bcc 1b
|
||||
|
||||
// 6 cycle push + 2 ldr ensures the 8 cycle delay before remainder and quotient are ready
|
||||
push {r4, r5, r6, r7, lr}
|
||||
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
|
||||
ldr r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
|
||||
ldr r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
|
||||
ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
|
||||
ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
|
||||
.endm
|
||||
|
||||
// restores divider state from r4-r7, then pops them and pc
|
||||
.macro restore_div_state_and_return
|
||||
// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
|
||||
//
|
||||
// it is worth considering what happens if we are interrupted
|
||||
//
|
||||
// after writing r4: we are DIRTY and !READY
|
||||
// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
|
||||
// saved/restored correctly and we'll restore the rest ourselves
|
||||
// after writing r4, r5: we are DIRTY and !READY
|
||||
// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
|
||||
// at least will be saved/restored correctly and and we'll restore the rest ourselves
|
||||
// after writing r4, r5, r6: we are DIRTY and READY
|
||||
// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
|
||||
// and we'll restore the remainder after the fact
|
||||
|
||||
// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
|
||||
// and so 4 reads is cheaper (and we don't have to adjust r2)
|
||||
// note also, that we must restore via UDIVI* rather than SDIVI* to prevent the quotient/remainder being negated on read based
|
||||
// on the signs of the inputs
|
||||
str r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
|
||||
str r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
|
||||
str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
|
||||
str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
|
||||
pop {r4, r5, r6, r7, pc}
|
||||
.endm
|
|
@ -0,0 +1,769 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
#include "hardware/divider_helper.S"
|
||||
|
||||
__pre_init __aeabi_float_init, 00020
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
.macro float_section name
|
||||
#if PICO_FLOAT_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
#else
|
||||
.section SECTION_NAME(\name), "ax"
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro float_wrapper_section func
|
||||
float_section WRAPPER_FUNC_NAME(\func)
|
||||
.endm
|
||||
|
||||
.macro _float_wrapper_func x
|
||||
wrapper_func \x
|
||||
.endm
|
||||
|
||||
.macro wrapper_func_f1 x
|
||||
_float_wrapper_func \x
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
mov ip, lr
|
||||
bl __check_nan_f1
|
||||
mov lr, ip
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro wrapper_func_f2 x
|
||||
_float_wrapper_func \x
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
mov ip, lr
|
||||
bl __check_nan_f2
|
||||
mov lr, ip
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.section .text
|
||||
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
.thumb_func
|
||||
__check_nan_f1:
|
||||
movs r3, #1
|
||||
lsls r3, #24
|
||||
lsls r2, r0, #1
|
||||
adds r2, r3
|
||||
bhi 1f
|
||||
bx lr
|
||||
1:
|
||||
bx ip
|
||||
|
||||
.thumb_func
|
||||
__check_nan_f2:
|
||||
movs r3, #1
|
||||
lsls r3, #24
|
||||
lsls r2, r0, #1
|
||||
adds r2, r3
|
||||
bhi 1f
|
||||
lsls r2, r1, #1
|
||||
adds r2, r3
|
||||
bhi 2f
|
||||
bx lr
|
||||
2:
|
||||
mov r0, r1
|
||||
1:
|
||||
bx ip
|
||||
#endif
|
||||
|
||||
.macro table_tail_call SF_TABLE_OFFSET
|
||||
#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
#ifndef NDEBUG
|
||||
movs r3, #0
|
||||
mov ip, r3
|
||||
#endif
|
||||
#endif
|
||||
ldr r3, =sf_table
|
||||
ldr r3, [r3, #\SF_TABLE_OFFSET]
|
||||
bx r3
|
||||
.endm
|
||||
|
||||
.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
|
||||
ldr r3, =sf_table
|
||||
ldr r3, [r3, #\SF_TABLE_OFFSET]
|
||||
#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
mov ip, pc
|
||||
#endif
|
||||
bx r3
|
||||
#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
.byte \SF_TABLE_OFFSET, 0xdf
|
||||
.word \shim
|
||||
#endif
|
||||
.endm
|
||||
|
||||
|
||||
// note generally each function is in a separate section unless there is fall thru or branching between them
|
||||
// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
|
||||
|
||||
// note functions are word aligned except where they are an odd number of linear instructions
|
||||
|
||||
// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition
|
||||
float_wrapper_section __aeabi_farithmetic
|
||||
// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x
|
||||
|
||||
// frsub first because it is the only one that needs alignment
|
||||
.align 2
|
||||
wrapper_func __aeabi_frsub
|
||||
eors r0, r1
|
||||
eors r1, r0
|
||||
eors r0, r1
|
||||
// fall thru
|
||||
|
||||
// float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y
|
||||
wrapper_func_f2 __aeabi_fsub
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
// we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
|
||||
mov r2, r0
|
||||
eors r2, r1
|
||||
bmi 1f // different signs
|
||||
push {r0, r1, lr}
|
||||
bl 1f
|
||||
b fdiv_fsub_nan_helper
|
||||
1:
|
||||
#endif
|
||||
table_tail_call SF_TABLE_FSUB
|
||||
|
||||
wrapper_func_f2 __aeabi_fadd
|
||||
table_tail_call SF_TABLE_FADD
|
||||
|
||||
// float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d
|
||||
wrapper_func_f2 __aeabi_fdiv
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
push {r0, r1, lr}
|
||||
bl 1f
|
||||
b fdiv_fsub_nan_helper
|
||||
1:
|
||||
#endif
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
|
||||
ldr r2, =(SIO_BASE)
|
||||
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
|
||||
bcs fdiv_save_state
|
||||
#else
|
||||
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
|
||||
push {r4, lr}
|
||||
mrs r4, PRIMASK
|
||||
cpsid i
|
||||
bl fdiv_shim_call
|
||||
msr PRIMASK, r4
|
||||
pop {r4, pc}
|
||||
#endif
|
||||
fdiv_shim_call:
|
||||
table_tail_call SF_TABLE_FDIV
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
fdiv_save_state:
|
||||
save_div_state_and_lr
|
||||
bl fdiv_shim_call
|
||||
ldr r2, =(SIO_BASE)
|
||||
restore_div_state_and_return
|
||||
#endif
|
||||
|
||||
fdiv_fsub_nan_helper:
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
pop {r1, r2}
|
||||
|
||||
// check for infinite op infinite (or rather check for infinite result with both
|
||||
// operands being infinite)
|
||||
lsls r3, r0, #1
|
||||
asrs r3, r3, #24
|
||||
adds r3, #1
|
||||
beq 2f
|
||||
pop {pc}
|
||||
2:
|
||||
lsls r1, #1
|
||||
asrs r1, r1, #24
|
||||
lsls r2, #1
|
||||
asrs r2, r2, #24
|
||||
ands r1, r2
|
||||
adds r1, #1
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r1, #1
|
||||
lsls r1, #22
|
||||
orrs r0, r1
|
||||
3:
|
||||
pop {pc}
|
||||
#endif
|
||||
|
||||
// float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication
|
||||
wrapper_func_f2 __aeabi_fmul
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
push {r0, r1, lr}
|
||||
bl 1f
|
||||
pop {r1, r2}
|
||||
|
||||
// check for multiplication of infinite by zero (or rather check for infinite result with either
|
||||
// operand 0)
|
||||
lsls r3, r0, #1
|
||||
asrs r3, r3, #24
|
||||
adds r3, #1
|
||||
beq 2f
|
||||
pop {pc}
|
||||
2:
|
||||
ands r1, r2
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r1, #1
|
||||
lsls r1, #22
|
||||
orrs r0, r1
|
||||
3:
|
||||
pop {pc}
|
||||
1:
|
||||
#endif
|
||||
table_tail_call SF_TABLE_FMUL
|
||||
|
||||
// void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
|
||||
float_wrapper_section __aeabi_cfcmple
|
||||
.align 2
|
||||
wrapper_func __aeabi_cfrcmple
|
||||
push {r0-r2, lr}
|
||||
eors r0, r1
|
||||
eors r1, r0
|
||||
eors r0, r1
|
||||
b __aeabi_cfcmple_guts
|
||||
|
||||
// NOTE these share an implementation as we have no excepting NaNs.
|
||||
// void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
|
||||
// void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags
|
||||
.align 2
|
||||
wrapper_func __aeabi_cfcmple
|
||||
wrapper_func __aeabi_cfcmpeq
|
||||
push {r0-r2, lr}
|
||||
|
||||
__aeabi_cfcmple_guts:
|
||||
lsls r2,r0,#1
|
||||
lsrs r2,#24
|
||||
beq 1f
|
||||
cmp r2,#0xff
|
||||
bne 2f
|
||||
lsls r2, r0, #9
|
||||
bhi 3f
|
||||
1:
|
||||
lsrs r0,#23 @ clear mantissa if denormal or infinite
|
||||
lsls r0,#23
|
||||
2:
|
||||
lsls r2,r1,#1
|
||||
lsrs r2,#24
|
||||
beq 1f
|
||||
cmp r2,#0xff
|
||||
bne 2f
|
||||
lsls r2, r1, #9
|
||||
bhi 3f
|
||||
1:
|
||||
lsrs r1,#23 @ clear mantissa if denormal or infinite
|
||||
lsls r1,#23
|
||||
2:
|
||||
movs r2,#1 @ initialise result
|
||||
eors r1,r0
|
||||
bmi 2f @ opposite signs? then can proceed on basis of sign of x
|
||||
eors r1,r0 @ restore y
|
||||
bpl 1f
|
||||
cmp r1,r0
|
||||
pop {r0-r2, pc}
|
||||
1:
|
||||
cmp r0,r1
|
||||
pop {r0-r2, pc}
|
||||
2:
|
||||
orrs r1, r0 @ handle 0/-0
|
||||
adds r1, r1 @ note this always sets C
|
||||
beq 3f
|
||||
mvns r0, r0 @ carry inverse of r0 sign
|
||||
adds r0, r0
|
||||
3:
|
||||
pop {r0-r2, pc}
|
||||
|
||||
|
||||
// int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
|
||||
float_wrapper_section __aeabi_fcmpeq
|
||||
.align 2
|
||||
wrapper_func __aeabi_fcmpeq
|
||||
push {lr}
|
||||
bl __aeabi_cfcmpeq
|
||||
beq 1f
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #1
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C <
|
||||
float_wrapper_section __aeabi_fcmplt
|
||||
.align 2
|
||||
wrapper_func __aeabi_fcmplt
|
||||
push {lr}
|
||||
bl __aeabi_cfcmple
|
||||
sbcs r0, r0
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <=
|
||||
float_wrapper_section __aeabi_fcmple
|
||||
.align 2
|
||||
wrapper_func __aeabi_fcmple
|
||||
push {lr}
|
||||
bl __aeabi_cfcmple
|
||||
bls 1f
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #1
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >=
|
||||
float_wrapper_section __aeabi_fcmpge
|
||||
.align 2
|
||||
wrapper_func __aeabi_fcmpge
|
||||
push {lr}
|
||||
// because of NaNs it is better to reverse the args than the result
|
||||
bl __aeabi_cfrcmple
|
||||
bls 1f
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #1
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C >
|
||||
float_wrapper_section __aeabi_fcmpgt
|
||||
wrapper_func __aeabi_fcmpgt
|
||||
push {lr}
|
||||
// because of NaNs it is better to reverse the args than the result
|
||||
bl __aeabi_cfrcmple
|
||||
sbcs r0, r0
|
||||
pop {pc}
|
||||
|
||||
// int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
|
||||
float_wrapper_section __aeabi_fcmpun
|
||||
wrapper_func __aeabi_fcmpun
|
||||
movs r3, #1
|
||||
lsls r3, #24
|
||||
lsls r2, r0, #1
|
||||
adds r2, r3
|
||||
bhi 1f
|
||||
lsls r2, r1, #1
|
||||
adds r2, r3
|
||||
bhi 1f
|
||||
movs r0, #0
|
||||
bx lr
|
||||
1:
|
||||
movs r0, #1
|
||||
bx lr
|
||||
|
||||
|
||||
// float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion
|
||||
float_wrapper_section __aeabi_ui2f
|
||||
wrapper_func __aeabi_ui2f
|
||||
subs r1, r1
|
||||
cmp r0, #0
|
||||
bne __aeabi_i2f_main
|
||||
mov r0, r1
|
||||
bx lr
|
||||
|
||||
float_wrapper_section __aeabi_i2f
|
||||
// float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion
|
||||
wrapper_func __aeabi_i2f
|
||||
lsrs r1, r0, #31
|
||||
lsls r1, #31
|
||||
bpl 1f
|
||||
negs r0, r0
|
||||
1:
|
||||
cmp r0, #0
|
||||
beq 7f
|
||||
__aeabi_i2f_main:
|
||||
|
||||
mov ip, lr
|
||||
push {r0, r1}
|
||||
ldr r3, =sf_clz_func
|
||||
ldr r3, [r3]
|
||||
blx r3
|
||||
pop {r1, r2}
|
||||
lsls r1, r0
|
||||
subs r0, #158
|
||||
negs r0, r0
|
||||
|
||||
adds r1,#0x80 @ rounding
|
||||
bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
|
||||
|
||||
lsls r3,r1,#24 @ check bottom 8 bits of r1
|
||||
beq 6f @ in rounding-tie case?
|
||||
lsls r1,#1 @ remove leading 1
|
||||
3:
|
||||
lsrs r1,#9 @ align mantissa
|
||||
lsls r0,#23 @ align exponent
|
||||
orrs r0,r2 @ assemble exponent and mantissa
|
||||
4:
|
||||
orrs r0,r1 @ apply sign
|
||||
1:
|
||||
bx ip
|
||||
5:
|
||||
adds r0,#1 @ correct exponent offset
|
||||
b 3b
|
||||
6:
|
||||
lsrs r1,#9 @ ensure even result
|
||||
lsls r1,#10
|
||||
b 3b
|
||||
7:
|
||||
bx lr
|
||||
|
||||
|
||||
// int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3]
|
||||
float_wrapper_section __aeabi_f2iz
|
||||
wrapper_func __aeabi_f2iz
|
||||
regular_func float2int_z
|
||||
lsls r1, r0, #1
|
||||
lsrs r2, r1, #24
|
||||
movs r3, #0x80
|
||||
lsls r3, #24
|
||||
cmp r2, #126
|
||||
ble 1f
|
||||
subs r2, #158
|
||||
bge 2f
|
||||
asrs r1, r0, #31
|
||||
lsls r0, #9
|
||||
lsrs r0, #1
|
||||
orrs r0, r3
|
||||
negs r2, r2
|
||||
lsrs r0, r2
|
||||
lsls r1, #1
|
||||
adds r1, #1
|
||||
muls r0, r1
|
||||
bx lr
|
||||
1:
|
||||
movs r0, #0
|
||||
bx lr
|
||||
2:
|
||||
lsrs r0, #31
|
||||
adds r0, r3
|
||||
subs r0, #1
|
||||
bx lr
|
||||
|
||||
cmn r0, r0
|
||||
bcc float2int
|
||||
push {lr}
|
||||
lsls r0, #1
|
||||
lsrs r0, #1
|
||||
movs r1, #0
|
||||
bl __aeabi_f2uiz
|
||||
cmp r0, #0
|
||||
bmi 1f
|
||||
negs r0, r0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #128
|
||||
lsls r0, #24
|
||||
pop {pc}
|
||||
|
||||
float_section float2int
|
||||
regular_func float2int
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
|
||||
|
||||
float_section float2fix
|
||||
regular_func float2fix
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
|
||||
|
||||
float_section float2ufix
|
||||
regular_func float2ufix
|
||||
table_tail_call SF_TABLE_FLOAT2UFIX
|
||||
|
||||
// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3]
|
||||
float_wrapper_section __aeabi_f2uiz
|
||||
wrapper_func __aeabi_f2uiz
|
||||
table_tail_call SF_TABLE_FLOAT2UINT
|
||||
|
||||
float_section fix2float
|
||||
regular_func fix2float
|
||||
table_tail_call SF_TABLE_FIX2FLOAT
|
||||
|
||||
float_section ufix2float
|
||||
regular_func ufix2float
|
||||
table_tail_call SF_TABLE_UFIX2FLOAT
|
||||
|
||||
float_section fix642float
|
||||
regular_func fix642float
|
||||
shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim
|
||||
|
||||
float_section ufix642float
|
||||
regular_func ufix642float
|
||||
shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim
|
||||
|
||||
// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
|
||||
float_wrapper_section __aeabi_l2f
|
||||
1:
|
||||
ldr r2, =__aeabi_i2f
|
||||
bx r2
|
||||
wrapper_func __aeabi_l2f
|
||||
asrs r2, r0, #31
|
||||
cmp r1, r2
|
||||
beq 1b
|
||||
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim
|
||||
|
||||
// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
|
||||
float_wrapper_section __aeabi_ul2f
|
||||
1:
|
||||
ldr r2, =__aeabi_ui2f
|
||||
bx r2
|
||||
wrapper_func __aeabi_ul2f
|
||||
cmp r1, #0
|
||||
beq 1b
|
||||
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim
|
||||
|
||||
// long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3]
|
||||
float_wrapper_section __aeabi_f2lz
|
||||
wrapper_func __aeabi_f2lz
|
||||
regular_func float2int64_z
|
||||
cmn r0, r0
|
||||
bcc float2int64
|
||||
push {lr}
|
||||
lsls r0, #1
|
||||
lsrs r0, #1
|
||||
movs r1, #0
|
||||
bl float2ufix64
|
||||
cmp r1, #0
|
||||
bmi 1f
|
||||
movs r2, #0
|
||||
negs r0, r0
|
||||
sbcs r2, r1
|
||||
mov r1, r2
|
||||
pop {pc}
|
||||
1:
|
||||
movs r1, #128
|
||||
lsls r1, #24
|
||||
movs r0, #0
|
||||
pop {pc}
|
||||
|
||||
float_section float2int64
|
||||
regular_func float2int64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
|
||||
|
||||
float_section float2fix64
|
||||
regular_func float2fix64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
|
||||
|
||||
// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3]
|
||||
float_wrapper_section __aeabi_f2ulz
|
||||
wrapper_func __aeabi_f2ulz
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
|
||||
|
||||
float_section float2ufix64
|
||||
regular_func float2ufix64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
|
||||
|
||||
float_wrapper_section __aeabi_f2d
|
||||
1:
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
// copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
|
||||
asrs r1, r0, #3
|
||||
movs r2, #0xf
|
||||
lsls r2, #27
|
||||
orrs r1, r2
|
||||
lsls r0, #25
|
||||
bx lr
|
||||
#endif
|
||||
wrapper_func __aeabi_f2d
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
movs r3, #1
|
||||
lsls r3, #24
|
||||
lsls r2, r0, #1
|
||||
adds r2, r3
|
||||
bhi 1b
|
||||
#endif
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim
|
||||
|
||||
float_wrapper_section sqrtf
|
||||
wrapper_func_f1 sqrtf
|
||||
#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
// check for negative
|
||||
asrs r1, r0, #23
|
||||
bmi 1f
|
||||
#endif
|
||||
table_tail_call SF_TABLE_FSQRT
|
||||
#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
1:
|
||||
mvns r0, r1
|
||||
cmp r0, #255
|
||||
bne 2f
|
||||
// -0 or -Denormal return -0 (0x80000000)
|
||||
lsls r0, #31
|
||||
bx lr
|
||||
2:
|
||||
// return -Inf (0xff800000)
|
||||
asrs r0, r1, #31
|
||||
lsls r0, #23
|
||||
bx lr
|
||||
#endif
|
||||
|
||||
float_wrapper_section cosf
|
||||
// note we don't use _f1 since we do an infinity/nan check for outside of range
|
||||
wrapper_func cosf
|
||||
// rom version only works for -128 < angle < 128
|
||||
lsls r1, r0, #1
|
||||
lsrs r1, #24
|
||||
cmp r1, #127 + 7
|
||||
bge 1f
|
||||
2:
|
||||
table_tail_call SF_TABLE_FCOS
|
||||
1:
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
// also check for infinites
|
||||
cmp r1, #255
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r1, #1
|
||||
lsls r1, #22
|
||||
orrs r0, r1
|
||||
bx lr
|
||||
3:
|
||||
#endif
|
||||
ldr r1, =0x40c90fdb // 2 * M_PI
|
||||
push {lr}
|
||||
bl remainderf
|
||||
pop {r1}
|
||||
mov lr, r1
|
||||
b 2b
|
||||
|
||||
float_wrapper_section sinf
|
||||
// note we don't use _f1 since we do an infinity/nan check for outside of range
|
||||
wrapper_func sinf
|
||||
// rom version only works for -128 < angle < 128
|
||||
lsls r1, r0, #1
|
||||
lsrs r1, #24
|
||||
cmp r1, #127 + 7
|
||||
bge 1f
|
||||
2:
|
||||
table_tail_call SF_TABLE_FSIN
|
||||
1:
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
// also check for infinites
|
||||
cmp r1, #255
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r1, #1
|
||||
lsls r1, #22
|
||||
orrs r0, r1
|
||||
bx lr
|
||||
3:
|
||||
#endif
|
||||
ldr r1, =0x40c90fdb // 2 * M_PI
|
||||
push {lr}
|
||||
bl remainderf
|
||||
pop {r1}
|
||||
mov lr, r1
|
||||
b 2b
|
||||
|
||||
float_wrapper_section sincosf
|
||||
// note we don't use _f1 since we do an infinity/nan check for outside of range
|
||||
wrapper_func sincosf
|
||||
push {r1, r2, lr}
|
||||
// rom version only works for -128 < angle < 128
|
||||
lsls r3, r0, #1
|
||||
lsrs r3, #24
|
||||
cmp r3, #127 + 7
|
||||
bge 3f
|
||||
2:
|
||||
ldr r3, =sf_table
|
||||
ldr r3, [r3, #SF_TABLE_FSIN]
|
||||
blx r3
|
||||
pop {r2, r3}
|
||||
str r0, [r2]
|
||||
str r1, [r3]
|
||||
pop {pc}
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
.align 2
|
||||
pop {pc}
|
||||
#endif
|
||||
3:
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
// also check for infinites
|
||||
cmp r3, #255
|
||||
bne 4f
|
||||
// infinite to nan
|
||||
movs r3, #1
|
||||
lsls r3, #22
|
||||
orrs r0, r3
|
||||
str r0, [r1]
|
||||
str r0, [r2]
|
||||
add sp, #12
|
||||
bx lr
|
||||
4:
|
||||
#endif
|
||||
ldr r1, =0x40c90fdb // 2 * M_PI
|
||||
push {lr}
|
||||
bl remainderf
|
||||
pop {r1}
|
||||
mov lr, r1
|
||||
b 2b
|
||||
|
||||
float_wrapper_section tanf
|
||||
// note we don't use _f1 since we do an infinity/nan check for outside of range
|
||||
wrapper_func tanf
|
||||
// rom version only works for -128 < angle < 128
|
||||
lsls r1, r0, #1
|
||||
lsrs r1, #24
|
||||
cmp r1, #127 + 7
|
||||
bge ftan_out_of_range
|
||||
ftan_in_range:
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
|
||||
ldr r2, =(SIO_BASE)
|
||||
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
|
||||
bcs ftan_save_state
|
||||
#else
|
||||
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
|
||||
push {r4, lr}
|
||||
mrs r4, PRIMASK
|
||||
cpsid i
|
||||
bl ftan_shim_call
|
||||
msr PRIMASK, r4
|
||||
pop {r4, pc}
|
||||
#endif
|
||||
ftan_shim_call:
|
||||
table_tail_call SF_TABLE_FTAN
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
ftan_save_state:
|
||||
save_div_state_and_lr
|
||||
bl ftan_shim_call
|
||||
ldr r2, =(SIO_BASE)
|
||||
restore_div_state_and_return
|
||||
#endif
|
||||
ftan_out_of_range:
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
// also check for infinites
|
||||
cmp r1, #255
|
||||
bne 3f
|
||||
// infinite to nan
|
||||
movs r1, #1
|
||||
lsls r1, #22
|
||||
orrs r0, r1
|
||||
bx lr
|
||||
3:
|
||||
#endif
|
||||
ldr r1, =0x40c90fdb // 2 * M_PI
|
||||
push {lr}
|
||||
bl remainderf
|
||||
pop {r1}
|
||||
mov lr, r1
|
||||
b ftan_in_range
|
||||
|
||||
float_wrapper_section atan2f
|
||||
wrapper_func_f2 atan2f
|
||||
shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim
|
||||
|
||||
float_wrapper_section expf
|
||||
wrapper_func_f1 expf
|
||||
table_tail_call SF_TABLE_FEXP
|
||||
|
||||
float_wrapper_section logf
|
||||
wrapper_func_f1 logf
|
||||
table_tail_call SF_TABLE_FLN
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "pico/bootrom.h"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
|
||||
// NOTE THIS FUNCTION TABLE IS NOT PUBLIC OR NECESSARILY COMPLETE...
|
||||
// IT IS ***NOT*** SAFE TO CALL THESE FUNCTION POINTERS FROM ARBITRARY CODE
|
||||
uint32_t sf_table[SF_TABLE_V2_SIZE / 2];
|
||||
void __attribute__((weak)) *sf_clz_func;
|
||||
|
||||
#if !(PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED)
|
||||
static __attribute__((noreturn)) void missing_float_func_shim(void) {
|
||||
panic("");
|
||||
}
|
||||
#endif
|
||||
|
||||
void __aeabi_float_init(void) {
|
||||
int rom_version = rp2040_rom_version();
|
||||
void *rom_table = rom_data_lookup(rom_table_code('S', 'F'));
|
||||
#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
if (rom_version == 1) {
|
||||
memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
|
||||
extern void float_table_shim_on_use_helper(void);
|
||||
// todo replace NDEBUG with a more exclusive assertion guard
|
||||
#ifndef NDEBUG
|
||||
if (*(uint16_t *)0x29ee != 0x0fc4 || // this is packx
|
||||
*(uint16_t *)0x29c0 != 0x0dc2 || // this is upackx
|
||||
*(uint16_t *)0x2b96 != 0xb5c0 || // this is cordic_vec
|
||||
*(uint16_t *)0x2b18 != 0x2500 || // this is packretns
|
||||
*(uint16_t *)0x2acc != 0xb510 || // this is float2fix
|
||||
*(uint32_t *)0x2cfc != 0x6487ed51 // pi_q29
|
||||
) {
|
||||
panic("");
|
||||
}
|
||||
#endif
|
||||
|
||||
// this is a little tricky.. we only want to pull in a shim if the corresponding function
|
||||
// is called. to that end we include a SVC instruction with the table offset as the call number
|
||||
// followed by the shim function pointer inside the actual wrapper function. that way if the wrapper
|
||||
// function is garbage collected, so is the shim function.
|
||||
//
|
||||
// float_table_shim_on_use_helper expects this SVC instruction in the calling code soon after the address
|
||||
// pointed to by IP and patches the float_table entry with the real shim the first time the function is called.
|
||||
|
||||
for(uint i=SF_TABLE_V1_SIZE/4; i<SF_TABLE_V2_SIZE/4; i++) {
|
||||
sf_table[i] = (uintptr_t)float_table_shim_on_use_helper;
|
||||
}
|
||||
// we shim these for -0 and -denormal handling
|
||||
sf_table[SF_TABLE_FLOAT2INT/4] = sf_table[SF_TABLE_FLOAT2FIX/4] = (uintptr_t)float_table_shim_on_use_helper;
|
||||
}
|
||||
#else
|
||||
if (rom_version == 1) {
|
||||
memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
|
||||
// opting for soft failure for now - you'll get a panic at runtime if you call any of the missing methods
|
||||
for(uint i=0;i<SF_TABLE_V2_SIZE/4;i++) {
|
||||
if (!sf_table[i]) sf_table[i] = (uintptr_t)missing_float_func_shim;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (rom_version >= 2) {
|
||||
assert(*((uint8_t *)rom_table-2) * 4 >= SF_TABLE_V2_SIZE);
|
||||
memcpy(&sf_table, rom_table, SF_TABLE_V2_SIZE);
|
||||
}
|
||||
sf_clz_func = rom_func_lookup(ROM_FUNC_CLZ32);
|
||||
}
|
|
@ -0,0 +1,582 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/float.h"
|
||||
|
||||
// opened a separate issue https://github.com/raspberrypi/pico-sdk/issues/166 to deal with these warnings if at all
|
||||
GCC_Pragma("GCC diagnostic push")
|
||||
GCC_Pragma("GCC diagnostic ignored \"-Wconversion\"")
|
||||
GCC_Pragma("GCC diagnostic ignored \"-Wsign-conversion\"")
|
||||
|
||||
typedef uint32_t ui32;
|
||||
typedef int32_t i32;
|
||||
|
||||
#define FPINF ( HUGE_VALF)
|
||||
#define FMINF (-HUGE_VALF)
|
||||
#define NANF ((float)NAN)
|
||||
#define PZERO (+0.0)
|
||||
#define MZERO (-0.0)
|
||||
|
||||
#define PI 3.14159265358979323846
|
||||
#define LOG2 0.69314718055994530941
|
||||
// Unfortunately in double precision ln(10) is very close to half-way between to representable numbers
|
||||
#define LOG10 2.30258509299404568401
|
||||
#define LOG2E 1.44269504088896340737
|
||||
#define LOG10E 0.43429448190325182765
|
||||
#define ONETHIRD 0.33333333333333333333
|
||||
|
||||
#define PIf 3.14159265358979323846f
|
||||
#define LOG2f 0.69314718055994530941f
|
||||
#define LOG2Ef 1.44269504088896340737f
|
||||
#define LOG10Ef 0.43429448190325182765f
|
||||
#define ONETHIRDf 0.33333333333333333333f
|
||||
|
||||
#define FUNPACK(x,e,m) e=((x)>>23)&0xff,m=((x)&0x007fffff)|0x00800000
|
||||
#define FUNPACKS(x,s,e,m) s=((x)>>31),FUNPACK((x),(e),(m))
|
||||
|
||||
typedef union {
|
||||
float f;
|
||||
ui32 ix;
|
||||
} float_ui32;
|
||||
|
||||
static inline float ui322float(ui32 ix) {
|
||||
float_ui32 tmp;
|
||||
tmp.ix = ix;
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
static inline ui32 float2ui32(float f) {
|
||||
float_ui32 tmp;
|
||||
tmp.f = f;
|
||||
return tmp.ix;
|
||||
}
|
||||
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
static inline bool fisnan(float x) {
|
||||
ui32 ix=float2ui32(x);
|
||||
return ix * 2 > 0xff000000u;
|
||||
}
|
||||
|
||||
#define check_nan_f1(x) if (fisnan((x))) return (x)
|
||||
#define check_nan_f2(x,y) if (fisnan((x))) return (x); else if (fisnan((y))) return (y);
|
||||
#else
|
||||
#define check_nan_f1(x) ((void)0)
|
||||
#define check_nan_f2(x,y) ((void)0)
|
||||
#endif
|
||||
|
||||
static inline int fgetsignexp(float x) {
|
||||
ui32 ix=float2ui32(x);
|
||||
return (ix>>23)&0x1ff;
|
||||
}
|
||||
|
||||
static inline int fgetexp(float x) {
|
||||
ui32 ix=float2ui32(x);
|
||||
return (ix>>23)&0xff;
|
||||
}
|
||||
|
||||
static inline float fldexp(float x,int de) {
|
||||
ui32 ix=float2ui32(x),iy;
|
||||
int e;
|
||||
e=fgetexp(x);
|
||||
if(e==0||e==0xff) return x;
|
||||
e+=de;
|
||||
if(e<=0) iy=ix&0x80000000; // signed zero for underflow
|
||||
else if(e>=0xff) iy=(ix&0x80000000)|0x7f800000ULL; // signed infinity on overflow
|
||||
else iy=ix+((ui32)de<<23);
|
||||
return ui322float(iy);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(ldexpf)(float x, int de) {
|
||||
check_nan_f1(x);
|
||||
return fldexp(x, de);
|
||||
}
|
||||
|
||||
static inline float fcopysign(float x,float y) {
|
||||
ui32 ix=float2ui32(x),iy=float2ui32(y);
|
||||
ix=((ix&0x7fffffff)|(iy&0x80000000));
|
||||
return ui322float(ix);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(copysignf)(float x, float y) {
|
||||
check_nan_f2(x,y);
|
||||
return fcopysign(x, y);
|
||||
}
|
||||
|
||||
static inline int fiszero(float x) { return fgetexp (x)==0; }
|
||||
//static inline int fispzero(float x) { return fgetsignexp(x)==0; }
|
||||
//static inline int fismzero(float x) { return fgetsignexp(x)==0x100; }
|
||||
static inline int fisinf(float x) { return fgetexp (x)==0xff; }
|
||||
static inline int fispinf(float x) { return fgetsignexp(x)==0xff; }
|
||||
static inline int fisminf(float x) { return fgetsignexp(x)==0x1ff; }
|
||||
|
||||
static inline int fisint(float x) {
|
||||
ui32 ix=float2ui32(x),m;
|
||||
int e=fgetexp(x);
|
||||
if(e==0) return 1; // 0 is an integer
|
||||
e-=0x7f; // remove exponent bias
|
||||
if(e<0) return 0; // |x|<1
|
||||
e=23-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return 1; // |x| large, so must be an integer
|
||||
m=(1<<e)-1; // mask for bits of significance <1
|
||||
if(ix&m) return 0; // not an integer
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int fisoddint(float x) {
|
||||
ui32 ix=float2ui32(x),m;
|
||||
int e=fgetexp(x);
|
||||
e-=0x7f; // remove exponent bias
|
||||
if(e<0) return 0; // |x|<1; 0 is not odd
|
||||
e=23-e; // bit position in mantissa with significance 1
|
||||
if(e<0) return 0; // |x| large, so must be even
|
||||
m=(1<<e)-1; // mask for bits of significance <1 (if any)
|
||||
if(ix&m) return 0; // not an integer
|
||||
if(e==23) return 1; // value is exactly 1
|
||||
return (ix>>e)&1;
|
||||
}
|
||||
|
||||
static inline int fisstrictneg(float x) {
|
||||
ui32 ix=float2ui32(x);
|
||||
if(fiszero(x)) return 0;
|
||||
return ix>>31;
|
||||
}
|
||||
|
||||
static inline int fisneg(float x) {
|
||||
ui32 ix=float2ui32(x);
|
||||
return ix>>31;
|
||||
}
|
||||
|
||||
static inline float fneg(float x) {
|
||||
ui32 ix=float2ui32(x);
|
||||
ix^=0x80000000;
|
||||
return ui322float(ix);
|
||||
}
|
||||
|
||||
static inline int fispo2(float x) {
|
||||
ui32 ix=float2ui32(x);
|
||||
if(fiszero(x)) return 0;
|
||||
if(fisinf(x)) return 0;
|
||||
ix&=0x007fffff;
|
||||
return ix==0;
|
||||
}
|
||||
|
||||
static inline float fnan_or(float x) {
|
||||
#if PICO_FLOAT_PROPAGATE_NANS
|
||||
return NANF;
|
||||
#else
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(truncf)(float x) {
|
||||
check_nan_f1(x);
|
||||
ui32 ix=float2ui32(x),m;
|
||||
int e=fgetexp(x);
|
||||
e-=0x7f; // remove exponent bias
|
||||
if(e<0) { // |x|<1
|
||||
ix&=0x80000000;
|
||||
return ui322float(ix);
|
||||
}
|
||||
e=23-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=(1<<e)-1; // mask for bits of significance <1
|
||||
ix&=~m;
|
||||
return ui322float(ix);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(roundf)(float x) {
|
||||
check_nan_f1(x);
|
||||
ui32 ix=float2ui32(x),m;
|
||||
int e=fgetexp(x);
|
||||
e-=0x7f; // remove exponent bias
|
||||
if(e<-1) { // |x|<0.5
|
||||
ix&=0x80000000;
|
||||
return ui322float(ix);
|
||||
}
|
||||
if(e==-1) { // 0.5<=|x|<1
|
||||
ix&=0x80000000;
|
||||
ix|=0x3f800000; // ±1
|
||||
return ui322float(ix);
|
||||
}
|
||||
e=23-e; // bit position in mantissa with significance 1, <=23
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=1<<(e-1); // mask for bit of significance 0.5
|
||||
ix+=m;
|
||||
m=m+m-1; // mask for bits of significance <1
|
||||
ix&=~m;
|
||||
return ui322float(ix);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(floorf)(float x) {
|
||||
check_nan_f1(x);
|
||||
ui32 ix=float2ui32(x),m;
|
||||
int e=fgetexp(x);
|
||||
if(e==0) { // x==0
|
||||
ix&=0x80000000;
|
||||
return ui322float(ix);
|
||||
}
|
||||
e-=0x7f; // remove exponent bias
|
||||
if(e<0) { // |x|<1, not zero
|
||||
if(fisneg(x)) return -1;
|
||||
return PZERO;
|
||||
}
|
||||
e=23-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=(1<<e)-1; // mask for bit of significance <1
|
||||
if(fisneg(x)) ix+=m; // add 1-ε to magnitude if negative
|
||||
ix&=~m; // truncate
|
||||
return ui322float(ix);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(ceilf)(float x) {
|
||||
check_nan_f1(x);
|
||||
ui32 ix=float2ui32(x),m;
|
||||
int e=fgetexp(x);
|
||||
if(e==0) { // x==0
|
||||
ix&=0x80000000;
|
||||
return ui322float(ix);
|
||||
}
|
||||
e-=0x7f; // remove exponent bias
|
||||
if(e<0) { // |x|<1, not zero
|
||||
if(fisneg(x)) return MZERO;
|
||||
return 1;
|
||||
}
|
||||
e=23-e; // bit position in mantissa with significance 1
|
||||
if(e<=0) return x; // |x| large, so must be an integer
|
||||
m=(1<<e)-1; // mask for bit of significance <1
|
||||
if(!fisneg(x)) ix+=m; // add 1-ε to magnitude if positive
|
||||
ix&=~m; // truncate
|
||||
return ui322float(ix);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(asinf)(float x) {
|
||||
check_nan_f1(x);
|
||||
float u;
|
||||
u=(1.0f-x)*(1.0f+x);
|
||||
if(fisstrictneg(u)) return fnan_or(FPINF);
|
||||
return atan2f(x,sqrtf(u));
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(acosf)(float x) {
|
||||
check_nan_f1(x);
|
||||
float u;
|
||||
u=(1.0f-x)*(1.0f+x);
|
||||
if(fisstrictneg(u)) return fnan_or(FPINF);
|
||||
return atan2f(sqrtf(u),x);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(atanf)(float x) {
|
||||
check_nan_f1(x);
|
||||
if(fispinf(x)) return (float)( PIf/2);
|
||||
if(fisminf(x)) return (float)(-PIf/2);
|
||||
return atan2f(x,1.0f);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(sinhf)(float x) {
|
||||
check_nan_f1(x);
|
||||
return fldexp((expf(x)-expf(fneg(x))),-1);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(coshf)(float x) {
|
||||
check_nan_f1(x);
|
||||
return fldexp((expf(x)+expf(fneg(x))),-1);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(tanhf)(float x) {
|
||||
check_nan_f1(x);
|
||||
float u;
|
||||
int e;
|
||||
e=fgetexp(x);
|
||||
if(e>=4+0x7f) { // |x|>=16?
|
||||
if(!fisneg(x)) return 1; // 1 << exp 2x; avoid generating infinities later
|
||||
else return -1; // 1 >> exp 2x
|
||||
}
|
||||
u=expf(fldexp(x,1));
|
||||
return (u-1.0f)/(u+1.0f);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(asinhf)(float x) {
|
||||
check_nan_f1(x);
|
||||
int e;
|
||||
e=fgetexp(x);
|
||||
if(e>=16+0x7f) { // |x|>=2^16?
|
||||
if(!fisneg(x)) return logf( x )+LOG2f; // 1/x^2 << 1
|
||||
else return fneg(logf(fneg(x))+LOG2f); // 1/x^2 << 1
|
||||
}
|
||||
if(x>0) return (float)log(sqrt((double)x*(double)x+1.0)+(double)x);
|
||||
else return fneg((float)log(sqrt((double)x*(double)x+1.0)-(double)x));
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(acoshf)(float x) {
|
||||
check_nan_f1(x);
|
||||
int e;
|
||||
if(fisneg(x)) x=fneg(x);
|
||||
e=fgetexp(x);
|
||||
if(e>=16+0x7f) return logf(x)+LOG2f; // |x|>=2^16?
|
||||
return (float)log(sqrt(((double)x+1.0)*((double)x-1.0))+(double)x);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(atanhf)(float x) {
|
||||
check_nan_f1(x);
|
||||
return fldexp(logf((1.0f+x)/(1.0f-x)),-1);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(exp2f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG2); }
|
||||
float WRAPPER_FUNC(log2f)(float x) { check_nan_f1(x); return logf(x)*LOG2Ef; }
|
||||
float WRAPPER_FUNC(exp10f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG10); }
|
||||
float WRAPPER_FUNC(log10f)(float x) { check_nan_f1(x); return logf(x)*LOG10Ef; }
|
||||
|
||||
float WRAPPER_FUNC(expm1f)(float x) { check_nan_f1(x); return (float)(exp((double)x)-1); }
|
||||
float WRAPPER_FUNC(log1pf)(float x) { check_nan_f1(x); return (float)(log(1+(double)x)); }
|
||||
float WRAPPER_FUNC(fmaf)(float x,float y,float z) {
|
||||
check_nan_f2(x,y);
|
||||
check_nan_f1(z);
|
||||
return (float)((double)x*(double)y+(double)z);
|
||||
} // has double rounding so not exact
|
||||
|
||||
// general power, x>0
|
||||
static inline float fpow_1(float x,float y) {
|
||||
return (float)exp(log((double)x)*(double)y); // using double-precision intermediates for better accuracy
|
||||
}
|
||||
|
||||
static float fpow_int2(float x,int y) {
|
||||
float u;
|
||||
if(y==1) return x;
|
||||
u=fpow_int2(x,y/2);
|
||||
u*=u;
|
||||
if(y&1) u*=x;
|
||||
return u;
|
||||
}
|
||||
|
||||
// for the case where x not zero or infinity, y small and not zero
|
||||
static inline float fpowint_1(float x,int y) {
|
||||
if(y<0) x=1.0f/x,y=-y;
|
||||
return fpow_int2(x,y);
|
||||
}
|
||||
|
||||
// for the case where x not zero or infinity
|
||||
static float fpowint_0(float x,int y) {
|
||||
int e;
|
||||
if(fisneg(x)) {
|
||||
if(fisoddint(y)) return fneg(fpowint_0(fneg(x),y));
|
||||
else return fpowint_0(fneg(x),y);
|
||||
}
|
||||
if(fispo2(x)) {
|
||||
e=fgetexp(x)-0x7f;
|
||||
if(y>=256) y= 255; // avoid overflow
|
||||
if(y<-256) y=-256;
|
||||
y*=e;
|
||||
return fldexp(1,y);
|
||||
}
|
||||
if(y==0) return 1;
|
||||
if(y>=-32&&y<=32) return fpowint_1(x,y);
|
||||
return fpow_1(x,y);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(powintf)(float x,int y) {
|
||||
GCC_Pragma("GCC diagnostic push")
|
||||
GCC_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
|
||||
if(x==1.0f||y==0) return 1;
|
||||
if(x==0.0f) {
|
||||
if(y>0) {
|
||||
if(y&1) return x;
|
||||
else return 0;
|
||||
}
|
||||
if((y&1)) return fcopysign(FPINF,x);
|
||||
return FPINF;
|
||||
}
|
||||
GCC_Pragma("GCC diagnostic pop")
|
||||
check_nan_f1(x);
|
||||
if(fispinf(x)) {
|
||||
if(y<0) return 0;
|
||||
else return FPINF;
|
||||
}
|
||||
if(fisminf(x)) {
|
||||
if(y>0) {
|
||||
if((y&1)) return FMINF;
|
||||
else return FPINF;
|
||||
}
|
||||
if((y&1)) return MZERO;
|
||||
else return PZERO;
|
||||
}
|
||||
return fpowint_0(x,y);
|
||||
}
|
||||
|
||||
// for the case where y is guaranteed a finite integer, x not zero or infinity
|
||||
static float fpow_0(float x,float y) {
|
||||
int e,p;
|
||||
if(fisneg(x)) {
|
||||
if(fisoddint(y)) return fneg(fpow_0(fneg(x),y));
|
||||
else return fpow_0(fneg(x),y);
|
||||
}
|
||||
p=(int)y;
|
||||
if(fispo2(x)) {
|
||||
e=fgetexp(x)-0x7f;
|
||||
if(p>=256) p= 255; // avoid overflow
|
||||
if(p<-256) p=-256;
|
||||
p*=e;
|
||||
return fldexp(1,p);
|
||||
}
|
||||
if(p==0) return 1;
|
||||
if(p>=-32&&p<=32) return fpowint_1(x,p);
|
||||
return fpow_1(x,y);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(powf)(float x,float y) {
|
||||
GCC_Like_Pragma("GCC diagnostic push")
|
||||
GCC_Like_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
|
||||
if(x==1.0f||fiszero(y)) return 1;
|
||||
check_nan_f2(x,y);
|
||||
if(x==-1.0f&&fisinf(y)) return 1;
|
||||
GCC_Like_Pragma("GCC diagnostic pop")
|
||||
if(fiszero(x)) {
|
||||
if(!fisneg(y)) {
|
||||
if(fisoddint(y)) return x;
|
||||
else return 0;
|
||||
}
|
||||
if(fisoddint(y)) return fcopysign(FPINF,x);
|
||||
return FPINF;
|
||||
}
|
||||
if(fispinf(x)) {
|
||||
if(fisneg(y)) return 0;
|
||||
else return FPINF;
|
||||
}
|
||||
if(fisminf(x)) {
|
||||
if(!fisneg(y)) {
|
||||
if(fisoddint(y)) return FMINF;
|
||||
else return FPINF;
|
||||
}
|
||||
if(fisoddint(y)) return MZERO;
|
||||
else return PZERO;
|
||||
}
|
||||
if(fispinf(y)) {
|
||||
if(fgetexp(x)<0x7f) return PZERO;
|
||||
else return FPINF;
|
||||
}
|
||||
if(fisminf(y)) {
|
||||
if(fgetexp(x)<0x7f) return FPINF;
|
||||
else return PZERO;
|
||||
}
|
||||
if(fisint(y)) return fpow_0(x,y);
|
||||
if(fisneg(x)) return FPINF;
|
||||
return fpow_1(x,y);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(hypotf)(float x,float y) {
|
||||
check_nan_f2(x,y);
|
||||
int ex,ey;
|
||||
ex=fgetexp(x); ey=fgetexp(y);
|
||||
if(ex>=0x7f+50||ey>=0x7f+50) { // overflow, or nearly so
|
||||
x=fldexp(x,-70),y=fldexp(y,-70);
|
||||
return fldexp(sqrtf(x*x+y*y), 70);
|
||||
}
|
||||
else if(ex<=0x7f-50&&ey<=0x7f-50) { // underflow, or nearly so
|
||||
x=fldexp(x, 70),y=fldexp(y, 70);
|
||||
return fldexp(sqrtf(x*x+y*y),-70);
|
||||
}
|
||||
return sqrtf(x*x+y*y);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(cbrtf)(float x) {
|
||||
check_nan_f1(x);
|
||||
int e;
|
||||
if(fisneg(x)) return fneg(cbrtf(fneg(x)));
|
||||
if(fiszero(x)) return fcopysign(PZERO,x);
|
||||
e=fgetexp(x)-0x7f;
|
||||
e=(e*0x5555+0x8000)>>16; // ~e/3, rounded
|
||||
x=fldexp(x,-e*3);
|
||||
x=expf(logf(x)*ONETHIRDf);
|
||||
return fldexp(x,e);
|
||||
}
|
||||
|
||||
// reduces mx*2^e modulo my, returning bottom bits of quotient at *pquo
|
||||
// 2^23<=|mx|,my<2^24, e>=0; 0<=result<my
|
||||
static i32 frem_0(i32 mx,i32 my,int e,int*pquo) {
|
||||
int quo=0,q,r=0,s;
|
||||
if(e>0) {
|
||||
r=0xffffffffU/(ui32)(my>>7); // reciprocal estimate Q16
|
||||
}
|
||||
while(e>0) {
|
||||
s=e; if(s>12) s=12; // gain up to 12 bits on each iteration
|
||||
q=(mx>>9)*r; // Q30
|
||||
q=((q>>(29-s))+1)>>1; // Q(s), rounded
|
||||
mx=(mx<<s)-my*q;
|
||||
quo=(quo<<s)+q;
|
||||
e-=s;
|
||||
}
|
||||
if(mx>=my) mx-=my,quo++; // when e==0 mx can be nearly as big as 2my
|
||||
if(mx>=my) mx-=my,quo++;
|
||||
if(mx<0) mx+=my,quo--;
|
||||
if(mx<0) mx+=my,quo--;
|
||||
if(pquo) *pquo=quo;
|
||||
return mx;
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(fmodf)(float x,float y) {
|
||||
check_nan_f2(x,y);
|
||||
ui32 ix=float2ui32(x),iy=float2ui32(y);
|
||||
int sx,ex,ey;
|
||||
i32 mx,my;
|
||||
FUNPACKS(ix,sx,ex,mx);
|
||||
FUNPACK(iy,ey,my);
|
||||
if(ex==0xff) {
|
||||
return fnan_or(FPINF);
|
||||
}
|
||||
if(ey==0) return FPINF;
|
||||
if(ex==0) {
|
||||
if(!fisneg(x)) return PZERO;
|
||||
return MZERO;
|
||||
}
|
||||
if(ex<ey) return x; // |x|<|y|, including case x=±0
|
||||
mx=frem_0(mx,my,ex-ey,0);
|
||||
if(sx) mx=-mx;
|
||||
return fix2float(mx,0x7f-ey+23);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(remquof)(float x,float y,int*quo) {
|
||||
check_nan_f2(x,y);
|
||||
ui32 ix=float2ui32(x),iy=float2ui32(y);
|
||||
int sx,sy,ex,ey,q;
|
||||
i32 mx,my;
|
||||
FUNPACKS(ix,sx,ex,mx);
|
||||
FUNPACKS(iy,sy,ey,my);
|
||||
if(quo) *quo=0;
|
||||
if(ex==0xff) return FPINF;
|
||||
if(ey==0) return FPINF;
|
||||
if(ex==0) return PZERO;
|
||||
if(ey==0xff) return x;
|
||||
if(ex<ey-1) return x; // |x|<|y|/2
|
||||
if(ex==ey-1) {
|
||||
if(mx<=my) return x; // |x|<=|y|/2, even quotient
|
||||
// here |y|/2<|x|<|y|
|
||||
if(!sx) { // x>|y|/2
|
||||
mx-=my+my;
|
||||
ey--;
|
||||
q=1;
|
||||
} else { // x<-|y|/2
|
||||
mx=my+my-mx;
|
||||
ey--;
|
||||
q=-1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(sx) mx=-mx;
|
||||
mx=frem_0(mx,my,ex-ey,&q);
|
||||
if(mx+mx>my || (mx+mx==my&&(q&1)) ) { // |x|>|y|/2, or equality and an odd quotient?
|
||||
mx-=my;
|
||||
q++;
|
||||
}
|
||||
}
|
||||
if(sy) q=-q;
|
||||
if(quo) *quo=q;
|
||||
return fix2float(mx,0x7f-ey+23);
|
||||
}
|
||||
|
||||
float WRAPPER_FUNC(dremf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
|
||||
|
||||
float WRAPPER_FUNC(remainderf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
|
||||
|
||||
GCC_Pragma("GCC diagnostic pop") // conversion
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
wrapper_func __aeabi_fadd
|
||||
wrapper_func __aeabi_fdiv
|
||||
wrapper_func __aeabi_fmul
|
||||
wrapper_func __aeabi_frsub
|
||||
wrapper_func __aeabi_fsub
|
||||
wrapper_func __aeabi_cfcmpeq
|
||||
wrapper_func __aeabi_cfrcmple
|
||||
wrapper_func __aeabi_cfcmple
|
||||
wrapper_func __aeabi_fcmpeq
|
||||
wrapper_func __aeabi_fcmplt
|
||||
wrapper_func __aeabi_fcmple
|
||||
wrapper_func __aeabi_fcmpge
|
||||
wrapper_func __aeabi_fcmpgt
|
||||
wrapper_func __aeabi_fcmpun
|
||||
wrapper_func __aeabi_i2f
|
||||
wrapper_func __aeabi_l2f
|
||||
wrapper_func __aeabi_ui2f
|
||||
wrapper_func __aeabi_ul2f
|
||||
wrapper_func __aeabi_f2iz
|
||||
wrapper_func __aeabi_f2lz
|
||||
wrapper_func __aeabi_f2uiz
|
||||
wrapper_func __aeabi_f2ulz
|
||||
wrapper_func __aeabi_f2d
|
||||
wrapper_func sqrtf
|
||||
wrapper_func cosf
|
||||
wrapper_func sinf
|
||||
wrapper_func tanf
|
||||
wrapper_func atan2f
|
||||
wrapper_func expf
|
||||
wrapper_func logf
|
||||
wrapper_func ldexpf
|
||||
wrapper_func copysignf
|
||||
wrapper_func truncf
|
||||
wrapper_func floorf
|
||||
wrapper_func ceilf
|
||||
wrapper_func roundf
|
||||
wrapper_func sincosf
|
||||
wrapper_func asinf
|
||||
wrapper_func acosf
|
||||
wrapper_func atanf
|
||||
wrapper_func sinhf
|
||||
wrapper_func coshf
|
||||
wrapper_func tanhf
|
||||
wrapper_func asinhf
|
||||
wrapper_func acoshf
|
||||
wrapper_func atanhf
|
||||
wrapper_func exp2f
|
||||
wrapper_func log2f
|
||||
wrapper_func exp10f
|
||||
wrapper_func log10f
|
||||
wrapper_func powf
|
||||
wrapper_func powintf
|
||||
wrapper_func hypotf
|
||||
wrapper_func cbrtf
|
||||
wrapper_func fmodf
|
||||
wrapper_func dremf
|
||||
wrapper_func remainderf
|
||||
wrapper_func remquof
|
||||
wrapper_func expm1f
|
||||
wrapper_func log1pf
|
||||
wrapper_func fmaf
|
||||
push {lr} // keep stack trace sane
|
||||
ldr r0, =str
|
||||
bl panic
|
||||
|
||||
str:
|
||||
.asciz "float support is disabled"
|
|
@ -0,0 +1,346 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
|
||||
#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
|
||||
|
||||
#ifndef PICO_FLOAT_IN_RAM
|
||||
#define PICO_FLOAT_IN_RAM 0
|
||||
#endif
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
.macro float_section name
|
||||
// todo separate flag for shims?
|
||||
#if PICO_FLOAT_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
#else
|
||||
.section SECTION_NAME(\name), "ax"
|
||||
#endif
|
||||
.endm
|
||||
|
||||
float_section float_table_shim_on_use_helper
|
||||
regular_func float_table_shim_on_use_helper
|
||||
push {r0-r2, lr}
|
||||
mov r0, ip
|
||||
#ifndef NDEBUG
|
||||
// sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro
|
||||
cmp r0, #0
|
||||
bne 1f
|
||||
bkpt #0
|
||||
#endif
|
||||
1:
|
||||
ldrh r1, [r0]
|
||||
lsrs r2, r1, #8
|
||||
adds r0, #2
|
||||
cmp r2, #0xdf
|
||||
bne 1b
|
||||
uxtb r1, r1 // r1 holds table offset
|
||||
lsrs r2, r0, #2
|
||||
bcc 1f
|
||||
// unaligned
|
||||
ldrh r2, [r0, #0]
|
||||
ldrh r0, [r0, #2]
|
||||
lsls r0, #16
|
||||
orrs r0, r2
|
||||
b 2f
|
||||
1:
|
||||
ldr r0, [r0]
|
||||
2:
|
||||
ldr r2, =sf_table
|
||||
str r0, [r2, r1]
|
||||
str r0, [sp, #12]
|
||||
pop {r0-r2, pc}
|
||||
|
||||
float_section 642float_shims
|
||||
|
||||
@ convert uint64 to float, rounding
|
||||
regular_func uint642float_shim
|
||||
movs r2,#0 @ fall through
|
||||
|
||||
@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
|
||||
regular_func ufix642float_shim
|
||||
push {r4,r5,r14}
|
||||
cmp r1,#0
|
||||
bpl 3f @ positive? we can use signed code
|
||||
lsls r5,r1,#31 @ contribution to sticky bits
|
||||
orrs r5,r0
|
||||
lsrs r0,r1,#1
|
||||
subs r2,#1
|
||||
b 4f
|
||||
|
||||
@ convert int64 to float, rounding
|
||||
regular_func int642float_shim
|
||||
movs r2,#0 @ fall through
|
||||
|
||||
@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
|
||||
regular_func fix642float_shim
|
||||
push {r4,r5,r14}
|
||||
3:
|
||||
movs r5,r0
|
||||
orrs r5,r1
|
||||
beq ret_pop45 @ zero? return +0
|
||||
asrs r5,r1,#31 @ sign bits
|
||||
2:
|
||||
asrs r4,r1,#24 @ try shifting 7 bits at a time
|
||||
cmp r4,r5
|
||||
bne 1f @ next shift will overflow?
|
||||
lsls r1,#7
|
||||
lsrs r4,r0,#25
|
||||
orrs r1,r4
|
||||
lsls r0,#7
|
||||
adds r2,#7
|
||||
b 2b
|
||||
1:
|
||||
movs r5,r0
|
||||
movs r0,r1
|
||||
4:
|
||||
negs r2,r2
|
||||
adds r2,#32+29
|
||||
|
||||
// bl packx
|
||||
ldr r1, =0x29ef // packx
|
||||
blx r1
|
||||
ret_pop45:
|
||||
pop {r4,r5,r15}
|
||||
|
||||
float_section fatan2_shim
|
||||
regular_func fatan2_shim
|
||||
push {r4,r5,r14}
|
||||
|
||||
ldr r4, =0x29c1 // unpackx
|
||||
mov ip, r4
|
||||
@ unpack arguments and shift one down to have common exponent
|
||||
blx ip
|
||||
mov r4,r0
|
||||
mov r0,r1
|
||||
mov r1,r4
|
||||
mov r4,r2
|
||||
mov r2,r3
|
||||
mov r3,r4
|
||||
blx ip
|
||||
lsls r0,r0,#5 @ Q28
|
||||
lsls r1,r1,#5 @ Q28
|
||||
adds r4,r2,r3 @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise
|
||||
asrs r4,#9
|
||||
adds r4,#1
|
||||
bmi 2f @ force y to 0 proper, so result will be zero
|
||||
subs r4,r2,r3 @ calculate shift
|
||||
bge 1f @ ex>=ey?
|
||||
negs r4,r4 @ make shift positive
|
||||
asrs r0,r4
|
||||
cmp r4,#28
|
||||
blo 3f
|
||||
asrs r0,#31
|
||||
b 3f
|
||||
1:
|
||||
asrs r1,r4
|
||||
cmp r4,#28
|
||||
blo 3f
|
||||
2:
|
||||
@ here |x|>>|y| or both x and y are ±0
|
||||
cmp r0,#0
|
||||
bge 4f @ x positive, return signed 0
|
||||
ldr r3, =0x2cfc @ &pi_q29, circular coefficients
|
||||
ldr r0,[r3] @ x negative, return +/- pi
|
||||
asrs r1,#31
|
||||
eors r0,r1
|
||||
b 7f
|
||||
4:
|
||||
asrs r0,r1,#31
|
||||
b 7f
|
||||
3:
|
||||
movs r2,#0 @ initial angle
|
||||
ldr r3, =0x2cfc @ &pi_q29, circular coefficients
|
||||
cmp r0,#0 @ x negative
|
||||
bge 5f
|
||||
negs r0,r0 @ rotate to 1st/4th quadrants
|
||||
negs r1,r1
|
||||
ldr r2,[r3] @ pi Q29
|
||||
5:
|
||||
movs r4,#1 @ m=1
|
||||
ldr r5, =0x2b97 @ cordic_vec
|
||||
blx r5 @ also produces magnitude (with scaling factor 1.646760119), which is discarded
|
||||
mov r0,r2 @ result here is -pi/2..3pi/2 Q29
|
||||
@ asrs r2,#29
|
||||
@ subs r0,r2
|
||||
ldr r3, =0x2cfc @ &pi_q29, circular coefficients
|
||||
ldr r2,[r3] @ pi Q29
|
||||
adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case
|
||||
bcs 6f @ -pi/2..0? leave result as is
|
||||
subs r4,r0,r2 @ <pi? leave as is
|
||||
bmi 6f
|
||||
subs r0,r4,r2 @ >pi: take off 2pi
|
||||
6:
|
||||
subs r0,#1 @ fiddle factor so atan2(0,1)==0
|
||||
7:
|
||||
movs r2,#0 @ exponent for pack
|
||||
ldr r3, =0x2b19
|
||||
bx r3
|
||||
|
||||
float_section float232_shims
|
||||
|
||||
regular_func float2int_shim
|
||||
movs r1,#0 @ fall through
|
||||
regular_func float2fix_shim
|
||||
// check for -0 or -denormal upfront
|
||||
asrs r2, r0, #23
|
||||
adds r2, #128
|
||||
adds r2, #128
|
||||
beq 1f
|
||||
// call original
|
||||
ldr r2, =0x2acd
|
||||
bx r2
|
||||
1:
|
||||
movs r0, #0
|
||||
bx lr
|
||||
|
||||
float_section float264_shims
|
||||
|
||||
regular_func float2int64_shim
|
||||
movs r1,#0 @ and fall through
|
||||
regular_func float2fix64_shim
|
||||
push {r14}
|
||||
bl f2fix
|
||||
b d2f64_a
|
||||
|
||||
regular_func float2uint64_shim
|
||||
movs r1,#0 @ and fall through
|
||||
regular_func float2ufix64_shim
|
||||
asrs r3,r0,#23 @ negative? return 0
|
||||
bmi ret_dzero
|
||||
@ and fall through
|
||||
|
||||
@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf
|
||||
@ result clamped so that r3 can only be 0 or -1
|
||||
@ trashes r12
|
||||
.thumb_func
|
||||
f2fix:
|
||||
push {r4,r14}
|
||||
mov r12,r1
|
||||
asrs r3,r0,#31
|
||||
lsls r0,#1
|
||||
lsrs r2,r0,#24
|
||||
beq 1f @ zero?
|
||||
cmp r2,#0xff @ Inf?
|
||||
beq 2f
|
||||
subs r1,r2,#1
|
||||
subs r2,#0x7f @ remove exponent bias
|
||||
lsls r1,#24
|
||||
subs r0,r1 @ insert implied 1
|
||||
eors r0,r3
|
||||
subs r0,r3 @ top two's complement
|
||||
asrs r1,r0,#4 @ convert to double format
|
||||
lsls r0,#28
|
||||
ldr r4, =d2fix_a
|
||||
bx r4
|
||||
1:
|
||||
movs r0,#0
|
||||
movs r1,r0
|
||||
movs r3,r0
|
||||
pop {r4,r15}
|
||||
2:
|
||||
mvns r0,r3 @ return max/min value
|
||||
mvns r1,r3
|
||||
pop {r4,r15}
|
||||
|
||||
ret_dzero:
|
||||
movs r0,#0
|
||||
movs r1,#0
|
||||
bx r14
|
||||
|
||||
float_section d2fix_a_float
|
||||
|
||||
.weak d2fix_a // weak because it exists in float shims too
|
||||
.thumb_func
|
||||
d2fix_a:
|
||||
@ here
|
||||
@ r0:r1 two's complement mantissa
|
||||
@ r2 unbaised exponent
|
||||
@ r3 mantissa sign extension bits
|
||||
add r2,r12 @ exponent plus offset for required binary point position
|
||||
subs r2,#52 @ required shift
|
||||
bmi 1f @ shift down?
|
||||
@ here a shift up by r2 places
|
||||
cmp r2,#12 @ will clamp?
|
||||
bge 2f
|
||||
movs r4,r0
|
||||
lsls r1,r2
|
||||
lsls r0,r2
|
||||
negs r2,r2
|
||||
adds r2,#32 @ complementary shift
|
||||
lsrs r4,r2
|
||||
orrs r1,r4
|
||||
pop {r4,r15}
|
||||
2:
|
||||
mvns r0,r3
|
||||
mvns r1,r3 @ overflow: clamp to extreme fixed-point values
|
||||
pop {r4,r15}
|
||||
1:
|
||||
@ here a shift down by -r2 places
|
||||
adds r2,#32
|
||||
bmi 1f @ long shift?
|
||||
mov r4,r1
|
||||
lsls r4,r2
|
||||
negs r2,r2
|
||||
adds r2,#32 @ complementary shift
|
||||
asrs r1,r2
|
||||
lsrs r0,r2
|
||||
orrs r0,r4
|
||||
pop {r4,r15}
|
||||
1:
|
||||
@ here a long shift down
|
||||
movs r0,r1
|
||||
asrs r1,#31 @ shift down 32 places
|
||||
adds r2,#32
|
||||
bmi 1f @ very long shift?
|
||||
negs r2,r2
|
||||
adds r2,#32
|
||||
asrs r0,r2
|
||||
pop {r4,r15}
|
||||
1:
|
||||
movs r0,r3 @ result very near zero: use sign extension bits
|
||||
movs r1,r3
|
||||
pop {r4,r15}
|
||||
d2f64_a:
|
||||
asrs r2,r1,#31
|
||||
cmp r2,r3
|
||||
bne 1f @ sign extension bits fail to match sign of result?
|
||||
pop {r15}
|
||||
1:
|
||||
mvns r0,r3
|
||||
movs r1,#1
|
||||
lsls r1,#31
|
||||
eors r1,r1,r0 @ generate extreme fixed-point values
|
||||
pop {r15}
|
||||
|
||||
float_section float2double_shim
|
||||
regular_func float2double_shim
|
||||
lsrs r3,r0,#31 @ sign bit
|
||||
lsls r3,#31
|
||||
lsls r1,r0,#1
|
||||
lsrs r2,r1,#24 @ exponent
|
||||
beq 1f @ zero?
|
||||
cmp r2,#0xff @ Inf?
|
||||
beq 2f
|
||||
lsrs r1,#4 @ exponent and top 20 bits of mantissa
|
||||
ldr r2,=(0x3ff-0x7f)<<20 @ difference in exponent offsets
|
||||
adds r1,r2
|
||||
orrs r1,r3
|
||||
lsls r0,#29 @ bottom 3 bits of mantissa
|
||||
bx r14
|
||||
1:
|
||||
movs r1,r3 @ return signed zero
|
||||
3:
|
||||
movs r0,#0
|
||||
bx r14
|
||||
2:
|
||||
ldr r1,=0x7ff00000 @ return signed infinity
|
||||
adds r1,r3
|
||||
b 3b
|
||||
|
||||
#endif
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#ifndef _PICO_FLOAT_H
|
||||
#define _PICO_FLOAT_H
|
||||
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include "pico.h"
|
||||
#include "pico/bootrom/sf_table.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file float.h
|
||||
* \defgroup pico_float pico_float
|
||||
*
|
||||
* Optimized single-precision floating point functions
|
||||
*
|
||||
* (Replacement) optimized implementations are provided of the following compiler built-ins
|
||||
* and math library functions:
|
||||
*
|
||||
* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf
|
||||
* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf
|
||||
* - powintf, sincosf (GNU extensions)
|
||||
*
|
||||
* The following additional optimized functions are also provided:
|
||||
*
|
||||
* - fix2float, ufix2float, fix642float, ufix642float, float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2int64, float2int_z, float2int64_z
|
||||
*/
|
||||
|
||||
float fix2float(int32_t m, int e);
|
||||
float ufix2float(uint32_t m, int e);
|
||||
float fix642float(int64_t m, int e);
|
||||
float ufix642float(uint64_t m, int e);
|
||||
|
||||
// These methods round towards -Infinity.
|
||||
int32_t float2fix(float f, int e);
|
||||
uint32_t float2ufix(float f, int e);
|
||||
int64_t float2fix64(float f, int e);
|
||||
uint64_t float2ufix64(float f, int e);
|
||||
int32_t float2int(float f);
|
||||
int64_t float2int64(float f);
|
||||
|
||||
// These methods round towards 0.
|
||||
int32_t float2int_z(float f);
|
||||
int64_t float2int64_z(float f);
|
||||
|
||||
float exp10f(float x);
|
||||
void sincosf(float x, float *sinx, float *cosx);
|
||||
float powintf(float x, int y);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,95 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#if !PICO_NO_BINARY_INFO && !PICO_NO_PROGRAM_INFO
|
||||
#include "pico/binary_info.h"
|
||||
|
||||
#if !PICO_NO_FLASH
|
||||
#include "boot_stage2/config.h"
|
||||
#endif
|
||||
|
||||
// Note we put at most 4 pieces of binary info in the reset section because that's how much spare space we had
|
||||
// (picked the most common ones)... if there is a link failure because of .reset section overflow then move
|
||||
// more out.
|
||||
#define reset_section_attr __attribute__((section(".reset")))
|
||||
|
||||
#if !PICO_NO_FLASH
|
||||
#ifndef PICO_NO_BI_BINARY_SIZE
|
||||
extern char __flash_binary_end;
|
||||
bi_decl_with_attr(bi_binary_end((intptr_t)&__flash_binary_end), reset_section_attr)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_PROGRAM_BUILD_DATE
|
||||
#ifndef PICO_PROGRAM_BUILD_DATE
|
||||
#define PICO_PROGRAM_BUILD_DATE __DATE__
|
||||
#endif
|
||||
bi_decl_with_attr(bi_program_build_date_string(PICO_PROGRAM_BUILD_DATE), reset_section_attr);
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_PROGRAM_NAME
|
||||
#if !defined(PICO_PROGRAM_NAME) && defined(PICO_TARGET_NAME)
|
||||
#define PICO_PROGRAM_NAME PICO_TARGET_NAME
|
||||
#endif
|
||||
#ifdef PICO_PROGRAM_NAME
|
||||
bi_decl_with_attr(bi_program_name(PICO_PROGRAM_NAME), reset_section_attr)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_PICO_BOARD
|
||||
#ifdef PICO_BOARD
|
||||
bi_decl(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_PICO_BOARD, PICO_BOARD))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_SDK_VERSION
|
||||
#ifdef PICO_SDK_VERSION_STRING
|
||||
bi_decl_with_attr(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_SDK_VERSION, PICO_SDK_VERSION_STRING),reset_section_attr)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_PROGRAM_VERSION_STRING
|
||||
#ifdef PICO_PROGRAM_VERSION_STRING
|
||||
bi_decl(bi_program_version_string(PICO_PROGRAM_VERSION_STRING))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if !PICO_NO_BI_PROGRAM_DESCRIPTION
|
||||
#ifdef PICO_PROGRAM_DESCRIPTION
|
||||
bi_decl(bi_program_description(PICO_PROGRAM_DESCRIPTION))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_PROGRAM_URL
|
||||
#ifdef PICO_PROGRAM_URL
|
||||
bi_decl(bi_program_url(PICO_PROGRAM_URL))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_BOOT_STAGE2_NAME
|
||||
#ifdef PICO_BOOT_STAGE2_NAME
|
||||
bi_decl(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_BOOT2_NAME, PICO_BOOT_STAGE2_NAME))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !PICO_NO_BI_BUILD_TYPE
|
||||
#ifdef PICO_CMAKE_BUILD_TYPE
|
||||
bi_decl(bi_program_build_attribute(PICO_CMAKE_BUILD_TYPE))
|
||||
#else
|
||||
#ifndef NDEBUG
|
||||
bi_decl(bi_program_build_attribute("Debug"))
|
||||
#else
|
||||
bi_decl(bi_program_build_attribute("Release"))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if PICO_DEOPTIMIZED_DEBUG
|
||||
bi_decl(bi_program_build_attribute("All optimization disabled"))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,352 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include "pico.h"
|
||||
#include "pico/asm_helper.S"
|
||||
|
||||
#include "hardware/regs/m0plus.h"
|
||||
#include "hardware/regs/addressmap.h"
|
||||
#include "hardware/regs/sio.h"
|
||||
#include "pico/binary_info/defs.h"
|
||||
|
||||
#ifdef NDEBUG
|
||||
#ifndef COLLAPSE_IRQS
|
||||
#define COLLAPSE_IRQS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
.section .vectors, "ax"
|
||||
.align 2
|
||||
|
||||
.global __vectors, __VECTOR_TABLE
|
||||
__VECTOR_TABLE:
|
||||
__vectors:
|
||||
.word __StackTop
|
||||
.word _reset_handler
|
||||
.word NMI_Handler
|
||||
.word HardFault_Handler
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word SVC_Handler
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word isr_invalid // Reserved, should never fire
|
||||
.word PendSV_Handler
|
||||
.word SysTick_Handler
|
||||
.word TIMER_IRQ_0_Handler
|
||||
.word TIMER_IRQ_1_Handler
|
||||
.word TIMER_IRQ_2_Handler
|
||||
.word TIMER_IRQ_3_Handler
|
||||
.word PWM_IRQ_WRAP_Handler
|
||||
.word USBCTRL_IRQ_Handler
|
||||
.word XIP_IRQ_Handler
|
||||
.word PIO0_IRQ_0_Handler
|
||||
.word PIO0_IRQ_1_Handler
|
||||
.word PIO1_IRQ_0_Handler
|
||||
.word TIMER_IRQ_1_Handler0
|
||||
.word TIMER_IRQ_1_Handler1
|
||||
.word TIMER_IRQ_1_Handler2
|
||||
.word TIMER_IRQ_1_Handler3
|
||||
.word TIMER_IRQ_1_Handler4
|
||||
.word TIMER_IRQ_1_Handler5
|
||||
.word TIMER_IRQ_1_Handler6
|
||||
.word TIMER_IRQ_1_Handler7
|
||||
.word TIMER_IRQ_1_Handler8
|
||||
.word TIMER_IRQ_1_Handler9
|
||||
.word TIMER_IRQ_2_Handler0
|
||||
.word TIMER_IRQ_2_Handler1
|
||||
.word TIMER_IRQ_2_Handler2
|
||||
.word TIMER_IRQ_2_Handler3
|
||||
.word TIMER_IRQ_2_Handler4
|
||||
.word TIMER_IRQ_2_Handler5
|
||||
.word TIMER_IRQ_2_Handler6
|
||||
.word TIMER_IRQ_2_Handler7
|
||||
.word TIMER_IRQ_2_Handler8
|
||||
.word TIMER_IRQ_2_Handler9
|
||||
.word TIMER_IRQ_3_Handler0
|
||||
.word TIMER_IRQ_3_Handler1
|
||||
|
||||
// all default exception handlers do nothing, and we can check for them being set to our
|
||||
// default values by seeing if they point to somewhere between __defaults_isrs_start and __default_isrs_end
|
||||
.global __default_isrs_start
|
||||
__default_isrs_start:
|
||||
|
||||
// Declare a weak symbol for each ISR.
|
||||
// By default, they will fall through to the undefined IRQ handler below (breakpoint),
|
||||
// but can be overridden by C functions with correct name.
|
||||
|
||||
.macro decl_isr_bkpt name
|
||||
.weak \name
|
||||
.type \name,%function
|
||||
.thumb_func
|
||||
\name:
|
||||
bkpt #0
|
||||
.endm
|
||||
|
||||
// these are separated out for clarity
|
||||
decl_isr_bkpt isr_invalid
|
||||
decl_isr_bkpt NMI_Handler
|
||||
decl_isr_bkpt HardFault_Handler
|
||||
decl_isr_bkpt SVC_Handler
|
||||
decl_isr_bkpt PendSV_Handler
|
||||
decl_isr_bkpt SysTick_Handler
|
||||
|
||||
.global __default_isrs_end
|
||||
__default_isrs_end:
|
||||
|
||||
.macro decl_isr name
|
||||
.weak \name
|
||||
.type \name,%function
|
||||
.thumb_func
|
||||
\name:
|
||||
.endm
|
||||
|
||||
decl_isr TIMER_IRQ_0_Handler
|
||||
decl_isr TIMER_IRQ_1_Handler
|
||||
decl_isr TIMER_IRQ_2_Handler
|
||||
decl_isr TIMER_IRQ_3_Handler
|
||||
decl_isr PWM_IRQ_WRAP_Handler
|
||||
decl_isr USBCTRL_IRQ_Handler
|
||||
decl_isr XIP_IRQ_Handler
|
||||
decl_isr PIO0_IRQ_0_Handler
|
||||
decl_isr PIO0_IRQ_1_Handler
|
||||
decl_isr PIO1_IRQ_0_Handler
|
||||
decl_isr TIMER_IRQ_1_Handler0
|
||||
decl_isr TIMER_IRQ_1_Handler1
|
||||
decl_isr TIMER_IRQ_1_Handler2
|
||||
decl_isr TIMER_IRQ_1_Handler3
|
||||
decl_isr TIMER_IRQ_1_Handler4
|
||||
decl_isr TIMER_IRQ_1_Handler5
|
||||
decl_isr TIMER_IRQ_1_Handler6
|
||||
decl_isr TIMER_IRQ_1_Handler7
|
||||
decl_isr TIMER_IRQ_1_Handler8
|
||||
decl_isr TIMER_IRQ_1_Handler9
|
||||
decl_isr TIMER_IRQ_2_Handler0
|
||||
decl_isr TIMER_IRQ_2_Handler1
|
||||
decl_isr TIMER_IRQ_2_Handler2
|
||||
decl_isr TIMER_IRQ_2_Handler3
|
||||
decl_isr TIMER_IRQ_2_Handler4
|
||||
decl_isr TIMER_IRQ_2_Handler5
|
||||
decl_isr TIMER_IRQ_2_Handler6
|
||||
decl_isr TIMER_IRQ_2_Handler7
|
||||
decl_isr TIMER_IRQ_2_Handler8
|
||||
decl_isr TIMER_IRQ_2_Handler9
|
||||
decl_isr TIMER_IRQ_3_Handler0
|
||||
decl_isr TIMER_IRQ_3_Handler1
|
||||
|
||||
// All unhandled USER IRQs fall through to here
|
||||
.global __unhandled_user_irq
|
||||
.thumb_func
|
||||
__unhandled_user_irq:
|
||||
mrs r0, ipsr
|
||||
subs r0, #16
|
||||
.global unhandled_user_irq_num_in_r0
|
||||
unhandled_user_irq_num_in_r0:
|
||||
bkpt #0
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
.section .binary_info_header, "a"
|
||||
|
||||
// Header must be in first 256 bytes of main image (i.e. excluding flash boot2).
|
||||
// For flash builds we put it immediately after vector table; for NO_FLASH the
|
||||
// vectors are at a +0x100 offset because the bootrom enters RAM images directly
|
||||
// at their lowest address, so we put the header in the VTOR alignment hole.
|
||||
|
||||
#if !PICO_NO_BINARY_INFO
|
||||
binary_info_header:
|
||||
.word BINARY_INFO_MARKER_START
|
||||
.word __binary_info_start
|
||||
.word __binary_info_end
|
||||
.word data_cpy_table // we may need to decode pointers that are in RAM at runtime.
|
||||
.word BINARY_INFO_MARKER_END
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
.section .reset, "ax"
|
||||
|
||||
// On flash builds, the vector table comes first in the image (conventional).
|
||||
// On NO_FLASH builds, the reset handler section comes first, as the entry
|
||||
// point is at offset 0 (fixed due to bootrom), and VTOR is highly-aligned.
|
||||
// Image is entered in various ways:
|
||||
//
|
||||
// - NO_FLASH builds are entered from beginning by UF2 bootloader
|
||||
//
|
||||
// - Flash builds vector through the table into _reset_handler from boot2
|
||||
//
|
||||
// - Either type can be entered via _entry_point by the debugger, and flash builds
|
||||
// must then be sent back round the boot sequence to properly initialise flash
|
||||
|
||||
// ELF entry point:
|
||||
.type _entry_point,%function
|
||||
.thumb_func
|
||||
.global _entry_point
|
||||
_entry_point:
|
||||
|
||||
#if PICO_NO_FLASH
|
||||
// Vector through our own table (SP, VTOR will not have been set up at
|
||||
// this point). Same path for debugger entry and bootloader entry.
|
||||
ldr r0, =__vectors
|
||||
#else
|
||||
// Debugger tried to run code after loading, so SSI is in 03h-only mode.
|
||||
// Go back through bootrom + boot2 to properly initialise flash.
|
||||
movs r0, #0
|
||||
#endif
|
||||
ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET)
|
||||
str r0, [r1]
|
||||
ldmia r0!, {r1, r2}
|
||||
msr msp, r1
|
||||
bx r2
|
||||
|
||||
// Reset handler:
|
||||
// - initialises .data
|
||||
// - clears .bss
|
||||
// - calls runtime_init
|
||||
// - calls main
|
||||
// - calls exit (which should eventually hang the processor via _exit)
|
||||
|
||||
.type _reset_handler,%function
|
||||
.thumb_func
|
||||
_reset_handler:
|
||||
// Only core 0 should run the C runtime startup code; core 1 is normally
|
||||
// sleeping in the bootrom at this point but check to be sure
|
||||
ldr r0, =(SIO_BASE + SIO_CPUID_OFFSET)
|
||||
ldr r0, [r0]
|
||||
cmp r0, #0
|
||||
bne hold_non_core0_in_bootrom
|
||||
|
||||
// In a NO_FLASH binary, don't perform .data copy, since it's loaded
|
||||
// in-place by the SRAM load. Still need to clear .bss
|
||||
#if !PICO_NO_FLASH
|
||||
adr r4, data_cpy_table
|
||||
|
||||
// assume there is at least one entry
|
||||
1:
|
||||
ldmia r4!, {r1-r3}
|
||||
cmp r1, #0
|
||||
beq 2f
|
||||
bl data_cpy
|
||||
b 1b
|
||||
2:
|
||||
#endif
|
||||
|
||||
// Zero out the BSS
|
||||
ldr r1, =__bss_start__
|
||||
ldr r2, =__bss_end__
|
||||
movs r0, #0
|
||||
b bss_fill_test
|
||||
bss_fill_loop:
|
||||
stm r1!, {r0}
|
||||
bss_fill_test:
|
||||
cmp r1, r2
|
||||
bne bss_fill_loop
|
||||
|
||||
platform_entry: // symbol for stack traces
|
||||
// Use 32-bit jumps, in case these symbols are moved out of branch range
|
||||
// (e.g. if main is in SRAM and crt0 in flash)
|
||||
ldr r1, =runtime_init
|
||||
blx r1
|
||||
ldr r1, =main
|
||||
blx r1
|
||||
ldr r1, =exit
|
||||
blx r1
|
||||
// exit should not return. If it does, hang the core.
|
||||
// (fall thru into our hang _exit impl
|
||||
1: // separate label because _exit can be moved out of branch range
|
||||
bkpt #0
|
||||
b 1b
|
||||
|
||||
#if !PICO_NO_FLASH
|
||||
data_cpy_loop:
|
||||
ldm r1!, {r0}
|
||||
stm r2!, {r0}
|
||||
data_cpy:
|
||||
cmp r2, r3
|
||||
blo data_cpy_loop
|
||||
bx lr
|
||||
#endif
|
||||
|
||||
// Note the data copy table is still included for NO_FLASH builds, even though
|
||||
// we skip the copy, because it is listed in binary info
|
||||
|
||||
.align 2
|
||||
data_cpy_table:
|
||||
#if PICO_COPY_TO_RAM
|
||||
.word __ram_text_source__
|
||||
.word __ram_text_start__
|
||||
.word __ram_text_end__
|
||||
#endif
|
||||
.word __etext
|
||||
.word __data_start__
|
||||
.word __data_end__
|
||||
|
||||
.word __scratch_x_source__
|
||||
.word __scratch_x_start__
|
||||
.word __scratch_x_end__
|
||||
|
||||
.word __scratch_y_source__
|
||||
.word __scratch_y_start__
|
||||
.word __scratch_y_end__
|
||||
|
||||
.word 0 // null terminator
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Provide safe defaults for _exit and runtime_init
|
||||
// Full implementations usually provided by platform.c
|
||||
|
||||
.weak runtime_init
|
||||
.type runtime_init,%function
|
||||
.thumb_func
|
||||
runtime_init:
|
||||
bx lr
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// If core 1 somehow gets into crt0 due to a spectacular VTOR mishap, we need to
|
||||
// catch it and send back to the sleep-and-launch code in the bootrom. Shouldn't
|
||||
// happen (it should sleep in the ROM until given an entry point via the
|
||||
// cross-core FIFOs) but it's good to be defensive.
|
||||
|
||||
hold_non_core0_in_bootrom:
|
||||
ldr r0, = 'W' | ('V' << 8)
|
||||
bl rom_func_lookup
|
||||
bx r0
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Stack/heap dummies to set size
|
||||
|
||||
// Prior to SDK 1.5.1 these were `.section .stack` without the `, "a"`... Clang linker gives a warning about this,
|
||||
// however setting it explicitly to `, "a"` makes GCC *now* discard the section unless it is also KEEP. This
|
||||
// seems like very surprising behavior!
|
||||
//
|
||||
// Strictly the most correct thing to do (as .stack and .heap are unreferenced) is to mark them as "a", and also KEEP, which
|
||||
// works correctly for both GCC and Clang, however doing so may break anyone who already has custom linker scripts without
|
||||
// the KEEP. Therefore we will only add the "a" on Clang, but will also use KEEP to our own linker scripts.
|
||||
|
||||
.macro spacer_section name
|
||||
#if PICO_ASSEMBLER_IS_CLANG
|
||||
.section \name, "a"
|
||||
#else
|
||||
.section \name
|
||||
#endif
|
||||
.endm
|
||||
|
||||
spacer_section .stack
|
||||
// align to allow for memory protection (although this alignment is pretty much ignored by linker script)
|
||||
.p2align 5
|
||||
.equ StackSize, PICO_STACK_SIZE
|
||||
.space StackSize
|
||||
|
||||
spacer_section .heap
|
||||
.p2align 2
|
||||
.equ HeapSize, PICO_HEAP_SIZE
|
||||
.space HeapSize
|
|
@ -1,10 +0,0 @@
|
|||
/**
|
||||
* \defgroup pico_standard_link pico_standard_link
|
||||
* \brief Standard link step providing the basics for creating a runnable binary
|
||||
*
|
||||
* This includes
|
||||
* - C runtime initialization
|
||||
* - Linker scripts for 'default', 'no_flash', 'blocked_ram' and 'copy_to_ram' binaries
|
||||
* - 'Binary Information' support
|
||||
* - Linker option control
|
||||
*/
|
|
@ -1,3 +1,6 @@
|
|||
# Copyright (c) 2024 ARM Limited. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""
|
||||
This script can be used to reimport a newer version of the RPi Pico SDK.
|
||||
|
||||
|
@ -65,6 +68,7 @@ FILES_DIRS_TO_COPY: List[pathlib.Path] = [
|
|||
pathlib.Path("pico_sdk_version.cmake"),
|
||||
pathlib.Path("src") / "rp2_common" / "hardware_base",
|
||||
pathlib.Path("src") / "rp2_common" / "hardware_adc",
|
||||
pathlib.Path("src") / "rp2_common" / "hardware_divider",
|
||||
pathlib.Path("src") / "rp2_common" / "hardware_resets",
|
||||
pathlib.Path("src") / "rp2_common" / "hardware_pwm",
|
||||
pathlib.Path("src") / "rp2_common" / "hardware_flash",
|
||||
|
@ -83,6 +87,7 @@ FILES_DIRS_TO_COPY: List[pathlib.Path] = [
|
|||
pathlib.Path("src") / "rp2_common" / "hardware_rtc",
|
||||
pathlib.Path("src") / "rp2_common" / "pico_bootrom",
|
||||
pathlib.Path("src") / "rp2_common" / "pico_platform",
|
||||
pathlib.Path("src") / "rp2_common" / "pico_float",
|
||||
pathlib.Path("src") / "rp2_common" / "cmsis" / "stub",
|
||||
pathlib.Path("src") / "common" / "pico_time",
|
||||
pathlib.Path("src") / "common" / "pico_sync",
|
||||
|
|
Loading…
Reference in New Issue