Enable RPi Pico's optimized ROM floating point routines (#202)

* Enable RPi Pico's optimized ROM floating point routines * Add license header
2024-01-06 03:16:01 -05:00 · 2024-01-06 03:16:01 -05:00 · c1effb15ec
parent 79c56f3155
commit c1effb15ec
14 changed files with 2519 additions and 457 deletions
--- a/targets/TARGET_RASPBERRYPI/CMakeLists.txt
+++ b/targets/TARGET_RASPBERRYPI/CMakeLists.txt
@ -30,11 +30,83 @@ file(GENERATE
 	CONTENT "${header_content}"
 )

+# add a link option to wrap the given function name; i.e. -Wl:wrap=FUNCNAME for gcc
+function(pico_wrap_function TARGET FUNCNAME)
+    target_link_options(${TARGET} INTERFACE "LINKER:--wrap=${FUNCNAME}")
+endfunction()
+
+# Following is copied from src/rp2_common/pico_float/CMakeLists.txt
+function(wrap_float_functions TARGET)
+	pico_wrap_function(${TARGET} __aeabi_fadd)
+	pico_wrap_function(${TARGET} __aeabi_fdiv)
+	pico_wrap_function(${TARGET} __aeabi_fmul)
+	pico_wrap_function(${TARGET} __aeabi_frsub)
+	pico_wrap_function(${TARGET} __aeabi_fsub)
+	pico_wrap_function(${TARGET} __aeabi_cfcmpeq)
+	pico_wrap_function(${TARGET} __aeabi_cfrcmple)
+	pico_wrap_function(${TARGET} __aeabi_cfcmple)
+	pico_wrap_function(${TARGET} __aeabi_fcmpeq)
+	pico_wrap_function(${TARGET} __aeabi_fcmplt)
+	pico_wrap_function(${TARGET} __aeabi_fcmple)
+	pico_wrap_function(${TARGET} __aeabi_fcmpge)
+	pico_wrap_function(${TARGET} __aeabi_fcmpgt)
+	pico_wrap_function(${TARGET} __aeabi_fcmpun)
+	pico_wrap_function(${TARGET} __aeabi_i2f)
+	pico_wrap_function(${TARGET} __aeabi_l2f)
+	pico_wrap_function(${TARGET} __aeabi_ui2f)
+	pico_wrap_function(${TARGET} __aeabi_ul2f)
+	pico_wrap_function(${TARGET} __aeabi_f2iz)
+	pico_wrap_function(${TARGET} __aeabi_f2lz)
+	pico_wrap_function(${TARGET} __aeabi_f2uiz)
+	pico_wrap_function(${TARGET} __aeabi_f2ulz)
+	pico_wrap_function(${TARGET} __aeabi_f2d)
+	pico_wrap_function(${TARGET} sqrtf)
+	pico_wrap_function(${TARGET} cosf)
+	pico_wrap_function(${TARGET} sinf)
+	pico_wrap_function(${TARGET} tanf)
+	pico_wrap_function(${TARGET} atan2f)
+	pico_wrap_function(${TARGET} expf)
+	pico_wrap_function(${TARGET} logf)
+
+	pico_wrap_function(${TARGET} ldexpf)
+	pico_wrap_function(${TARGET} copysignf)
+	pico_wrap_function(${TARGET} truncf)
+	pico_wrap_function(${TARGET} floorf)
+	pico_wrap_function(${TARGET} ceilf)
+	pico_wrap_function(${TARGET} roundf)
+	pico_wrap_function(${TARGET} sincosf) # gnu
+	pico_wrap_function(${TARGET} asinf)
+	pico_wrap_function(${TARGET} acosf)
+	pico_wrap_function(${TARGET} atanf)
+	pico_wrap_function(${TARGET} sinhf)
+	pico_wrap_function(${TARGET} coshf)
+	pico_wrap_function(${TARGET} tanhf)
+	pico_wrap_function(${TARGET} asinhf)
+	pico_wrap_function(${TARGET} acoshf)
+	pico_wrap_function(${TARGET} atanhf)
+	pico_wrap_function(${TARGET} exp2f)
+	pico_wrap_function(${TARGET} log2f)
+	pico_wrap_function(${TARGET} exp10f)
+	pico_wrap_function(${TARGET} log10f)
+	pico_wrap_function(${TARGET} powf)
+	pico_wrap_function(${TARGET} powintf) #gnu
+	pico_wrap_function(${TARGET} hypotf)
+	pico_wrap_function(${TARGET} cbrtf)
+	pico_wrap_function(${TARGET} fmodf)
+	pico_wrap_function(${TARGET} dremf)
+	pico_wrap_function(${TARGET} remainderf)
+	pico_wrap_function(${TARGET} remquof)
+	pico_wrap_function(${TARGET} expm1f)
+	pico_wrap_function(${TARGET} log1pf)
+	pico_wrap_function(${TARGET} fmaf)
+endfunction()
+
 # Now, add includes and headers from the Pico SDK
 target_include_directories(mbed-raspberrypi
    INTERFACE
        .
 		pico-sdk/src/rp2_common/hardware_adc/include
+		pico-sdk/src/rp2_common/hardware_divider/include
 		pico-sdk/src/rp2_common/hardware_gpio/include
 		pico-sdk/src/rp2_common/hardware_resets/include
 		pico-sdk/src/rp2_common/hardware_pwm/include
@ -54,6 +126,7 @@ target_include_directories(mbed-raspberrypi
 		pico-sdk/src/rp2_common/pico_platform/include
 		pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/include/
 		pico-sdk/src/rp2_common/pico_bootrom/include
+		pico-sdk/src/rp2_common/pico_float/include
 		pico-sdk/src/rp2_common/hardware_claim/include
 		pico-sdk/src/common/pico_sync/include
 		pico-sdk/src/common/pico_time/include
@ -89,6 +162,11 @@ target_sources(mbed-raspberrypi
 		pico-sdk/src/common/pico_time/time.c
 		pico-sdk/src/common/pico_sync/lock_core.c
 		pico-sdk/src/rp2_common/cmsis/stub/CMSIS/Device/RaspberryPi/RP2040/Source/system_RP2040.c
+		pico-sdk/src/rp2_common/pico_float/float_aeabi.S
+		pico-sdk/src/rp2_common/pico_float/float_init_rom.c
+		pico-sdk/src/rp2_common/pico_float/float_math.c
+		pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S
+		pico-sdk/src/rp2_common/hardware_divider/divider.S
 )

 target_compile_definitions(mbed-raspberrypi
@ -110,4 +188,7 @@ target_sources(mbed-rp2040
 		pico-sdk/src/rp2_common/pico_fix/rp2040_usb_device_enumeration/rp2040_usb_device_enumeration.c
 )

+# Enable usage of the RPi Pico optimized floating point routines
+wrap_float_functions(mbed-rp2040)
+
 add_subdirectory(TARGET_RP2040 EXCLUDE_FROM_ALL)
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/divider.S
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/divider.S
@ -0,0 +1,55 @@
+#include "pico/asm_helper.S"
+#include "hardware/regs/addressmap.h"
+#include "hardware/regs/sio.h"
+
+pico_default_asm_setup
+
+// tag::hw_div_s32[]
+regular_func_with_section hw_divider_divmod_s32
+    ldr r3, =(SIO_BASE)
+    str r0, [r3, #SIO_DIV_SDIVIDEND_OFFSET]
+    str r1, [r3, #SIO_DIV_SDIVISOR_OFFSET]
+    b hw_divider_divmod_return
+// end::hw_div_s32[]
+
+// tag::hw_div_u32[]
+regular_func_with_section hw_divider_divmod_u32
+    ldr r3, =(SIO_BASE)
+    str r0, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
+    str r1, [r3, #SIO_DIV_UDIVISOR_OFFSET]
+    b hw_divider_divmod_return
+// end::hw_div_u32[]
+
+// Common delay and return section for s32 and u32
+.section .text.hw_divider_divmod_return
+hw_divider_divmod_return:
+    // Branching here is 2 cycles, delay another 6
+    b 1f
+1:  b 1f
+1:  b 1f
+1:  // return 64 bit value so we can efficiently return both (note quotient must be read last)
+    ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
+    ldr r0, [r3, #SIO_DIV_QUOTIENT_OFFSET]
+    bx lr
+
+regular_func_with_section hw_divider_save_state
+    ldr r3, =SIO_BASE
+    ldr r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
+    ldr r2, [r3, #SIO_DIV_UDIVISOR_OFFSET]
+    stmia r0!, {r1-r2}
+    // The 8 cycles needed to guarantee that the result is ready is ensured by the preceeding
+    // code of 7 cycles together with any branch to it taking at least 2 cycles.
+    ldr r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
+    ldr r2, [r3, #SIO_DIV_QUOTIENT_OFFSET]
+    stmia r0!, {r1-r2}
+    bx lr
+
+regular_func_with_section hw_divider_restore_state
+    ldr r3, =SIO_BASE
+    ldmia r0!, {r1-r2}
+    str r1, [r3, #SIO_DIV_UDIVIDEND_OFFSET]
+    str r2, [r3, #SIO_DIV_UDIVISOR_OFFSET]
+    ldmia r0!, {r1-r2}
+    str r1, [r3, #SIO_DIV_REMAINDER_OFFSET]
+    str r2, [r3, #SIO_DIV_QUOTIENT_OFFSET]
+    bx lr
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider.h
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider.h
@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef _HARDWARE_DIVIDER_H
+#define _HARDWARE_DIVIDER_H
+
+#include "pico.h"
+#include "hardware/structs/sio.h"
+
+/** \file hardware/divider.h
+ *  \defgroup hardware_divider hardware_divider
+ *
+ * Low-level hardware-divider access
+ *
+ * The SIO contains an 8-cycle signed/unsigned divide/modulo circuit, per core. Calculation is started by writing a dividend
+ * and divisor to the two argument registers, DIVIDEND and DIVISOR. The divider calculates the quotient / and remainder % of
+ * this division over the next 8 cycles, and on the 9th cycle the results can be read from the two result registers
+ * DIV_QUOTIENT and DIV_REMAINDER. A 'ready' bit in register DIV_CSR can be polled to wait for the calculation to
+ * complete, or software can insert a fixed 8-cycle delay
+ *
+ * This header provides low level macros and inline functions for accessing the hardware dividers directly,
+ * and perhaps most usefully performing asynchronous divides. These functions however do not follow the regular
+ * SDK conventions for saving/restoring the divider state, so are not generally safe to call from interrupt handlers
+ *
+ * The pico_divider library provides a more user friendly set of APIs over the divider (and support for
+ * 64 bit divides), and of course by default regular C language integer divisions are redirected through that library, meaning
+ * you can just use C level `/` and `%` operators and gain the benefits of the fast hardware divider.
+ *
+ * @see pico_divider
+ *
+ * \subsection divider_example Example
+ * \addtogroup hardware_divider
+ * \include hello_divider.c
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef uint64_t divmod_result_t;
+
+/*! \brief Start a signed asynchronous divide
+ *  \ingroup hardware_divider
+ *
+ * Start a divide of the specified signed parameters. You should wait for 8 cycles (__div_pause()) or wait for the ready bit to be set
+ * (hw_divider_wait_ready()) prior to reading the results.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ */
+static inline void hw_divider_divmod_s32_start(int32_t a, int32_t b) {
+    check_hw_layout( sio_hw_t, div_sdividend, SIO_DIV_SDIVIDEND_OFFSET);
+    sio_hw->div_sdividend = (uint32_t)a;
+    sio_hw->div_sdivisor = (uint32_t)b;
+}
+
+/*! \brief Start an unsigned asynchronous divide
+ *  \ingroup hardware_divider
+ *
+ * Start a divide of the specified unsigned parameters. You should wait for 8 cycles (__div_pause()) or wait for the ready bit to be set
+ * (hw_divider_wait_ready()) prior to reading the results.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ */
+static inline void hw_divider_divmod_u32_start(uint32_t a, uint32_t b) {
+    check_hw_layout(
+            sio_hw_t, div_udividend, SIO_DIV_UDIVIDEND_OFFSET);
+    sio_hw->div_udividend = a;
+    sio_hw->div_udivisor = b;
+}
+
+/*! \brief Wait for a divide to complete
+ *  \ingroup hardware_divider
+ *
+ * Wait for a divide to complete
+ */
+static inline void hw_divider_wait_ready(void) {
+    // this is #1 in lsr below
+    static_assert(SIO_DIV_CSR_READY_BITS == 1, "");
+
+    // we use one less register and instruction than gcc which uses a TST instruction
+
+    uint32_t tmp; // allow compiler to pick scratch register
+    pico_default_asm_volatile (
+    "hw_divider_result_loop_%=:"
+    "ldr %0, [%1, %2]\n\t"
+    "lsrs %0, %0, #1\n\t"
+    "bcc hw_divider_result_loop_%=\n\t"
+    : "=&l" (tmp)
+    : "l" (sio_hw), "I" (SIO_DIV_CSR_OFFSET)
+    :
+    );
+}
+
+/*! \brief Return result of HW divide, nowait
+ *  \ingroup hardware_divider
+ *
+ * \note This is UNSAFE in that the calculation may not have been completed.
+ *
+ * \return Current result. Most significant 32 bits are the remainder, lower 32 bits are the quotient.
+ */
+static inline divmod_result_t hw_divider_result_nowait(void) {
+    // as ugly as this looks it is actually quite efficient
+    divmod_result_t rc = ((divmod_result_t) sio_hw->div_remainder) << 32u;
+    rc |= sio_hw->div_quotient;
+    return rc;
+}
+
+/*! \brief Return result of last asynchronous HW divide
+ *  \ingroup hardware_divider
+ *
+ * This function waits for the result to be ready by calling hw_divider_wait_ready().
+ *
+ * \return Current result. Most significant 32 bits are the remainder, lower 32 bits are the quotient.
+ */
+static inline divmod_result_t hw_divider_result_wait(void) {
+    hw_divider_wait_ready();
+    return hw_divider_result_nowait();
+}
+
+/*! \brief Return result of last asynchronous HW divide, unsigned quotient only
+ *  \ingroup hardware_divider
+ *
+ * This function waits for the result to be ready by calling hw_divider_wait_ready().
+ *
+ * \return Current unsigned quotient result.
+ */
+static inline uint32_t hw_divider_u32_quotient_wait(void) {
+    hw_divider_wait_ready();
+    return sio_hw->div_quotient;
+}
+
+/*! \brief Return result of last asynchronous HW divide, signed quotient only
+ *  \ingroup hardware_divider
+ *
+ * This function waits for the result to be ready by calling hw_divider_wait_ready().
+ *
+ * \return Current signed quotient result.
+ */
+static inline int32_t hw_divider_s32_quotient_wait(void) {
+    hw_divider_wait_ready();
+    return (int32_t)sio_hw->div_quotient;
+}
+
+/*! \brief Return result of last asynchronous HW divide, unsigned remainder only
+ *  \ingroup hardware_divider
+ *
+ * This function waits for the result to be ready by calling hw_divider_wait_ready().
+ *
+ * \return Current unsigned remainder result.
+ */
+static inline uint32_t hw_divider_u32_remainder_wait(void) {
+    hw_divider_wait_ready();
+    uint32_t rc = sio_hw->div_remainder;
+    sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
+    return rc;
+}
+
+/*! \brief Return result of last asynchronous HW divide, signed remainder only
+ *  \ingroup hardware_divider
+ *
+ * This function waits for the result to be ready by calling hw_divider_wait_ready().
+ *
+ * \return Current remainder results.
+ */
+static inline int32_t hw_divider_s32_remainder_wait(void) {
+    hw_divider_wait_ready();
+    int32_t rc = (int32_t)sio_hw->div_remainder;
+    sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
+    return rc;
+}
+
+/*! \brief Do a signed HW divide and wait for result
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return result as a pair of 32-bit quotient/remainder values.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Results of divide as a pair of 32-bit quotient/remainder values.
+ */
+divmod_result_t hw_divider_divmod_s32(int32_t a, int32_t b);
+
+/*! \brief Do an unsigned HW divide and wait for result
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return result as a pair of 32-bit quotient/remainder values.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Results of divide as a pair of 32-bit quotient/remainder values.
+ */
+divmod_result_t hw_divider_divmod_u32(uint32_t a, uint32_t b);
+
+/*! \brief Efficient extraction of unsigned quotient from 32p32 fixed point
+ *  \ingroup hardware_divider
+ *
+ * \param r A pair of 32-bit quotient/remainder values.
+ * \return Unsigned quotient
+ */
+inline static uint32_t to_quotient_u32(divmod_result_t r) {
+    return (uint32_t) r;
+}
+
+/*! \brief Efficient extraction of signed quotient from 32p32 fixed point
+ *  \ingroup hardware_divider
+ *
+ * \param r A pair of 32-bit quotient/remainder values.
+ * \return Unsigned quotient
+ */
+inline static int32_t to_quotient_s32(divmod_result_t r) {
+    return (int32_t)(uint32_t)r;
+}
+
+/*! \brief Efficient extraction of unsigned remainder from 32p32 fixed point
+ *  \ingroup hardware_divider
+ *
+ * \param r A pair of 32-bit quotient/remainder values.
+ * \return Unsigned remainder
+ *
+ * \note On Arm this is just a 32 bit register move or a nop
+ */
+inline static uint32_t to_remainder_u32(divmod_result_t r) {
+    return (uint32_t)(r >> 32u);
+}
+
+/*! \brief Efficient extraction of signed remainder from 32p32 fixed point
+ *  \ingroup hardware_divider
+ *
+ * \param r A pair of 32-bit quotient/remainder values.
+ * \return Signed remainder
+ *
+ * \note On arm this is just a 32 bit register move or a nop
+ */
+inline static int32_t to_remainder_s32(divmod_result_t r) {
+    return (int32_t)(r >> 32u);
+}
+
+/*! \brief Do an unsigned HW divide, wait for result, return quotient
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return quotient.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Quotient results of the divide
+ */
+static inline uint32_t hw_divider_u32_quotient(uint32_t a, uint32_t b) {
+    return to_quotient_u32(hw_divider_divmod_u32(a, b));
+}
+
+/*! \brief Do an unsigned HW divide, wait for result, return remainder
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return remainder.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Remainder results of the divide
+ */
+static inline uint32_t hw_divider_u32_remainder(uint32_t a, uint32_t b) {
+    return to_remainder_u32(hw_divider_divmod_u32(a, b));
+}
+
+/*! \brief Do a signed HW divide, wait for result, return quotient
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return quotient.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Quotient results of the divide
+ */
+static inline int32_t hw_divider_quotient_s32(int32_t a, int32_t b) {
+    return to_quotient_s32(hw_divider_divmod_s32(a, b));
+}
+
+/*! \brief Do a signed HW divide, wait for result, return remainder
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return remainder.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Remainder results of the divide
+ */
+static inline int32_t hw_divider_remainder_s32(int32_t a, int32_t b) {
+    return to_remainder_s32(hw_divider_divmod_s32(a, b));
+}
+
+/*! \brief Pause for exact amount of time needed for a asynchronous divide to complete
+ *  \ingroup hardware_divider
+ */
+static inline void hw_divider_pause(void) {
+    pico_default_asm_volatile(
+    "b _1_%=\n"
+    "_1_%=:\n"
+    "b _2_%=\n"
+    "_2_%=:\n"
+    "b _3_%=\n"
+    "_3_%=:\n"
+    "b _4_%=\n"
+    "_4_%=:\n"
+    :::);
+}
+
+/*! \brief Do a hardware unsigned HW divide, wait for result, return quotient
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return quotient.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Quotient result of the divide
+ */
+static inline uint32_t hw_divider_u32_quotient_inlined(uint32_t a, uint32_t b) {
+    hw_divider_divmod_u32_start(a, b);
+    hw_divider_pause();
+    return sio_hw->div_quotient;
+}
+
+/*! \brief Do a hardware unsigned HW divide, wait for result, return remainder
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return remainder.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Remainder result of the divide
+ */
+static inline uint32_t hw_divider_u32_remainder_inlined(uint32_t a, uint32_t b) {
+    hw_divider_divmod_u32_start(a, b);
+    hw_divider_pause();
+    uint32_t rc = sio_hw->div_remainder;
+    sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
+    return rc;
+}
+
+/*! \brief Do a hardware signed HW divide, wait for result, return quotient
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return quotient.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Quotient result of the divide
+ */
+static inline int32_t hw_divider_s32_quotient_inlined(int32_t a, int32_t b) {
+    hw_divider_divmod_s32_start(a, b);
+    hw_divider_pause();
+    return (int32_t)sio_hw->div_quotient;
+}
+
+/*! \brief Do a hardware signed HW divide, wait for result, return remainder
+ *  \ingroup hardware_divider
+ *
+ * Divide \p a by \p b, wait for calculation to complete, return remainder.
+ *
+ * \param a The dividend
+ * \param b The divisor
+ * \return Remainder result of the divide
+ */
+static inline int32_t hw_divider_s32_remainder_inlined(int32_t a, int32_t b) {
+    hw_divider_divmod_s32_start(a, b);
+    hw_divider_pause();
+    int32_t rc = (int32_t)sio_hw->div_remainder;
+    sio_hw->div_quotient; // must read quotient to cooperate with other SDK code
+    return rc;
+}
+
+typedef struct {
+    uint32_t values[4];
+} hw_divider_state_t;
+
+/*! \brief Save the calling cores hardware divider state
+ *  \ingroup hardware_divider
+ *
+ * Copy the current core's hardware divider state into the provided structure. This method
+ * waits for the divider results to be stable, then copies them to memory.
+ * They can be restored via hw_divider_restore_state()
+ *
+ * \param dest the location to store the divider state
+ */
+void hw_divider_save_state(hw_divider_state_t *dest);
+
+/*! \brief Load a saved hardware divider state into the current core's hardware divider
+ *  \ingroup hardware_divider
+ *
+ * Copy the passed hardware divider state into the hardware divider.
+ *
+ * \param src the location to load the divider state from
+ */
+
+void hw_divider_restore_state(hw_divider_state_t *src);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _HARDWARE_DIVIDER_H
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider_helper.S
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/hardware_divider/include/hardware/divider_helper.S
@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+// Note this file is always included by another, so does not do pico_default_asm_setup
+#include "hardware/regs/addressmap.h"
+#include "hardware/regs/sio.h"
+
+#if SIO_DIV_CSR_READY_LSB == 0
+.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
+#else
+need to change SHIFT above
+#endif
+#if SIO_DIV_CSR_DIRTY_LSB == 1
+.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
+#else
+need to change SHIFT above
+#endif
+
+// SIO_BASE ptr in r2; pushes r4-r7, lr to stack
+.macro save_div_state_and_lr
+// originally we did this, however a) it uses r3, and b) the push and dividend/divisor
+// readout takes 8 cycles, c) any IRQ which uses the divider will necessarily put the
+// data back, which will immediately make it ready
+//
+//    // ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
+//    // // wait for results as we can't save signed-ness of operation
+//    // 1:
+//    //     lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
+//    //     bcc 1b
+
+// 6 cycle push + 2 ldr ensures the 8 cycle delay before remainder and quotient are ready
+push {r4, r5, r6, r7, lr}
+// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
+ldr r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
+ldr r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
+ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
+ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
+.endm
+
+// restores divider state from r4-r7, then pops them and pc
+.macro restore_div_state_and_return
+// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
+//
+// it is worth considering what happens if we are interrupted
+//
+// after writing r4: we are DIRTY and !READY
+//    ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
+//        saved/restored correctly and we'll restore the rest ourselves
+// after writing r4, r5: we are DIRTY and !READY
+//    ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
+//        at least will be saved/restored correctly and and we'll restore the rest ourselves
+// after writing r4, r5, r6: we are DIRTY and READY
+//    ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
+//        and we'll restore the remainder after the fact
+
+// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
+//      and so 4 reads is cheaper (and we don't have to adjust r2)
+// note also, that we must restore via UDIVI* rather than SDIVI* to prevent the quotient/remainder being negated on read based
+//      on the signs of the inputs
+str r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
+str r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
+str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
+str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
+pop {r4, r5, r6, r7, pc}
+.endm
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_aeabi.S
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_aeabi.S
@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+#include "pico/bootrom/sf_table.h"
+#include "hardware/divider_helper.S"
+
+__pre_init __aeabi_float_init, 00020
+
+pico_default_asm_setup
+
+.macro float_section name
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+.macro float_wrapper_section func
+float_section WRAPPER_FUNC_NAME(\func)
+.endm
+
+.macro _float_wrapper_func x
+    wrapper_func \x
+.endm
+
+.macro wrapper_func_f1 x
+   _float_wrapper_func \x
+#if PICO_FLOAT_PROPAGATE_NANS
+    mov ip, lr
+    bl __check_nan_f1
+    mov lr, ip
+#endif
+.endm
+
+.macro wrapper_func_f2 x
+   _float_wrapper_func \x
+#if PICO_FLOAT_PROPAGATE_NANS
+    mov ip, lr
+    bl __check_nan_f2
+    mov lr, ip
+#endif
+.endm
+
+.section .text
+
+#if PICO_FLOAT_PROPAGATE_NANS
+.thumb_func
+__check_nan_f1:
+   movs r3, #1
+   lsls r3, #24
+   lsls r2, r0, #1
+   adds r2, r3
+   bhi 1f
+   bx lr
+1:
+   bx ip
+
+.thumb_func
+__check_nan_f2:
+   movs r3, #1
+   lsls r3, #24
+   lsls r2, r0, #1
+   adds r2, r3
+   bhi 1f
+   lsls r2, r1, #1
+   adds r2, r3
+   bhi 2f
+   bx lr
+2:
+   mov r0, r1
+1:
+   bx ip
+#endif
+
+.macro table_tail_call SF_TABLE_OFFSET
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+#ifndef NDEBUG
+    movs r3, #0
+    mov ip, r3
+#endif
+#endif
+    ldr r3, =sf_table
+    ldr r3, [r3, #\SF_TABLE_OFFSET]
+    bx r3
+.endm
+
+.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
+    ldr r3, =sf_table
+    ldr r3, [r3, #\SF_TABLE_OFFSET]
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+    mov ip, pc
+#endif
+    bx r3
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+.byte \SF_TABLE_OFFSET, 0xdf
+.word \shim
+#endif
+.endm
+
+
+// note generally each function is in a separate section unless there is fall thru or branching between them
+// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
+
+// note functions are word aligned except where they are an odd number of linear instructions
+
+// float FUNC_NAME(__aeabi_fadd)(float, float)         single-precision addition
+float_wrapper_section __aeabi_farithmetic
+// float FUNC_NAME(__aeabi_frsub)(float x, float y)    single-precision reverse subtraction, y - x
+
+// frsub first because it is the only one that needs alignment
+.align 2
+wrapper_func __aeabi_frsub
+    eors r0, r1
+    eors r1, r0
+    eors r0, r1
+    // fall thru
+
+// float FUNC_NAME(__aeabi_fsub)(float x, float y)     single-precision subtraction, x - y
+wrapper_func_f2 __aeabi_fsub
+#if PICO_FLOAT_PROPAGATE_NANS
+    // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
+    mov r2, r0
+    eors r2, r1
+    bmi 1f // different signs
+    push {r0, r1, lr}
+    bl 1f
+    b fdiv_fsub_nan_helper
+1:
+#endif
+    table_tail_call SF_TABLE_FSUB
+
+wrapper_func_f2 __aeabi_fadd
+    table_tail_call SF_TABLE_FADD
+
+// float FUNC_NAME(__aeabi_fdiv)(float n, float d)     single-precision division, n / d
+wrapper_func_f2 __aeabi_fdiv
+#if PICO_FLOAT_PROPAGATE_NANS
+    push {r0, r1, lr}
+    bl 1f
+    b fdiv_fsub_nan_helper
+1:
+#endif
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+    // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
+    ldr r2, =(SIO_BASE)
+    ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
+    lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
+    bcs fdiv_save_state
+#else
+    // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
+    push {r4, lr}
+    mrs r4, PRIMASK
+    cpsid i
+    bl fdiv_shim_call
+    msr PRIMASK, r4
+    pop {r4, pc}
+#endif
+fdiv_shim_call:
+    table_tail_call SF_TABLE_FDIV
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+fdiv_save_state:
+    save_div_state_and_lr
+    bl fdiv_shim_call
+    ldr r2, =(SIO_BASE)
+    restore_div_state_and_return
+#endif
+
+fdiv_fsub_nan_helper:
+#if PICO_FLOAT_PROPAGATE_NANS
+    pop {r1, r2}
+
+    // check for infinite op infinite (or rather check for infinite result with both
+    // operands being infinite)
+    lsls r3, r0, #1
+    asrs r3, r3, #24
+    adds r3, #1
+    beq 2f
+    pop {pc}
+2:
+    lsls r1, #1
+    asrs r1, r1, #24
+    lsls r2, #1
+    asrs r2, r2, #24
+    ands r1, r2
+    adds r1, #1
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+3:
+    pop {pc}
+#endif
+
+// float FUNC_NAME(__aeabi_fmul)(float, float)         single-precision multiplication
+wrapper_func_f2 __aeabi_fmul
+#if PICO_FLOAT_PROPAGATE_NANS
+    push {r0, r1, lr}
+    bl 1f
+    pop {r1, r2}
+
+    // check for multiplication of infinite by zero (or rather check for infinite result with either
+    // operand 0)
+    lsls r3, r0, #1
+    asrs r3, r3, #24
+    adds r3, #1
+    beq 2f
+    pop {pc}
+2:
+    ands r1, r2
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+3:
+    pop {pc}
+1:
+#endif
+    table_tail_call SF_TABLE_FMUL
+
+// void FUNC_NAME(__aeabi_cfrcmple)(float, float)         reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
+float_wrapper_section __aeabi_cfcmple
+.align 2
+wrapper_func __aeabi_cfrcmple
+    push {r0-r2, lr}
+    eors r0, r1
+    eors r1, r0
+    eors r0, r1
+    b __aeabi_cfcmple_guts
+
+// NOTE these share an implementation as we have no excepting NaNs.
+// void FUNC_NAME(__aeabi_cfcmple)(float, float)         3-way (<, =, ?>) compare [1], result in PSR ZC flags
+// void FUNC_NAME(__aeabi_cfcmpeq)(float, float)         non-excepting equality comparison [1], result in PSR ZC flags
+.align 2
+wrapper_func __aeabi_cfcmple
+wrapper_func __aeabi_cfcmpeq
+    push {r0-r2, lr}
+
+__aeabi_cfcmple_guts:
+    lsls r2,r0,#1
+    lsrs r2,#24
+    beq 1f
+    cmp r2,#0xff
+    bne 2f
+    lsls r2, r0, #9
+    bhi 3f
+1:
+    lsrs r0,#23     @ clear mantissa if denormal or infinite
+    lsls r0,#23
+2:
+    lsls r2,r1,#1
+    lsrs r2,#24
+    beq 1f
+    cmp r2,#0xff
+    bne 2f
+    lsls r2, r1, #9
+    bhi 3f
+1:
+    lsrs r1,#23     @ clear mantissa if denormal or infinite
+    lsls r1,#23
+2:
+    movs r2,#1      @ initialise result
+    eors r1,r0
+    bmi 2f          @ opposite signs? then can proceed on basis of sign of x
+    eors r1,r0      @ restore y
+    bpl 1f
+    cmp r1,r0
+    pop {r0-r2, pc}
+1:
+    cmp r0,r1
+    pop {r0-r2, pc}
+2:
+    orrs r1, r0     @ handle 0/-0
+    adds r1, r1     @ note this always sets C
+    beq 3f
+    mvns r0, r0     @ carry inverse of r0 sign
+    adds r0, r0
+3:
+    pop {r0-r2, pc}
+
+
+// int FUNC_NAME(__aeabi_fcmpeq)(float, float)         result (1, 0) denotes (=, ?<>) [2], use for C == and !=
+float_wrapper_section __aeabi_fcmpeq
+.align 2
+wrapper_func __aeabi_fcmpeq
+    push {lr}
+    bl __aeabi_cfcmpeq
+    beq 1f
+    movs r0, #0
+    pop {pc}
+1:
+    movs r0, #1
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmplt)(float, float)         result (1, 0) denotes (<, ?>=) [2], use for C <
+float_wrapper_section __aeabi_fcmplt
+.align 2
+wrapper_func __aeabi_fcmplt
+    push {lr}
+    bl __aeabi_cfcmple
+    sbcs r0, r0
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmple)(float, float)         result (1, 0) denotes (<=, ?>) [2], use for C <=
+float_wrapper_section __aeabi_fcmple
+.align 2
+wrapper_func __aeabi_fcmple
+    push {lr}
+    bl __aeabi_cfcmple
+    bls 1f
+    movs r0, #0
+    pop {pc}
+1:
+    movs r0, #1
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmpge)(float, float)         result (1, 0) denotes (>=, ?<) [2], use for C >=
+float_wrapper_section __aeabi_fcmpge
+.align 2
+wrapper_func __aeabi_fcmpge
+    push {lr}
+    // because of NaNs it is better to reverse the args than the result
+    bl __aeabi_cfrcmple
+    bls 1f
+    movs r0, #0
+    pop {pc}
+1:
+    movs r0, #1
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmpgt)(float, float)         result (1, 0) denotes (>, ?<=) [2], use for C >
+float_wrapper_section __aeabi_fcmpgt
+wrapper_func __aeabi_fcmpgt
+    push {lr}
+    // because of NaNs it is better to reverse the args than the result
+    bl __aeabi_cfrcmple
+    sbcs r0, r0
+    pop {pc}
+
+// int FUNC_NAME(__aeabi_fcmpun)(float, float)         result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
+float_wrapper_section __aeabi_fcmpun
+wrapper_func __aeabi_fcmpun
+   movs r3, #1
+   lsls r3, #24
+   lsls r2, r0, #1
+   adds r2, r3
+   bhi 1f
+   lsls r2, r1, #1
+   adds r2, r3
+   bhi 1f
+   movs r0, #0
+   bx lr
+1:
+   movs r0, #1
+   bx lr
+
+
+// float FUNC_NAME(__aeabi_ui2f)(unsigned)             unsigned to float (single precision) conversion
+float_wrapper_section __aeabi_ui2f
+wrapper_func __aeabi_ui2f
+        subs r1, r1
+        cmp r0, #0
+        bne __aeabi_i2f_main
+        mov r0, r1
+        bx lr
+
+float_wrapper_section __aeabi_i2f
+// float FUNC_NAME(__aeabi_i2f)(int)                     integer to float (single precision) conversion
+wrapper_func __aeabi_i2f
+        lsrs r1, r0, #31
+        lsls r1, #31
+        bpl 1f
+        negs r0, r0
+1:
+        cmp r0, #0
+        beq 7f
+__aeabi_i2f_main:
+
+        mov ip, lr
+        push {r0, r1}
+        ldr r3, =sf_clz_func
+        ldr r3, [r3]
+        blx r3
+        pop {r1, r2}
+        lsls r1, r0
+        subs r0, #158
+        negs r0, r0
+
+        adds r1,#0x80  @ rounding
+        bcs 5f         @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
+
+        lsls r3,r1,#24 @ check bottom 8 bits of r1
+        beq 6f         @ in rounding-tie case?
+        lsls r1,#1     @ remove leading 1
+3:
+        lsrs r1,#9     @ align mantissa
+        lsls r0,#23    @ align exponent
+        orrs r0,r2     @ assemble exponent and mantissa
+4:
+        orrs r0,r1     @ apply sign
+1:
+        bx ip
+5:
+        adds r0,#1     @ correct exponent offset
+        b 3b
+6:
+        lsrs r1,#9     @ ensure even result
+        lsls r1,#10
+        b 3b
+7:
+        bx lr
+
+
+// int FUNC_NAME(__aeabi_f2iz)(float)                     float (single precision) to integer C-style conversion [3]
+float_wrapper_section __aeabi_f2iz
+wrapper_func __aeabi_f2iz
+regular_func float2int_z
+    lsls r1, r0, #1
+    lsrs r2, r1, #24
+    movs r3, #0x80
+    lsls r3, #24
+    cmp r2, #126
+    ble 1f
+    subs r2, #158
+    bge 2f
+    asrs r1, r0, #31
+    lsls r0, #9
+    lsrs r0, #1
+    orrs r0, r3
+    negs r2, r2
+    lsrs r0, r2
+    lsls r1, #1
+    adds r1, #1
+    muls r0, r1
+    bx lr
+1:
+    movs r0, #0
+    bx lr
+2:
+    lsrs r0, #31
+    adds r0, r3
+    subs r0, #1
+    bx lr
+
+    cmn r0, r0
+    bcc float2int
+    push {lr}
+    lsls r0, #1
+    lsrs r0, #1
+    movs r1, #0
+    bl __aeabi_f2uiz
+    cmp r0, #0
+    bmi 1f
+    negs r0, r0
+    pop {pc}
+1:
+    movs r0, #128
+    lsls r0, #24
+    pop {pc}
+
+float_section float2int
+regular_func float2int
+    shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
+
+float_section float2fix
+regular_func float2fix
+    shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
+
+float_section float2ufix
+regular_func float2ufix
+    table_tail_call SF_TABLE_FLOAT2UFIX
+
+// unsigned FUNC_NAME(__aeabi_f2uiz)(float)             float (single precision) to unsigned C-style conversion [3]
+float_wrapper_section __aeabi_f2uiz
+wrapper_func __aeabi_f2uiz
+    table_tail_call SF_TABLE_FLOAT2UINT
+
+float_section fix2float
+regular_func fix2float
+    table_tail_call SF_TABLE_FIX2FLOAT
+
+float_section ufix2float
+regular_func ufix2float
+    table_tail_call SF_TABLE_UFIX2FLOAT
+
+float_section fix642float
+regular_func fix642float
+    shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim
+
+float_section ufix642float
+regular_func ufix642float
+    shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim
+
+// float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
+float_wrapper_section __aeabi_l2f
+1:
+    ldr r2, =__aeabi_i2f
+    bx r2
+wrapper_func __aeabi_l2f
+    asrs r2, r0, #31
+    cmp r1, r2
+    beq 1b
+    shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim
+
+// float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
+float_wrapper_section __aeabi_ul2f
+1:
+    ldr r2, =__aeabi_ui2f
+    bx r2
+wrapper_func __aeabi_ul2f
+    cmp r1, #0
+    beq 1b
+    shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim
+
+// long long FUNC_NAME(__aeabi_f2lz)(float)             float (single precision) to long long C-style conversion [3]
+float_wrapper_section __aeabi_f2lz
+wrapper_func __aeabi_f2lz
+regular_func float2int64_z
+    cmn r0, r0
+    bcc float2int64
+    push {lr}
+    lsls r0, #1
+    lsrs r0, #1
+    movs r1, #0
+    bl float2ufix64
+    cmp r1, #0
+    bmi 1f
+    movs r2, #0
+    negs r0, r0
+    sbcs r2, r1
+    mov r1, r2
+    pop {pc}
+1:
+    movs r1, #128
+    lsls r1, #24
+    movs r0, #0
+    pop {pc}
+
+float_section float2int64
+regular_func float2int64
+    shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
+
+float_section float2fix64
+regular_func float2fix64
+    shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
+
+// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float)     float to unsigned long long C-style conversion [3]
+float_wrapper_section __aeabi_f2ulz
+wrapper_func __aeabi_f2ulz
+    shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
+
+float_section float2ufix64
+regular_func float2ufix64
+    shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
+
+float_wrapper_section __aeabi_f2d
+1:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
+    asrs r1, r0, #3
+    movs r2, #0xf
+    lsls r2, #27
+    orrs r1, r2
+    lsls r0, #25
+    bx lr
+#endif
+wrapper_func __aeabi_f2d
+#if PICO_FLOAT_PROPAGATE_NANS
+    movs r3, #1
+    lsls r3, #24
+    lsls r2, r0, #1
+    adds r2, r3
+    bhi 1b
+#endif
+    shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim
+
+float_wrapper_section sqrtf
+wrapper_func_f1 sqrtf
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+    // check for negative
+    asrs r1, r0, #23
+    bmi 1f
+#endif
+    table_tail_call SF_TABLE_FSQRT
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+1:
+    mvns r0, r1
+    cmp r0, #255
+    bne 2f
+    // -0 or -Denormal return -0 (0x80000000)
+    lsls r0, #31
+    bx lr
+2:
+    // return -Inf (0xff800000)
+    asrs r0, r1, #31
+    lsls r0, #23
+    bx lr
+#endif
+
+float_wrapper_section cosf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func cosf
+    // rom version only works for -128 < angle < 128
+    lsls r1, r0, #1
+    lsrs r1, #24
+    cmp r1, #127 + 7
+    bge 1f
+2:
+    table_tail_call SF_TABLE_FCOS
+1:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r1, #255
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+    bx lr
+3:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b 2b
+
+float_wrapper_section sinf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func sinf
+    // rom version only works for -128 < angle < 128
+    lsls r1, r0, #1
+    lsrs r1, #24
+    cmp r1, #127 + 7
+    bge 1f
+2:
+    table_tail_call SF_TABLE_FSIN
+1:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r1, #255
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+    bx lr
+3:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b 2b
+
+float_wrapper_section sincosf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func sincosf
+    push {r1, r2, lr}
+    // rom version only works for -128 < angle < 128
+    lsls r3, r0, #1
+    lsrs r3, #24
+    cmp r3, #127 + 7
+    bge 3f
+2:
+    ldr r3, =sf_table
+    ldr r3, [r3, #SF_TABLE_FSIN]
+    blx r3
+    pop {r2, r3}
+    str r0, [r2]
+    str r1, [r3]
+    pop {pc}
+#if PICO_FLOAT_PROPAGATE_NANS
+.align 2
+    pop {pc}
+#endif
+3:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r3, #255
+    bne 4f
+    // infinite to nan
+    movs r3, #1
+    lsls r3, #22
+    orrs r0, r3
+    str r0, [r1]
+    str r0, [r2]
+    add sp, #12
+    bx lr
+4:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b 2b
+
+float_wrapper_section tanf
+// note we don't use _f1 since we do an infinity/nan check for outside of range
+wrapper_func tanf
+    // rom version only works for -128 < angle < 128
+    lsls r1, r0, #1
+    lsrs r1, #24
+    cmp r1, #127 + 7
+    bge ftan_out_of_range
+ftan_in_range:
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+    // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
+    ldr r2, =(SIO_BASE)
+    ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
+    lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
+    bcs ftan_save_state
+#else
+    // to avoid worrying about IRQs (or context switches), simply disable interrupts around call
+    push {r4, lr}
+    mrs r4, PRIMASK
+    cpsid i
+    bl ftan_shim_call
+    msr PRIMASK, r4
+    pop {r4, pc}
+#endif
+ftan_shim_call:
+    table_tail_call SF_TABLE_FTAN
+#if !PICO_DIVIDER_DISABLE_INTERRUPTS
+ftan_save_state:
+    save_div_state_and_lr
+    bl ftan_shim_call
+    ldr r2, =(SIO_BASE)
+    restore_div_state_and_return
+#endif
+ftan_out_of_range:
+#if PICO_FLOAT_PROPAGATE_NANS
+    // also check for infinites
+    cmp r1, #255
+    bne 3f
+    // infinite to nan
+    movs r1, #1
+    lsls r1, #22
+    orrs r0, r1
+    bx lr
+3:
+#endif
+    ldr r1, =0x40c90fdb // 2 * M_PI
+    push {lr}
+    bl remainderf
+    pop {r1}
+    mov lr, r1
+    b ftan_in_range
+
+float_wrapper_section atan2f
+wrapper_func_f2 atan2f
+    shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim
+
+float_wrapper_section expf
+wrapper_func_f1 expf
+    table_tail_call SF_TABLE_FEXP
+
+float_wrapper_section logf
+wrapper_func_f1 logf
+    table_tail_call SF_TABLE_FLN
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_init_rom.c
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_init_rom.c
@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <string.h>
+#include "pico/bootrom.h"
+#include "pico/bootrom/sf_table.h"
+
+// NOTE THIS FUNCTION TABLE IS NOT PUBLIC OR NECESSARILY COMPLETE...
+// IT IS ***NOT*** SAFE TO CALL THESE FUNCTION POINTERS FROM ARBITRARY CODE
+uint32_t sf_table[SF_TABLE_V2_SIZE / 2];
+void __attribute__((weak)) *sf_clz_func;
+
+#if !(PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED)
+static __attribute__((noreturn)) void missing_float_func_shim(void) {
+    panic("");
+}
+#endif
+
+void __aeabi_float_init(void) {
+    int rom_version = rp2040_rom_version();
+    void *rom_table = rom_data_lookup(rom_table_code('S', 'F'));
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+    if (rom_version == 1) {
+        memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
+        extern void float_table_shim_on_use_helper(void);
+        // todo replace NDEBUG with a more exclusive assertion guard
+#ifndef NDEBUG
+        if (*(uint16_t *)0x29ee != 0x0fc4 || // this is packx
+            *(uint16_t *)0x29c0 != 0x0dc2 || // this is upackx
+            *(uint16_t *)0x2b96 != 0xb5c0 || // this is cordic_vec
+            *(uint16_t *)0x2b18 != 0x2500 || // this is packretns
+            *(uint16_t *)0x2acc != 0xb510 || // this is float2fix
+            *(uint32_t *)0x2cfc != 0x6487ed51 // pi_q29
+        ) {
+            panic("");
+        }
+#endif
+
+        // this is a little tricky.. we only want to pull in a shim if the corresponding function
+        // is called. to that end we include a SVC instruction with the table offset as the call number
+        // followed by the shim function pointer inside the actual wrapper function. that way if the wrapper
+        // function is garbage collected, so is the shim function.
+        //
+        // float_table_shim_on_use_helper expects this SVC instruction in the calling code soon after the address
+        // pointed to by IP and patches the float_table entry with the real shim the first time the function is called.
+
+        for(uint i=SF_TABLE_V1_SIZE/4; i<SF_TABLE_V2_SIZE/4; i++) {
+            sf_table[i] = (uintptr_t)float_table_shim_on_use_helper;
+        }
+        // we shim these for -0 and -denormal handling
+        sf_table[SF_TABLE_FLOAT2INT/4] = sf_table[SF_TABLE_FLOAT2FIX/4] = (uintptr_t)float_table_shim_on_use_helper;
+    }
+#else
+    if (rom_version == 1) {
+        memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
+        // opting for soft failure for now - you'll get a panic at runtime if you call any of the missing methods
+        for(uint i=0;i<SF_TABLE_V2_SIZE/4;i++) {
+            if (!sf_table[i]) sf_table[i] = (uintptr_t)missing_float_func_shim;
+        }
+    }
+#endif
+    if (rom_version >= 2) {
+        assert(*((uint8_t *)rom_table-2) * 4 >= SF_TABLE_V2_SIZE);
+        memcpy(&sf_table, rom_table, SF_TABLE_V2_SIZE);
+    }
+    sf_clz_func = rom_func_lookup(ROM_FUNC_CLZ32);
+}
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_math.c
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_math.c
@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/float.h"
+
+// opened a separate issue https://github.com/raspberrypi/pico-sdk/issues/166 to deal with these warnings if at all
+GCC_Pragma("GCC diagnostic push")
+GCC_Pragma("GCC diagnostic ignored \"-Wconversion\"")
+GCC_Pragma("GCC diagnostic ignored \"-Wsign-conversion\"")
+
+typedef uint32_t ui32;
+typedef int32_t i32;
+
+#define FPINF ( HUGE_VALF)
+#define FMINF (-HUGE_VALF)
+#define NANF ((float)NAN)
+#define PZERO (+0.0)
+#define MZERO (-0.0)
+
+#define PI       3.14159265358979323846
+#define LOG2     0.69314718055994530941
+// Unfortunately in double precision ln(10) is very close to half-way between to representable numbers
+#define LOG10    2.30258509299404568401
+#define LOG2E    1.44269504088896340737
+#define LOG10E   0.43429448190325182765
+#define ONETHIRD 0.33333333333333333333
+
+#define PIf       3.14159265358979323846f
+#define LOG2f     0.69314718055994530941f
+#define LOG2Ef    1.44269504088896340737f
+#define LOG10Ef   0.43429448190325182765f
+#define ONETHIRDf 0.33333333333333333333f
+
+#define FUNPACK(x,e,m) e=((x)>>23)&0xff,m=((x)&0x007fffff)|0x00800000
+#define FUNPACKS(x,s,e,m) s=((x)>>31),FUNPACK((x),(e),(m))
+
+typedef union {
+    float f;
+    ui32 ix;
+} float_ui32;
+
+static inline float ui322float(ui32 ix) {
+    float_ui32 tmp;
+    tmp.ix = ix;
+    return tmp.f;
+}
+
+static inline ui32 float2ui32(float f) {
+    float_ui32 tmp;
+    tmp.f = f;
+    return tmp.ix;
+}
+
+#if PICO_FLOAT_PROPAGATE_NANS
+static inline bool fisnan(float x) {
+    ui32 ix=float2ui32(x);
+    return ix * 2 > 0xff000000u;
+}
+
+#define check_nan_f1(x) if (fisnan((x))) return (x)
+#define check_nan_f2(x,y) if (fisnan((x))) return (x); else if (fisnan((y))) return (y);
+#else
+#define check_nan_f1(x) ((void)0)
+#define check_nan_f2(x,y) ((void)0)
+#endif
+
+static inline int fgetsignexp(float x) {
+    ui32 ix=float2ui32(x);
+    return (ix>>23)&0x1ff;
+}
+
+static inline int fgetexp(float x) {
+    ui32 ix=float2ui32(x);
+    return (ix>>23)&0xff;
+}
+
+static inline float fldexp(float x,int de) {
+    ui32 ix=float2ui32(x),iy;
+    int e;
+    e=fgetexp(x);
+    if(e==0||e==0xff) return x;
+    e+=de;
+    if(e<=0) iy=ix&0x80000000; // signed zero for underflow
+    else if(e>=0xff) iy=(ix&0x80000000)|0x7f800000ULL; // signed infinity on overflow
+    else iy=ix+((ui32)de<<23);
+    return ui322float(iy);
+}
+
+float WRAPPER_FUNC(ldexpf)(float x, int de) {
+    check_nan_f1(x);
+    return fldexp(x, de);
+}
+
+static inline float fcopysign(float x,float y) {
+    ui32 ix=float2ui32(x),iy=float2ui32(y);
+    ix=((ix&0x7fffffff)|(iy&0x80000000));
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(copysignf)(float x, float y) {
+    check_nan_f2(x,y);
+    return fcopysign(x, y);
+}
+
+static inline int fiszero(float x)  { return fgetexp    (x)==0; }
+//static inline int fispzero(float x) { return fgetsignexp(x)==0; }
+//static inline int fismzero(float x) { return fgetsignexp(x)==0x100; }
+static inline int fisinf(float x)   { return fgetexp    (x)==0xff; }
+static inline int fispinf(float x)  { return fgetsignexp(x)==0xff; }
+static inline int fisminf(float x)  { return fgetsignexp(x)==0x1ff; }
+
+static inline int fisint(float x) {
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    if(e==0) return 1;       // 0 is an integer
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) return 0;        // |x|<1
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return 1;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bits of significance <1
+    if(ix&m) return 0;       // not an integer
+    return 1;
+}
+
+static inline int fisoddint(float x) {
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) return 0;        // |x|<1; 0 is not odd
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<0) return 0;        // |x| large, so must be even
+    m=(1<<e)-1;              // mask for bits of significance <1 (if any)
+    if(ix&m) return 0;       // not an integer
+    if(e==23) return 1;      // value is exactly 1
+    return (ix>>e)&1;
+}
+
+static inline int fisstrictneg(float x) {
+    ui32 ix=float2ui32(x);
+    if(fiszero(x)) return 0;
+    return ix>>31;
+}
+
+static inline int fisneg(float x) {
+    ui32 ix=float2ui32(x);
+    return ix>>31;
+}
+
+static inline float fneg(float x) {
+    ui32 ix=float2ui32(x);
+    ix^=0x80000000;
+    return ui322float(ix);
+}
+
+static inline int fispo2(float x) {
+    ui32 ix=float2ui32(x);
+    if(fiszero(x)) return 0;
+    if(fisinf(x)) return 0;
+    ix&=0x007fffff;
+    return ix==0;
+}
+
+static inline float fnan_or(float x) {
+#if PICO_FLOAT_PROPAGATE_NANS
+    return NANF;
+#else
+    return x;
+#endif
+}
+
+float WRAPPER_FUNC(truncf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) {                // |x|<1
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bits of significance <1
+    ix&=~m;
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(roundf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    e-=0x7f;                 // remove exponent bias
+    if(e<-1) {               // |x|<0.5
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    if(e==-1) {              // 0.5<=|x|<1
+        ix&=0x80000000;
+        ix|=0x3f800000;        // ±1
+        return ui322float(ix);
+    }
+    e=23-e;                  // bit position in mantissa with significance 1, <=23
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=1<<(e-1);              // mask for bit of significance 0.5
+    ix+=m;
+    m=m+m-1;                 // mask for bits of significance <1
+    ix&=~m;
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(floorf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    if(e==0) {       // x==0
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) {                // |x|<1, not zero
+        if(fisneg(x)) return -1;
+        return PZERO;
+    }
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bit of significance <1
+    if(fisneg(x)) ix+=m;     // add 1-ε to magnitude if negative
+    ix&=~m;                  // truncate
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(ceilf)(float x) {
+    check_nan_f1(x);
+    ui32 ix=float2ui32(x),m;
+    int e=fgetexp(x);
+    if(e==0) {       // x==0
+        ix&=0x80000000;
+        return ui322float(ix);
+    }
+    e-=0x7f;                 // remove exponent bias
+    if(e<0) {                // |x|<1, not zero
+        if(fisneg(x)) return MZERO;
+        return 1;
+    }
+    e=23-e;                  // bit position in mantissa with significance 1
+    if(e<=0) return x;       // |x| large, so must be an integer
+    m=(1<<e)-1;              // mask for bit of significance <1
+    if(!fisneg(x)) ix+=m;    // add 1-ε to magnitude if positive
+    ix&=~m;                  // truncate
+    return ui322float(ix);
+}
+
+float WRAPPER_FUNC(asinf)(float x) {
+    check_nan_f1(x);
+    float u;
+    u=(1.0f-x)*(1.0f+x);
+    if(fisstrictneg(u)) return fnan_or(FPINF);
+    return atan2f(x,sqrtf(u));
+}
+
+float WRAPPER_FUNC(acosf)(float x) {
+    check_nan_f1(x);
+    float u;
+    u=(1.0f-x)*(1.0f+x);
+    if(fisstrictneg(u)) return fnan_or(FPINF);
+    return atan2f(sqrtf(u),x);
+}
+
+float WRAPPER_FUNC(atanf)(float x) {
+    check_nan_f1(x);
+    if(fispinf(x)) return (float)( PIf/2);
+    if(fisminf(x)) return (float)(-PIf/2);
+    return atan2f(x,1.0f);
+}
+
+float WRAPPER_FUNC(sinhf)(float x) {
+    check_nan_f1(x);
+    return fldexp((expf(x)-expf(fneg(x))),-1);
+}
+
+float WRAPPER_FUNC(coshf)(float x) {
+    check_nan_f1(x);
+    return fldexp((expf(x)+expf(fneg(x))),-1);
+}
+
+float WRAPPER_FUNC(tanhf)(float x) {
+    check_nan_f1(x);
+    float u;
+    int e;
+    e=fgetexp(x);
+    if(e>=4+0x7f) {             // |x|>=16?
+        if(!fisneg(x)) return  1;  // 1 << exp 2x; avoid generating infinities later
+        else           return -1;  // 1 >> exp 2x
+    }
+    u=expf(fldexp(x,1));
+    return (u-1.0f)/(u+1.0f);
+}
+
+float WRAPPER_FUNC(asinhf)(float x) {
+    check_nan_f1(x);
+    int e;
+    e=fgetexp(x);
+    if(e>=16+0x7f) {                                   // |x|>=2^16?
+        if(!fisneg(x)) return      logf(     x )+LOG2f;  // 1/x^2 << 1
+        else           return fneg(logf(fneg(x))+LOG2f); // 1/x^2 << 1
+    }
+    if(x>0) return      (float)log(sqrt((double)x*(double)x+1.0)+(double)x);
+    else    return fneg((float)log(sqrt((double)x*(double)x+1.0)-(double)x));
+}
+
+float WRAPPER_FUNC(acoshf)(float x) {
+    check_nan_f1(x);
+    int e;
+    if(fisneg(x)) x=fneg(x);
+    e=fgetexp(x);
+    if(e>=16+0x7f) return logf(x)+LOG2f;           // |x|>=2^16?
+    return (float)log(sqrt(((double)x+1.0)*((double)x-1.0))+(double)x);
+}
+
+float WRAPPER_FUNC(atanhf)(float x) {
+    check_nan_f1(x);
+    return fldexp(logf((1.0f+x)/(1.0f-x)),-1);
+}
+
+float WRAPPER_FUNC(exp2f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG2); }
+float WRAPPER_FUNC(log2f)(float x) { check_nan_f1(x); return logf(x)*LOG2Ef;  }
+float WRAPPER_FUNC(exp10f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG10); }
+float WRAPPER_FUNC(log10f)(float x) { check_nan_f1(x); return logf(x)*LOG10Ef; }
+
+float WRAPPER_FUNC(expm1f)(float x) { check_nan_f1(x); return (float)(exp((double)x)-1); }
+float WRAPPER_FUNC(log1pf)(float x) { check_nan_f1(x); return (float)(log(1+(double)x)); }
+float WRAPPER_FUNC(fmaf)(float x,float y,float z) {
+    check_nan_f2(x,y);
+    check_nan_f1(z);
+    return (float)((double)x*(double)y+(double)z);
+} // has double rounding so not exact
+
+// general power, x>0
+static inline float fpow_1(float x,float y) {
+    return (float)exp(log((double)x)*(double)y); // using double-precision intermediates for better accuracy
+}
+
+static float fpow_int2(float x,int y) {
+    float u;
+    if(y==1) return x;
+    u=fpow_int2(x,y/2);
+    u*=u;
+    if(y&1) u*=x;
+    return u;
+}
+
+// for the case where x not zero or infinity, y small and not zero
+static inline float fpowint_1(float x,int y) {
+    if(y<0) x=1.0f/x,y=-y;
+    return fpow_int2(x,y);
+}
+
+// for the case where x not zero or infinity
+static float fpowint_0(float x,int y) {
+    int e;
+    if(fisneg(x)) {
+        if(fisoddint(y)) return fneg(fpowint_0(fneg(x),y));
+        else             return      fpowint_0(fneg(x),y);
+    }
+    if(fispo2(x)) {
+        e=fgetexp(x)-0x7f;
+        if(y>=256) y= 255;  // avoid overflow
+        if(y<-256) y=-256;
+        y*=e;
+        return fldexp(1,y);
+    }
+    if(y==0) return 1;
+    if(y>=-32&&y<=32) return fpowint_1(x,y);
+    return fpow_1(x,y);
+}
+
+float WRAPPER_FUNC(powintf)(float x,int y) {
+    GCC_Pragma("GCC diagnostic push")
+    GCC_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+    if(x==1.0f||y==0) return 1;
+    if(x==0.0f) {
+        if(y>0) {
+            if(y&1) return x;
+            else    return 0;
+        }
+        if((y&1)) return fcopysign(FPINF,x);
+        return FPINF;
+    }
+    GCC_Pragma("GCC diagnostic pop")
+    check_nan_f1(x);
+    if(fispinf(x)) {
+        if(y<0) return 0;
+        else    return FPINF;
+    }
+    if(fisminf(x)) {
+        if(y>0) {
+            if((y&1)) return FMINF;
+            else      return FPINF;
+        }
+        if((y&1)) return MZERO;
+        else      return PZERO;
+    }
+    return fpowint_0(x,y);
+}
+
+// for the case where y is guaranteed a finite integer, x not zero or infinity
+static float fpow_0(float x,float y) {
+    int e,p;
+    if(fisneg(x)) {
+        if(fisoddint(y)) return fneg(fpow_0(fneg(x),y));
+        else             return      fpow_0(fneg(x),y);
+    }
+    p=(int)y;
+    if(fispo2(x)) {
+        e=fgetexp(x)-0x7f;
+        if(p>=256) p= 255;  // avoid overflow
+        if(p<-256) p=-256;
+        p*=e;
+        return fldexp(1,p);
+    }
+    if(p==0) return 1;
+    if(p>=-32&&p<=32) return fpowint_1(x,p);
+    return fpow_1(x,y);
+}
+
+float WRAPPER_FUNC(powf)(float x,float y) {
+    GCC_Like_Pragma("GCC diagnostic push")
+    GCC_Like_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+    if(x==1.0f||fiszero(y)) return 1;
+    check_nan_f2(x,y);
+    if(x==-1.0f&&fisinf(y)) return 1;
+    GCC_Like_Pragma("GCC diagnostic pop")
+    if(fiszero(x)) {
+        if(!fisneg(y)) {
+            if(fisoddint(y)) return x;
+            else             return 0;
+        }
+        if(fisoddint(y)) return fcopysign(FPINF,x);
+        return FPINF;
+    }
+    if(fispinf(x)) {
+        if(fisneg(y)) return 0;
+        else          return FPINF;
+    }
+    if(fisminf(x)) {
+        if(!fisneg(y)) {
+            if(fisoddint(y)) return FMINF;
+            else             return FPINF;
+        }
+        if(fisoddint(y)) return MZERO;
+        else             return PZERO;
+    }
+    if(fispinf(y)) {
+        if(fgetexp(x)<0x7f) return PZERO;
+        else                return FPINF;
+    }
+    if(fisminf(y)) {
+        if(fgetexp(x)<0x7f) return FPINF;
+        else                return PZERO;
+    }
+    if(fisint(y)) return fpow_0(x,y);
+    if(fisneg(x)) return FPINF;
+    return fpow_1(x,y);
+}
+
+float WRAPPER_FUNC(hypotf)(float x,float y) {
+    check_nan_f2(x,y);
+    int ex,ey;
+    ex=fgetexp(x); ey=fgetexp(y);
+    if(ex>=0x7f+50||ey>=0x7f+50) { // overflow, or nearly so
+        x=fldexp(x,-70),y=fldexp(y,-70);
+        return fldexp(sqrtf(x*x+y*y), 70);
+    }
+    else if(ex<=0x7f-50&&ey<=0x7f-50) { // underflow, or nearly so
+        x=fldexp(x, 70),y=fldexp(y, 70);
+        return fldexp(sqrtf(x*x+y*y),-70);
+    }
+    return sqrtf(x*x+y*y);
+}
+
+float WRAPPER_FUNC(cbrtf)(float x) {
+    check_nan_f1(x);
+    int e;
+    if(fisneg(x)) return fneg(cbrtf(fneg(x)));
+    if(fiszero(x)) return fcopysign(PZERO,x);
+    e=fgetexp(x)-0x7f;
+    e=(e*0x5555+0x8000)>>16;  // ~e/3, rounded
+    x=fldexp(x,-e*3);
+    x=expf(logf(x)*ONETHIRDf);
+    return fldexp(x,e);
+}
+
+// reduces mx*2^e modulo my, returning bottom bits of quotient at *pquo
+// 2^23<=|mx|,my<2^24, e>=0; 0<=result<my
+static i32 frem_0(i32 mx,i32 my,int e,int*pquo) {
+    int quo=0,q,r=0,s;
+    if(e>0) {
+        r=0xffffffffU/(ui32)(my>>7);  // reciprocal estimate Q16
+    }
+    while(e>0) {
+        s=e; if(s>12) s=12;    // gain up to 12 bits on each iteration
+        q=(mx>>9)*r;           // Q30
+        q=((q>>(29-s))+1)>>1;  // Q(s), rounded
+        mx=(mx<<s)-my*q;
+        quo=(quo<<s)+q;
+        e-=s;
+    }
+    if(mx>=my) mx-=my,quo++; // when e==0 mx can be nearly as big as 2my
+    if(mx>=my) mx-=my,quo++;
+    if(mx<0) mx+=my,quo--;
+    if(mx<0) mx+=my,quo--;
+    if(pquo) *pquo=quo;
+    return mx;
+}
+
+float WRAPPER_FUNC(fmodf)(float x,float y) {
+    check_nan_f2(x,y);
+    ui32 ix=float2ui32(x),iy=float2ui32(y);
+    int sx,ex,ey;
+    i32 mx,my;
+    FUNPACKS(ix,sx,ex,mx);
+    FUNPACK(iy,ey,my);
+    if(ex==0xff) {
+        return fnan_or(FPINF);
+    }
+    if(ey==0) return FPINF;
+    if(ex==0) {
+        if(!fisneg(x)) return PZERO;
+        return MZERO;
+    }
+    if(ex<ey) return x;  // |x|<|y|, including case x=±0
+    mx=frem_0(mx,my,ex-ey,0);
+    if(sx) mx=-mx;
+    return fix2float(mx,0x7f-ey+23);
+}
+
+float WRAPPER_FUNC(remquof)(float x,float y,int*quo) {
+    check_nan_f2(x,y);
+    ui32 ix=float2ui32(x),iy=float2ui32(y);
+    int sx,sy,ex,ey,q;
+    i32 mx,my;
+    FUNPACKS(ix,sx,ex,mx);
+    FUNPACKS(iy,sy,ey,my);
+    if(quo) *quo=0;
+    if(ex==0xff) return FPINF;
+    if(ey==0)    return FPINF;
+    if(ex==0)    return PZERO;
+    if(ey==0xff) return x;
+    if(ex<ey-1)  return x;  // |x|<|y|/2
+    if(ex==ey-1) {
+        if(mx<=my) return x;  // |x|<=|y|/2, even quotient
+        // here |y|/2<|x|<|y|
+        if(!sx) { // x>|y|/2
+            mx-=my+my;
+            ey--;
+            q=1;
+        } else { // x<-|y|/2
+            mx=my+my-mx;
+            ey--;
+            q=-1;
+        }
+    }
+    else {
+        if(sx) mx=-mx;
+        mx=frem_0(mx,my,ex-ey,&q);
+        if(mx+mx>my || (mx+mx==my&&(q&1)) ) { // |x|>|y|/2, or equality and an odd quotient?
+            mx-=my;
+            q++;
+        }
+    }
+    if(sy) q=-q;
+    if(quo) *quo=q;
+    return fix2float(mx,0x7f-ey+23);
+}
+
+float WRAPPER_FUNC(dremf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
+
+float WRAPPER_FUNC(remainderf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
+
+GCC_Pragma("GCC diagnostic pop") // conversion
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_none.S
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_none.S
@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+#include "pico/bootrom/sf_table.h"
+
+pico_default_asm_setup
+
+wrapper_func __aeabi_fadd
+wrapper_func __aeabi_fdiv
+wrapper_func __aeabi_fmul
+wrapper_func __aeabi_frsub
+wrapper_func __aeabi_fsub
+wrapper_func __aeabi_cfcmpeq
+wrapper_func __aeabi_cfrcmple
+wrapper_func __aeabi_cfcmple
+wrapper_func __aeabi_fcmpeq
+wrapper_func __aeabi_fcmplt
+wrapper_func __aeabi_fcmple
+wrapper_func __aeabi_fcmpge
+wrapper_func __aeabi_fcmpgt
+wrapper_func __aeabi_fcmpun
+wrapper_func __aeabi_i2f
+wrapper_func __aeabi_l2f
+wrapper_func __aeabi_ui2f
+wrapper_func __aeabi_ul2f
+wrapper_func __aeabi_f2iz
+wrapper_func __aeabi_f2lz
+wrapper_func __aeabi_f2uiz
+wrapper_func __aeabi_f2ulz
+wrapper_func __aeabi_f2d
+wrapper_func sqrtf
+wrapper_func cosf
+wrapper_func sinf
+wrapper_func tanf
+wrapper_func atan2f
+wrapper_func expf
+wrapper_func logf
+wrapper_func ldexpf
+wrapper_func copysignf
+wrapper_func truncf
+wrapper_func floorf
+wrapper_func ceilf
+wrapper_func roundf
+wrapper_func sincosf
+wrapper_func asinf
+wrapper_func acosf
+wrapper_func atanf
+wrapper_func sinhf
+wrapper_func coshf
+wrapper_func tanhf
+wrapper_func asinhf
+wrapper_func acoshf
+wrapper_func atanhf
+wrapper_func exp2f
+wrapper_func log2f
+wrapper_func exp10f
+wrapper_func log10f
+wrapper_func powf
+wrapper_func powintf
+wrapper_func hypotf
+wrapper_func cbrtf
+wrapper_func fmodf
+wrapper_func dremf
+wrapper_func remainderf
+wrapper_func remquof
+wrapper_func expm1f
+wrapper_func log1pf
+wrapper_func fmaf
+    push {lr}       // keep stack trace sane
+    ldr r0, =str
+    bl panic
+
+str:
+    .asciz "float support is disabled"
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/float_v1_rom_shim.S
@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "pico/asm_helper.S"
+
+#if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED
+
+#ifndef PICO_FLOAT_IN_RAM
+#define PICO_FLOAT_IN_RAM 0
+#endif
+
+pico_default_asm_setup
+
+.macro float_section name
+// todo separate flag for shims?
+#if PICO_FLOAT_IN_RAM
+.section RAM_SECTION_NAME(\name), "ax"
+#else
+.section SECTION_NAME(\name), "ax"
+#endif
+.endm
+
+float_section float_table_shim_on_use_helper
+regular_func float_table_shim_on_use_helper
+    push {r0-r2, lr}
+    mov r0, ip
+#ifndef NDEBUG
+    // sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro
+    cmp r0, #0
+    bne 1f
+    bkpt #0
+#endif
+1:
+    ldrh r1, [r0]
+    lsrs r2, r1, #8
+    adds r0, #2
+    cmp r2, #0xdf
+    bne 1b
+    uxtb r1, r1 // r1 holds table offset
+    lsrs r2, r0, #2
+    bcc 1f
+    // unaligned
+    ldrh r2, [r0, #0]
+    ldrh r0, [r0, #2]
+    lsls r0, #16
+    orrs r0, r2
+    b 2f
+1:
+    ldr r0, [r0]
+2:
+    ldr r2, =sf_table
+    str r0, [r2, r1]
+    str r0, [sp, #12]
+    pop {r0-r2, pc}
+
+float_section 642float_shims
+
+@ convert uint64 to float, rounding
+regular_func uint642float_shim
+ movs r2,#0       @ fall through
+
+@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
+regular_func ufix642float_shim
+ push {r4,r5,r14}
+ cmp r1,#0
+ bpl 3f          @ positive? we can use signed code
+ lsls r5,r1,#31  @ contribution to sticky bits
+ orrs r5,r0
+ lsrs r0,r1,#1
+ subs r2,#1
+ b 4f
+
+@ convert int64 to float, rounding
+regular_func int642float_shim
+ movs r2,#0       @ fall through
+
+@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
+regular_func fix642float_shim
+ push {r4,r5,r14}
+3:
+ movs r5,r0
+ orrs r5,r1
+ beq ret_pop45   @ zero? return +0
+ asrs r5,r1,#31  @ sign bits
+2:
+ asrs r4,r1,#24  @ try shifting 7 bits at a time
+ cmp r4,r5
+ bne 1f          @ next shift will overflow?
+ lsls r1,#7
+ lsrs r4,r0,#25
+ orrs r1,r4
+ lsls r0,#7
+ adds r2,#7
+ b 2b
+1:
+ movs r5,r0
+ movs r0,r1
+4:
+ negs r2,r2
+ adds r2,#32+29
+
+ // bl packx
+ ldr r1, =0x29ef // packx
+ blx r1
+ret_pop45:
+ pop {r4,r5,r15}
+
+float_section fatan2_shim
+regular_func fatan2_shim
+ push {r4,r5,r14}
+
+ ldr r4, =0x29c1 // unpackx
+ mov ip, r4
+@ unpack arguments and shift one down to have common exponent
+ blx ip
+ mov r4,r0
+ mov r0,r1
+ mov r1,r4
+ mov r4,r2
+ mov r2,r3
+ mov r3,r4
+ blx ip
+ lsls r0,r0,#5  @ Q28
+ lsls r1,r1,#5  @ Q28
+ adds r4,r2,r3  @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise
+ asrs r4,#9
+ adds r4,#1
+ bmi 2f         @ force y to 0 proper, so result will be zero
+ subs r4,r2,r3  @ calculate shift
+ bge 1f         @ ex>=ey?
+ negs r4,r4     @ make shift positive
+ asrs r0,r4
+ cmp r4,#28
+ blo 3f
+ asrs r0,#31
+ b 3f
+1:
+ asrs r1,r4
+ cmp r4,#28
+ blo 3f
+2:
+@ here |x|>>|y| or both x and y are ±0
+ cmp r0,#0
+ bge 4f         @ x positive, return signed 0
+ ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
+ ldr r0,[r3]    @ x negative, return +/- pi
+ asrs r1,#31
+ eors r0,r1
+ b 7f
+4:
+ asrs r0,r1,#31
+ b 7f
+3:
+ movs r2,#0              @ initial angle
+ ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
+ cmp r0,#0               @ x negative
+ bge 5f
+ negs r0,r0              @ rotate to 1st/4th quadrants
+ negs r1,r1
+ ldr r2,[r3]             @ pi Q29
+5:
+ movs r4,#1              @ m=1
+ ldr r5, =0x2b97         @ cordic_vec
+ blx r5                  @ also produces magnitude (with scaling factor 1.646760119), which is discarded
+ mov r0,r2               @ result here is -pi/2..3pi/2 Q29
+@ asrs r2,#29
+@ subs r0,r2
+ ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
+ ldr r2,[r3]             @ pi Q29
+ adds r4,r0,r2           @ attempt to fix -3pi/2..-pi case
+ bcs 6f                  @ -pi/2..0? leave result as is
+ subs r4,r0,r2           @ <pi? leave as is
+ bmi 6f
+ subs r0,r4,r2           @ >pi: take off 2pi
+6:
+ subs r0,#1              @ fiddle factor so atan2(0,1)==0
+7:
+ movs r2,#0              @ exponent for pack
+ ldr r3, =0x2b19
+ bx r3
+
+float_section float232_shims
+
+regular_func float2int_shim
+     movs r1,#0                    @ fall through
+regular_func float2fix_shim
+     // check for -0 or -denormal upfront
+     asrs r2, r0, #23
+     adds r2, #128
+     adds r2, #128
+     beq 1f
+     // call original
+     ldr r2, =0x2acd
+     bx r2
+     1:
+     movs r0, #0
+     bx lr
+
+float_section float264_shims
+
+regular_func float2int64_shim
+ movs r1,#0                    @ and fall through
+regular_func float2fix64_shim
+ push {r14}
+ bl f2fix
+ b d2f64_a
+
+regular_func float2uint64_shim
+ movs r1,#0                    @ and fall through
+regular_func float2ufix64_shim
+ asrs r3,r0,#23                @ negative? return 0
+ bmi ret_dzero
+@ and fall through
+
+@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf
+@ result clamped so that r3 can only be 0 or -1
+@ trashes r12
+.thumb_func
+f2fix:
+ push {r4,r14}
+ mov r12,r1
+ asrs r3,r0,#31
+ lsls r0,#1
+ lsrs r2,r0,#24
+ beq 1f                        @ zero?
+ cmp r2,#0xff                  @ Inf?
+ beq 2f
+ subs r1,r2,#1
+ subs r2,#0x7f                 @ remove exponent bias
+ lsls r1,#24
+ subs r0,r1                    @ insert implied 1
+ eors r0,r3
+ subs r0,r3                    @ top two's complement
+ asrs r1,r0,#4                 @ convert to double format
+ lsls r0,#28
+ ldr r4, =d2fix_a
+ bx r4
+1:
+ movs r0,#0
+ movs r1,r0
+ movs r3,r0
+ pop {r4,r15}
+2:
+ mvns r0,r3                    @ return max/min value
+ mvns r1,r3
+ pop {r4,r15}
+
+ret_dzero:
+ movs r0,#0
+ movs r1,#0
+ bx r14
+
+float_section d2fix_a_float
+
+.weak d2fix_a // weak because it exists in float shims too
+.thumb_func
+d2fix_a:
+@ here
+@ r0:r1 two's complement mantissa
+@ r2    unbaised exponent
+@ r3    mantissa sign extension bits
+ add r2,r12                    @ exponent plus offset for required binary point position
+ subs r2,#52                   @ required shift
+ bmi 1f                        @ shift down?
+@ here a shift up by r2 places
+ cmp r2,#12                    @ will clamp?
+ bge 2f
+ movs r4,r0
+ lsls r1,r2
+ lsls r0,r2
+ negs r2,r2
+ adds r2,#32                   @ complementary shift
+ lsrs r4,r2
+ orrs r1,r4
+ pop {r4,r15}
+2:
+ mvns r0,r3
+ mvns r1,r3                    @ overflow: clamp to extreme fixed-point values
+ pop {r4,r15}
+1:
+@ here a shift down by -r2 places
+ adds r2,#32
+ bmi 1f                        @ long shift?
+ mov r4,r1
+ lsls r4,r2
+ negs r2,r2
+ adds r2,#32                   @ complementary shift
+ asrs r1,r2
+ lsrs r0,r2
+ orrs r0,r4
+ pop {r4,r15}
+1:
+@ here a long shift down
+ movs r0,r1
+ asrs r1,#31                   @ shift down 32 places
+ adds r2,#32
+ bmi 1f                        @ very long shift?
+ negs r2,r2
+ adds r2,#32
+ asrs r0,r2
+ pop {r4,r15}
+1:
+ movs r0,r3                    @ result very near zero: use sign extension bits
+ movs r1,r3
+ pop {r4,r15}
+d2f64_a:
+ asrs r2,r1,#31
+ cmp r2,r3
+ bne 1f                        @ sign extension bits fail to match sign of result?
+ pop {r15}
+1:
+ mvns r0,r3
+ movs r1,#1
+ lsls r1,#31
+ eors r1,r1,r0                 @ generate extreme fixed-point values
+ pop {r15}
+
+float_section float2double_shim
+regular_func float2double_shim
+ lsrs r3,r0,#31                @ sign bit
+ lsls r3,#31
+ lsls r1,r0,#1
+ lsrs r2,r1,#24                @ exponent
+ beq 1f                        @ zero?
+ cmp r2,#0xff                  @ Inf?
+ beq 2f
+ lsrs r1,#4                    @ exponent and top 20 bits of mantissa
+ ldr r2,=(0x3ff-0x7f)<<20     @ difference in exponent offsets
+ adds r1,r2
+ orrs r1,r3
+ lsls r0,#29                   @ bottom 3 bits of mantissa
+ bx r14
+1:
+ movs r1,r3                    @ return signed zero
+3:
+ movs r0,#0
+ bx r14
+2:
+ ldr r1,=0x7ff00000           @ return signed infinity
+ adds r1,r3
+ b 3b
+
+#endif
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/include/pico/float.h
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_float/include/pico/float.h
@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef _PICO_FLOAT_H
+#define _PICO_FLOAT_H
+
+#include <math.h>
+#include <float.h>
+#include "pico.h"
+#include "pico/bootrom/sf_table.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file float.h
+* \defgroup pico_float pico_float
+*
+* Optimized single-precision floating point functions
+*
+* (Replacement) optimized implementations are provided of the following compiler built-ins
+* and math library functions:
+*
+* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf
+* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf
+* - powintf, sincosf (GNU extensions)
+*
+* The following additional optimized functions are also provided:
+*
+* - fix2float, ufix2float, fix642float, ufix642float, float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2int64, float2int_z, float2int64_z
+*/
+
+float fix2float(int32_t m, int e);
+float ufix2float(uint32_t m, int e);
+float fix642float(int64_t m, int e);
+float ufix642float(uint64_t m, int e);
+
+// These methods round towards -Infinity.
+int32_t float2fix(float f, int e);
+uint32_t float2ufix(float f, int e);
+int64_t float2fix64(float f, int e);
+uint64_t float2ufix64(float f, int e);
+int32_t float2int(float f);
+int64_t float2int64(float f);
+
+// These methods round towards 0.
+int32_t float2int_z(float f);
+int64_t float2int64_z(float f);
+
+float exp10f(float x);
+void sincosf(float x, float *sinx, float *cosx);
+float powintf(float x, int y);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/binary_info.c
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/binary_info.c
@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#if !PICO_NO_BINARY_INFO && !PICO_NO_PROGRAM_INFO
-#include "pico/binary_info.h"
-
-#if !PICO_NO_FLASH
-#include "boot_stage2/config.h"
-#endif
-
-// Note we put at most 4 pieces of binary info in the reset section because that's how much spare space we had
-// (picked the most common ones)... if there is a link failure because of .reset section overflow then move
-// more out.
-#define reset_section_attr __attribute__((section(".reset")))
-
-#if !PICO_NO_FLASH
-#ifndef PICO_NO_BI_BINARY_SIZE
-extern char __flash_binary_end;
-bi_decl_with_attr(bi_binary_end((intptr_t)&__flash_binary_end), reset_section_attr)
-#endif
-#endif
-
-#if !PICO_NO_BI_PROGRAM_BUILD_DATE
-#ifndef PICO_PROGRAM_BUILD_DATE
-#define PICO_PROGRAM_BUILD_DATE __DATE__
-#endif
-bi_decl_with_attr(bi_program_build_date_string(PICO_PROGRAM_BUILD_DATE), reset_section_attr);
-#endif
-
-#if !PICO_NO_BI_PROGRAM_NAME
-#if !defined(PICO_PROGRAM_NAME) && defined(PICO_TARGET_NAME)
-#define PICO_PROGRAM_NAME PICO_TARGET_NAME
-#endif
-#ifdef PICO_PROGRAM_NAME
-bi_decl_with_attr(bi_program_name(PICO_PROGRAM_NAME), reset_section_attr)
-#endif
-#endif
-
-#if !PICO_NO_BI_PICO_BOARD
-#ifdef PICO_BOARD
-bi_decl(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_PICO_BOARD, PICO_BOARD))
-#endif
-#endif
-
-#if !PICO_NO_BI_SDK_VERSION
-#ifdef PICO_SDK_VERSION_STRING
-bi_decl_with_attr(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_SDK_VERSION, PICO_SDK_VERSION_STRING),reset_section_attr)
-#endif
-#endif
-
-#if !PICO_NO_BI_PROGRAM_VERSION_STRING
-#ifdef PICO_PROGRAM_VERSION_STRING
-bi_decl(bi_program_version_string(PICO_PROGRAM_VERSION_STRING))
-#endif
-#endif
-
-
-#if !PICO_NO_BI_PROGRAM_DESCRIPTION
-#ifdef PICO_PROGRAM_DESCRIPTION
-bi_decl(bi_program_description(PICO_PROGRAM_DESCRIPTION))
-#endif
-#endif
-
-#if !PICO_NO_BI_PROGRAM_URL
-#ifdef PICO_PROGRAM_URL
-bi_decl(bi_program_url(PICO_PROGRAM_URL))
-#endif
-#endif
-
-#if !PICO_NO_BI_BOOT_STAGE2_NAME
-#ifdef PICO_BOOT_STAGE2_NAME
-bi_decl(bi_string(BINARY_INFO_TAG_RASPBERRY_PI, BINARY_INFO_ID_RP_BOOT2_NAME, PICO_BOOT_STAGE2_NAME))
-#endif
-#endif
-
-#if !PICO_NO_BI_BUILD_TYPE
-#ifdef PICO_CMAKE_BUILD_TYPE
-bi_decl(bi_program_build_attribute(PICO_CMAKE_BUILD_TYPE))
-#else
-#ifndef NDEBUG
-bi_decl(bi_program_build_attribute("Debug"))
-#else
-bi_decl(bi_program_build_attribute("Release"))
-#endif
-#endif
-
-#if PICO_DEOPTIMIZED_DEBUG
-bi_decl(bi_program_build_attribute("All optimization disabled"))
-#endif
-#endif
-
-#endif
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/crt0.S
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/crt0.S
@ -1,352 +0,0 @@
-/*
- * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include "pico.h"
-#include "pico/asm_helper.S"
-
-#include "hardware/regs/m0plus.h"
-#include "hardware/regs/addressmap.h"
-#include "hardware/regs/sio.h"
-#include "pico/binary_info/defs.h"
-
-#ifdef NDEBUG
-#ifndef COLLAPSE_IRQS
-#define COLLAPSE_IRQS
-#endif
-#endif
-
-pico_default_asm_setup
-
-.section .vectors, "ax"
-.align 2
-
-.global __vectors, __VECTOR_TABLE
-__VECTOR_TABLE:
-__vectors:
-.word __StackTop
-.word _reset_handler
-.word NMI_Handler
-.word HardFault_Handler
-.word isr_invalid // Reserved, should never fire
-.word isr_invalid // Reserved, should never fire
-.word isr_invalid // Reserved, should never fire
-.word isr_invalid // Reserved, should never fire
-.word isr_invalid // Reserved, should never fire
-.word isr_invalid // Reserved, should never fire
-.word isr_invalid // Reserved, should never fire
-.word SVC_Handler
-.word isr_invalid // Reserved, should never fire
-.word isr_invalid // Reserved, should never fire
-.word PendSV_Handler
-.word SysTick_Handler
-.word TIMER_IRQ_0_Handler
-.word TIMER_IRQ_1_Handler
-.word TIMER_IRQ_2_Handler
-.word TIMER_IRQ_3_Handler
-.word PWM_IRQ_WRAP_Handler
-.word USBCTRL_IRQ_Handler
-.word XIP_IRQ_Handler
-.word PIO0_IRQ_0_Handler
-.word PIO0_IRQ_1_Handler
-.word PIO1_IRQ_0_Handler
-.word TIMER_IRQ_1_Handler0
-.word TIMER_IRQ_1_Handler1
-.word TIMER_IRQ_1_Handler2
-.word TIMER_IRQ_1_Handler3
-.word TIMER_IRQ_1_Handler4
-.word TIMER_IRQ_1_Handler5
-.word TIMER_IRQ_1_Handler6
-.word TIMER_IRQ_1_Handler7
-.word TIMER_IRQ_1_Handler8
-.word TIMER_IRQ_1_Handler9
-.word TIMER_IRQ_2_Handler0
-.word TIMER_IRQ_2_Handler1
-.word TIMER_IRQ_2_Handler2
-.word TIMER_IRQ_2_Handler3
-.word TIMER_IRQ_2_Handler4
-.word TIMER_IRQ_2_Handler5
-.word TIMER_IRQ_2_Handler6
-.word TIMER_IRQ_2_Handler7
-.word TIMER_IRQ_2_Handler8
-.word TIMER_IRQ_2_Handler9
-.word TIMER_IRQ_3_Handler0
-.word TIMER_IRQ_3_Handler1
-
-// all default exception handlers do nothing, and we can check for them being set to our
-// default values by seeing if they point to somewhere between __defaults_isrs_start and __default_isrs_end
-.global __default_isrs_start
-__default_isrs_start:
-
-// Declare a weak symbol for each ISR.
-// By default, they will fall through to the undefined IRQ handler below (breakpoint),
-// but can be overridden by C functions with correct name.
-
-.macro decl_isr_bkpt name
-.weak \name
-.type \name,%function
-.thumb_func
-\name:
-    bkpt #0
-.endm
-
-// these are separated out for clarity
-decl_isr_bkpt isr_invalid
-decl_isr_bkpt NMI_Handler
-decl_isr_bkpt HardFault_Handler
-decl_isr_bkpt SVC_Handler
-decl_isr_bkpt PendSV_Handler
-decl_isr_bkpt SysTick_Handler
-
-.global __default_isrs_end
-__default_isrs_end:
-
-.macro decl_isr name
-.weak \name
-.type \name,%function
-.thumb_func
-\name:
-.endm
-
-decl_isr TIMER_IRQ_0_Handler
-decl_isr TIMER_IRQ_1_Handler
-decl_isr TIMER_IRQ_2_Handler
-decl_isr TIMER_IRQ_3_Handler
-decl_isr PWM_IRQ_WRAP_Handler
-decl_isr USBCTRL_IRQ_Handler
-decl_isr XIP_IRQ_Handler
-decl_isr PIO0_IRQ_0_Handler
-decl_isr PIO0_IRQ_1_Handler
-decl_isr PIO1_IRQ_0_Handler
-decl_isr TIMER_IRQ_1_Handler0
-decl_isr TIMER_IRQ_1_Handler1
-decl_isr TIMER_IRQ_1_Handler2
-decl_isr TIMER_IRQ_1_Handler3
-decl_isr TIMER_IRQ_1_Handler4
-decl_isr TIMER_IRQ_1_Handler5
-decl_isr TIMER_IRQ_1_Handler6
-decl_isr TIMER_IRQ_1_Handler7
-decl_isr TIMER_IRQ_1_Handler8
-decl_isr TIMER_IRQ_1_Handler9
-decl_isr TIMER_IRQ_2_Handler0
-decl_isr TIMER_IRQ_2_Handler1
-decl_isr TIMER_IRQ_2_Handler2
-decl_isr TIMER_IRQ_2_Handler3
-decl_isr TIMER_IRQ_2_Handler4
-decl_isr TIMER_IRQ_2_Handler5
-decl_isr TIMER_IRQ_2_Handler6
-decl_isr TIMER_IRQ_2_Handler7
-decl_isr TIMER_IRQ_2_Handler8
-decl_isr TIMER_IRQ_2_Handler9
-decl_isr TIMER_IRQ_3_Handler0
-decl_isr TIMER_IRQ_3_Handler1
-
-// All unhandled USER IRQs fall through to here
-.global __unhandled_user_irq
-.thumb_func
-__unhandled_user_irq:
-    mrs  r0, ipsr
-    subs r0, #16
-.global unhandled_user_irq_num_in_r0
-unhandled_user_irq_num_in_r0:
-    bkpt #0
-
-// ----------------------------------------------------------------------------
-
-.section .binary_info_header, "a"
-
-// Header must be in first 256 bytes of main image (i.e. excluding flash boot2).
-// For flash builds we put it immediately after vector table; for NO_FLASH the
-// vectors are at a +0x100 offset because the bootrom enters RAM images directly
-// at their lowest address, so we put the header in the VTOR alignment hole.
-
-#if !PICO_NO_BINARY_INFO
-binary_info_header:
-.word BINARY_INFO_MARKER_START
-.word __binary_info_start
-.word __binary_info_end
-.word data_cpy_table // we may need to decode pointers that are in RAM at runtime.
-.word BINARY_INFO_MARKER_END
-#endif
-
-// ----------------------------------------------------------------------------
-
-.section .reset, "ax"
-
-// On flash builds, the vector table comes first in the image (conventional).
-// On NO_FLASH builds, the reset handler section comes first, as the entry
-// point is at offset 0 (fixed due to bootrom), and VTOR is highly-aligned.
-// Image is entered in various ways:
-//
-// - NO_FLASH builds are entered from beginning by UF2 bootloader
-//
-// - Flash builds vector through the table into _reset_handler from boot2
-//
-// - Either type can be entered via _entry_point by the debugger, and flash builds
-//   must then be sent back round the boot sequence to properly initialise flash
-
-// ELF entry point:
-.type _entry_point,%function
-.thumb_func
-.global _entry_point
-_entry_point:
-
-#if PICO_NO_FLASH
-    // Vector through our own table (SP, VTOR will not have been set up at
-    // this point). Same path for debugger entry and bootloader entry.
-    ldr r0, =__vectors
-#else
-    // Debugger tried to run code after loading, so SSI is in 03h-only mode.
-    // Go back through bootrom + boot2 to properly initialise flash.
-    movs r0, #0
-#endif
-    ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET)
-    str r0, [r1]
-    ldmia r0!, {r1, r2}
-    msr msp, r1
-    bx r2
-
-// Reset handler:
-// - initialises .data
-// - clears .bss
-// - calls runtime_init
-// - calls main
-// - calls exit (which should eventually hang the processor via _exit)
-
-.type _reset_handler,%function
-.thumb_func
-_reset_handler:
-    // Only core 0 should run the C runtime startup code; core 1 is normally
-    // sleeping in the bootrom at this point but check to be sure
-    ldr r0, =(SIO_BASE + SIO_CPUID_OFFSET)
-    ldr r0, [r0]
-    cmp r0, #0
-    bne hold_non_core0_in_bootrom
-
-    // In a NO_FLASH binary, don't perform .data copy, since it's loaded
-    // in-place by the SRAM load. Still need to clear .bss
-#if !PICO_NO_FLASH
-    adr r4, data_cpy_table
-
-    // assume there is at least one entry
-1:
-    ldmia r4!, {r1-r3}
-    cmp r1, #0
-    beq 2f
-    bl data_cpy
-    b 1b
-2:
-#endif
-
-    // Zero out the BSS
-    ldr r1, =__bss_start__
-    ldr r2, =__bss_end__
-    movs r0, #0
-    b bss_fill_test
-bss_fill_loop:
-    stm r1!, {r0}
-bss_fill_test:
-    cmp r1, r2
-    bne bss_fill_loop
-
-platform_entry: // symbol for stack traces
-    // Use 32-bit jumps, in case these symbols are moved out of branch range
-    // (e.g. if main is in SRAM and crt0 in flash)
-    ldr r1, =runtime_init
-    blx r1
-    ldr r1, =main
-    blx r1
-    ldr r1, =exit
-    blx r1
-    // exit should not return.  If it does, hang the core.
-    // (fall thru into our hang _exit impl
-1: // separate label because _exit can be moved out of branch range
-    bkpt #0
-    b 1b
-
-#if !PICO_NO_FLASH
-data_cpy_loop:
-    ldm r1!, {r0}
-    stm r2!, {r0}
-data_cpy:
-    cmp r2, r3
-    blo data_cpy_loop
-    bx lr
-#endif
-
-// Note the data copy table is still included for NO_FLASH builds, even though
-// we skip the copy, because it is listed in binary info
-
-.align 2
-data_cpy_table:
-#if PICO_COPY_TO_RAM
-.word __ram_text_source__
-.word __ram_text_start__
-.word __ram_text_end__
-#endif
-.word __etext
-.word __data_start__
-.word __data_end__
-
-.word __scratch_x_source__
-.word __scratch_x_start__
-.word __scratch_x_end__
-
-.word __scratch_y_source__
-.word __scratch_y_start__
-.word __scratch_y_end__
-
-.word 0 // null terminator
-
-// ----------------------------------------------------------------------------
-// Provide safe defaults for _exit and runtime_init
-// Full implementations usually provided by platform.c
-
-.weak runtime_init
-.type runtime_init,%function
-.thumb_func
-runtime_init:
-    bx lr
-
-// ----------------------------------------------------------------------------
-// If core 1 somehow gets into crt0 due to a spectacular VTOR mishap, we need to
-// catch it and send back to the sleep-and-launch code in the bootrom. Shouldn't
-// happen (it should sleep in the ROM until given an entry point via the
-// cross-core FIFOs) but it's good to be defensive.
-
-hold_non_core0_in_bootrom:
-    ldr r0, = 'W' | ('V' << 8)
-    bl rom_func_lookup
-    bx r0
-
-// ----------------------------------------------------------------------------
-// Stack/heap dummies to set size
-
-// Prior to SDK 1.5.1 these were `.section .stack` without the `, "a"`... Clang linker gives a warning about this,
-// however setting it explicitly to `, "a"` makes GCC *now* discard the section unless it is also KEEP. This
-// seems like very surprising behavior!
-//
-// Strictly the most correct thing to do (as .stack and .heap are unreferenced) is to mark them as "a", and also KEEP, which
-// works correctly for both GCC and Clang, however doing so may break anyone who already has custom linker scripts without
-// the KEEP. Therefore we will only add the "a" on Clang, but will also use KEEP to our own linker scripts.
-
-.macro spacer_section name
-#if PICO_ASSEMBLER_IS_CLANG
-.section \name, "a"
-#else
-.section \name
-#endif
-.endm
-
-spacer_section .stack
-// align to allow for memory protection (although this alignment is pretty much ignored by linker script)
-.p2align 5
-    .equ StackSize, PICO_STACK_SIZE
-.space StackSize
-
-spacer_section .heap
-.p2align 2
-    .equ HeapSize, PICO_HEAP_SIZE
-.space HeapSize
--- a/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/doc.h
+++ b/targets/TARGET_RASPBERRYPI/pico-sdk/src/rp2_common/pico_standard_link/doc.h
@ -1,10 +0,0 @@
-/**
- * \defgroup pico_standard_link pico_standard_link
- * \brief Standard link step providing the basics for creating a runnable binary
- * 
- * This includes
- *   - C runtime initialization
- *   - Linker scripts for 'default', 'no_flash', 'blocked_ram' and 'copy_to_ram' binaries
- *   - 'Binary Information' support
- *   - Linker option control
- */
--- a/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py
+++ b/targets/TARGET_RASPBERRYPI/reimport_pico_sdk.py
@ -1,3 +1,6 @@
+# Copyright (c) 2024 ARM Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
 """
 This script can be used to reimport a newer version of the RPi Pico SDK.

@ -65,6 +68,7 @@ FILES_DIRS_TO_COPY: List[pathlib.Path] = [
    pathlib.Path("pico_sdk_version.cmake"),
    pathlib.Path("src") / "rp2_common" / "hardware_base",
    pathlib.Path("src") / "rp2_common" / "hardware_adc",
+    pathlib.Path("src") / "rp2_common" / "hardware_divider",
    pathlib.Path("src") / "rp2_common" / "hardware_resets",
    pathlib.Path("src") / "rp2_common" / "hardware_pwm",
    pathlib.Path("src") / "rp2_common" / "hardware_flash",
@ -83,6 +87,7 @@ FILES_DIRS_TO_COPY: List[pathlib.Path] = [
    pathlib.Path("src") / "rp2_common" / "hardware_rtc",
    pathlib.Path("src") / "rp2_common" / "pico_bootrom",
    pathlib.Path("src") / "rp2_common" / "pico_platform",
+    pathlib.Path("src") / "rp2_common" / "pico_float",
    pathlib.Path("src") / "rp2_common" / "cmsis" / "stub",
    pathlib.Path("src") / "common" / "pico_time",
    pathlib.Path("src") / "common" / "pico_sync",