From f9f887d88ebe79b48fc4f497f19044ea18f55acf Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Mon, 1 Apr 2019 15:33:45 +0300 Subject: [PATCH] mbed_atomic_impl.h: Add pre-op (xxx_fetch) forms Even though C/C++11 don't offer pre-op forms (that do the operation then return the new value) of their freestanding functions, we will be making them visible via pre-op operators on `Atomic` (++, --, +=, -=, &=, |=, ^=). It is easier to do a pre-op than a post-op, as we can use one fewer register in the assembler, so it's worth optimising for what will be quite common cases. Make these forms accessible for `Atomic`, but don't document them for standalone use at this stage. --- platform/internal/mbed_atomic_impl.h | 218 +++++++++++++++++++++------ 1 file changed, 173 insertions(+), 45 deletions(-) diff --git a/platform/internal/mbed_atomic_impl.h b/platform/internal/mbed_atomic_impl.h index f169663650..6a130318ad 100644 --- a/platform/internal/mbed_atomic_impl.h +++ b/platform/internal/mbed_atomic_impl.h @@ -132,14 +132,49 @@ extern "C" { #endif #ifdef __CC_ARM -#define DO_MBED_LOCKFREE_3OP_ASM(OP, Constants, M) \ +#define DO_MBED_LOCKFREE_NEWVAL_2OP_ASM(OP, Constants, M) \ + __asm { \ + LDREX##M newValue, [valuePtr] ; \ + OP newValue, arg ; \ + STREX##M fail, newValue, [valuePtr] \ + } +#elif defined __clang__ || defined __GNUC__ +#define DO_MBED_LOCKFREE_NEWVAL_2OP_ASM(OP, Constants, M) \ + __asm volatile ( \ + "LDREX"#M "\t%[newValue], %[value]\n\t" \ + #OP "\t%[newValue], %[arg]\n\t" \ + "STREX"#M "\t%[fail], %[newValue], %[value]\n\t" \ + : [newValue] "=&" MBED_DOP_REG (newValue), \ + [fail] "=&r" (fail), \ + [value] "+Q" (*valuePtr) \ + : [arg] Constants MBED_DOP_REG (arg) \ + : "cc" \ + ) +#elif defined __ICCARM__ +/* In IAR "r" means low register if Thumbv1 (there's no way to specify any register...) */ +/* IAR does not support "ADDS reg, reg", so write as 3-operand */ +#define DO_MBED_LOCKFREE_NEWVAL_2OP_ASM(OP, Constants, M) \ + asm volatile ( \ + "LDREX"#M "\t%[newValue], [%[valuePtr]]\n" \ + #OP "\t%[newValue], %[newValue], %[arg]\n" \ + "STREX"#M "\t%[fail], %[newValue], [%[valuePtr]]\n" \ + : [newValue] "=&r" (newValue), \ + [fail] "=&r" (fail) \ + : [valuePtr] "r" (valuePtr), \ + [arg] "r" (arg) \ + : "memory", "cc" \ + ) +#endif + +#ifdef __CC_ARM +#define DO_MBED_LOCKFREE_OLDVAL_3OP_ASM(OP, Constants, M) \ __asm { \ LDREX##M oldValue, [valuePtr] ; \ OP newValue, oldValue, arg ; \ STREX##M fail, newValue, [valuePtr] \ } #elif defined __clang__ || defined __GNUC__ -#define DO_MBED_LOCKFREE_3OP_ASM(OP, Constants, M) \ +#define DO_MBED_LOCKFREE_OLDVAL_3OP_ASM(OP, Constants, M) \ __asm volatile ( \ ".syntax unified\n\t" \ "LDREX"#M "\t%[oldValue], %[value]\n\t" \ @@ -154,7 +189,7 @@ extern "C" { ) #elif defined __ICCARM__ /* In IAR "r" means low register if Thumbv1 (there's no way to specify any register...) */ -#define DO_MBED_LOCKFREE_3OP_ASM(OP, Constants, M) \ +#define DO_MBED_LOCKFREE_OLDVAL_3OP_ASM(OP, Constants, M) \ asm volatile ( \ "LDREX"#M "\t%[oldValue], [%[valuePtr]]\n" \ #OP "\t%[newValue], %[oldValue], %[arg]\n" \ @@ -172,7 +207,7 @@ extern "C" { * are only 2-operand versions of the instructions. */ #ifdef __CC_ARM -#define DO_MBED_LOCKFREE_2OP_ASM(OP, Constants, M) \ +#define DO_MBED_LOCKFREE_OLDVAL_2OP_ASM(OP, Constants, M) \ __asm { \ LDREX##M oldValue, [valuePtr] ; \ MOV newValue, oldValue ; \ @@ -180,7 +215,7 @@ extern "C" { STREX##M fail, newValue, [valuePtr] \ } #elif defined __clang__ || defined __GNUC__ -#define DO_MBED_LOCKFREE_2OP_ASM(OP, Constants, M) \ +#define DO_MBED_LOCKFREE_OLDVAL_2OP_ASM(OP, Constants, M) \ __asm volatile ( \ ".syntax unified\n\t" \ "LDREX"#M "\t%[oldValue], %[value]\n\t" \ @@ -195,7 +230,7 @@ extern "C" { : "cc" \ ) #elif defined __ICCARM__ -#define DO_MBED_LOCKFREE_2OP_ASM(OP, Constants, M) \ +#define DO_MBED_LOCKFREE_OLDVAL_2OP_ASM(OP, Constants, M) \ asm volatile ( \ "LDREX"#M "\t%[oldValue], [%[valuePtr]]\n" \ "MOV" "\t%[newValue], %[oldValue]\n" \ @@ -444,17 +479,41 @@ MBED_FORCEINLINE bool core_util_atomic_cas_explicit_##fn_suffix(volatile T *ptr, } -#define DO_MBED_LOCKFREE_2OP(name, OP, Constants, retValue, T, fn_suffix, M) \ +#define DO_MBED_LOCKFREE_NEWVAL_2OP(name, OP, Constants, T, fn_suffix, M) \ +inline T core_util_atomic_##name##_##fn_suffix(volatile T *valuePtr, T arg) \ +{ \ + uint32_t fail, newValue; \ + MBED_BARRIER(); \ + do { \ + DO_MBED_LOCKFREE_NEWVAL_2OP_ASM(OP, Constants, M); \ + } while (fail); \ + MBED_BARRIER(); \ + return (T) newValue; \ +} \ + \ +MBED_FORCEINLINE T core_util_atomic_##name##_explicit_##fn_suffix( \ + volatile T *valuePtr, T arg, mbed_memory_order order) \ +{ \ + uint32_t fail, newValue; \ + MBED_RELEASE_BARRIER(order); \ + do { \ + DO_MBED_LOCKFREE_NEWVAL_2OP_ASM(OP, Constants, M); \ + } while (fail); \ + MBED_ACQUIRE_BARRIER(order); \ + return (T) newValue; \ +} \ + +#define DO_MBED_LOCKFREE_OLDVAL_2OP(name, OP, Constants, T, fn_suffix, M) \ inline T core_util_atomic_##name##_##fn_suffix(volatile T *valuePtr, T arg) \ { \ T oldValue; \ uint32_t fail, newValue; \ MBED_BARRIER(); \ do { \ - DO_MBED_LOCKFREE_2OP_ASM(OP, Constants, M); \ + DO_MBED_LOCKFREE_OLDVAL_2OP_ASM(OP, Constants, M); \ } while (fail); \ MBED_BARRIER(); \ - return (T) retValue; \ + return oldValue; \ } \ \ MBED_FORCEINLINE T core_util_atomic_##name##_explicit_##fn_suffix( \ @@ -464,22 +523,22 @@ MBED_FORCEINLINE T core_util_atomic_##name##_explicit_##fn_suffix( uint32_t fail, newValue; \ MBED_RELEASE_BARRIER(order); \ do { \ - DO_MBED_LOCKFREE_2OP_ASM(OP, Constants, M); \ + DO_MBED_LOCKFREE_OLDVAL_2OP_ASM(OP, Constants, M); \ } while (fail); \ MBED_ACQUIRE_BARRIER(order); \ - return (T) retValue; \ + return oldValue; \ } \ -#define DO_MBED_LOCKFREE_3OP(name, OP, Constants, retValue, T, fn_suffix, M) \ +#define DO_MBED_LOCKFREE_OLDVAL_3OP(name, OP, Constants, T, fn_suffix, M) \ inline T core_util_atomic_##name##_##fn_suffix(volatile T *valuePtr, T arg) { \ T oldValue; \ uint32_t fail, newValue; \ MBED_BARRIER(); \ do { \ - DO_MBED_LOCKFREE_3OP_ASM(OP, Constants, M); \ + DO_MBED_LOCKFREE_OLDVAL_3OP_ASM(OP, Constants, M); \ } while (fail); \ MBED_BARRIER(); \ - return (T) retValue; \ + return oldValue; \ } \ \ MBED_FORCEINLINE T core_util_atomic_##name##_explicit_##fn_suffix( \ @@ -489,10 +548,10 @@ MBED_FORCEINLINE T core_util_atomic_##name##_explicit_##fn_suffix( uint32_t fail, newValue; \ MBED_RELEASE_BARRIER(order); \ do { \ - DO_MBED_LOCKFREE_3OP_ASM(OP, Constants, M); \ + DO_MBED_LOCKFREE_OLDVAL_3OP_ASM(OP, Constants, M); \ } while (fail); \ MBED_ACQUIRE_BARRIER(order); \ - return (T) retValue; \ + return oldValue; \ } \ inline bool core_util_atomic_flag_test_and_set(volatile core_util_atomic_flag *valuePtr) @@ -526,15 +585,20 @@ MBED_FORCEINLINE bool core_util_atomic_flag_test_and_set_explicit(volatile core_ DO_MBED_LOCKFREE_EXCHG_OP(uint16_t, u16, H) \ DO_MBED_LOCKFREE_EXCHG_OP(uint32_t, u32, ) -#define DO_MBED_LOCKFREE_3OPS(name, OP, Constants, retValue) \ - DO_MBED_LOCKFREE_3OP(name, OP, Constants, retValue, uint8_t, u8, B) \ - DO_MBED_LOCKFREE_3OP(name, OP, Constants, retValue, uint16_t, u16, H) \ - DO_MBED_LOCKFREE_3OP(name, OP, Constants, retValue, uint32_t, u32, ) +#define DO_MBED_LOCKFREE_NEWVAL_2OPS(name, OP, Constants) \ + DO_MBED_LOCKFREE_NEWVAL_2OP(name, OP, Constants, uint8_t, u8, B) \ + DO_MBED_LOCKFREE_NEWVAL_2OP(name, OP, Constants, uint16_t, u16, H) \ + DO_MBED_LOCKFREE_NEWVAL_2OP(name, OP, Constants, uint32_t, u32, ) -#define DO_MBED_LOCKFREE_2OPS(name, OP, Constants, retValue) \ - DO_MBED_LOCKFREE_2OP(name, OP, Constants, retValue, uint8_t, u8, B) \ - DO_MBED_LOCKFREE_2OP(name, OP, Constants, retValue, uint16_t, u16, H) \ - DO_MBED_LOCKFREE_2OP(name, OP, Constants, retValue, uint32_t, u32, ) +#define DO_MBED_LOCKFREE_OLDVAL_3OPS(name, OP, Constants) \ + DO_MBED_LOCKFREE_OLDVAL_3OP(name, OP, Constants, uint8_t, u8, B) \ + DO_MBED_LOCKFREE_OLDVAL_3OP(name, OP, Constants, uint16_t, u16, H) \ + DO_MBED_LOCKFREE_OLDVAL_3OP(name, OP, Constants, uint32_t, u32, ) + +#define DO_MBED_LOCKFREE_OLDVAL_2OPS(name, OP, Constants) \ + DO_MBED_LOCKFREE_OLDVAL_2OP(name, OP, Constants, uint8_t, u8, B) \ + DO_MBED_LOCKFREE_OLDVAL_2OP(name, OP, Constants, uint16_t, u16, H) \ + DO_MBED_LOCKFREE_OLDVAL_2OP(name, OP, Constants, uint32_t, u32, ) #define DO_MBED_LOCKFREE_CAS_STRONG_OPS() \ DO_MBED_LOCKFREE_CAS_STRONG_OP(uint8_t, u8, B) \ @@ -546,6 +610,11 @@ MBED_FORCEINLINE bool core_util_atomic_flag_test_and_set_explicit(volatile core_ DO_MBED_LOCKFREE_CAS_WEAK_OP(uint16_t, u16, H) \ DO_MBED_LOCKFREE_CAS_WEAK_OP(uint32_t, u32, ) +// Note that these macros define a number of functions that are +// not in mbed_atomic.h, like core_util_atomic_and_fetch_u16. +// These are not documented via the doxygen in mbed_atomic.h, so +// for now should be regarded as internal only. They are used by the +// Atomic template as an optimisation though. // We always use the "S" form of operations - avoids yet another // possible unneeded distinction between Thumbv1 and Thumbv2, and @@ -559,33 +628,42 @@ MBED_FORCEINLINE bool core_util_atomic_flag_test_and_set_explicit(volatile core_ // of the 16-bit forms. Shame we can't specify "don't care" // for the "S", or get the GNU multi-alternative to // choose ADDS/ADD appropriately. -DO_MBED_LOCKFREE_3OPS(incr, ADDS, "IL", newValue) -DO_MBED_LOCKFREE_3OPS(decr, SUBS, "IL", newValue) -DO_MBED_LOCKFREE_3OPS(fetch_add, ADDS, "IL", oldValue) -DO_MBED_LOCKFREE_3OPS(fetch_sub, SUBS, "IL", oldValue) +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_add, ADDS, "IL") +DO_MBED_LOCKFREE_NEWVAL_2OPS(incr, ADDS, "IL") +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_sub, SUBS, "IL") +DO_MBED_LOCKFREE_NEWVAL_2OPS(decr, SUBS, "IL") // K constraint is inverted 12-bit modified immediate constant // (relying on assembler substituting BIC for AND) -DO_MBED_LOCKFREE_3OPS(fetch_and, ANDS, "IK", oldValue) +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_and, ANDS, "IK") +DO_MBED_LOCKFREE_NEWVAL_2OPS(and_fetch, ANDS, "IK") #if MBED_EXCLUSIVE_ACCESS_ARM // ARM does not have ORN instruction, so take plain immediates. -DO_MBED_LOCKFREE_3OPS(fetch_or, ORRS, "I", oldValue) +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_or, ORRS, "I") +DO_MBED_LOCKFREE_NEWVAL_2OPS(or_fetch, ORRS, "I") #else // Thumb-2 has ORN instruction, and assembler substitutes ORN for ORR. -DO_MBED_LOCKFREE_3OPS(fetch_or, ORRS, "IK", oldValue) +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_or, ORRS, "IK") +DO_MBED_LOCKFREE_NEWVAL_2OPS(or_fetch, ORRS, "IK") #endif // I constraint is 12-bit modified immediate operand -DO_MBED_LOCKFREE_3OPS(fetch_xor, EORS, "I", oldValue) +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_xor, EORS, "I") +DO_MBED_LOCKFREE_NEWVAL_2OPS(xor_fetch, EORS, "I") #else // MBED_EXCLUSIVE_ACCESS_THUMB1 +// I constraint is 0-255; J is -255 to -1, suitable for +// 2-op ADD/SUB (relying on assembler to swap ADD/SUB) // L constraint is -7 to +7, suitable for 3-op ADD/SUB // (relying on assembler to swap ADD/SUB) -DO_MBED_LOCKFREE_3OPS(incr, ADDS, "L", newValue) -DO_MBED_LOCKFREE_3OPS(decr, SUBS, "L", newValue) -DO_MBED_LOCKFREE_3OPS(fetch_add, ADDS, "L", oldValue) -DO_MBED_LOCKFREE_3OPS(fetch_sub, SUBS, "L", oldValue) -DO_MBED_LOCKFREE_2OPS(fetch_and, ANDS, "", oldValue) -DO_MBED_LOCKFREE_2OPS(fetch_or, ORRS, "", oldValue) -DO_MBED_LOCKFREE_2OPS(fetch_xor, EORS, "", oldValue) +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_add, ADDS, "L") +DO_MBED_LOCKFREE_NEWVAL_2OPS(incr, ADDS, "IJ") +DO_MBED_LOCKFREE_OLDVAL_3OPS(fetch_sub, SUBS, "L") +DO_MBED_LOCKFREE_NEWVAL_2OPS(decr, SUBS, "IJ") +DO_MBED_LOCKFREE_OLDVAL_2OPS(fetch_and, ANDS, "") +DO_MBED_LOCKFREE_NEWVAL_2OPS(and_fetch, ANDS, "") +DO_MBED_LOCKFREE_OLDVAL_2OPS(fetch_or, ORRS, "") +DO_MBED_LOCKFREE_NEWVAL_2OPS(or_fetch, ORRS, "") +DO_MBED_LOCKFREE_OLDVAL_2OPS(fetch_xor, EORS, "") +DO_MBED_LOCKFREE_NEWVAL_2OPS(xor_fetch, EORS, "") #endif DO_MBED_LOCKFREE_EXCHG_OPS() @@ -1236,6 +1314,20 @@ inline T *core_util_atomic_fetch_sub_explicit(T *volatile *valuePtr, ptrdiff_t a DO_MBED_ATOMIC_OP_TEMPLATE(name, int32_t, s32) \ DO_MBED_ATOMIC_OP_TEMPLATE(name, int64_t, s64) +#define DO_MBED_ATOMIC_MANUAL_PRE_OP_TEMPLATE(name, T, fn_suffix, postname, OP) \ +template<> \ +inline T core_util_atomic_##name(volatile T *valuePtr, T arg) \ +{ \ + return core_util_atomic_##postname##_##fn_suffix(valuePtr, arg) OP; \ +} \ + \ +template<> \ +inline T core_util_atomic_##name##_explicit(volatile T *valuePtr, T arg, \ + mbed_memory_order order) \ +{ \ + return core_util_atomic_##postname##_explicit_##fn_suffix(valuePtr, arg, order) OP; \ +} + DO_MBED_ATOMIC_OP_U_TEMPLATES(exchange) DO_MBED_ATOMIC_OP_S_TEMPLATES(exchange) DO_MBED_ATOMIC_OP_U_TEMPLATES(fetch_add) @@ -1246,25 +1338,61 @@ DO_MBED_ATOMIC_OP_U_TEMPLATES(fetch_and) DO_MBED_ATOMIC_OP_U_TEMPLATES(fetch_or) DO_MBED_ATOMIC_OP_U_TEMPLATES(fetch_xor) +namespace mbed { +namespace impl { + +// Use custom assembler forms for pre-ops where available, else construct from post-ops +#if MBED_EXCLUSIVE_ACCESS +#define DO_MBED_ATOMIC_PRE_OP_TEMPLATES(name, postname, OP) \ + template T core_util_atomic_##name(volatile T *valuePtr, T arg); \ + template T core_util_atomic_##name##_explicit(volatile T *valuePtr, T arg, mbed_memory_order order); \ + DO_MBED_ATOMIC_OP_TEMPLATE(name, uint8_t, u8) \ + DO_MBED_ATOMIC_OP_TEMPLATE(name, uint16_t, u16) \ + DO_MBED_ATOMIC_OP_TEMPLATE(name, uint32_t, u32) \ + DO_MBED_ATOMIC_MANUAL_PRE_OP_TEMPLATE(name, uint64_t, u64, postname, OP) +#else +#define DO_MBED_ATOMIC_PRE_OP_TEMPLATES(name, postname, OP) \ + template T core_util_atomic_##name(volatile T *valuePtr, T arg); \ + template T core_util_atomic_##name##_explicit(volatile T *valuePtr, T arg, mbed_memory_order order); \ + DO_MBED_ATOMIC_MANUAL_PRE_OP_TEMPLATE(name, uint8_t, u8, postname, OP) \ + DO_MBED_ATOMIC_MANUAL_PRE_OP_TEMPLATE(name, uint16_t, u16, postname, OP) \ + DO_MBED_ATOMIC_MANUAL_PRE_OP_TEMPLATE(name, uint32_t, u32, postname, OP) \ + DO_MBED_ATOMIC_MANUAL_PRE_OP_TEMPLATE(name, uint64_t, u64, postname, OP) +#endif + +// *INDENT-OFF* +DO_MBED_ATOMIC_PRE_OP_TEMPLATES(incr, fetch_add, + arg) +DO_MBED_ATOMIC_PRE_OP_TEMPLATES(decr, fetch_sub, - arg) +DO_MBED_ATOMIC_PRE_OP_TEMPLATES(and_fetch, fetch_and, & arg) +DO_MBED_ATOMIC_PRE_OP_TEMPLATES(or_fetch, fetch_or, | arg) +DO_MBED_ATOMIC_PRE_OP_TEMPLATES(xor_fetch, fetch_xor, ^ arg) +// *INDENT-ON* + +} +} + #endif // __cplusplus #undef MBED_DOP_REG #undef MBED_CMP_IMM #undef MBED_SUB3_IMM #undef DO_MBED_LOCKFREE_EXCHG_ASM -#undef DO_MBED_LOCKFREE_3OP_ASM -#undef DO_MBED_LOCKFREE_2OP_ASM +#undef DO_MBED_LOCKFREE_NEWVAL_2OP_ASM +#undef DO_MBED_LOCKFREE_OLDVAL_3OP_ASM +#undef DO_MBED_LOCKFREE_OLDVAL_2OP_ASM #undef DO_MBED_LOCKFREE_CAS_WEAK_ASM #undef DO_MBED_LOCKFREE_CAS_STRONG_ASM #undef DO_MBED_LOCKFREE_LOADSTORE #undef DO_MBED_LOCKFREE_EXCHG_OP #undef DO_MBED_LOCKFREE_CAS_WEAK_OP #undef DO_MBED_LOCKFREE_CAS_STRONG_OP -#undef DO_MBED_LOCKFREE_2OP -#undef DO_MBED_LOCKFREE_3OP +#undef DO_MBED_LOCKFREE_NEWVAL_2OP +#undef DO_MBED_LOCKFREE_OLDVAL_2OP +#undef DO_MBED_LOCKFREE_OLDVAL_3OP #undef DO_MBED_LOCKFREE_EXCHG_OPS -#undef DO_MBED_LOCKFREE_2OPS -#undef DO_MBED_LOCKFREE_3OPS +#undef DO_MBED_LOCKFREE_NEWVAL_2OPS +#undef DO_MBED_LOCKFREE_OLDVAL_2OPS +#undef DO_MBED_LOCKFREE_OLDVAL_3OPS #undef DO_MBED_LOCKFREE_CAS_WEAK_OPS #undef DO_MBED_LOCKFREE_CAS_STRONG_OPS #undef DO_MBED_SIGNED_CAS_OP