diff --git a/internal/core/src/simd/avx512.cpp b/internal/core/src/simd/avx512.cpp index e1bc4da3ff..2fb8f7f539 100644 --- a/internal/core/src/simd/avx512.cpp +++ b/internal/core/src/simd/avx512.cpp @@ -381,7 +381,7 @@ struct CompareValAVX512Impl { target, (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(mask, 0x01); - _mm_storeu_si64((__m128i*)(res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } for (size_t i = middle; i < size; ++i) { @@ -429,7 +429,7 @@ struct CompareValAVX512Impl { target, (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(cmp_res_mask, 0x01); - _mm_storeu_si64((res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } for (size_t i = middle; i < size; ++i) { @@ -611,7 +611,7 @@ struct CompareColumnAVX512Impl { (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(mask, 0x01); - _mm_storeu_si64((__m128i*)(res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } } @@ -668,7 +668,7 @@ struct CompareColumnAVX512Impl { (CompareOperator::ComparePredicate)); __m128i cmp_res = _mm_maskz_set1_epi8(cmp_res_mask, 0x01); - _mm_storeu_si64((res + i), cmp_res); + _mm_storel_epi64((__m128i_u*)(res + i), cmp_res); } for (size_t i = middle; i < size; ++i) { diff --git a/internal/core/src/simd/sse2.cpp b/internal/core/src/simd/sse2.cpp index 9726aec946..c0060ef856 100644 --- a/internal/core/src/simd/sse2.cpp +++ b/internal/core/src/simd/sse2.cpp @@ -34,9 +34,9 @@ GetBitsetBlockSSE2(const bool* src) { tmp[i] = _mm_movemask_epi8(highbits); } - __m128i tmpvec = _mm_loadu_si64(tmp); + __m128i tmpvec = _mm_loadl_epi64((__m128i_u*)tmp); BitsetBlockType res; - _mm_storeu_si64(&res, tmpvec); + _mm_storel_epi64((__m128i_u*)&res, tmpvec); return res; } else { // Others has 32 bits