mirror of https://github.com/milvus-io/milvus.git
1537 lines
46 KiB
C++
1537 lines
46 KiB
C++
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
#include <boost/format.hpp>
|
|
#include <chrono>
|
|
#include <iostream>
|
|
#include <random>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <unordered_set>
|
|
#include <boost/container/vector.hpp>
|
|
|
|
using namespace std;
|
|
|
|
template <typename Type>
|
|
using FixedVector = boost::container::vector<Type>;
|
|
|
|
#define PRINT_SKPI_TEST \
|
|
std::cout \
|
|
<< "skip " \
|
|
<< ::testing::UnitTest::GetInstance()->current_test_info()->name() \
|
|
<< std::endl;
|
|
|
|
#if defined(__x86_64__)
|
|
#include "simd/hook.h"
|
|
#include "simd/ref.h"
|
|
#include "simd/sse2.h"
|
|
#include "simd/sse4.h"
|
|
#include "simd/avx2.h"
|
|
#include "simd/avx512.h"
|
|
#include "simd/ref.h"
|
|
|
|
using namespace milvus::simd;
|
|
TEST(GetBitSetBlock, base_test_sse) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(false);
|
|
}
|
|
|
|
auto res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << res << std::endl;
|
|
ASSERT_EQ(res, 0);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(true);
|
|
}
|
|
res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0xffffffffffffffff);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x5555555555555555);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 4 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x1111111111111111);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 8 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x0101010101010101);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 16 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x0001000100010001);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 32 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x0000000100000001);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockSSE2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x1084210842108421);
|
|
}
|
|
|
|
TEST(GetBitsetBlockPerf, bitset) {
|
|
FixedVector<bool> srcs;
|
|
for (size_t i = 0; i < 100000000; ++i) {
|
|
srcs.push_back(i % 2 == 0);
|
|
}
|
|
std::cout << "start test" << std::endl;
|
|
auto start = std::chrono::steady_clock::now();
|
|
for (int i = 0; i < 10000000; ++i)
|
|
auto result = GetBitsetBlockSSE2(srcs.data() + i);
|
|
std::cout << "cost: "
|
|
<< std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< "us" << std::endl;
|
|
start = std::chrono::steady_clock::now();
|
|
for (int i = 0; i < 10000000; ++i)
|
|
auto result = GetBitsetBlockAVX2(srcs.data() + i);
|
|
std::cout << "cost: "
|
|
<< std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< "us" << std::endl;
|
|
}
|
|
|
|
TEST(GetBitSetBlock, base_test_avx2) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(false);
|
|
}
|
|
|
|
auto res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << res << std::endl;
|
|
ASSERT_EQ(res, 0);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(true);
|
|
}
|
|
res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0xffffffffffffffff);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x5555555555555555);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 4 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x1111111111111111);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 8 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x0101010101010101);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 16 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x0001000100010001);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 32 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x0000000100000001);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 64; ++i) {
|
|
src.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
res = GetBitsetBlockAVX2(src.data());
|
|
std::cout << std::hex << res << std::endl;
|
|
ASSERT_EQ(res, 0x1084210842108421);
|
|
}
|
|
|
|
TEST(FindTermSSE2, bool_type) {
|
|
FixedVector<bool> vecs;
|
|
vecs.push_back(false);
|
|
|
|
auto res = FindTermSSE2(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), false);
|
|
ASSERT_EQ(res, true);
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
vecs.push_back(false);
|
|
}
|
|
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), false);
|
|
ASSERT_EQ(res, true);
|
|
|
|
vecs.push_back(true);
|
|
for (int i = 0; i < 16; i++) {
|
|
vecs.push_back(false);
|
|
}
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermSSE2, int8_type) {
|
|
std::vector<int8_t> vecs;
|
|
for (int i = 0; i < 100; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermSSE2(vecs.data(), vecs.size(), (int8_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int8_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int8_t)99);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int8_t)100);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int8_t)127);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(127);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int8_t)127);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermSSE2, int16_type) {
|
|
std::vector<int16_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermSSE2(vecs.data(), vecs.size(), (int16_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int16_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int16_t)999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int16_t)1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int16_t)1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1000);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int16_t)1000);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermSSE2, int32_type) {
|
|
std::vector<int32_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermSSE2(vecs.data(), vecs.size(), 0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1000);
|
|
ASSERT_EQ(res, false);
|
|
|
|
vecs.push_back(1000);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1000);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1001);
|
|
ASSERT_EQ(res, false);
|
|
|
|
vecs.push_back(1001);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1001);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1002);
|
|
ASSERT_EQ(res, false);
|
|
|
|
vecs.push_back(1002);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1002);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1003);
|
|
ASSERT_EQ(res, false);
|
|
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1270);
|
|
ASSERT_EQ(res, false);
|
|
}
|
|
|
|
TEST(FindTermSSE2, int64_type) {
|
|
std::vector<int64_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermSSE2(vecs.data(), vecs.size(), (int64_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int64_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int64_t)999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int64_t)1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int64_t)1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1005);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (int64_t)1005);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermSSE2, float_type) {
|
|
std::vector<float> vecs;
|
|
for (int i = 0; i < 10000; i++) {
|
|
vecs.push_back(i + 0.01);
|
|
}
|
|
|
|
auto res = FindTermSSE2(vecs.data(), vecs.size(), (float)0.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (float)10.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (float)10000.01);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (float)12700.02);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1.001);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), (float)1.001);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermSSE2, double_type) {
|
|
std::vector<double> vecs;
|
|
for (int i = 0; i < 10000; i++) {
|
|
vecs.push_back(i + 0.01);
|
|
}
|
|
|
|
auto res = FindTermSSE2(vecs.data(), vecs.size(), 0.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 10.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 10000.01);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 12700.01);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1.001);
|
|
res = FindTermSSE2(vecs.data(), vecs.size(), 1.001);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermSSE4, int64_type) {
|
|
if (!cpu_support_sse4_2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int64_t> srcs;
|
|
for (size_t i = 0; i < 1000; i++) {
|
|
srcs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermSSE4(srcs.data(), srcs.size(), (int64_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE4(srcs.data(), srcs.size(), (int64_t)1);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE4(srcs.data(), srcs.size(), (int64_t)999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermSSE4(srcs.data(), srcs.size(), (int64_t)1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermSSE4(srcs.data(), srcs.size(), (int64_t)2000);
|
|
ASSERT_EQ(res, false);
|
|
srcs.push_back(1000);
|
|
res = FindTermSSE4(srcs.data(), srcs.size(), (int64_t)1000);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX2, bool_type) {
|
|
if (!cpu_support_avx2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int64_t> srcs;
|
|
for (size_t i = 0; i < 1000; i++) {
|
|
srcs.push_back(i);
|
|
}
|
|
FixedVector<bool> vecs;
|
|
vecs.push_back(false);
|
|
|
|
auto res = FindTermAVX2(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), false);
|
|
ASSERT_EQ(res, true);
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
vecs.push_back(false);
|
|
}
|
|
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), false);
|
|
ASSERT_EQ(res, true);
|
|
|
|
vecs.push_back(true);
|
|
for (int i = 0; i < 16; i++) {
|
|
vecs.push_back(false);
|
|
}
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX2, int8_type) {
|
|
if (!cpu_support_avx2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int8_t> vecs;
|
|
for (int i = 0; i < 100; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX2(vecs.data(), vecs.size(), (int8_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int8_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int8_t)99);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int8_t)100);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int8_t)127);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(127);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int8_t)127);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX2, int16_type) {
|
|
if (!cpu_support_avx2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int16_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX2(vecs.data(), vecs.size(), (int16_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int16_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int16_t)999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int16_t)1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int16_t)1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1270);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int16_t)1270);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX2, int32_type) {
|
|
if (!cpu_support_avx2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int32_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX2(vecs.data(), vecs.size(), 0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1270);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 1270);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX2, int64_type) {
|
|
if (!cpu_support_avx2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int64_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX2(vecs.data(), vecs.size(), (int64_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int64_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int64_t)999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int64_t)1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int64_t)1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1270);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (int64_t)1270);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX2, float_type) {
|
|
if (!cpu_support_avx2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<float> vecs;
|
|
for (int i = 0; i < 10000; i++) {
|
|
vecs.push_back(i + 0.01);
|
|
}
|
|
|
|
auto res = FindTermAVX2(vecs.data(), vecs.size(), (float)0.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (float)10.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (float)10000.01);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (float)12700.02);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(12700.02);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), (float)12700.02);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX2, double_type) {
|
|
if (!cpu_support_avx2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<double> vecs;
|
|
for (int i = 0; i < 10000; i++) {
|
|
vecs.push_back(i + 0.01);
|
|
}
|
|
|
|
auto res = FindTermAVX2(vecs.data(), vecs.size(), 0.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 10.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 10000.01);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 12700.01);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(12700.01);
|
|
res = FindTermAVX2(vecs.data(), vecs.size(), 12700.01);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX512, bool_type) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int64_t> srcs;
|
|
for (size_t i = 0; i < 1000; i++) {
|
|
srcs.push_back(i);
|
|
}
|
|
FixedVector<bool> vecs;
|
|
vecs.push_back(false);
|
|
|
|
auto res = FindTermAVX512(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), false);
|
|
ASSERT_EQ(res, true);
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
vecs.push_back(false);
|
|
}
|
|
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), false);
|
|
ASSERT_EQ(res, true);
|
|
|
|
vecs.push_back(true);
|
|
for (int i = 0; i < 16; i++) {
|
|
vecs.push_back(false);
|
|
}
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), true);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX512, int8_type) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int8_t> vecs;
|
|
for (int i = 0; i < 100; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX512(vecs.data(), vecs.size(), (int8_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int8_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int8_t)99);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int8_t)100);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int8_t)127);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(127);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int8_t)127);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX512, int16_type) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int16_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX512(vecs.data(), vecs.size(), (int16_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int16_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int16_t)999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int16_t)1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int16_t)1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1270);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int16_t)1270);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX512, int32_type) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int32_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX512(vecs.data(), vecs.size(), 0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1270);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 1270);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX512, int64_type) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int64_t> vecs;
|
|
for (int i = 0; i < 1000; i++) {
|
|
vecs.push_back(i);
|
|
}
|
|
|
|
auto res = FindTermAVX512(vecs.data(), vecs.size(), (int64_t)0);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int64_t)10);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int64_t)999);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int64_t)1000);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int64_t)1270);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(1270);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (int64_t)1270);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(FindTermAVX512, float_type) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<float> vecs;
|
|
for (int i = 0; i < 10000; i++) {
|
|
vecs.push_back(i + 0.01);
|
|
}
|
|
|
|
auto res = FindTermAVX512(vecs.data(), vecs.size(), (float)0.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (float)10.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (float)10000.01);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (float)12700.02);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(12700.02);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), (float)12700.02);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(StrCmpSS4, string_type) {
|
|
if (!cpu_support_sse4_2()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
|
|
std::vector<string> s1;
|
|
for (int i = 0; i < 1000; ++i) {
|
|
s1.push_back("test" + std::to_string(i));
|
|
}
|
|
|
|
for (int i = 0; i < 1000; ++i) {
|
|
auto res = StrCmpSSE4(s1[i].c_str(), "test0");
|
|
}
|
|
|
|
string s2;
|
|
string s3;
|
|
for (int i = 0; i < 1000; ++i) {
|
|
s2.push_back('x');
|
|
}
|
|
for (int i = 0; i < 1000; ++i) {
|
|
s3.push_back('x');
|
|
}
|
|
|
|
auto res = StrCmpSSE4(s2.c_str(), s3.c_str());
|
|
std::cout << res << std::endl;
|
|
}
|
|
|
|
TEST(FindTermAVX512, double_type) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<double> vecs;
|
|
for (int i = 0; i < 10000; i++) {
|
|
vecs.push_back(i + 0.01);
|
|
}
|
|
|
|
auto res = FindTermAVX512(vecs.data(), vecs.size(), 0.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 10.01);
|
|
ASSERT_EQ(res, true);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 10000.01);
|
|
ASSERT_EQ(res, false);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 12700.01);
|
|
ASSERT_EQ(res, false);
|
|
vecs.push_back(12700.01);
|
|
res = FindTermAVX512(vecs.data(), vecs.size(), 12700.01);
|
|
ASSERT_EQ(res, true);
|
|
}
|
|
|
|
TEST(EqualVal, perf_int8) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<int8_t> srcs(1000000);
|
|
for (int i = 0; i < 1000000; ++i) {
|
|
srcs[i] = i % 128;
|
|
}
|
|
FixedVector<bool> res(1000000);
|
|
auto start = std::chrono::steady_clock::now();
|
|
EqualValRef(srcs.data(), 1000000, (int8_t)10, res.data());
|
|
std::cout << std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::steady_clock::now();
|
|
EqualValAVX512(srcs.data(), 1000000, (int8_t)10, res.data());
|
|
std::cout << std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
TestCompareValAVX512Perf() {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<T> srcs(1000000);
|
|
for (int i = 0; i < 1000000; ++i) {
|
|
srcs[i] = i;
|
|
}
|
|
FixedVector<bool> res(1000000);
|
|
T target = 10;
|
|
auto start = std::chrono::steady_clock::now();
|
|
EqualValRef(srcs.data(), 1000000, target, res.data());
|
|
std::cout << std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::steady_clock::now();
|
|
EqualValAVX512(srcs.data(), 1000000, target, res.data());
|
|
std::cout << std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
TEST(EqualVal, perf_int16) {
|
|
TestCompareValAVX512Perf<int16_t>();
|
|
}
|
|
|
|
TEST(EqualVal, pref_int32) {
|
|
TestCompareValAVX512Perf<int32_t>();
|
|
}
|
|
|
|
TEST(EqualVal, perf_int64) {
|
|
TestCompareValAVX512Perf<int64_t>();
|
|
}
|
|
|
|
TEST(EqualVal, perf_float) {
|
|
TestCompareValAVX512Perf<float>();
|
|
}
|
|
|
|
TEST(EqualVal, perf_double) {
|
|
TestCompareValAVX512Perf<double>();
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
TestCompareValAVX512(int size, T target) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<T> vecs;
|
|
for (int i = 0; i < size; ++i) {
|
|
if constexpr (std::is_same_v<T, int8_t>) {
|
|
vecs.push_back(i % 127);
|
|
} else if constexpr (std::is_floating_point_v<T>) {
|
|
vecs.push_back(i + 0.01);
|
|
} else {
|
|
vecs.push_back(i);
|
|
}
|
|
}
|
|
FixedVector<bool> res(size);
|
|
|
|
EqualValAVX512(vecs.data(), size, target, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], vecs[i] == target) << i;
|
|
}
|
|
LessValAVX512(vecs.data(), size, target, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], vecs[i] < target) << i;
|
|
}
|
|
LessEqualValAVX512(vecs.data(), size, target, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], vecs[i] <= target) << i;
|
|
}
|
|
GreaterEqualValAVX512(vecs.data(), size, target, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], vecs[i] >= target) << i;
|
|
}
|
|
GreaterValAVX512(vecs.data(), size, target, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], vecs[i] > target) << i;
|
|
}
|
|
NotEqualValAVX512(vecs.data(), size, target, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], vecs[i] != target) << i;
|
|
}
|
|
}
|
|
|
|
TEST(CompareVal, avx512_int8) {
|
|
TestCompareValAVX512<int8_t>(1000, 9);
|
|
TestCompareValAVX512<int8_t>(1000, 99);
|
|
TestCompareValAVX512<int8_t>(1001, 127);
|
|
}
|
|
|
|
TEST(CompareVal, avx512_int16) {
|
|
TestCompareValAVX512<int16_t>(1000, 99);
|
|
TestCompareValAVX512<int16_t>(1000, 999);
|
|
TestCompareValAVX512<int16_t>(1001, 1000);
|
|
}
|
|
|
|
TEST(CompareVal, avx512_int32) {
|
|
TestCompareValAVX512<int32_t>(1000, 99);
|
|
TestCompareValAVX512<int32_t>(1000, 999);
|
|
TestCompareValAVX512<int32_t>(1001, 1000);
|
|
}
|
|
|
|
TEST(CompareVal, avx512_int64) {
|
|
TestCompareValAVX512<int64_t>(1000, 99);
|
|
TestCompareValAVX512<int64_t>(1000, 999);
|
|
TestCompareValAVX512<int64_t>(1001, 1000);
|
|
}
|
|
|
|
TEST(CompareVal, avx512_float) {
|
|
TestCompareValAVX512<float>(1000, 99.01);
|
|
TestCompareValAVX512<float>(1000, 999.01);
|
|
TestCompareValAVX512<float>(1001, 1000.01);
|
|
}
|
|
|
|
TEST(CompareVal, avx512_double) {
|
|
TestCompareValAVX512<double>(1000, 99.01);
|
|
TestCompareValAVX512<double>(1000, 999.01);
|
|
TestCompareValAVX512<double>(1001, 1000.01);
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
TestCompareColumnAVX512Perf() {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::vector<T> lefts(1000000);
|
|
for (int i = 0; i < 1000000; ++i) {
|
|
lefts[i] = i;
|
|
}
|
|
std::vector<T> rights(1000000);
|
|
for (int i = 0; i < 1000000; ++i) {
|
|
rights[i] = i;
|
|
}
|
|
FixedVector<bool> res(1000000);
|
|
auto start = std::chrono::steady_clock::now();
|
|
LessColumnRef(lefts.data(), rights.data(), 1000000, res.data());
|
|
std::cout << std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::steady_clock::now();
|
|
LessColumnAVX512(lefts.data(), rights.data(), 1000000, res.data());
|
|
std::cout << std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::steady_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
TEST(LessColumn, pref_int32) {
|
|
TestCompareColumnAVX512Perf<int32_t>();
|
|
}
|
|
|
|
TEST(LessColumn, perf_int64) {
|
|
TestCompareColumnAVX512Perf<int64_t>();
|
|
}
|
|
|
|
TEST(LessColumn, perf_float) {
|
|
TestCompareColumnAVX512Perf<float>();
|
|
}
|
|
|
|
TEST(LessColumn, perf_double) {
|
|
TestCompareColumnAVX512Perf<double>();
|
|
}
|
|
|
|
template <typename T>
|
|
void
|
|
TestCompareColumnAVX512(int size, T min_val, T max_val) {
|
|
if (!cpu_support_avx512()) {
|
|
PRINT_SKPI_TEST
|
|
return;
|
|
}
|
|
std::random_device rd;
|
|
std::mt19937 gen(rd());
|
|
|
|
std::vector<T> left;
|
|
std::vector<T> right;
|
|
if constexpr (std::is_same_v<T, float>) {
|
|
std::uniform_real_distribution<float> dis(min_val, max_val);
|
|
for (int i = 0; i < size; ++i) {
|
|
left.push_back(dis(gen));
|
|
right.push_back(dis(gen));
|
|
}
|
|
} else if constexpr (std::is_same_v<T, double>) {
|
|
std::uniform_real_distribution<double> dis(min_val, max_val);
|
|
for (int i = 0; i < size; ++i) {
|
|
left.push_back(dis(gen));
|
|
right.push_back(dis(gen));
|
|
}
|
|
} else {
|
|
std::uniform_int_distribution<> dis(min_val, max_val);
|
|
for (int i = 0; i < size; ++i) {
|
|
left.push_back(dis(gen));
|
|
right.push_back(dis(gen));
|
|
}
|
|
}
|
|
|
|
FixedVector<bool> res(size);
|
|
|
|
EqualColumnAVX512(left.data(), right.data(), size, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], left[i] == right[i]) << i;
|
|
}
|
|
LessColumnAVX512(left.data(), right.data(), size, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], left[i] < right[i]) << i;
|
|
}
|
|
GreaterColumnAVX512(left.data(), right.data(), size, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], left[i] > right[i]) << i;
|
|
}
|
|
LessEqualColumnAVX512(left.data(), right.data(), size, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], left[i] <= right[i]) << i;
|
|
}
|
|
GreaterEqualColumnAVX512(left.data(), right.data(), size, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], left[i] >= right[i]) << i;
|
|
}
|
|
NotEqualColumnAVX512(left.data(), right.data(), size, res.data());
|
|
for (int i = 0; i < size; i++) {
|
|
ASSERT_EQ(res[i], left[i] != right[i]) << i;
|
|
}
|
|
}
|
|
|
|
TEST(CompareColumn, avx512_int8) {
|
|
TestCompareColumnAVX512<int8_t>(1000, -128, 127);
|
|
TestCompareColumnAVX512<int8_t>(1001, -128, 127);
|
|
}
|
|
|
|
TEST(CompareColumn, avx512_int16) {
|
|
TestCompareColumnAVX512<int16_t>(1000, -1000, 1000);
|
|
TestCompareColumnAVX512<int16_t>(1001, -1000, 1000);
|
|
}
|
|
|
|
TEST(CompareColumn, avx512_int32) {
|
|
TestCompareColumnAVX512<int32_t>(1000, -1000, 1000);
|
|
TestCompareColumnAVX512<int32_t>(1001, -1000, 1000);
|
|
}
|
|
|
|
TEST(CompareColumn, avx512_int64) {
|
|
TestCompareColumnAVX512<int64_t>(1000, -1000, 1000);
|
|
TestCompareColumnAVX512<int64_t>(1001, -1000, 1000);
|
|
}
|
|
|
|
TEST(CompareColumn, avx512_float) {
|
|
TestCompareColumnAVX512<float>(1000, -1.0, 1.0);
|
|
TestCompareColumnAVX512<float>(1001, -1.0, 1.0);
|
|
}
|
|
|
|
TEST(CompareColumn, avx512_double) {
|
|
TestCompareColumnAVX512<double>(1000, -1.0, 1.0);
|
|
TestCompareColumnAVX512<double>(1001, -1.0, 1.0);
|
|
}
|
|
|
|
TEST(AllBooleanSSE2, function) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(false);
|
|
}
|
|
auto res = AllFalseSSE2(src.data(), src.size());
|
|
EXPECT_EQ(res, true);
|
|
res = AllTrueSSE2(src.data(), src.size());
|
|
EXPECT_EQ(res, false);
|
|
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
res = AllFalseSSE2(src.data(), src.size());
|
|
EXPECT_EQ(res, false);
|
|
res = AllTrueSSE2(src.data(), src.size());
|
|
EXPECT_EQ(res, false);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(true);
|
|
}
|
|
res = AllTrueSSE2(src.data(), src.size());
|
|
EXPECT_EQ(res, true);
|
|
}
|
|
|
|
TEST(AllBooleanSSE2, performance) {
|
|
FixedVector<bool> src;
|
|
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
std::cout << "sse2" << std::endl;
|
|
for (int j = 0; j < 10; j++) {
|
|
auto start = std::chrono::system_clock::now();
|
|
auto res = AllFalseSSE2(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::system_clock::now();
|
|
res = AllTrueSSE2(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
std::cout << "avx2" << std::endl;
|
|
for (int j = 0; j < 10; j++) {
|
|
auto start = std::chrono::system_clock::now();
|
|
auto res = AllFalseAVX2(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::system_clock::now();
|
|
res = AllTrueAVX2(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
for (int j = 0; j < 10; j++) {
|
|
auto start = std::chrono::system_clock::now();
|
|
auto res = AllFalseRef(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::system_clock::now();
|
|
res = AllTrueRef(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
}
|
|
|
|
TEST(InvertBool, function) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
InvertBoolSSE2(src.data(), src.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(src[i], (i % 2) != 0);
|
|
}
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 3 == 0 ? true : false);
|
|
}
|
|
InvertBoolSSE2(src.data(), src.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(src[i], (i % 3) != 0);
|
|
}
|
|
}
|
|
|
|
TEST(InvertBool, performance) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
InvertBoolSSE2(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
InvertBoolRef(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
}
|
|
|
|
TEST(LogicalBool, function) {
|
|
FixedVector<bool> left;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
FixedVector<bool> right;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
AndBoolSSE2(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], i % 2 == 0);
|
|
}
|
|
OrBoolSSE2(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], i % 2 == 0);
|
|
}
|
|
|
|
left.clear();
|
|
right.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
AndBoolSSE2(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], (i % 2 == 0) && (i % 5 == 0));
|
|
}
|
|
|
|
left.clear();
|
|
right.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
OrBoolSSE2(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], (i % 2 == 0) || (i % 5 == 0));
|
|
}
|
|
|
|
left.clear();
|
|
right.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
AndBoolAVX2(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], (i % 2 == 0) && (i % 5 == 0));
|
|
}
|
|
|
|
left.clear();
|
|
right.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
OrBoolAVX2(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], (i % 2 == 0) || (i % 5 == 0));
|
|
}
|
|
}
|
|
|
|
TEST(LogicalBool, performance) {
|
|
FixedVector<bool> left;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
FixedVector<bool> right;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
std::cout << "sse2" << std::endl;
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
AndBoolSSE2(left.data(), right.data(), left.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
std::cout << "avx2" << std::endl;
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
AndBoolAVX2(left.data(), right.data(), left.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
AndBoolRef(left.data(), right.data(), left.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(__ARM_NEON)
|
|
#include "simd/ref.h"
|
|
#include "simd/neon.h"
|
|
using namespace milvus::simd;
|
|
|
|
#include <arm_neon.h>
|
|
#include <iostream>
|
|
|
|
void
|
|
print_uint8x16(uint8x16_t vec) {
|
|
uint8_t tmp[16];
|
|
vst1q_u8(tmp, vec);
|
|
|
|
std::cout << "Vector contents: ";
|
|
for (int i = 0; i < 16; ++i) {
|
|
std::cout << static_cast<unsigned>(tmp[i]) << " ";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
void
|
|
print_uint8x8(uint8x8_t vec) {
|
|
uint8_t tmp[8];
|
|
vst1_u8(tmp, vec);
|
|
|
|
std::cout << "Vector contents: ";
|
|
for (int i = 0; i < 8; ++i) {
|
|
std::cout << static_cast<unsigned>(tmp[i]) << " ";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
void
|
|
print_uint16x8(uint16x8_t vec) {
|
|
uint16_t tmp[8];
|
|
vst1q_u16(tmp, vec);
|
|
|
|
std::cout << "Vector contents: ";
|
|
for (int i = 0; i < 8; ++i) {
|
|
std::cout << static_cast<unsigned>(tmp[i]) << " ";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
TEST(InvertBool, function) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
InvertBoolNEON(src.data(), src.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(src[i], (i % 2) != 0);
|
|
}
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 3 == 0 ? true : false);
|
|
}
|
|
InvertBoolNEON(src.data(), src.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(src[i], (i % 3) != 0);
|
|
}
|
|
}
|
|
|
|
TEST(InvertBool, performance) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
InvertBoolNEON(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
InvertBoolRef(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
}
|
|
|
|
TEST(LogicalBool, function) {
|
|
FixedVector<bool> left;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
FixedVector<bool> right;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
AndBoolNEON(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], i % 2 == 0);
|
|
}
|
|
OrBoolNEON(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], i % 2 == 0);
|
|
}
|
|
|
|
left.clear();
|
|
right.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
AndBoolNEON(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], (i % 2 == 0) && (i % 5 == 0));
|
|
}
|
|
|
|
left.clear();
|
|
right.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 5 == 0 ? true : false);
|
|
}
|
|
OrBoolNEON(left.data(), right.data(), right.size());
|
|
for (int i = 0; i < 8192; ++i) {
|
|
EXPECT_EQ(left[i], (i % 2 == 0) || (i % 5 == 0));
|
|
}
|
|
}
|
|
|
|
TEST(LogicalBool, performance) {
|
|
FixedVector<bool> left;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
left.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
FixedVector<bool> right;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
right.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
std::cout << "NEON" << std::endl;
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
AndBoolNEON(left.data(), right.data(), left.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
std::cout << "ref" << std::endl;
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
auto start = std::chrono::system_clock::now();
|
|
AndBoolRef(left.data(), right.data(), left.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
}
|
|
|
|
TEST(AllBooleanNeon, function) {
|
|
FixedVector<bool> src;
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(false);
|
|
}
|
|
auto res = AllFalseNEON(src.data(), src.size());
|
|
EXPECT_EQ(res, true);
|
|
res = AllTrueNEON(src.data(), src.size());
|
|
EXPECT_EQ(res, false);
|
|
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
res = AllFalseNEON(src.data(), src.size());
|
|
EXPECT_EQ(res, false);
|
|
res = AllTrueNEON(src.data(), src.size());
|
|
EXPECT_EQ(res, false);
|
|
|
|
src.clear();
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(true);
|
|
}
|
|
res = AllTrueNEON(src.data(), src.size());
|
|
EXPECT_EQ(res, true);
|
|
}
|
|
|
|
TEST(AllBooleanNeon, performance) {
|
|
FixedVector<bool> src;
|
|
|
|
for (int i = 0; i < 8192; ++i) {
|
|
src.push_back(i % 2 == 0 ? true : false);
|
|
}
|
|
std::cout << "NEON" << std::endl;
|
|
for (int j = 0; j < 10; j++) {
|
|
auto start = std::chrono::system_clock::now();
|
|
auto res = AllFalseNEON(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::system_clock::now();
|
|
res = AllTrueNEON(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
|
|
std::cout << "ref" << std::endl;
|
|
for (int j = 0; j < 10; j++) {
|
|
auto start = std::chrono::system_clock::now();
|
|
auto res = AllFalseRef(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
start = std::chrono::system_clock::now();
|
|
res = AllTrueRef(src.data(), src.size());
|
|
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
std::chrono::system_clock::now() - start)
|
|
.count()
|
|
<< std::endl;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
int
|
|
main(int argc, char* argv[]) {
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|