Support IndexMeta, update Json file

Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
pull/4973/head^2
FluorineDog 2020-09-12 19:41:58 +08:00 committed by yefu.chen
parent be58ccb65e
commit d1287cf715
22 changed files with 11445 additions and 7515 deletions

View File

@ -60,6 +60,7 @@ include( DefineOptions )
include( ExternalProject )
include( FetchContent )
include_directories(thirdparty)
set( FETCHCONTENT_BASE_DIR ${MILVUS_BINARY_DIR}/3rdparty_download )
set(FETCHCONTENT_QUIET OFF)
include( ThirdPartyPackages )

View File

@ -6,15 +6,25 @@ if [[ ! ${jobs+1} ]]; then
fi
BUILD_OUTPUT_DIR="cmake_build"
BUILD_TYPE="Release"
BUILD_TYPE="Debug"
BUILD_UNITTEST="OFF"
INSTALL_PREFIX=$(pwd)/milvus
MAKE_CLEAN="OFF"
BUILD_COVERAGE="OFF"
DB_PATH="/tmp/milvus"
PROFILING="OFF"
RUN_CPPLINT="OFF"
CUDA_COMPILER=/usr/local/cuda/bin/nvcc
GPU_VERSION="OFF" #defaults to CPU version
WITH_PROMETHEUS="ON"
CUDA_ARCH="DEFAULT"
CUSTOM_THIRDPARTY_PATH=""
while getopts "p:d:t:s:ulrcghzme" arg; do
while getopts "p:d:t:s:f:ulrcghzme" arg; do
case $arg in
f)
CUSTOM_THIRDPARTY_PATH=$OPTARG
;;
p)
INSTALL_PREFIX=$OPTARG
;;
@ -36,20 +46,41 @@ while getopts "p:d:t:s:ulrcghzme" arg; do
MAKE_CLEAN="ON"
fi
;;
c)
BUILD_COVERAGE="ON"
;;
z)
PROFILING="ON"
;;
g)
GPU_VERSION="ON"
;;
e)
WITH_PROMETHEUS="OFF"
;;
s)
CUDA_ARCH=$OPTARG
;;
h) # help
echo "
parameter:
-f: custom paths of thirdparty downloaded files(default: NULL)
-p: install prefix(default: $(pwd)/milvus)
-d: db data path(default: /tmp/milvus)
-t: build type(default: Debug)
-u: building unit test options(default: OFF)
-l: run cpplint, clang-format and clang-tidy(default: OFF)
-r: remove previous build directory(default: OFF)
-c: code coverage(default: OFF)
-z: profiling(default: OFF)
-g: build GPU version(default: OFF)
-e: build without prometheus(default: OFF)
-s: build with CUDA arch(default:DEFAULT), for example '-gencode=compute_61,code=sm_61;-gencode=compute_75,code=sm_75'
-h: help
usage:
./build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} [-u] [-l] [-r] [-h]
./build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} -s \${CUDA_ARCH} -f\${CUSTOM_THIRDPARTY_PATH} [-u] [-l] [-r] [-c] [-z] [-g] [-m] [-e] [-h]
"
exit 0
;;
@ -82,7 +113,14 @@ CMAKE_CMD="cmake \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DOpenBLAS_SOURCE=AUTO \
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \
-DBUILD_COVERAGE=${BUILD_COVERAGE} \
-DMILVUS_DB_PATH=${DB_PATH} \
-DENABLE_CPU_PROFILING=${PROFILING} \
-DMILVUS_GPU_VERSION=${GPU_VERSION} \
-DMILVUS_WITH_PROMETHEUS=${WITH_PROMETHEUS} \
-DMILVUS_CUDA_ARCH=${CUDA_ARCH} \
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} \
../"
echo ${CMAKE_CMD}
${CMAKE_CMD}
@ -106,12 +144,12 @@ if [[ ${RUN_CPPLINT} == "ON" ]]; then
echo "clang-format check passed!"
# clang-tidy check
# make check-clang-tidy
# if [ $? -ne 0 ]; then
# echo "ERROR! clang-tidy check failed"
# exit 1
# fi
# echo "clang-tidy check passed!"
make check-clang-tidy
if [ $? -ne 0 ]; then
echo "ERROR! clang-tidy check failed"
exit 1
fi
echo "clang-tidy check passed!"
else
# compile and build
make -j ${jobs} install || exit 1

View File

@ -25,50 +25,51 @@ endforeach ()
add_subdirectory( utils )
add_subdirectory(dog_segment)
add_subdirectory( log)
add_subdirectory( dog_segment)
add_subdirectory( cache )
#add_subdirectory( query )
add_subdirectory( db ) # target milvus_engine
add_subdirectory( server )
add_subdirectory( query )
# add_subdirectory( db ) # target milvus_engine
# add_subdirectory( server )
set(link_lib
milvus_engine
dog_segment
#query
utils
curl
)
# set(link_lib
# milvus_engine
# # dog_segment
# #query
# utils
# curl
# )
set( BOOST_LIB libboost_system.a
libboost_filesystem.a
libboost_serialization.a
)
# set( BOOST_LIB libboost_system.a
# libboost_filesystem.a
# libboost_serialization.a
# )
set( THIRD_PARTY_LIBS yaml-cpp
)
# set( THIRD_PARTY_LIBS yaml-cpp
# )
target_link_libraries( server
PUBLIC ${link_lib}
${THIRD_PARTY_LIBS}
${BOOST_LIB}
)
# target_link_libraries( server
# PUBLIC ${link_lib}
# ${THIRD_PARTY_LIBS}
# ${BOOST_LIB}
# )
# **************************** Get&Print Include Directories ****************************
get_property( dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES )
# # **************************** Get&Print Include Directories ****************************
# get_property( dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES )
foreach ( dir ${dirs} )
message( STATUS "Current Include DIRS: ")
endforeach ()
# foreach ( dir ${dirs} )
# message( STATUS "Current Include DIRS: ")
# endforeach ()
set( SERVER_LIBS server )
# set( SERVER_LIBS server )
add_executable( milvus_server ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
)
#target_include_directories(db PUBLIC ${PROJECT_BINARY_DIR}/thirdparty/pulsar-client-cpp/pulsar-client-cpp-src/pulsar-client-cpp/include)
# add_executable( milvus_server ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
# )
# #target_include_directories(db PUBLIC ${PROJECT_BINARY_DIR}/thirdparty/pulsar-client-cpp/pulsar-client-cpp-src/pulsar-client-cpp/include)
target_link_libraries( milvus_server PRIVATE ${SERVER_LIBS} )
install( TARGETS milvus_server DESTINATION bin )
# target_link_libraries( milvus_server PRIVATE ${SERVER_LIBS} )
# install( TARGETS milvus_server DESTINATION bin )

View File

@ -1,47 +0,0 @@
#-------------------------------------------------------------------------------
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
# **************************** Engine Source Files ****************************
aux_source_directory( ${MILVUS_ENGINE_SRC}/db DB_MAIN_FILES )
aux_source_directory( ${MILVUS_ENGINE_SRC}/index/archive WRAPPER_FILES )
aux_source_directory( ${MILVUS_THIRDPARTY_SRC}/easyloggingpp THIRDPARTY_EASYLOGGINGPP_FILES )
aux_source_directory( ${MILVUS_THIRDPARTY_SRC}/nlohmann THIRDPARTY_NLOHMANN_FILES )
set( THIRDPARTY_FILES ${THIRDPARTY_EASYLOGGINGPP_FILES}
${THIRDPARTY_NLOHMANN_FILES}
)
set( ENGINE_FILES ${DB_MAIN_FILES}
${THIRDPARTY_FILES}
${WRAPPER_FILES}
)
set( ENGINE_LIBS pthread
libstdc++fs.a
libgomp.a
libgfortran.a
)
# **************************** Add Target milvus engine ****************************
add_library( milvus_engine STATIC)
target_sources( milvus_engine PRIVATE ${ENGINE_FILES})
target_link_libraries( milvus_engine
PUBLIC knowhere
#server
${ENGINE_LIBS}
)

View File

@ -1,33 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include <stdint.h>
namespace milvus {
namespace engine {
constexpr int64_t KB = 1LL << 10;
constexpr int64_t MB = 1LL << 20;
constexpr int64_t GB = 1LL << 30;
constexpr int64_t TB = 1LL << 40;
constexpr int64_t MAX_TABLE_FILE_MEM = 128 * MB;
constexpr int64_t MAX_NAME_LENGTH = 255;
constexpr int64_t MAX_DIMENSION = 32768;
constexpr int32_t MAX_SEGMENT_ROW_COUNT = 4 * 1024 * 1024;
constexpr int64_t DEFAULT_SEGMENT_ROW_COUNT = 100000; // default row count per segment when creating collection
constexpr int64_t MAX_INSERT_DATA_SIZE = 256 * MB;
} // namespace engine
} // namespace milvus

View File

@ -1,36 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#include "db/Types.h"
// #include "knowhere/index/vector_index/helpers/IndexParameter.h"
namespace milvus {
namespace engine {
const char* FIELD_UID = "_id";
const char* ELEMENT_RAW_DATA = "_raw";
const char* ELEMENT_BLOOM_FILTER = "_blf";
const char* ELEMENT_DELETED_DOCS = "_del";
const char* ELEMENT_INDEX_COMPRESS = "_compress";
const char* PARAM_UID_AUTOGEN = "auto_id";
// const char* PARAM_DIMENSION = knowhere::meta::DIM;
const char* PARAM_INDEX_TYPE = "index_type";
// const char* PARAM_INDEX_METRIC_TYPE = knowhere::Metric::TYPE;
const char* PARAM_INDEX_EXTRA_PARAMS = "params";
const char* PARAM_SEGMENT_ROW_COUNT = "segment_row_count";
const char* DEFAULT_STRUCTURED_INDEX = "SORTED"; // this string should be defined in knowhere::IndexEnum
const char* DEFAULT_PARTITON_TAG = "_default";
} // namespace engine
} // namespace milvus

View File

@ -1,200 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
// #include <faiss/Index.h>
#include <cstdint>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
// #include "cache/DataObj.h"
#include "db/Constants.h"
// #include "knowhere/index/vector_index/VecIndex.h"
#include "utils/Json.h"
namespace milvus {
namespace engine {
extern const char* FIELD_UID;
extern const char* ELEMENT_RAW_DATA;
extern const char* ELEMENT_BLOOM_FILTER;
extern const char* ELEMENT_DELETED_DOCS;
extern const char* ELEMENT_INDEX_COMPRESS;
extern const char* PARAM_UID_AUTOGEN;
extern const char* PARAM_DIMENSION;
extern const char* PARAM_INDEX_TYPE;
extern const char* PARAM_INDEX_METRIC_TYPE;
extern const char* PARAM_INDEX_EXTRA_PARAMS;
extern const char* PARAM_SEGMENT_ROW_COUNT;
extern const char* DEFAULT_STRUCTURED_INDEX;
extern const char* DEFAULT_PARTITON_TAG;
///////////////////////////////////////////////////////////////////////////////////////////////////
using id_t = int64_t;
using offset_t = int32_t;
using date_t = int32_t;
using IDNumbers = std::vector<id_t>;
// using VectorDistance = faiss::Index::distance_t;
using VectorDistance = float;
using VectorDistances = std::vector<VectorDistance>;
// using ResultIds = std::vector<faiss::Index::idx_t>;
using ResultIds = std::vector<int64_t>;
using ResultDistances = std::vector<VectorDistance>;
///////////////////////////////////////////////////////////////////////////////////////////////////
enum class DataType {
NONE = 0,
BOOL = 1,
INT8 = 2,
INT16 = 3,
INT32 = 4,
INT64 = 5,
FLOAT = 10,
DOUBLE = 11,
STRING = 20,
VECTOR_BINARY = 100,
VECTOR_FLOAT = 101,
};
///////////////////////////////////////////////////////////////////////////////////////////////////
enum class FieldElementType {
FET_NONE = 0,
FET_RAW = 1,
FET_BLOOM_FILTER = 2,
FET_DELETED_DOCS = 3,
FET_INDEX = 4,
FET_COMPRESS_SQ8 = 5,
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// class BinaryData : public cache::DataObj {
// public:
// int64_t
// Size() {
// return data_.size();
// }
// public:
// std::vector<uint8_t> data_;
// };
// using BinaryDataPtr = std::shared_ptr<BinaryData>;
///////////////////////////////////////////////////////////////////////////////////////////////////
// class VaribleData : public cache::DataObj {
// public:
// int64_t
// Size() {
// return data_.size() + offset_.size() * sizeof(int64_t);
// }
// public:
// std::vector<uint8_t> data_;
// std::vector<int64_t> offset_;
// };
// using VaribleDataPtr = std::shared_ptr<VaribleData>;
///////////////////////////////////////////////////////////////////////////////////////////////////
// using FIELD_TYPE_MAP = std::unordered_map<std::string, DataType>;
// using FIELD_WIDTH_MAP = std::unordered_map<std::string, int64_t>;
// using FIXEDX_FIELD_MAP = std::unordered_map<std::string, BinaryDataPtr>;
// using VARIABLE_FIELD_MAP = std::unordered_map<std::string, VaribleDataPtr>;
// using VECTOR_INDEX_MAP = std::unordered_map<std::string, knowhere::VecIndexPtr>;
// using STRUCTURED_INDEX_MAP = std::unordered_map<std::string, knowhere::IndexPtr>;
///////////////////////////////////////////////////////////////////////////////////////////////////
struct DataChunk {
int64_t count_ = 0;
// FIXEDX_FIELD_MAP fixed_fields_;
// VARIABLE_FIELD_MAP variable_fields_;
};
using DataChunkPtr = std::shared_ptr<DataChunk>;
///////////////////////////////////////////////////////////////////////////////////////////////////
struct CollectionIndex {
std::string index_name_;
std::string index_type_;
std::string metric_name_;
milvus::json extra_params_ = {{"nlist", 2048}};
};
///////////////////////////////////////////////////////////////////////////////////////////////////
struct VectorsData {
uint64_t vector_count_ = 0;
std::vector<float> float_data_;
std::vector<uint8_t> binary_data_;
IDNumbers id_array_;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
struct AttrsData {
uint64_t attr_count_ = 0;
std::unordered_map<std::string, engine::DataType> attr_type_;
std::unordered_map<std::string, std::vector<uint8_t>> attr_data_;
IDNumbers id_array_;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
struct QueryResult {
uint64_t row_num_;
engine::ResultIds result_ids_;
engine::ResultDistances result_distances_;
engine::DataChunkPtr data_chunk_;
};
using QueryResultPtr = std::shared_ptr<QueryResult>;
///////////////////////////////////////////////////////////////////////////////////////////////////
struct DBMetaOptions {
std::string path_;
std::string backend_uri_;
}; // DBMetaOptions
///////////////////////////////////////////////////////////////////////////////////////////////////
struct DBOptions {
typedef enum { SINGLE = 0, CLUSTER_READONLY, CLUSTER_WRITABLE } MODE;
DBMetaOptions meta_;
int mode_ = MODE::SINGLE;
size_t insert_buffer_size_ = 4 * GB;
bool insert_cache_immediately_ = false;
int64_t auto_flush_interval_ = 1;
bool metric_enable_ = false;
// wal relative configurations
bool wal_enable_ = false;
int64_t buffer_size_ = 256;
std::string mxlog_path_ = "/tmp/milvus/wal/";
// transcript configurations
bool transcript_enable_ = false;
std::string replay_script_path_; // for replay
}; // Options
} // namespace engine
} // namespace milvus

View File

@ -8,9 +8,9 @@ set(DOG_SEGMENT_FILES
partition_c.cpp
segment_c.cpp
)
add_library(dog_segment SHARED
add_library(milvus_dog_segment SHARED
${DOG_SEGMENT_FILES}
)
#add_dependencies( segment sqlite mysqlpp )
target_link_libraries(dog_segment tbb utils pthread)
target_link_libraries(milvus_dog_segment tbb utils pthread knowhere log)

View File

@ -1,56 +1,55 @@
#include "IndexMeta.h"
// #include <mutex>
// #include <cassert>
// namespace milvus::dog_segment {
//
// Status
// IndexMeta::AddEntry(const std::string& index_name, const std::string& field_name, IndexType type, IndexMode mode,
// IndexConfig config) {
// Entry entry{
// index_name,
// field_name,
// type,
// mode,
// std::move(config)
// };
// VerifyEntry(entry);
//
// if (entries_.count(index_name)) {
// throw std::invalid_argument("duplicate index_name");
// }
// // TODO: support multiple indexes for single field
// assert(!lookups_.count(field_name));
// lookups_[field_name] = index_name;
// entries_[index_name] = std::move(entry);
//
// return Status::OK();
// }
//
// Status
// IndexMeta::DropEntry(const std::string& index_name) {
// assert(entries_.count(index_name));
// auto entry = std::move(entries_[index_name]);
// if(lookups_[entry.field_name] == index_name) {
// lookups_.erase(entry.field_name);
// }
// return Status::OK();
// }
//
// void IndexMeta::VerifyEntry(const Entry &entry) {
// auto is_mode_valid = std::set{IndexMode::MODE_CPU, IndexMode::MODE_GPU}.count(entry.mode);
// if(!is_mode_valid) {
// throw std::invalid_argument("invalid mode");
// }
//
// auto& schema = *schema_;
// auto& field_meta = schema[entry.index_name];
// // TODO checking
// if(field_meta.is_vector()) {
// assert(entry.type == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
// } else {
// assert(false);
// }
// }
//
// } // namespace milvus::dog_segment
//
#include <mutex>
#include <cassert>
namespace milvus::dog_segment {
Status
IndexMeta::AddEntry(const std::string& index_name, const std::string& field_name, IndexType type, IndexMode mode,
IndexConfig config) {
Entry entry{
index_name,
field_name,
type,
mode,
std::move(config)
};
VerifyEntry(entry);
if (entries_.count(index_name)) {
throw std::invalid_argument("duplicate index_name");
}
// TODO: support multiple indexes for single field
assert(!lookups_.count(field_name));
lookups_[field_name] = index_name;
entries_[index_name] = std::move(entry);
return Status::OK();
}
Status
IndexMeta::DropEntry(const std::string& index_name) {
assert(entries_.count(index_name));
auto entry = std::move(entries_[index_name]);
if(lookups_[entry.field_name] == index_name) {
lookups_.erase(entry.field_name);
}
return Status::OK();
}
void IndexMeta::VerifyEntry(const Entry &entry) {
auto is_mode_valid = std::set{IndexMode::MODE_CPU, IndexMode::MODE_GPU}.count(entry.mode);
if(!is_mode_valid) {
throw std::invalid_argument("invalid mode");
}
auto& schema = *schema_;
auto& field_meta = schema[entry.index_name];
// TODO checking
if(field_meta.is_vector()) {
assert(entry.type == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
} else {
assert(false);
}
}
} // namespace milvus::dog_segment

View File

@ -3,55 +3,56 @@
//#include <shared_mutex>
//
//#include "SegmentDefs.h"
// #include "dog_segment/SegmentBase.h"
#include "knowhere/index/IndexType.h"
//
#include "knowhere/common/Config.h"
#include "dog_segment/SegmentDefs.h"
#include <map>
#include <memory>
class IndexMeta;
namespace milvus::dog_segment {
//// TODO: this is
//class IndexMeta {
// public:
// IndexMeta(SchemaPtr schema) : schema_(schema) {
// }
// using IndexType = knowhere::IndexType;
// using IndexMode = knowhere::IndexMode;
// using IndexConfig = knowhere::Config;
//
// struct Entry {
// std::string index_name;
// std::string field_name;
// IndexType type;
// IndexMode mode;
// IndexConfig config;
// };
//
// Status
// AddEntry(const std::string& index_name, const std::string& field_name, IndexType type, IndexMode mode,
// IndexConfig config);
//
// Status
// DropEntry(const std::string& index_name);
//
// const std::map<std::string, Entry>&
// get_entries() {
// return entries_;
// }
//
// const Entry& lookup_by_field(const std::string& field_name) {
// auto index_name = lookups_.at(field_name);
// return entries_.at(index_name);
// }
// private:
// void
// VerifyEntry(const Entry& entry);
//
// private:
// SchemaPtr schema_;
// std::map<std::string, Entry> entries_; // index_name => Entry
// std::map<std::string, std::string> lookups_; // field_name => index_name
//};
//
// TODO: this is
class IndexMeta {
public:
IndexMeta(SchemaPtr schema) : schema_(schema) {
}
using IndexType = knowhere::IndexType;
using IndexMode = knowhere::IndexMode;
using IndexConfig = knowhere::Config;
struct Entry {
std::string index_name;
std::string field_name;
IndexType type;
IndexMode mode;
IndexConfig config;
};
Status
AddEntry(const std::string& index_name, const std::string& field_name, IndexType type, IndexMode mode,
IndexConfig config);
Status
DropEntry(const std::string& index_name);
const std::map<std::string, Entry>&
get_entries() {
return entries_;
}
const Entry& lookup_by_field(const std::string& field_name) {
auto index_name = lookups_.at(field_name);
return entries_.at(index_name);
}
private:
void
VerifyEntry(const Entry& entry);
private:
SchemaPtr schema_;
std::map<std::string, Entry> entries_; // index_name => Entry
std::map<std::string, std::string> lookups_; // field_name => index_name
};
using IndexMetaPtr = std::shared_ptr<IndexMeta>;
//
} // namespace milvus::dog_segment
//

View File

@ -385,31 +385,14 @@ SegmentNaive::Query(query::QueryPtr query_info, Timestamp timestamp, QueryResult
Status
SegmentNaive::Close() {
state_ = SegmentState::Closed;
if(this->record_.reserved != this->record_.ack_responder_.GetAck()) {
std::runtime_error("insert not ready");
}
if(this->deleted_record_.reserved != this->record_.ack_responder_.GetAck()) {
std::runtime_error("delete not ready");
}
return Status::OK();
// auto src_record = GetMutableRecord();
// assert(src_record);
//
// auto dst_record = std::make_shared<ImmutableRecord>(schema_->size());
//
// auto data_move = [](auto& dst_vec, const auto& src_vec) {
// assert(dst_vec.size() == 0);
// dst_vec.insert(dst_vec.begin(), src_vec.begin(), src_vec.end());
// };
// data_move(dst_record->uids_, src_record->uids_);
// data_move(dst_record->timestamps_, src_record->uids_);
//
// assert(src_record->entity_vecs_.size() == schema_->size());
// assert(dst_record->entity_vecs_.size() == schema_->size());
// for (int i = 0; i < schema_->size(); ++i) {
// data_move(dst_record->entity_vecs_[i], src_record->entity_vecs_[i]);
// }
// bool ready_old = false;
// record_immutable_ = dst_record;
// ready_immutable_.compare_exchange_strong(ready_old, true);
// if (ready_old) {
// throw std::logic_error("Close may be called twice, with potential race condition");
// }
// return Status::OK();
}
Status

View File

@ -11,7 +11,7 @@
#pragma once
#include "src/utils/Json.h"
#include "utils/Json.h"
namespace milvus {
namespace knowhere {

View File

@ -1,23 +0,0 @@
#-------------------------------------------------------------------------------
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
aux_source_directory( ${MILVUS_ENGINE_SRC}/server SERVER_SERVICE_FILES )
set( SERVER_FILES ${SERVER_SERVICE_FILES}
)
#add_library( server STATIC MessageWrapper.cpp MessageWrapper.h)
add_library( server STATIC ${SERVER_FILES})
#target_sources( server
# PRIVATE ${SERVER_FILES}
# )

View File

@ -1,75 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#include "server/Server.h"
#include <fcntl.h>
#include <unistd.h>
#include <boost/filesystem.hpp>
#include <cstring>
#include <unordered_map>
#include "config/ServerConfig.h"
#include "tracing/TracerUtil.h"
#include "log/LogMgr.h"
#include <yaml-cpp/yaml.h>
#include "utils/Log.h"
#include "utils/SignalHandler.h"
#include "utils/TimeRecorder.h"
namespace milvus {
namespace server {
Server&
Server::GetInstance() {
static Server server;
return server;
}
void
Server::Init(int64_t daemonized, const std::string& pid_filename, const std::string& config_filename) {
daemonized_ = daemonized;
pid_filename_ = pid_filename;
config_filename_ = config_filename;
}
void
Server::Daemonize() {
}
Status
Server::Start() {
}
void
Server::Stop() {
}
Status
Server::StartService() {
Status stat;
return stat;
}
void
Server::StopService() {
}
void
Server::LogConfigInFile(const std::string& path) {
}
void
Server::LogCpuInfo() {
}
} // namespace server
} // namespace milvus

View File

@ -1,62 +0,0 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#pragma once
#include <string>
#include "config/ConfigMgr.h"
#include "utils/Status.h"
namespace milvus {
namespace server {
class Server {
public:
static Server&
GetInstance();
void
Init(int64_t daemonized, const std::string& pid_filename, const std::string& config_filename);
Status
Start();
void
Stop();
private:
Server() = default;
~Server() = default;
void
Daemonize();
Status
StartService();
void
StopService();
private:
static void
LogConfigInFile(const std::string& path);
static void
LogCpuInfo();
private:
int64_t daemonized_ = 0;
int pid_fd_ = -1;
std::string pid_filename_;
std::string config_filename_;
// ConfigMgrPtr config_mgr_;
}; // Server
} // namespace server
} // namespace milvus

View File

@ -11,10 +11,13 @@
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
aux_source_directory( ${MILVUS_ENGINE_SRC}/utils UTILS_FILES )
# aux_source_directory( ${MILVUS_ENGINE_SRC}/utils UTILS_FILES )
set(UTILS_FILES
Status.cpp
)
add_library( utils STATIC ${UTILS_FILES} )
target_link_libraries(utils
libboost_filesystem.a
libboost_system.a)
libboost_system.a)

View File

@ -45,4 +45,4 @@ endif()
# ****************************** Thirdparty opentracing ***************************************
if ( MILVUS_WITH_OPENTRACING )
add_subdirectory( opentracing )
endif()
endif()

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,12 @@
enable_testing()
find_package(GTest REQUIRED)
include_directories(${CMAKE_HOME_DIRECTORY}/src)
include_directories(>>>> ${CMAKE_HOME_DIRECTORY}/src/index/knowhere)
set(MILVUS_TEST_FILES
test_naive.cpp
# test_dog_segment.cpp
test_dog_segment.cpp
test_concurrent_vector.cpp
test_c_api.cpp
)
add_executable(all_tests

View File

@ -137,7 +137,7 @@ TEST(CApiTest, SearchTest) {
long result_ids[10];
float result_distances[10];
auto sea_res = Search(segment, nullptr, 0, result_ids, result_distances);
auto sea_res = Search(segment, nullptr, 1, result_ids, result_distances);
assert(sea_res == 0);
assert(result_ids[0] == 100911);

View File

@ -0,0 +1,129 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#include <gtest/gtest.h>
#include <iostream>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include "dog_segment/ConcurrentVector.h"
#include "dog_segment/SegmentBase.h"
// #include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "dog_segment/SegmentBase.h"
#include "dog_segment/AckResponder.h"
using std::cin;
using std::cout;
using std::endl;
using namespace milvus::engine;
using namespace milvus::dog_segment;
using std::vector;
TEST(ConcurrentVector, TestABI) {
ASSERT_EQ(TestABI(), 42);
assert(true);
}
TEST(ConcurrentVector, TestSingle) {
auto dim = 8;
ConcurrentVector<int, false, 32> c_vec(dim);
std::default_random_engine e(42);
int data = 0;
auto total_count = 0;
for (int i = 0; i < 10000; ++i) {
int insert_size = e() % 150;
vector<int> vec(insert_size * dim);
for (auto& x : vec) {
x = data++;
}
c_vec.grow_to_at_least(total_count + insert_size);
c_vec.set_data(total_count, vec.data(), insert_size);
total_count += insert_size;
}
ASSERT_EQ(c_vec.chunk_size(), (total_count + 31) / 32);
for (int i = 0; i < total_count; ++i) {
for (int d = 0; d < dim; ++d) {
auto std_data = d + i * dim;
ASSERT_EQ(c_vec.get_element(i)[d], std_data);
}
}
}
TEST(ConcurrentVector, TestMultithreads) {
auto dim = 8;
constexpr int threads = 16;
std::vector<int64_t> total_counts(threads);
ConcurrentVector<int64_t, false, 32> c_vec(dim);
std::atomic<int64_t> ack_counter = 0;
// std::mutex mutex;
auto executor = [&](int thread_id) {
std::default_random_engine e(42 + thread_id);
int64_t data = 0;
int64_t total_count = 0;
for (int i = 0; i < 10000; ++i) {
// std::lock_guard lck(mutex);
int insert_size = e() % 150;
vector<int64_t> vec(insert_size * dim);
for (auto& x : vec) {
x = data++ * threads + thread_id;
}
auto offset = ack_counter.fetch_add(insert_size);
c_vec.grow_to_at_least(offset + insert_size);
c_vec.set_data(offset, vec.data(), insert_size);
total_count += insert_size;
}
assert(data == total_count * dim);
total_counts[thread_id] = total_count;
};
std::vector<std::thread> pool;
for (int i = 0; i < threads; ++i) {
pool.emplace_back(executor, i);
}
for (auto& thread : pool) {
thread.join();
}
std::vector<int64_t> counts(threads);
auto N = ack_counter.load();
for (int64_t i = 0; i < N; ++i) {
for (int d = 0; d < dim; ++d) {
auto data = c_vec.get_element(i)[d];
auto thread_id = data % threads;
auto raw_data = data / threads;
auto std_data = counts[thread_id]++;
ASSERT_EQ(raw_data, std_data) << data;
}
}
}
TEST(ConcurrentVector, TestAckSingle) {
std::vector<std::tuple<int64_t, int64_t, int64_t>> raw_data;
std::default_random_engine e(42);
AckResponder ack;
int N = 10000;
for(int i = 0; i < 10000; ++i) {
auto weight = i + e() % 100;
raw_data.emplace_back(weight, i, (i + 1));
}
std::sort(raw_data.begin(), raw_data.end());
for(auto [_, b, e]: raw_data) {
EXPECT_LE(ack.GetAck(), b);
ack.AddSegment(b, e);
auto seg = ack.GetAck();
EXPECT_GE(seg + 100, b);
}
EXPECT_EQ(ack.GetAck(), N);
}

View File

@ -9,71 +9,21 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
// #include <fiu-control.h>
// #include <fiu-local.h>
// #include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <iostream>
#include <string>
// #include "db/SnapshotVisitor.h"
// #include "db/Types.h"
// #include "db/snapshot/IterateHandler.h"
// #include "db/snapshot/Resources.h"
// #include "db/utils.h"
// #include "knowhere/index/vector_index/helpers/IndexParameter.h"
// #include "segment/SegmentReader.h"
// #include "segment/SegmentWriter.h"
// #include "src/dog_segment/SegmentBase.h"
#include "dog_segment/SegmentBase.h"
// #include "utils/Json.h"
#include <random>
#include <gtest/gtest.h>
#include "dog_segment/SegmentBase.h"
using std::cin;
using std::cout;
using std::endl;
// using SegmentVisitor = milvus::engine::SegmentVisitor;
// namespace {
// milvus::Status
// CreateCollection(std::shared_ptr<DB> db, const std::string& collection_name, const LSN_TYPE& lsn) {
// CreateCollectionContext context;
// context.lsn = lsn;
// auto collection_schema = std::make_shared<Collection>(collection_name);
// context.collection = collection_schema;
// int64_t collection_id = 0;
// int64_t field_id = 0;
// /* field uid */
// auto uid_field = std::make_shared<Field>(milvus::engine::FIELD_UID, 0, milvus::engine::DataType::INT64,
// milvus::engine::snapshot::JEmpty, field_id);
// auto uid_field_element_blt =
// std::make_shared<FieldElement>(collection_id, field_id, milvus::engine::ELEMENT_BLOOM_FILTER,
// milvus::engine::FieldElementType::FET_BLOOM_FILTER);
// auto uid_field_element_del =
// std::make_shared<FieldElement>(collection_id, field_id, milvus::engine::ELEMENT_DELETED_DOCS,
// milvus::engine::FieldElementType::FET_DELETED_DOCS);
// field_id++;
// /* field vector */
// milvus::json vector_param = {{milvus::knowhere::meta::DIM, 4}};
// auto vector_field =
// std::make_shared<Field>("vector", 0, milvus::engine::DataType::VECTOR_FLOAT, vector_param, field_id);
// auto vector_field_element_index =
// std::make_shared<FieldElement>(collection_id, field_id, milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
// milvus::engine::FieldElementType::FET_INDEX);
// /* another field*/
// auto int_field = std::make_shared<Field>("int", 0, milvus::engine::DataType::INT32,
// milvus::engine::snapshot::JEmpty, field_id++);
// context.fields_schema[uid_field] = {uid_field_element_blt, uid_field_element_del};
// context.fields_schema[vector_field] = {vector_field_element_index};
// context.fields_schema[int_field] = {};
// return db->CreateCollection(context);
// }
// } // namespace
TEST(DogSegmentTest, TestABI) {
using namespace milvus::engine;
@ -82,60 +32,6 @@ TEST(DogSegmentTest, TestABI) {
assert(true);
}
// TEST_F(DogSegmentTest, TestCreateAndSchema) {
// using namespace milvus::engine;
// using namespace milvus::dog_segment;
// // step1: create segment from current snapshot.
// LSN_TYPE lsn = 0;
// auto next_lsn = [&]() -> decltype(lsn) { return ++lsn; };
// // step 1.1: create collection
// std::string db_root = "/tmp/milvus_test/db/table";
// std::string collection_name = "c1";
// auto status = CreateCollection(db_, collection_name, next_lsn());
// ASSERT_TRUE(status.ok());
// // step 1.2: get snapshot
// ScopedSnapshotT snapshot;
// status = Snapshots::GetInstance().GetSnapshot(snapshot, collection_name);
// ASSERT_TRUE(status.ok());
// ASSERT_TRUE(snapshot);
// ASSERT_EQ(snapshot->GetName(), collection_name);
// // step 1.3: get partition_id
// cout << endl;
// cout << endl;
// ID_TYPE partition_id = snapshot->GetResources<Partition>().begin()->first;
// cout << partition_id;
// // step 1.5 create schema from ids
// auto collection = snapshot->GetCollection();
// auto field_names = snapshot->GetFieldNames();
// auto schema = std::make_shared<Schema>();
// for (const auto& field_name : field_names) {
// auto the_field = snapshot->GetField(field_name);
// auto param = the_field->GetParams();
// auto type = the_field->GetFtype();
// cout << field_name //
// << " " << (int)type //
// << " " << param //
// << endl;
// FieldMeta field(field_name, type);
// int dim = 1;
// if(field.is_vector()) {
// field.set_dim(dim);
// }
// schema->AddField(field);
// }
// // step 1.6 create a segment from ids
// auto segment = CreateSegment(schema);
// std::vector<id_t> primary_ids;
// }
TEST(DogSegmentTest, MockTest) {
using namespace milvus::dog_segment;
@ -145,7 +41,7 @@ TEST(DogSegmentTest, MockTest) {
schema->AddField("age", DataType::INT32);
std::vector<char> raw_data;
std::vector<Timestamp> timestamps;
std::vector<uint64_t> uids;
std::vector<int64_t> uids;
int N = 10000;
std::default_random_engine e(67);
for(int i = 0; i < N; ++i) {
@ -163,108 +59,18 @@ TEST(DogSegmentTest, MockTest) {
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
assert(raw_data.size() == line_sizeof * N);
auto segment = CreateSegment(schema).release();
// auto index_meta = std::make_shared<IndexMeta>(schema);
auto segment = CreateSegment(schema, nullptr);
DogDataChunk data_chunk{raw_data.data(), (int)line_sizeof, N};
segment->Insert(N, uids.data(), timestamps.data(), data_chunk);
auto offset = segment->PreInsert(N);
segment->Insert(offset, N, uids.data(), timestamps.data(), data_chunk);
QueryResult query_result;
segment->Query(nullptr, 0, query_result);
delete segment;
// segment->Query(nullptr, 0, query_result);
segment->Close();
// segment->BuildIndex();
int i = 0;
i++;
}
//TEST_F(DogSegmentTest, DogSegmentTest) {
// LSN_TYPE lsn = 0;
// auto next_lsn = [&]() -> decltype(lsn) { return ++lsn; };
//
// std::string db_root = "/tmp/milvus_test/db/table";
// std::string c1 = "c1";
// auto status = CreateCollection(db_, c1, next_lsn());
// ASSERT_TRUE(status.ok());
//
// ScopedSnapshotT snapshot;
// status = Snapshots::GetInstance().GetSnapshot(snapshot, c1);
// ASSERT_TRUE(status.ok());
// ASSERT_TRUE(snapshot);
// ASSERT_EQ(snapshot->GetName(), c1);
// {
// SegmentFileContext sf_context;
// SFContextBuilder(sf_context, snapshot);
// }
// std::vector<SegmentFileContext> segfile_ctxs;
// SFContextsBuilder(segfile_ctxs, snapshot);
//
// std::cout << snapshot->ToString() << std::endl;
//
// ID_TYPE partition_id;
// {
// auto& partitions = snapshot->GetResources<Partition>();
// partition_id = partitions.begin()->first;
// }
//
// [&next_lsn, //
// &segfile_ctxs, //
// &partition_id, //
// &snapshot, //
// &db_root] {
// /* commit new segment */
// OperationContext op_ctx;
// op_ctx.lsn = next_lsn();
// op_ctx.prev_partition = snapshot->GetResource<Partition>(partition_id);
//
// auto new_seg_op = std::make_shared<NewSegmentOperation>(op_ctx, snapshot);
// SegmentPtr new_seg;
// auto status = new_seg_op->CommitNewSegment(new_seg);
// ASSERT_TRUE(status.ok());
//
// /* commit new segment file */
// for (auto& cctx : segfile_ctxs) {
// SegmentFilePtr seg_file;
// auto nsf_context = cctx;
// nsf_context.segment_id = new_seg->GetID();
// nsf_context.partition_id = new_seg->GetPartitionId();
// status = new_seg_op->CommitNewSegmentFile(nsf_context, seg_file);
// }
//
// /* build segment visitor */
// auto ctx = new_seg_op->GetContext();
// ASSERT_TRUE(ctx.new_segment);
// auto visitor = SegmentVisitor::Build(snapshot, ctx.new_segment, ctx.new_segment_files);
// ASSERT_TRUE(visitor);
// ASSERT_EQ(visitor->GetSegment(), new_seg);
// ASSERT_FALSE(visitor->GetSegment()->IsActive());
// // std::cout << visitor->ToString() << std::endl;
// // std::cout << snapshot->ToString() << std::endl;
//
// /* write data */
// milvus::segment::SegmentWriter segment_writer(db_root, visitor);
//
// // std::vector<milvus::segment::doc_id_t> raw_uids = {123};
// // std::vector<uint8_t> raw_vectors = {1, 2, 3, 4};
// // status = segment_writer.AddChunk("test", raw_vectors, raw_uids);
// // ASSERT_TRUE(status.ok())
// //
// // status = segment_writer.Serialize();
// // ASSERT_TRUE(status.ok());
//
// /* read data */
// // milvus::segment::SSSegmentReader segment_reader(db_root, visitor);
// //
// // status = segment_reader.Load();
// // ASSERT_TRUE(status.ok());
// //
// // milvus::segment::SegmentPtr segment_ptr;
// // status = segment_reader.GetSegment(segment_ptr);
// // ASSERT_TRUE(status.ok());
// //
// // auto& out_uids = segment_ptr->vectors_ptr_->GetUids();
// // ASSERT_EQ(raw_uids.size(), out_uids.size());
// // ASSERT_EQ(raw_uids[0], out_uids[0]);
// // auto& out_vectors = segment_ptr->vectors_ptr_->GetData();
// // ASSERT_EQ(raw_vectors.size(), out_vectors.size());
// // ASSERT_EQ(raw_vectors[0], out_vectors[0]);
// }();
//
// status = db_->DropCollection(c1);
// ASSERT_TRUE(status.ok());
//}