Add message sending and key2segment, use hard code schema

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
pull/4973/head^2
bigsheeper 2020-09-09 15:24:07 +08:00 committed by yefu.chen
parent 8f9a8d83e4
commit 1370da9498
47 changed files with 2631 additions and 763 deletions

View File

@ -1,8 +1,6 @@
package main
import (
"github.com/czs007/suvlim/pkg/master"
)
import "github.com/czs007/suvlim/pkg/master"
// func main() {
// ctx, cancel := context.WithCancel(context.Background())
@ -22,6 +20,7 @@ func init() {
// go mock.FakePulsarProducer()
}
func main() {
master.Run()
//master.SegmentStatsController()
master.CollectionController()
//master.CollectionController()
}

View File

@ -1,45 +1,58 @@
########################### GTEST
# Enable ExternalProject CMake module
INCLUDE(ExternalProject)
find_package(Threads REQUIRED)
# Set default ExternalProject root directory
SET_DIRECTORY_PROPERTIES(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/third_party)
# Add gtest
# http://stackoverflow.com/questions/9689183/cmake-googletest
include(ExternalProject)
ExternalProject_Add(
googletest
URL http://ss2.fluorinedog.com/data/gtest_v1.10.x.zip
# TIMEOUT 10
# # Force separate output paths for debug and release builds to allow easy
# # identification of correct lib in subsequent TARGET_LINK_LIBRARIES commands
# CMAKE_ARGS -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG:PATH=DebugLibs
# -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE:PATH=ReleaseLibs
# -Dgtest_force_shared_crt=ON
# Disable install step
INSTALL_COMMAND ""
# Wrap download, configure and build steps in a script to log output
LOG_DOWNLOAD ON
LOG_CONFIGURE ON
LOG_BUILD ON)
googletest
URL http://ss2.fluorinedog.com/data/gtest_v1.10.x.zip
UPDATE_COMMAND ""
INSTALL_COMMAND ""
LOG_DOWNLOAD ON
LOG_CONFIGURE ON
LOG_BUILD ON)
# Specify include dir
ExternalProject_Get_Property(googletest source_dir)
set(GTEST_INCLUDE_DIR ${source_dir}/include)
set(GTEST_INCLUDE_DIRS ${source_dir}/googletest/include)
set(GMOCK_INCLUDE_DIRS ${source_dir}/googlemock/include)
# The cloning of the above repo doesn't happen until make, however if the dir doesn't
# exist, INTERFACE_INCLUDE_DIRECTORIES will throw an error.
# To make it work, we just create the directory now during config.
file(MAKE_DIRECTORY ${GTEST_INCLUDE_DIRS})
file(MAKE_DIRECTORY ${GMOCK_INCLUDE_DIRS})
# Library
ExternalProject_Get_Property(googletest binary_dir)
set(GTEST_LIBRARY_PATH ${binary_dir}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest.a)
set(GTEST_LIBRARY gtest)
add_library(${GTEST_LIBRARY} UNKNOWN IMPORTED)
set_target_properties(${GTEST_LIBRARY} PROPERTIES
"IMPORTED_LOCATION" "${GTEST_LIBRARY_PATH}"
"IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}"
"INTERFACE_INCLUDE_DIRECTORIES" "${GTEST_INCLUDE_DIRS}")
add_dependencies(${GTEST_LIBRARY} googletest)
# set(GTEST_LIBRARY_PATH ${binary_dir}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest.a)
# set(GTEST_LIBRARY gtest)
# add_library(${GTEST_LIBRARY} UNKNOWN IMPORTED)
# set_property(TARGET ${GTEST_LIBRARY} PROPERTY IMPORTED_LOCATION
# ${GTEST_LIBRARY_PATH} )
# add_dependencies(${GTEST_LIBRARY} googletest)
set(GTEST_LIBRARY_PATH ${binary_dir}/lib)
add_library(gtest UNKNOWN IMPORTED)
add_library(gtest_main UNKNOWN IMPORTED)
set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARY_PATH}/libgtest.a)
set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARY_PATH}/libgtest_main.a)
add_dependencies(gtest googletest)
add_dependencies(gtest_main googletest)
set(GTEST_MAIN_LIBRARY_PATH ${binary_dir}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main.a)
set(GTEST_MAIN_LIBRARY gtest_main)
add_library(${GTEST_MAIN_LIBRARY} UNKNOWN IMPORTED)
set_target_properties(${GTEST_MAIN_LIBRARY} PROPERTIES
"IMPORTED_LOCATION" "${GTEST_MAIN_LIBRARY_PATH}"
"IMPORTED_LINK_INTERFACE_LIBRARImS" "${CMAKE_THREAD_LIBS_INIT}"
"INTERFACE_INCLUDE_DIRECTORIES" "${GTEST_INCLUDE_DIRS}")
add_dependencies(${GTEST_MAIN_LIBRARY} googletest)
# set(GMOCK_LIBRARY_PATH ${binary_dir}/googlemock/${CMAKE_FIND_LIBRARY_PREFIXES}gmock.a)
# set(GMOCK_LIBRARY gmock)
# add_library(${GMOCK_LIBRARY} UNKNOWN IMPORTED)
# set_target_properties(${GMOCK_LIBRARY} PROPERTIES
# "IMPORTED_LOCATION" "${GMOCK_LIBRARY_PATH}"
# "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}"
# "INTERFACE_INCLUDE_DIRECTORIES" "${GMOCK_INCLUDE_DIRS}")
# add_dependencies(${GMOCK_LIBRARY} googletest)
# set(GMOCK_MAIN_LIBRARY_PATH ${binary_dir}/googlemock/${CMAKE_FIND_LIBRARY_PREFIXES}gmock_main.a)
# set(GMOCK_MAIN_LIBRARY gmock_main)
# add_library(${GMOCK_MAIN_LIBRARY} UNKNOWN IMPORTED)
# set_target_properties(${GMOCK_MAIN_LIBRARY} PROPERTIES
# "IMPORTED_LOCATION" "${GMOCK_MAIN_LIBRARY_PATH}"
# "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}"
# "INTERFACE_INCLUDE_DIRECTORIES" "${GMOCK_INCLUDE_DIRS}")
# add_dependencies(${GMOCK_MAIN_LIBRARY} ${GTEST_LIBRARY})

View File

@ -17,19 +17,27 @@ DeleteSegment(CSegmentBase segment);
int
Insert(CSegmentBase c_segment,
long int reserved_offset,
signed long int size,
const unsigned long* primary_keys,
const long* primary_keys,
const unsigned long* timestamps,
void* raw_data,
int sizeof_per_row,
signed long int count);
long int
PreInsert(CSegmentBase c_segment, long int size);
int
Delete(CSegmentBase c_segment,
long int reserved_offset,
long size,
const unsigned long* primary_keys,
const long* primary_keys,
const unsigned long* timestamps);
long int
PreDelete(CSegmentBase c_segment, long int size);
int
Search(CSegmentBase c_segment,
void* fake_query,
@ -53,24 +61,6 @@ GetRowCount(CSegmentBase c_segment);
long int
GetDeletedCount(CSegmentBase c_segment);
unsigned long
GetTimeBegin(CSegmentBase c_segment);
void
SetTimeBegin(CSegmentBase c_segment, unsigned long time_begin);
unsigned long
GetTimeEnd(CSegmentBase c_segment);
void
SetTimeEnd(CSegmentBase c_segment, unsigned long time_end);
unsigned long
GetSegmentId(CSegmentBase c_segment);
void
SetSegmentId(CSegmentBase c_segment, unsigned long segment_id);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,41 @@
#pragma once
#include <shared_mutex>
#include <mutex>
#include <set>
#include <atomic>
namespace milvus::dog_segment {
class AckResponder {
public:
void
AddSegment(int64_t seg_begin, int64_t seg_end) {
std::lock_guard lck(mutex_);
fetch_and_flip(seg_end);
auto old_begin = fetch_and_flip(seg_begin);
if(old_begin) {
minimal = *acks_.begin();
}
}
int64_t
GetAck() const{
return minimal;
}
private:
bool
fetch_and_flip(int64_t endpoint) {
if (acks_.count(endpoint)) {
acks_.erase(endpoint);
return true;
} else {
acks_.insert(endpoint);
return false;
}
}
private:
std::shared_mutex mutex_;
std::set<int64_t> acks_ = {0};
std::atomic<int64_t> minimal = 0;
};
}

View File

@ -1,16 +1,17 @@
set(DOG_SEGMENT_FILES
SegmentNaive.cpp
IndexMeta.cpp
ConcurrentVector.cpp
Collection.cpp
Partition.cpp
collection_c.cpp
partition_c.cpp
segment_c.cpp
)
# Third Party dablooms file
#aux_source_directory( ${MILVUS_THIRDPARTY_SRC}/dablooms THIRDPARTY_DABLOOMS_FILES )
add_library(milvus_dog_segment SHARED
${DOG_SEGMENT_FILES}
)
${DOG_SEGMENT_FILES}
)
#add_dependencies( segment sqlite mysqlpp )
target_link_libraries(milvus_dog_segment tbb milvus_utils pthread)

View File

@ -3,12 +3,20 @@
namespace milvus::dog_segment {
Collection::Collection(std::string &collection_name, std::string &schema):
collection_name_(collection_name), schema_json_(schema){}
collection_name_(collection_name), schema_json_(schema) {
parse();
}
void
Collection::set_index() {}
void
Collection::parse() {}
Collection::parse() {
auto schema = std::make_shared<Schema>();
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
schema->AddField("age", DataType::INT32);
schema_ = schema;
}
}

View File

@ -34,4 +34,4 @@ private:
using CollectionPtr = std::unique_ptr<Collection>;
}
}

View File

@ -0,0 +1,8 @@
#include <iostream>
#include "dog_segment/ConcurrentVector.h"
namespace milvus::dog_segment {
}

View File

@ -0,0 +1,204 @@
#pragma once
#include <tbb/concurrent_vector.h>
#include <atomic>
#include <cassert>
#include <deque>
#include <mutex>
#include <shared_mutex>
#include <vector>
namespace milvus::dog_segment {
// we don't use std::array because capacity of concurrent_vector wastes too much memory
// template <typename Type>
// class FixedVector : public std::vector<Type> {
// public:
// // This is a stupid workaround for tbb API to avoid memory copy
// explicit FixedVector(int64_t size) : placeholder_size_(size) {
// }
// FixedVector(const FixedVector<Type>& placeholder_vec)
// : std::vector<Type>(placeholder_vec.placeholder_size_), is_placeholder_(false) {
// // assert(placeholder_vec.is_placeholder_);
// }
// FixedVector(FixedVector<Type>&&) = delete;
//
// FixedVector&
// operator=(FixedVector<Type>&&) = delete;
//
// FixedVector&
// operator=(const FixedVector<Type>&) = delete;
//
// bool is_placeholder() {
// return is_placeholder_;
// }
// private:
// bool is_placeholder_ = true;
// int placeholder_size_ = 0;
//};
template <typename Type>
using FixedVector = std::vector<Type>;
template <typename Type>
class ThreadSafeVector {
public:
template <typename... Args>
void
emplace_to_at_least(int64_t size, Args... args) {
if (size <= size_) {
return;
}
// TODO: use multithread to speedup
std::lock_guard lck(mutex_);
while (vec_.size() < size) {
vec_.emplace_back(std::forward<Args...>(args...));
++size_;
}
}
const Type&
operator[](int64_t index) const {
assert(index < size_);
std::shared_lock lck(mutex_);
return vec_[index];
}
Type&
operator[](int64_t index) {
assert(index < size_);
std::shared_lock lck(mutex_);
return vec_[index];
}
int64_t
size() const {
return size_;
}
private:
std::atomic<int64_t> size_ = 0;
std::deque<Type> vec_;
mutable std::shared_mutex mutex_;
};
class VectorBase {
public:
VectorBase() = default;
virtual ~VectorBase() = default;
virtual void
grow_to_at_least(int64_t element_count) = 0;
virtual void set_data_raw(ssize_t element_offset, void* source, ssize_t element_count) = 0;
};
template <typename Type, bool is_scalar = false, ssize_t ElementsPerChunk = 32 * 1024>
class ConcurrentVector : public VectorBase {
public:
// constants
using Chunk = FixedVector<Type>;
ConcurrentVector(ConcurrentVector&&) = delete;
ConcurrentVector(const ConcurrentVector&) = delete;
ConcurrentVector& operator=(ConcurrentVector&&) = delete;
ConcurrentVector& operator=(const ConcurrentVector&) = delete;
public:
explicit ConcurrentVector(ssize_t dim = 1) : Dim(is_scalar ? 1 : dim), SizePerChunk(Dim * ElementsPerChunk) {
assert(is_scalar ? dim == 1 : dim != 1);
}
void
grow_to_at_least(int64_t element_count) override {
auto chunk_count = (element_count + ElementsPerChunk - 1) / ElementsPerChunk;
chunks_.emplace_to_at_least(chunk_count, SizePerChunk);
}
void
set_data_raw(ssize_t element_offset, void* source, ssize_t element_count) override {
set_data(element_count, static_cast<const Type*>(source), element_count);
}
void
set_data(ssize_t element_offset, const Type* source, ssize_t element_count) {
if (element_count == 0) {
return;
}
this->grow_to_at_least(element_offset + element_count);
auto chunk_id = element_offset / ElementsPerChunk;
auto chunk_offset = element_offset % ElementsPerChunk;
ssize_t source_offset = 0;
// first partition:
if (chunk_offset + element_count <= ElementsPerChunk) {
// only first
fill_chunk(chunk_id, chunk_offset, element_count, source, source_offset);
return;
}
auto first_size = ElementsPerChunk - chunk_offset;
fill_chunk(chunk_id, chunk_offset, first_size, source, source_offset);
source_offset += ElementsPerChunk - chunk_offset;
element_count -= first_size;
++chunk_id;
// the middle
while (element_count >= ElementsPerChunk) {
fill_chunk(chunk_id, 0, ElementsPerChunk, source, source_offset);
source_offset += ElementsPerChunk;
element_count -= ElementsPerChunk;
++chunk_id;
}
// the final
if (element_count > 0) {
fill_chunk(chunk_id, 0, element_count, source, source_offset);
}
}
const Chunk&
get_chunk(ssize_t chunk_index) const {
return chunks_[chunk_index];
}
// just for fun, don't use it directly
const Type*
get_element(ssize_t element_index) const {
auto chunk_id = element_index / ElementsPerChunk;
auto chunk_offset = element_index % ElementsPerChunk;
return get_chunk(chunk_id).data() + chunk_offset * Dim;
}
const Type&
operator[](ssize_t element_index) const {
assert(Dim == 1);
auto chunk_id = element_index / ElementsPerChunk;
auto chunk_offset = element_index % ElementsPerChunk;
return get_chunk(chunk_id)[chunk_offset];
}
ssize_t
chunk_size() const {
return chunks_.size();
}
private:
void
fill_chunk(ssize_t chunk_id, ssize_t chunk_offset, ssize_t element_count, const Type* source,
ssize_t source_offset) {
if (element_count <= 0) {
return;
}
auto chunk_max_size = chunks_.size();
assert(chunk_id < chunk_max_size);
Chunk& chunk = chunks_[chunk_id];
auto ptr = chunk.data();
std::copy_n(source + source_offset * Dim, element_count * Dim, ptr + chunk_offset * Dim);
}
const ssize_t Dim;
const ssize_t SizePerChunk;
private:
ThreadSafeVector<Chunk> chunks_;
};
} // namespace milvus::dog_segment

View File

@ -0,0 +1,56 @@
// #include "IndexMeta.h"
// #include <mutex>
// #include <cassert>
// namespace milvus::dog_segment {
//
// Status
// IndexMeta::AddEntry(const std::string& index_name, const std::string& field_name, IndexType type, IndexMode mode,
// IndexConfig config) {
// Entry entry{
// index_name,
// field_name,
// type,
// mode,
// std::move(config)
// };
// VerifyEntry(entry);
//
// if (entries_.count(index_name)) {
// throw std::invalid_argument("duplicate index_name");
// }
// // TODO: support multiple indexes for single field
// assert(!lookups_.count(field_name));
// lookups_[field_name] = index_name;
// entries_[index_name] = std::move(entry);
//
// return Status::OK();
// }
//
// Status
// IndexMeta::DropEntry(const std::string& index_name) {
// assert(entries_.count(index_name));
// auto entry = std::move(entries_[index_name]);
// if(lookups_[entry.field_name] == index_name) {
// lookups_.erase(entry.field_name);
// }
// return Status::OK();
// }
//
// void IndexMeta::VerifyEntry(const Entry &entry) {
// auto is_mode_valid = std::set{IndexMode::MODE_CPU, IndexMode::MODE_GPU}.count(entry.mode);
// if(!is_mode_valid) {
// throw std::invalid_argument("invalid mode");
// }
//
// auto& schema = *schema_;
// auto& field_meta = schema[entry.index_name];
// // TODO checking
// if(field_meta.is_vector()) {
// assert(entry.type == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
// } else {
// assert(false);
// }
// }
//
// } // namespace milvus::dog_segment
//

View File

@ -0,0 +1,57 @@
#pragma once
//
//#include <shared_mutex>
//
//#include "SegmentDefs.h"
//#include "knowhere/index/IndexType.h"
//
#include <memory>
class IndexMeta;
namespace milvus::dog_segment {
//// TODO: this is
//class IndexMeta {
// public:
// IndexMeta(SchemaPtr schema) : schema_(schema) {
// }
// using IndexType = knowhere::IndexType;
// using IndexMode = knowhere::IndexMode;
// using IndexConfig = knowhere::Config;
//
// struct Entry {
// std::string index_name;
// std::string field_name;
// IndexType type;
// IndexMode mode;
// IndexConfig config;
// };
//
// Status
// AddEntry(const std::string& index_name, const std::string& field_name, IndexType type, IndexMode mode,
// IndexConfig config);
//
// Status
// DropEntry(const std::string& index_name);
//
// const std::map<std::string, Entry>&
// get_entries() {
// return entries_;
// }
//
// const Entry& lookup_by_field(const std::string& field_name) {
// auto index_name = lookups_.at(field_name);
// return entries_.at(index_name);
// }
// private:
// void
// VerifyEntry(const Entry& entry);
//
// private:
// SchemaPtr schema_;
// std::map<std::string, Entry> entries_; // index_name => Entry
// std::map<std::string, std::string> lookups_; // field_name => index_name
//};
//
using IndexMetaPtr = std::shared_ptr<IndexMeta>;
//
} // namespace milvus::dog_segment
//

View File

@ -1,18 +1,24 @@
#pragma once
#include <vector>
// #include "db/Types.h"
#include "IndexMeta.h"
#include "utils/Types.h"
#include "dog_segment/SegmentDefs.h"
// #include "knowhere/index/Index.h"
// #include "knowhere/index/IndexType.h"
#include "query/GeneralQuery.h"
using idx_t = int64_t;
namespace milvus {
namespace dog_segment {
// using engine::DataChunk;
// using engine::DataChunkPtr;
using engine::QueryResult;
// using DogDataChunkPtr = std::shared_ptr<DataChunk>;
int
TestABI();
class SegmentBase {
public:
// definitions
@ -25,17 +31,19 @@ class SegmentBase {
public:
virtual ~SegmentBase() = default;
// SegmentBase(std::shared_ptr<FieldsInfo> collection);
// single threaded
virtual Status
Insert(int64_t size, const idx_t* primary_keys, const Timestamp* timestamps, const DogDataChunk& values, std::pair<Timestamp, Timestamp> timestamp_range) = 0;
// TODO: add id into delete log, possibly bitmap
// single threaded
virtual int64_t PreInsert(int64_t size) = 0;
virtual Status
Delete(int64_t size, const idx_t* primary_keys, const Timestamp* timestamps, std::pair<Timestamp, Timestamp> timestamp_range) = 0;
Insert(int64_t reserved_offset, int64_t size, const int64_t* primary_keys, const Timestamp* timestamps, const DogDataChunk& values) = 0;
virtual int64_t PreDelete(int64_t size) = 0;
// TODO: add id into delete log, possibly bitmap
virtual Status
Delete(int64_t reserved_offset, int64_t size, const int64_t* primary_keys, const Timestamp* timestamps) = 0;
// query contains metadata of
// multi-threaded
virtual Status
Query(const query::QueryPtr& query, Timestamp timestamp, QueryResult& results) = 0;
@ -44,7 +52,6 @@ class SegmentBase {
// GetEntityByIds(Timestamp timestamp, const std::vector<Id>& ids, DataChunkPtr& results) = 0;
// stop receive insert requests
// single threaded
virtual Status
Close() = 0;
@ -53,15 +60,8 @@ class SegmentBase {
// virtual Status
// Flush(Timestamp timestamp) = 0;
// BuildIndex With Paramaters, must with Frozen State
// This function is atomic
// NOTE: index_params contains serveral policies for several index
virtual Status
BuildIndex(std::shared_ptr<IndexConfig> index_params) = 0;
// Remove Index
virtual Status
DropIndex(std::string_view field_name) = 0;
// watch changes
// NOTE: Segment will use this ptr as correct
virtual Status
DropRawData(std::string_view field_name) = 0;
@ -69,6 +69,9 @@ class SegmentBase {
virtual Status
LoadRawData(std::string_view field_name, const char* blob, int64_t blob_size) = 0;
virtual Status
BuildIndex() = 0;
public:
virtual ssize_t
get_row_count() const = 0;
@ -78,12 +81,12 @@ class SegmentBase {
virtual ssize_t
get_deleted_count() const = 0;
};
using SegmentBasePtr = std::unique_ptr<SegmentBase>;
SegmentBasePtr CreateSegment(SchemaPtr& ptr);
SegmentBasePtr
CreateSegment(SchemaPtr schema, IndexMetaPtr index_meta);
} // namespace engine
} // namespace dog_segment
} // namespace milvus

View File

@ -1,16 +1,14 @@
#pragma once
#include <vector>
#include <unordered_map>
#include <assert.h>
// #include "db/Types.h"
#include "utils/Types.h"
// #include "knowhere/index/Index.h"
#include "utils/Status.h"
#include "utils/Types.h"
#include <cassert>
using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
namespace milvus::dog_segment {
using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
using engine::DataType;
using engine::FieldElementType;
@ -20,11 +18,6 @@ struct DogDataChunk {
int64_t count;
};
struct IndexConfig {
// TODO
// std::unordered_map<std::string, knowhere::Config> configs;
};
inline int
field_sizeof(DataType data_type, int dim = 1) {
switch (data_type) {
@ -49,12 +42,17 @@ field_sizeof(DataType data_type, int dim = 1) {
return dim / 8;
}
default: {
assert(false);
throw std::invalid_argument("unsupported data type");
return 0;
}
}
}
inline bool
field_is_vector(DataType datatype) {
return datatype == DataType::VECTOR_BINARY || datatype == DataType::VECTOR_FLOAT;
}
struct FieldMeta {
public:
FieldMeta(std::string_view name, DataType type, int dim = 1) : name_(name), type_(type), dim_(dim) {
@ -107,10 +105,12 @@ class Schema {
void
AddField(FieldMeta field_meta) {
auto index = fields_.size();
auto offset = fields_.size();
fields_.emplace_back(field_meta);
indexes_.emplace(field_meta.get_name(), index);
total_sizeof_ = field_meta.get_sizeof();
offsets_.emplace(field_meta.get_name(), offset);
auto field_sizeof = field_meta.get_sizeof();
sizeof_infos_.push_back(field_sizeof);
total_sizeof_ += field_sizeof;
}
auto
@ -132,7 +132,8 @@ class Schema {
return fields_.end();
}
int size() const {
int
size() const {
return fields_.size();
}
@ -141,12 +142,22 @@ class Schema {
return fields_[field_index];
}
auto
get_total_sizeof() const {
return total_sizeof_;
}
const std::vector<int>& get_sizeof_infos() {
return sizeof_infos_;
}
const FieldMeta&
operator[](const std::string& field_name) const {
auto index_iter = indexes_.find(field_name);
assert(index_iter != indexes_.end());
auto index = index_iter->second;
return (*this)[index];
auto offset_iter = offsets_.find(field_name);
assert(offset_iter != offsets_.end());
auto offset = offset_iter->second;
return (*this)[offset];
}
private:
@ -155,19 +166,11 @@ class Schema {
private:
// a mapping for random access
std::unordered_map<std::string, int> indexes_;
int total_sizeof_;
std::unordered_map<std::string, int> offsets_;
std::vector<int> sizeof_infos_;
int total_sizeof_ = 0;
};
using SchemaPtr = std::shared_ptr<Schema>;
class IndexData {
public:
virtual std::vector<char>
serilize() = 0;
static std::shared_ptr<IndexData>
deserialize(int64_t size, const char* blob);
};
} // namespace milvus::dog_segment

View File

@ -1,246 +1,255 @@
#include <shared_mutex>
#include <dog_segment/SegmentNaive.h>
#include "dog_segment/SegmentBase.h"
#include "utils/Status.h"
#include <tbb/concurrent_vector.h>
#include <tbb/concurrent_unordered_map.h>
#include <atomic>
#include <algorithm>
#include <numeric>
#include <thread>
namespace milvus::dog_segment {
int
TestABI() {
return 42;
}
struct ColumnBasedDataChunk {
std::vector<std::vector<float>> entity_vecs;
static ColumnBasedDataChunk from(const DogDataChunk& source, const Schema& schema){
ColumnBasedDataChunk dest;
auto count = source.count;
auto raw_data = reinterpret_cast<const char*>(source.raw_data);
auto align = source.sizeof_per_row;
for(auto& field: schema) {
auto len = field.get_sizeof();
assert(len % sizeof(float) == 0);
std::vector<float> new_col(len * count / sizeof(float));
for(int64_t i = 0; i < count; ++i) {
memcpy(new_col.data() + i * len / sizeof(float), raw_data + i * align , len);
}
dest.entity_vecs.push_back(std::move(new_col));
// offset the raw_data
raw_data += len / sizeof(float);
}
return dest;
}
};
class SegmentNaive : public SegmentBase {
public:
virtual ~SegmentNaive() = default;
// SegmentBase(std::shared_ptr<FieldsInfo> collection);
// TODO: originally, id should be put into data_chunk
// TODO: Is it ok to put them the other side?
Status
Insert(int64_t size, const uint64_t* primary_keys, const Timestamp* timestamps,
const DogDataChunk& values) override;
// TODO: add id into delete log, possibly bitmap
Status
Delete(int64_t size, const uint64_t* primary_keys, const Timestamp* timestamps) override;
// query contains metadata of
Status
Query(const query::QueryPtr& query, Timestamp timestamp, QueryResult& results) override;
// // THIS FUNCTION IS REMOVED
// virtual Status
// GetEntityByIds(Timestamp timestamp, const std::vector<Id>& ids, DataChunkPtr& results) = 0;
// stop receive insert requests
Status
Close() override {
std::lock_guard<std::shared_mutex> lck(mutex_);
assert(state_ == SegmentState::Open);
state_ = SegmentState::Closed;
return Status::OK();
}
// // to make all data inserted visible
// // maybe a no-op?
// virtual Status
// Flush(Timestamp timestamp) = 0;
// BuildIndex With Paramaters, must with Frozen State
// This function is atomic
// NOTE: index_params contains serveral policies for several index
Status
BuildIndex(std::shared_ptr<IndexConfig> index_params) override {
throw std::runtime_error("not implemented");
}
// Remove Index
Status
DropIndex(std::string_view field_name) override {
throw std::runtime_error("not implemented");
}
Status
DropRawData(std::string_view field_name) override {
// TODO: NO-OP
return Status::OK();
}
Status
LoadRawData(std::string_view field_name, const char* blob, int64_t blob_size) override {
// TODO: NO-OP
return Status::OK();
}
public:
ssize_t
get_row_count() const override {
return ack_count_.load(std::memory_order_relaxed);
}
// const FieldsInfo&
// get_fields_info() const override {
//
// }
//
// // check is_indexed here
// virtual const IndexConfig&
// get_index_param() const = 0;
//
SegmentState
get_state() const override {
return state_.load(std::memory_order_relaxed);
}
//
// std::shared_ptr<IndexData>
// get_index_data();
ssize_t
get_deleted_count() const override {
return 0;
}
public:
friend SegmentBasePtr
CreateSegment(SchemaPtr& schema);
private:
SchemaPtr schema_;
std::shared_mutex mutex_;
std::atomic<SegmentState> state_ = SegmentState::Open;
std::atomic<int64_t> ack_count_ = 0;
tbb::concurrent_vector<uint64_t> uids_;
tbb::concurrent_vector<Timestamp> timestamps_;
std::vector<tbb::concurrent_vector<float>> entity_vecs_;
tbb::concurrent_unordered_map<uint64_t, int> internal_indexes_;
tbb::concurrent_unordered_multimap<int, Timestamp> delete_logs_;
};
SegmentBasePtr
CreateSegment(SchemaPtr& schema) {
// TODO: remove hard code
auto schema_tmp = std::make_shared<Schema>();
schema_tmp->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
schema_tmp->AddField("age", DataType::INT32);
auto segment = std::make_unique<SegmentNaive>();
segment->schema_ = schema_tmp;
segment->entity_vecs_.resize(schema_tmp->size());
std::unique_ptr<SegmentBase>
CreateSegment(SchemaPtr schema, IndexMetaPtr remote_index_meta) {
auto segment = std::make_unique<SegmentNaive>(schema, remote_index_meta);
return segment;
}
Status
SegmentNaive::Insert(int64_t size, const uint64_t* primary_keys, const Timestamp* timestamps,
const DogDataChunk& row_values) {
const auto& schema = *schema_;
auto data_chunk = ColumnBasedDataChunk::from(row_values, schema);
// insert datas
// TODO: use shared_lock
std::lock_guard lck(mutex_);
assert(state_ == SegmentState::Open);
auto ack_id = ack_count_.load();
uids_.grow_by(primary_keys, primary_keys + size);
for(int64_t i = 0; i < size; ++i) {
auto key = primary_keys[i];
auto internal_index = i + ack_id;
internal_indexes_[key] = internal_index;
SegmentNaive::Record::Record(const Schema& schema) : uids_(1), timestamps_(1) {
for (auto& field : schema) {
if (field.is_vector()) {
assert(field.get_data_type() == DataType::VECTOR_FLOAT);
entity_vec_.emplace_back(std::make_shared<ConcurrentVector<float>>(field.get_dim()));
} else {
assert(field.get_data_type() == DataType::INT32);
entity_vec_.emplace_back(std::make_shared<ConcurrentVector<int32_t, false>>());
}
}
timestamps_.grow_by(timestamps, timestamps + size);
for(int fid = 0; fid < schema.size(); ++fid) {
auto field = schema[fid];
auto total_len = field.get_sizeof() * size / sizeof(float);
auto source_vec = data_chunk.entity_vecs[fid];
entity_vecs_[fid].grow_by(source_vec.data(), source_vec.data() + total_len);
}
// finish insert
ack_count_ += size;
return Status::OK();
}
Status SegmentNaive::Delete(int64_t size, const uint64_t *primary_keys, const Timestamp *timestamps) {
for(int i = 0; i < size; ++i) {
auto key = primary_keys[i];
auto time = timestamps[i];
delete_logs_.insert(std::make_pair(key, time));
int64_t
SegmentNaive::PreInsert(int64_t size) {
auto reserved_begin = record_.reserved.fetch_add(size);
return reserved_begin;
}
int64_t
SegmentNaive::PreDelete(int64_t size) {
throw std::runtime_error("unimplemented");
}
Status
SegmentNaive::Insert(int64_t reserved_begin, int64_t size, const int64_t* uids_raw, const Timestamp* timestamps_raw,
const DogDataChunk& entities_raw) {
assert(entities_raw.count == size);
assert(entities_raw.sizeof_per_row == schema_->get_total_sizeof());
auto raw_data = reinterpret_cast<const char*>(entities_raw.raw_data);
// std::vector<char> entities(raw_data, raw_data + size * len_per_row);
auto len_per_row = entities_raw.sizeof_per_row;
std::vector<std::tuple<Timestamp, idx_t, int64_t>> ordering;
ordering.resize(size);
// #pragma omp parallel for
for (int i = 0; i < size; ++i) {
ordering[i] = std::make_tuple(timestamps_raw[i], uids_raw[i], i);
}
std::sort(ordering.begin(), ordering.end());
auto sizeof_infos = schema_->get_sizeof_infos();
std::vector<int> offset_infos(schema_->size() + 1, 0);
std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1);
std::vector<std::vector<char>> entities(schema_->size());
for (int fid = 0; fid < schema_->size(); ++fid) {
auto len = sizeof_infos[fid];
entities[fid].resize(len * size);
}
std::vector<idx_t> uids(size);
std::vector<Timestamp> timestamps(size);
// #pragma omp parallel for
for (int index = 0; index < size; ++index) {
auto [t, uid, order_index] = ordering[index];
timestamps[index] = t;
uids[index] = uid;
for (int fid = 0; fid < schema_->size(); ++fid) {
auto len = sizeof_infos[fid];
auto offset = offset_infos[fid];
auto src = raw_data + offset + order_index * len_per_row;
auto dst = entities[fid].data() + index * len;
memcpy(dst, src, len);
}
}
record_.timestamps_.set_data(reserved_begin, timestamps.data(), size);
record_.uids_.set_data(reserved_begin, uids.data(), size);
for (int fid = 0; fid < schema_->size(); ++fid) {
record_.entity_vec_[fid]->set_data_raw(reserved_begin, entities[fid].data(), size);
}
record_.ack_responder_.AddSegment(reserved_begin, size);
return Status::OK();
// std::thread go(executor, std::move(uids), std::move(timestamps), std::move(entities));
// go.detach();
// const auto& schema = *schema_;
// auto record_ptr = GetMutableRecord();
// assert(record_ptr);
// auto& record = *record_ptr;
// auto data_chunk = ColumnBasedDataChunk::from(row_values, schema);
//
// // TODO: use shared_lock for better concurrency
// std::lock_guard lck(mutex_);
// assert(state_ == SegmentState::Open);
// auto ack_id = ack_count_.load();
// record.uids_.grow_by(primary_keys, primary_keys + size);
// for (int64_t i = 0; i < size; ++i) {
// auto key = primary_keys[i];
// auto internal_index = i + ack_id;
// internal_indexes_[key] = internal_index;
// }
// record.timestamps_.grow_by(timestamps, timestamps + size);
// for (int fid = 0; fid < schema.size(); ++fid) {
// auto field = schema[fid];
// auto total_len = field.get_sizeof() * size / sizeof(float);
// auto source_vec = data_chunk.entity_vecs[fid];
// record.entity_vecs_[fid].grow_by(source_vec.data(), source_vec.data() + total_len);
// }
//
// // finish insert
// ack_count_ += size;
// return Status::OK();
}
Status
SegmentNaive::Delete(int64_t reserved_offset, int64_t size, const int64_t* primary_keys, const Timestamp* timestamps) {
throw std::runtime_error("unimplemented");
// for (int i = 0; i < size; ++i) {
// auto key = primary_keys[i];
// auto time = timestamps[i];
// delete_logs_.insert(std::make_pair(key, time));
// }
// return Status::OK();
}
// TODO: remove mock
Status
SegmentNaive::Query(const query::QueryPtr &query, Timestamp timestamp, QueryResult &result) {
std::shared_lock lck(mutex_);
auto ack_count = ack_count_.load();
assert(query == nullptr);
assert(schema_->size() >= 1);
const auto& field = schema_->operator[](0);
assert(field.get_data_type() == DataType::VECTOR_FLOAT);
assert(field.get_name() == "fakevec");
auto dim = field.get_dim();
// assume query vector is [0, 0, ..., 0]
std::vector<float> query_vector(dim, 0);
auto& target_vec = entity_vecs_[0];
int current_index = -1;
float min_diff = std::numeric_limits<float>::max();
for(int index = 0; index < ack_count; ++index) {
float diff = 0;
int offset = index * dim;
for(auto d = 0; d < dim; ++d) {
auto v = target_vec[offset + d] - query_vector[d];
diff += v * v;
}
if(diff < min_diff) {
min_diff = diff;
current_index = index;
}
}
QueryResult query_result;
query_result.row_num_ = 1;
query_result.result_distances_.push_back(min_diff);
query_result.result_ids_.push_back(uids_[current_index]);
// query_result.data_chunk_ = nullptr;
result = std::move(query_result);
return Status::OK();
SegmentNaive::QueryImpl(const query::QueryPtr& query, Timestamp timestamp, QueryResult& result) {
throw std::runtime_error("unimplemented");
// auto ack_count = ack_count_.load();
// assert(query == nullptr);
// assert(schema_->size() >= 1);
// const auto& field = schema_->operator[](0);
// assert(field.get_data_type() == DataType::VECTOR_FLOAT);
// assert(field.get_name() == "fakevec");
// auto dim = field.get_dim();
// // assume query vector is [0, 0, ..., 0]
// std::vector<float> query_vector(dim, 0);
// auto& target_vec = record.entity_vecs_[0];
// int current_index = -1;
// float min_diff = std::numeric_limits<float>::max();
// for (int index = 0; index < ack_count; ++index) {
// float diff = 0;
// int offset = index * dim;
// for (auto d = 0; d < dim; ++d) {
// auto v = target_vec[offset + d] - query_vector[d];
// diff += v * v;
// }
// if (diff < min_diff) {
// min_diff = diff;
// current_index = index;
// }
// }
// QueryResult query_result;
// query_result.row_num_ = 1;
// query_result.result_distances_.push_back(min_diff);
// query_result.result_ids_.push_back(record.uids_[current_index]);
// query_result.data_chunk_ = nullptr;
// result = std::move(query_result);
// return Status::OK();
}
} // namespace milvus::engine
Status
SegmentNaive::Query(const query::QueryPtr& query, Timestamp timestamp, QueryResult& result) {
// TODO: enable delete
// TODO: enable index
auto& field = schema_->operator[](0);
assert(field.get_name() == "fakevec");
assert(field.get_data_type() == DataType::VECTOR_FLOAT);
auto dim = field.get_dim();
assert(query == nullptr);
int64_t barrier = [&]
{
auto& vec = record_.timestamps_;
int64_t beg = 0;
int64_t end = record_.ack_responder_.GetAck();
while (beg < end) {
auto mid = (beg + end) / 2;
if (vec[mid] < timestamp) {
end = mid + 1;
} else {
beg = mid;
}
}
return beg;
}();
// search until barriers
// TODO: optimize
auto vec_ptr = std::static_pointer_cast<ConcurrentVector<float>>(record_.entity_vec_[0]);
for(int64_t i = 0; i < barrier; ++i) {
// auto element =
throw std::runtime_error("unimplemented");
}
return Status::OK();
// find end of binary
// throw std::runtime_error("unimplemented");
// auto record_ptr = GetMutableRecord();
// if (record_ptr) {
// return QueryImpl(*record_ptr, query, timestamp, result);
// } else {
// assert(ready_immutable_);
// return QueryImpl(*record_immutable_, query, timestamp, result);
// }
}
Status
SegmentNaive::Close() {
state_ = SegmentState::Closed;
return Status::OK();
// auto src_record = GetMutableRecord();
// assert(src_record);
//
// auto dst_record = std::make_shared<ImmutableRecord>(schema_->size());
//
// auto data_move = [](auto& dst_vec, const auto& src_vec) {
// assert(dst_vec.size() == 0);
// dst_vec.insert(dst_vec.begin(), src_vec.begin(), src_vec.end());
// };
// data_move(dst_record->uids_, src_record->uids_);
// data_move(dst_record->timestamps_, src_record->uids_);
//
// assert(src_record->entity_vecs_.size() == schema_->size());
// assert(dst_record->entity_vecs_.size() == schema_->size());
// for (int i = 0; i < schema_->size(); ++i) {
// data_move(dst_record->entity_vecs_[i], src_record->entity_vecs_[i]);
// }
// bool ready_old = false;
// record_immutable_ = dst_record;
// ready_immutable_.compare_exchange_strong(ready_old, true);
// if (ready_old) {
// throw std::logic_error("Close may be called twice, with potential race condition");
// }
// return Status::OK();
}
Status
SegmentNaive::BuildIndex() {
throw std::runtime_error("unimplemented");
// assert(ready_immutable_);
// throw std::runtime_error("unimplemented");
}
} // namespace milvus::dog_segment

View File

@ -0,0 +1,147 @@
#pragma once
#include <tbb/concurrent_priority_queue.h>
#include <tbb/concurrent_unordered_map.h>
#include <tbb/concurrent_vector.h>
#include <shared_mutex>
#include "AckResponder.h"
#include "ConcurrentVector.h"
#include "dog_segment/SegmentBase.h"
// #include "knowhere/index/structured_index/StructuredIndex.h"
#include "query/GeneralQuery.h"
#include "utils/Status.h"
using idx_t = int64_t;
namespace milvus::dog_segment {
struct ColumnBasedDataChunk {
std::vector<std::vector<float>> entity_vecs;
static ColumnBasedDataChunk
from(const DogDataChunk& source, const Schema& schema) {
ColumnBasedDataChunk dest;
auto count = source.count;
auto raw_data = reinterpret_cast<const char*>(source.raw_data);
auto align = source.sizeof_per_row;
for (auto& field : schema) {
auto len = field.get_sizeof();
assert(len % sizeof(float) == 0);
std::vector<float> new_col(len * count / sizeof(float));
for (int64_t i = 0; i < count; ++i) {
memcpy(new_col.data() + i * len / sizeof(float), raw_data + i * align, len);
}
dest.entity_vecs.push_back(std::move(new_col));
// offset the raw_data
raw_data += len / sizeof(float);
}
return dest;
}
};
class SegmentNaive : public SegmentBase {
public:
virtual ~SegmentNaive() = default;
// SegmentBase(std::shared_ptr<FieldsInfo> collection);
int64_t PreInsert(int64_t size) override;
// TODO: originally, id should be put into data_chunk
// TODO: Is it ok to put them the other side?
Status
Insert(int64_t reserverd_offset, int64_t size, const int64_t* primary_keys, const Timestamp* timestamps, const DogDataChunk& values) override;
int64_t PreDelete(int64_t size) override;
// TODO: add id into delete log, possibly bitmap
Status
Delete(int64_t reserverd_offset, int64_t size, const int64_t* primary_keys, const Timestamp* timestamps) override;
// query contains metadata of
Status
Query(const query::QueryPtr& query, Timestamp timestamp, QueryResult& results) override;
// stop receive insert requests
// will move data to immutable vector or something
Status
Close() override;
// using IndexType = knowhere::IndexType;
// using IndexMode = knowhere::IndexMode;
// using IndexConfig = knowhere::Config;
// BuildIndex With Paramaters, must with Frozen State
// NOTE: index_params contains serveral policies for several index
// TODO: currently, index has to be set at startup, and can't be modified
// AddIndex and DropIndex will be added later
Status
BuildIndex() override;
Status
DropRawData(std::string_view field_name) override {
// TODO: NO-OP
return Status::OK();
}
Status
LoadRawData(std::string_view field_name, const char* blob, int64_t blob_size) override {
// TODO: NO-OP
return Status::OK();
}
private:
struct MutableRecord {
ConcurrentVector<uint64_t> uids_;
tbb::concurrent_vector<Timestamp> timestamps_;
std::vector<tbb::concurrent_vector<float>> entity_vecs_;
MutableRecord(int entity_size) : entity_vecs_(entity_size) {
}
};
struct Record {
std::atomic<int64_t> reserved = 0;
AckResponder ack_responder_;
ConcurrentVector<Timestamp, true> timestamps_;
ConcurrentVector<idx_t, true> uids_;
std::vector<std::shared_ptr<VectorBase>> entity_vec_;
Record(const Schema& schema);
};
Status
QueryImpl(const query::QueryPtr& query, Timestamp timestamp, QueryResult& results);
public:
ssize_t
get_row_count() const override {
return record_.ack_responder_.GetAck();
}
SegmentState
get_state() const override {
return state_.load(std::memory_order_relaxed);
}
ssize_t
get_deleted_count() const override {
return 0;
}
public:
friend std::unique_ptr<SegmentBase>
CreateSegment(SchemaPtr schema, IndexMetaPtr index_meta);
explicit SegmentNaive(SchemaPtr schema, IndexMetaPtr index_meta)
: schema_(schema), index_meta_(index_meta), record_(*schema) {
}
private:
SchemaPtr schema_;
IndexMetaPtr index_meta_;
std::atomic<SegmentState> state_ = SegmentState::Open;
Record record_;
// tbb::concurrent_unordered_map<uint64_t, int> internal_indexes_;
// std::shared_ptr<MutableRecord> record_mutable_;
// // to determined that if immutable data if available
// std::shared_ptr<ImmutableRecord> record_immutable_ = nullptr;
// std::unordered_map<int, knowhere::VecIndexPtr> vec_indexings_;
// // TODO: scalar indexing
// // std::unordered_map<int, knowhere::IndexPtr> scalar_indexings_;
// tbb::concurrent_unordered_multimap<int, Timestamp> delete_logs_;
};
} // namespace milvus::dog_segment

View File

@ -1,46 +0,0 @@
package main
/*
#cgo CFLAGS: -I./
#cgo LDFLAGS: -L/home/sheep/workspace/milvus/sheep/suvlim/core/cmake-build-debug/src/dog_segment -lmilvus_dog_segment -Wl,-rpath=/home/sheep/workspace/milvus/sheep/suvlim/core/cmake-build-debug/src/dog_segment
#include "cwrap.h"
*/
import "C"
import (
"fmt"
"unsafe"
)
func testInsert() {
const DIM = 4
const N = 3
var ids = [N]uint64{1, 2, 3}
var timestamps = [N]uint64{0, 0, 0}
var vec = [DIM]float32{1.1, 2.2, 3.3, 4.4}
var rawData []int8
for i := 0; i <= N; i++ {
for _, ele := range vec {
rawData=append(rawData, int8(ele))
}
rawData=append(rawData, int8(i))
}
var segment = C.SegmentBaseInit()
fmt.Println(segment)
const sizeofPerRow = 4 + DIM * 4
var res = C.Insert(segment, N, (*C.ulong)(&ids[0]), (*C.ulong)(&timestamps[0]), unsafe.Pointer(&rawData[0]), C.int(sizeofPerRow), C.long(N))
fmt.Println(res)
}
func main() {
fmt.Println("Test milvus segment base:")
testInsert()
}

View File

@ -9,7 +9,8 @@ CSegmentBase
NewSegment(CPartition partition, unsigned long segment_id) {
auto p = (milvus::dog_segment::Partition*)partition;
auto segment = milvus::dog_segment::CreateSegment(p->get_schema());
// TODO: remove hard code null index ptr
auto segment = milvus::dog_segment::CreateSegment(p->get_schema(), nullptr);
// TODO: delete print
std::cout << "create segment " << segment_id << std::endl;
@ -30,14 +31,13 @@ DeleteSegment(CSegmentBase segment) {
int
Insert(CSegmentBase c_segment,
signed long int size,
const long* primary_keys,
const unsigned long* timestamps,
void* raw_data,
int sizeof_per_row,
signed long int count,
unsigned long timestamp_min,
unsigned long timestamp_max) {
long int reserved_offset,
signed long int size,
const long* primary_keys,
const unsigned long* timestamps,
void* raw_data,
int sizeof_per_row,
signed long int count) {
auto segment = (milvus::dog_segment::SegmentBase*)c_segment;
milvus::dog_segment::DogDataChunk dataChunk{};
@ -45,25 +45,44 @@ Insert(CSegmentBase c_segment,
dataChunk.sizeof_per_row = sizeof_per_row;
dataChunk.count = count;
auto res = segment->Insert(size, primary_keys, timestamps, dataChunk, std::make_pair(timestamp_min, timestamp_max));
auto res = segment->Insert(reserved_offset, size, primary_keys, timestamps, dataChunk);
return res.code();
}
long int
PreInsert(CSegmentBase c_segment, long int size) {
auto segment = (milvus::dog_segment::SegmentBase*)c_segment;
// TODO: delete print
std::cout << "PreInsert segment " << std::endl;
return segment->PreInsert(size);
}
int
Delete(CSegmentBase c_segment,
long size,
const long* primary_keys,
const unsigned long* timestamps,
unsigned long timestamp_min,
unsigned long timestamp_max) {
long int reserved_offset,
long size,
const long* primary_keys,
const unsigned long* timestamps) {
auto segment = (milvus::dog_segment::SegmentBase*)c_segment;
auto res = segment->Delete(size, primary_keys, timestamps, std::make_pair(timestamp_min, timestamp_max));
auto res = segment->Delete(reserved_offset, size, primary_keys, timestamps);
return res.code();
}
long int
PreDelete(CSegmentBase c_segment, long int size) {
auto segment = (milvus::dog_segment::SegmentBase*)c_segment;
// TODO: delete print
std::cout << "PreDelete segment " << std::endl;
return segment->PreDelete(size);
}
int
Search(CSegmentBase c_segment,
void* fake_query,

View File

@ -17,22 +17,26 @@ DeleteSegment(CSegmentBase segment);
int
Insert(CSegmentBase c_segment,
signed long int size,
const long* primary_keys,
const unsigned long* timestamps,
void* raw_data,
int sizeof_per_row,
signed long int count,
unsigned long timestamp_min,
unsigned long timestamp_max);
long int reserved_offset,
signed long int size,
const long* primary_keys,
const unsigned long* timestamps,
void* raw_data,
int sizeof_per_row,
signed long int count);
long int
PreInsert(CSegmentBase c_segment, long int size);
int
Delete(CSegmentBase c_segment,
long size,
const long* primary_keys,
const unsigned long* timestamps,
unsigned long timestamp_min,
unsigned long timestamp_max);
long int reserved_offset,
long size,
const long* primary_keys,
const unsigned long* timestamps);
long int
PreDelete(CSegmentBase c_segment, long int size);
int
Search(CSegmentBase c_segment,

View File

@ -1,8 +1,9 @@
enable_testing()
find_package(GTest REQUIRED)
set(MILVUS_TEST_FILES
test_dog_segment.cpp
test_c_api.cpp
test_naive.cpp
# test_dog_segment.cpp
test_c_api.cpp
)
add_executable(all_tests
${MILVUS_TEST_FILES}

View File

@ -49,7 +49,7 @@ TEST(CApiTest, InsertTest) {
std::vector<char> raw_data;
std::vector<uint64_t> timestamps;
std::vector<uint64_t> uids;
std::vector<int64_t> uids;
int N = 10000;
std::default_random_engine e(67);
for(int i = 0; i < N; ++i) {
@ -67,7 +67,9 @@ TEST(CApiTest, InsertTest) {
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
auto res = Insert(segment, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
auto offset = PreInsert(segment, N);
auto res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
assert(res == 0);
@ -85,10 +87,12 @@ TEST(CApiTest, DeleteTest) {
auto partition = NewPartition(collection, partition_name);
auto segment = NewSegment(partition, 0);
unsigned long delete_primary_keys[] = {100000, 100001, 100002};
long delete_primary_keys[] = {100000, 100001, 100002};
unsigned long delete_timestamps[] = {0, 0, 0};
auto del_res = Delete(segment, 1, delete_primary_keys, delete_timestamps);
auto offset = PreDelete(segment, 3);
auto del_res = Delete(segment, offset, 3, delete_primary_keys, delete_timestamps);
assert(del_res == 0);
DeleteCollection(collection);
@ -107,7 +111,7 @@ TEST(CApiTest, SearchTest) {
std::vector<char> raw_data;
std::vector<uint64_t> timestamps;
std::vector<uint64_t> uids;
std::vector<int64_t> uids;
int N = 10000;
std::default_random_engine e(67);
for(int i = 0; i < N; ++i) {
@ -125,7 +129,9 @@ TEST(CApiTest, SearchTest) {
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
auto ins_res = Insert(segment, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
auto offset = PreInsert(segment, N);
auto ins_res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
assert(ins_res == 0);
long result_ids;
@ -184,7 +190,7 @@ TEST(CApiTest, GetRowCountTest) {
std::vector<char> raw_data;
std::vector<uint64_t> timestamps;
std::vector<uint64_t> uids;
std::vector<int64_t> uids;
int N = 10000;
std::default_random_engine e(67);
for(int i = 0; i < N; ++i) {
@ -202,7 +208,9 @@ TEST(CApiTest, GetRowCountTest) {
auto line_sizeof = (sizeof(int) + sizeof(float) * 16);
auto res = Insert(segment, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
auto offset = PreInsert(segment, N);
auto res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int)line_sizeof, N);
assert(res == 0);
auto row_count = GetRowCount(segment);
@ -221,10 +229,12 @@ TEST(CApiTest, GetDeletedCountTest) {
auto partition = NewPartition(collection, partition_name);
auto segment = NewSegment(partition, 0);
unsigned long delete_primary_keys[] = {100000, 100001, 100002};
long delete_primary_keys[] = {100000, 100001, 100002};
unsigned long delete_timestamps[] = {0, 0, 0};
auto del_res = Delete(segment, 1, delete_primary_keys, delete_timestamps);
auto offset = PreDelete(segment, 3);
auto del_res = Delete(segment, offset, 3, delete_primary_keys, delete_timestamps);
assert(del_res == 0);
// TODO: assert(deleted_count == len(delete_primary_keys))
@ -235,46 +245,3 @@ TEST(CApiTest, GetDeletedCountTest) {
DeletePartition(partition);
DeleteSegment(segment);
}
TEST(CApiTest, TimeGetterAndSetterTest) {
auto collection_name = "collection0";
auto schema_tmp_conf = "null_schema";
auto collection = NewCollection(collection_name, schema_tmp_conf);
auto partition_name = "partition0";
auto partition = NewPartition(collection, partition_name);
auto segment = NewSegment(partition, 0);
uint64_t TIME_BEGIN = 100;
uint64_t TIME_END = 200;
SetTimeBegin(segment, TIME_BEGIN);
auto time_begin = GetTimeBegin(segment);
assert(time_begin == TIME_BEGIN);
SetTimeEnd(segment, TIME_END);
auto time_end = GetTimeEnd(segment);
assert(time_end == TIME_END);
DeleteCollection(collection);
DeletePartition(partition);
DeleteSegment(segment);
}
TEST(CApiTest, SegmentIDTest) {
auto collection_name = "collection0";
auto schema_tmp_conf = "null_schema";
auto collection = NewCollection(collection_name, schema_tmp_conf);
auto partition_name = "partition0";
auto partition = NewPartition(collection, partition_name);
auto segment = NewSegment(partition, 0);
uint64_t SEGMENT_ID = 1;
SetSegmentId(segment, SEGMENT_ID);
auto segment_id = GetSegmentId(segment);
assert(segment_id == SEGMENT_ID);
DeleteCollection(collection);
DeletePartition(partition);
DeleteSegment(segment);
}

View File

@ -0,0 +1,7 @@
#include <gtest/gtest.h>
TEST(TestNaive, Naive) {
EXPECT_TRUE(true);
}

3
go.mod
View File

@ -13,6 +13,7 @@ require (
github.com/danieljoos/wincred v1.1.0 // indirect
github.com/docker/go-units v0.4.0
github.com/dvsekhvalnov/jose2go v0.0.0-20200901110807-248326c1351b // indirect
github.com/frankban/quicktest v1.10.2 // indirect
github.com/gogo/protobuf v1.3.1
github.com/golang/protobuf v1.4.2
github.com/google/btree v1.0.0
@ -35,7 +36,7 @@ require (
go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738
go.uber.org/zap v1.15.0
golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a // indirect
golang.org/x/net v0.0.0-20200904194848-62affa334b73
golang.org/x/net v0.0.0-20200904194848-62affa334b73 // indirect
golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43 // indirect
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f // indirect
google.golang.org/grpc v1.31.0

22
go.sum
View File

@ -127,6 +127,7 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dimfeld/httptreemux v5.0.1+incompatible h1:Qj3gVcDNoOthBAqftuD596rm4wg/adLLz5xh5CmpiCA=
github.com/dimfeld/httptreemux v5.0.1+incompatible/go.mod h1:rbUlSV+CCpv/SuqUTP/8Bk2O3LyUV436/yaRGkhP6Z0=
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
@ -144,6 +145,7 @@ github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaB
github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o=
github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
github.com/envoyproxy/data-plane-api v0.0.0-20200904023242-f4d8a28107ca h1:EvL1gA7uyPU2JVN93HbQwYOXyUjUJKYGStDN8eKD/Ss=
github.com/envoyproxy/data-plane-api v0.0.0-20200909004014-2bb47b2b6fb0 h1:0edaQ8F7kgXmqz/tFjjl5rW/nAKUZ5Zg0Rv5vKiE6+U=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@ -151,8 +153,11 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4=
github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=
github.com/frankban/quicktest v1.10.2 h1:19ARM85nVi4xH7xPXuc5eM/udya5ieh7b/Sv+d844Tk=
github.com/frankban/quicktest v1.10.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
@ -220,6 +225,8 @@ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.1 h1:JFrFEBb2xKufg6XkJsJr+WbKb4FQlURi5RUcBveYu9k=
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
@ -242,6 +249,7 @@ github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.7.0 h1:tOSd0UKHQd6urX6ApfOn4XdBMY6Sh1MfxV3kmaazO+U=
github.com/gorilla/mux v1.7.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw=
github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q=
@ -323,9 +331,12 @@ github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFB
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
@ -372,7 +383,9 @@ github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzE
github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w=
github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs=
github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA=
@ -381,10 +394,12 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA=
github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk=
github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis=
@ -397,6 +412,7 @@ github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxS
github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw=
github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4=
github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4=
github.com/ozonru/etcd v3.3.20-grpc1.27-origmodule+incompatible h1:CAG0PUvo1fen+ZEfxKJjFIc8GuuN5RuaBuCAuaP2Hno=
github.com/ozonru/etcd v3.3.20-grpc1.27-origmodule+incompatible/go.mod h1:iIubILNIN6Jq9h8uiSLrN9L1tuj3iSSFwz3R61skm/A=
github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
@ -490,6 +506,7 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykE
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a h1:pa8hGb/2YqsZKovtsgrwcDH1RZhVbTKCjLp47XpqCDs=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
@ -500,12 +517,14 @@ github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2
github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
github.com/spf13/pflag v1.0.1 h1:aCvUg6QPl3ibpQUxyLkrEkCHtPqYJL4x9AuhqVqFis4=
github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
@ -775,6 +794,7 @@ golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc
golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200806022845-90696ccdc692 h1:fsn47thVa7Ar/TMyXYlZgOoT7M4+kRpb+KpSAqRQx1w=
golang.org/x/tools v0.0.0-20200806022845-90696ccdc692/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d h1:W07d4xkoAUSNOkOzdzXCdFGxT7o2rW4q8M34tB2i//k=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@ -877,6 +897,7 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
@ -902,6 +923,7 @@ gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -1,9 +1,13 @@
# How to start a master
## Requirements
### Start a etcdv3
```
./etcd -listen-peer-urls=http://192.168.1.10:12380 -advertise-client-urls=http://192.168.1.10:12379 -listen-client-urls http://0.0.0.0:12379,http://0.0.0.0:14001 -initial-advertise-peer-urls=http://192.168.1.10:12380
```
## Start from code
```
go run cmd/master.go
```
## Start with docker

View File

@ -7,4 +7,6 @@ const (
PULSAR_MONITER_INTERVAL = 1 * time.Second
PULSAR_TOPIC = "monitor-topic"
ETCD_ROOT_PATH = "by-dev"
SEGMENT_THRESHOLE = 10000
DEFAULT_GRPC_PORT = ":53100"
)

View File

@ -0,0 +1,235 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// source: master.proto
//option go_package = "github.com/czs007/suvilm/pkg/master/grpc";
package masterpb
import (
context "context"
fmt "fmt"
proto "github.com/golang/protobuf/proto"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
type CreateCollectionRequest struct {
CollectionName string `protobuf:"bytes,1,opt,name=collection_name,json=collectionName,proto3" json:"collection_name,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *CreateCollectionRequest) Reset() { *m = CreateCollectionRequest{} }
func (m *CreateCollectionRequest) String() string { return proto.CompactTextString(m) }
func (*CreateCollectionRequest) ProtoMessage() {}
func (*CreateCollectionRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_f9c348dec43a6705, []int{0}
}
func (m *CreateCollectionRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CreateCollectionRequest.Unmarshal(m, b)
}
func (m *CreateCollectionRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CreateCollectionRequest.Marshal(b, m, deterministic)
}
func (m *CreateCollectionRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_CreateCollectionRequest.Merge(m, src)
}
func (m *CreateCollectionRequest) XXX_Size() int {
return xxx_messageInfo_CreateCollectionRequest.Size(m)
}
func (m *CreateCollectionRequest) XXX_DiscardUnknown() {
xxx_messageInfo_CreateCollectionRequest.DiscardUnknown(m)
}
var xxx_messageInfo_CreateCollectionRequest proto.InternalMessageInfo
func (m *CreateCollectionRequest) GetCollectionName() string {
if m != nil {
return m.CollectionName
}
return ""
}
type CreateCollectionResponse struct {
CollectionName string `protobuf:"bytes,1,opt,name=collection_name,json=collectionName,proto3" json:"collection_name,omitempty"`
CollectionId uint64 `protobuf:"varint,2,opt,name=collection_id,json=collectionId,proto3" json:"collection_id,omitempty"`
SegmentIds []uint64 `protobuf:"varint,3,rep,packed,name=segment_ids,json=segmentIds,proto3" json:"segment_ids,omitempty"`
PartitionTags []string `protobuf:"bytes,4,rep,name=partition_tags,json=partitionTags,proto3" json:"partition_tags,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *CreateCollectionResponse) Reset() { *m = CreateCollectionResponse{} }
func (m *CreateCollectionResponse) String() string { return proto.CompactTextString(m) }
func (*CreateCollectionResponse) ProtoMessage() {}
func (*CreateCollectionResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_f9c348dec43a6705, []int{1}
}
func (m *CreateCollectionResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CreateCollectionResponse.Unmarshal(m, b)
}
func (m *CreateCollectionResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CreateCollectionResponse.Marshal(b, m, deterministic)
}
func (m *CreateCollectionResponse) XXX_Merge(src proto.Message) {
xxx_messageInfo_CreateCollectionResponse.Merge(m, src)
}
func (m *CreateCollectionResponse) XXX_Size() int {
return xxx_messageInfo_CreateCollectionResponse.Size(m)
}
func (m *CreateCollectionResponse) XXX_DiscardUnknown() {
xxx_messageInfo_CreateCollectionResponse.DiscardUnknown(m)
}
var xxx_messageInfo_CreateCollectionResponse proto.InternalMessageInfo
func (m *CreateCollectionResponse) GetCollectionName() string {
if m != nil {
return m.CollectionName
}
return ""
}
func (m *CreateCollectionResponse) GetCollectionId() uint64 {
if m != nil {
return m.CollectionId
}
return 0
}
func (m *CreateCollectionResponse) GetSegmentIds() []uint64 {
if m != nil {
return m.SegmentIds
}
return nil
}
func (m *CreateCollectionResponse) GetPartitionTags() []string {
if m != nil {
return m.PartitionTags
}
return nil
}
func init() {
proto.RegisterType((*CreateCollectionRequest)(nil), "masterpb.CreateCollectionRequest")
proto.RegisterType((*CreateCollectionResponse)(nil), "masterpb.CreateCollectionResponse")
}
func init() { proto.RegisterFile("master.proto", fileDescriptor_f9c348dec43a6705) }
var fileDescriptor_f9c348dec43a6705 = []byte{
// 225 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x90, 0xcf, 0x4a, 0x03, 0x31,
0x10, 0x87, 0x5d, 0x77, 0x29, 0x76, 0x6c, 0xab, 0xe4, 0x62, 0xf0, 0xe2, 0x1a, 0x11, 0x73, 0xda,
0x83, 0xbe, 0x81, 0x3d, 0xf5, 0xa0, 0x87, 0xe0, 0xcd, 0x43, 0x49, 0x9b, 0x61, 0x09, 0x34, 0x7f,
0xcc, 0x8c, 0xef, 0xe5, 0x23, 0x0a, 0x5b, 0x6d, 0x45, 0x51, 0x3c, 0xe6, 0xe3, 0xc7, 0x47, 0xe6,
0x83, 0x49, 0xb0, 0xc4, 0x58, 0xba, 0x5c, 0x12, 0x27, 0x71, 0xb4, 0x7d, 0xe5, 0x95, 0xba, 0x87,
0xb3, 0x79, 0x41, 0xcb, 0x38, 0x4f, 0x9b, 0x0d, 0xae, 0xd9, 0xa7, 0x68, 0xf0, 0xe5, 0x15, 0x89,
0xc5, 0x0d, 0x9c, 0xac, 0x77, 0x70, 0x19, 0x6d, 0x40, 0x59, 0xb5, 0x95, 0x1e, 0x9b, 0xd9, 0x1e,
0x3f, 0xda, 0x80, 0xea, 0xad, 0x02, 0xf9, 0x53, 0x42, 0x39, 0x45, 0xc2, 0x7f, 0x5b, 0xc4, 0x15,
0x4c, 0xbf, 0x0c, 0xbd, 0x93, 0x87, 0x6d, 0xa5, 0x1b, 0x33, 0xd9, 0xc3, 0x85, 0x13, 0x17, 0x70,
0x4c, 0xd8, 0x07, 0x8c, 0xbc, 0xf4, 0x8e, 0x64, 0xdd, 0xd6, 0xba, 0x31, 0xf0, 0x81, 0x16, 0x8e,
0xc4, 0x35, 0xcc, 0xb2, 0x2d, 0xec, 0x07, 0x09, 0xdb, 0x9e, 0x64, 0xd3, 0xd6, 0x7a, 0x6c, 0xa6,
0x3b, 0xfa, 0x64, 0x7b, 0xba, 0x45, 0x18, 0x3d, 0x0c, 0x09, 0xc4, 0x33, 0x9c, 0x7e, 0xff, 0xbb,
0xb8, 0xec, 0x3e, 0xfb, 0x74, 0xbf, 0xc4, 0x39, 0x57, 0x7f, 0x4d, 0xb6, 0xa7, 0xab, 0x83, 0xd5,
0x68, 0xc8, 0x7d, 0xf7, 0x1e, 0x00, 0x00, 0xff, 0xff, 0x0d, 0xb6, 0xf8, 0x4e, 0x7e, 0x01, 0x00,
0x00,
}
// Reference imports to suppress errors if they are not otherwise used.
var _ context.Context
var _ grpc.ClientConn
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
const _ = grpc.SupportPackageIsVersion4
// MasterClient is the client API for Master service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
type MasterClient interface {
CreateCollection(ctx context.Context, in *CreateCollectionRequest, opts ...grpc.CallOption) (*CreateCollectionResponse, error)
}
type masterClient struct {
cc *grpc.ClientConn
}
func NewMasterClient(cc *grpc.ClientConn) MasterClient {
return &masterClient{cc}
}
func (c *masterClient) CreateCollection(ctx context.Context, in *CreateCollectionRequest, opts ...grpc.CallOption) (*CreateCollectionResponse, error) {
out := new(CreateCollectionResponse)
err := c.cc.Invoke(ctx, "/masterpb.Master/CreateCollection", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
// MasterServer is the server API for Master service.
type MasterServer interface {
CreateCollection(context.Context, *CreateCollectionRequest) (*CreateCollectionResponse, error)
}
// UnimplementedMasterServer can be embedded to have forward compatible implementations.
type UnimplementedMasterServer struct {
}
func (*UnimplementedMasterServer) CreateCollection(ctx context.Context, req *CreateCollectionRequest) (*CreateCollectionResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method CreateCollection not implemented")
}
func RegisterMasterServer(s *grpc.Server, srv MasterServer) {
s.RegisterService(&_Master_serviceDesc, srv)
}
func _Master_CreateCollection_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(CreateCollectionRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MasterServer).CreateCollection(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/masterpb.Master/CreateCollection",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MasterServer).CreateCollection(ctx, req.(*CreateCollectionRequest))
}
return interceptor(ctx, in, info, handler)
}
var _Master_serviceDesc = grpc.ServiceDesc{
ServiceName: "masterpb.Master",
HandlerType: (*MasterServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "CreateCollection",
Handler: _Master_CreateCollection_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "master.proto",
}

View File

@ -0,0 +1,20 @@
syntax = "proto3";
//option go_package = "github.com/czs007/suvilm/pkg/master/grpc";
package masterpb;
service Master {
rpc CreateCollection (CreateCollectionRequest) returns (CreateCollectionResponse) {}
}
message CreateCollectionRequest {
string collection_name = 1;
}
message CreateCollectionResponse {
string collection_name = 1;
uint64 collection_id = 2;
repeated uint64 segment_ids = 3;
repeated string partition_tags = 4;
}

View File

@ -79,6 +79,12 @@ func (kv *etcdKVBase) Remove(key string) error {
return nil
}
func (kv *etcdKVBase) Watch(key string) clientv3.WatchChan {
key = path.Join(kv.rootPath, key)
rch := kv.client.Watch(context.Background(), key)
return rch
}
// SlowLogTxn wraps etcd transaction and log slow one.
type SlowLogTxn struct {
clientv3.Txn

View File

@ -1,7 +1,10 @@
package kv
import "go.etcd.io/etcd/clientv3"
type Base interface {
Load(key string) (string, error)
Save(key, value string) error
Remove(key string) error
Watch(key string) clientv3.WatchChan
}

View File

@ -36,11 +36,11 @@ func Collection2JSON(c Collection) (string, error) {
return string(b), nil
}
func JSON2Collection(s string) (Collection, error) {
func JSON2Collection(s string) (*Collection, error) {
var c Collection
err := json.Unmarshal([]byte(s), &c)
if err != nil {
return Collection{}, err
return &Collection{}, err
}
return c, nil
return &c, nil
}

View File

@ -0,0 +1,59 @@
package mock
import (
"context"
"log"
"time"
pb "github.com/czs007/suvlim/pkg/master/grpc"
"google.golang.org/grpc"
)
// func main() {
// // Set up a connection to the server.
// conn, err := grpc.Dial(address, grpc.WithInsecure(), grpc.WithBlock())
// if err != nil {
// log.Fatalf("did not connect: %v", err)
// }
// defer conn.Close()
// c := pb.NewGreeterClient(conn)
// // Contact the server and print out its response.
// name := defaultName
// if len(os.Args) > 1 {
// name = os.Args[1]
// }
// ctx, cancel := context.WithTimeout(context.Background(), time.Second)
// defer cancel()
// r, err := c.SayHello(ctx, &pb.HelloRequest{Name: name})
// if err != nil {
// log.Fatalf("could not greet: %v", err)
// }
// log.Printf("Greeting: %s", r.GetMessage())
// }
const (
addr = "192.168.1.10:53100"
)
func FakeCreateCollectionByGRPC() (string, uint64) {
conn, err := grpc.Dial(addr, grpc.WithInsecure(), grpc.WithBlock())
if err != nil {
log.Fatalf("did not connect: %v", err)
}
defer conn.Close()
c := pb.NewMasterClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
r, err := c.CreateCollection(ctx, &pb.CreateCollectionRequest{CollectionName: "grpc-client-test"})
if err != nil {
log.Fatalf("could not greet: %v", err)
}
log.Printf("CreateCollection: %s, id: %d", r.GetCollectionName(), r.GetCollectionId())
return r.GetCollectionName(), r.GetCollectionId()
}

View File

@ -0,0 +1,15 @@
package mock
import (
"fmt"
"testing"
)
func TestFakeCreateCollectionByGRPC(t *testing.T) {
collectionName, segmentID := FakeCreateCollectionByGRPC()
if collectionName != "grpc-client-test" {
t.Error("Collection name wrong")
}
fmt.Println(collectionName)
fmt.Println(segmentID)
}

View File

@ -51,13 +51,13 @@ func Segment2JSON(s Segment) (string, error) {
return string(b), nil
}
func JSON2Segment(s string) (Segment, error) {
func JSON2Segment(s string) (*Segment, error) {
var c Segment
err := json.Unmarshal([]byte(s), &c)
if err != nil {
return Segment{}, err
return &Segment{}, err
}
return c, nil
return &c, nil
}
func FakeCreateSegment(id uint64, cl Collection, opentime time.Time, closetime time.Time) Segment {

View File

@ -1,19 +1,39 @@
package master
import (
"context"
"fmt"
"log"
"net"
"strconv"
"time"
"github.com/czs007/suvlim/pkg/master/common"
pb "github.com/czs007/suvlim/pkg/master/grpc"
"github.com/czs007/suvlim/pkg/master/informer"
"github.com/czs007/suvlim/pkg/master/kv"
"github.com/czs007/suvlim/pkg/master/mock"
"go.etcd.io/etcd/clientv3"
"google.golang.org/grpc"
)
func Run() {
go mock.FakePulsarProducer()
go GRPCServer()
go SegmentStatsController()
go CollectionController()
for {
}
}
func SegmentStatsController() {
cli, _ := clientv3.New(clientv3.Config{
Endpoints: []string{"127.0.0.1:12379"},
DialTimeout: 5 * time.Second,
})
defer cli.Close()
kvbase := kv.NewEtcdKVBase(cli, common.ETCD_ROOT_PATH)
ssChan := make(chan mock.SegmentStats, 10)
defer close(ssChan)
ssClient := informer.NewPulsarClient()
@ -21,7 +41,7 @@ func SegmentStatsController() {
for {
select {
case ss := <-ssChan:
fmt.Println(ss)
ComputeCloseTime(ss, kvbase)
case <-time.After(5 * time.Second):
fmt.Println("timeout")
return
@ -29,8 +49,51 @@ func SegmentStatsController() {
}
}
func GRPCServer() {
func ComputeCloseTime(ss mock.SegmentStats, kvbase kv.Base) error {
if int(ss.MemorySize) > common.SEGMENT_THRESHOLE*0.8 {
memRate := int(ss.MemoryRate)
if memRate == 0 {
memRate = 1
}
sec := common.SEGMENT_THRESHOLE * 0.2 / memRate
data, err := kvbase.Load(strconv.Itoa(int(ss.SegementID)))
if err != nil {
return err
}
seg, err := mock.JSON2Segment(data)
if err != nil {
return err
}
seg.CloseTimeStamp = time.Now().Add(time.Duration(sec) * time.Second)
updateData, err := mock.Segment2JSON(*seg)
if err != nil {
return err
}
kvbase.Save(strconv.Itoa(int(ss.SegementID)), updateData)
}
return nil
}
func GRPCServer() error {
lis, err := net.Listen("tcp", common.DEFAULT_GRPC_PORT)
if err != nil {
return err
}
s := grpc.NewServer()
pb.RegisterMasterServer(s, GRPCMasterServer{})
if err := s.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
return err
}
return nil
}
type GRPCMasterServer struct{}
func (ms GRPCMasterServer) CreateCollection(ctx context.Context, in *pb.CreateCollectionRequest) (*pb.CreateCollectionResponse, error) {
return &pb.CreateCollectionResponse{
CollectionName: in.CollectionName,
}, nil
}
func CollectionController() {
@ -56,7 +119,3 @@ func CollectionController() {
log.Fatal(err)
}
}
func Sync() {
}

View File

@ -1,15 +1,15 @@
package reader
import (
schema2 "github.com/czs007/suvlim/pulsar/client-go/pb"
msgPb "github.com/czs007/suvlim/pkg/message"
)
type IndexConfig struct {}
func buildIndex(config IndexConfig) schema2.Status {
return schema2.Status{ErrorCode: schema2.ErrorCode_SUCCESS}
func buildIndex(config IndexConfig) msgPb.Status {
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func dropIndex(fieldName string) schema2.Status {
return schema2.Status{ErrorCode: schema2.ErrorCode_SUCCESS}
func dropIndex(fieldName string) msgPb.Status {
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}

View File

@ -2,9 +2,9 @@ package message_client
import (
"context"
"github.com/apache/pulsar/pulsar-client-go/pulsar"
msgpb "github.com/czs007/suvlim/pkg/message"
"github.com/golang/protobuf/proto"
"github.com/pulsar-client-go/pulsar"
"log"
)

View File

@ -14,25 +14,35 @@ package reader
import "C"
import (
"errors"
"fmt"
msgPb "github.com/czs007/suvlim/pkg/message"
"github.com/czs007/suvlim/reader/message_client"
"sort"
"strconv"
"sync"
"time"
)
type InsertData struct {
insertIDs map[int64][]int64
insertTimestamps map[int64][]uint64
insertRecords map[int64][][]byte
insertOffset map[int64]int64
}
type DeleteData struct {
deleteIDs map[int64][]int64
deleteTimestamps map[int64][]uint64
deleteOffset map[int64]int64
}
type DeleteRecord struct {
entityID int64
timestamp uint64
segmentID int64
}
type DeleteRecords struct {
deleteRecords *[]DeleteRecord
count chan int
type DeletePreprocessData struct {
deleteRecords []*DeleteRecord
count chan int
}
type QueryNodeDataBuffer struct {
@ -43,13 +53,15 @@ type QueryNodeDataBuffer struct {
}
type QueryNode struct {
QueryNodeId uint64
Collections []*Collection
SegmentsMap map[int64]*Segment
messageClient message_client.MessageClient
queryNodeTimeSync *QueryNodeTime
deleteRecordsMap map[TimeRange]DeleteRecords
buffer QueryNodeDataBuffer
QueryNodeId uint64
Collections []*Collection
SegmentsMap map[int64]*Segment
messageClient message_client.MessageClient
queryNodeTimeSync *QueryNodeTime
buffer QueryNodeDataBuffer
deletePreprocessData DeletePreprocessData
deleteData DeleteData
insertData InsertData
}
func NewQueryNode(queryNodeId uint64, timeSync uint64) *QueryNode {
@ -71,7 +83,6 @@ func NewQueryNode(queryNodeId uint64, timeSync uint64) *QueryNode {
SegmentsMap: segmentsMap,
messageClient: mc,
queryNodeTimeSync: queryNodeTimeSync,
deleteRecordsMap: make(map[TimeRange]DeleteRecords),
}
}
@ -95,19 +106,6 @@ func (node *QueryNode) DeleteCollection(collection *Collection) {
////////////////////////////////////////////////////////////////////////////////////////////////////
func (node *QueryNode) doQueryNode(wg *sync.WaitGroup) {
wg.Add(3)
// Do insert and delete messages sort, do insert
go node.InsertAndDelete(node.messageClient.InsertOrDeleteMsg, wg)
// Do delete messages sort
go node.searchDeleteInMap()
// Do delete
go node.Delete()
// Do search
go node.Search(node.messageClient.SearchMsg, wg)
wg.Wait()
}
func (node *QueryNode) PrepareBatchMsg() {
node.messageClient.PrepareBatchMsg()
}
@ -119,60 +117,6 @@ func (node *QueryNode) StartMessageClient() {
go node.messageClient.ReceiveMessage()
}
// Function `GetSegmentByEntityId` should return entityIDs, timestamps and segmentIDs
func (node *QueryNode) GetKey2Segments() ([]int64, []uint64, []int64) {
// TODO: get id2segment info from pulsar
return nil, nil, nil
}
func (node *QueryNode) GetTargetSegment(collectionName *string, partitionTag *string) (*Segment, error) {
var targetPartition *Partition
for _, collection := range node.Collections {
if *collectionName == collection.CollectionName {
for _, partition := range collection.Partitions {
if *partitionTag == partition.PartitionName {
targetPartition = partition
break
}
}
}
}
if targetPartition == nil {
return nil, errors.New("cannot found target partition")
}
for _, segment := range targetPartition.OpenedSegments {
// TODO: add other conditions
return segment, nil
}
return nil, errors.New("cannot found target segment")
}
func (node *QueryNode) GetCollectionByCollectionName(collectionName string) (*Collection, error) {
for _, collection := range node.Collections {
if collection.CollectionName == collectionName {
return collection, nil
}
}
return nil, errors.New("Cannot found collection: " + collectionName)
}
func (node *QueryNode) GetSegmentBySegmentID(segmentID int64) (*Segment, error) {
targetSegment := node.SegmentsMap[segmentID]
if targetSegment == nil {
return nil, errors.New("cannot found segment with id = " + strconv.FormatInt(segmentID, 10))
}
return targetSegment, nil
}
////////////////////////////////////////////////////////////////////////////////////////////////////
func (node *QueryNode) InitQueryNodeCollection() {
// TODO: remove hard code, add collection creation request
// TODO: error handle
@ -182,70 +126,47 @@ func (node *QueryNode) InitQueryNodeCollection() {
var _ = newPartition.NewSegment(0)
}
func (node *QueryNode) SegmentsManagement() {
node.queryNodeTimeSync.UpdateTSOTimeSync()
var timeNow = node.queryNodeTimeSync.TSOTimeSync
for _, collection := range node.Collections {
for _, partition := range collection.Partitions {
for _, oldSegment := range partition.OpenedSegments {
// TODO: check segment status
if timeNow >= oldSegment.SegmentCloseTime {
// start new segment and add it into partition.OpenedSegments
// TODO: get segmentID from master
var segmentID int64 = 0
var newSegment = partition.NewSegment(segmentID)
newSegment.SegmentCloseTime = timeNow + SegmentLifetime
partition.OpenedSegments = append(partition.OpenedSegments, newSegment)
node.SegmentsMap[segmentID] = newSegment
////////////////////////////////////////////////////////////////////////////////////////////////////
// close old segment and move it into partition.ClosedSegments
// TODO: check status
var _ = oldSegment.Close()
partition.ClosedSegments = append(partition.ClosedSegments, oldSegment)
}
}
}
}
}
func (node *QueryNode) SegmentService() {
func (node *QueryNode) RunInsertDelete() {
for {
time.Sleep(200 * time.Millisecond)
node.SegmentsManagement()
fmt.Println("do segments management in 200ms")
// TODO: get timeRange from message client
var timeRange = TimeRange{0, 0}
node.PrepareBatchMsg()
node.MessagesPreprocess(node.messageClient.InsertOrDeleteMsg, timeRange)
node.WriterDelete()
node.PreInsertAndDelete()
node.DoInsertAndDelete()
node.queryNodeTimeSync.UpdateSearchTimeSync(timeRange)
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// TODO: receive delete messages individually
func (node *QueryNode) InsertAndDelete(insertDeleteMessages []*msgPb.InsertOrDeleteMsg, wg *sync.WaitGroup) msgPb.Status {
node.queryNodeTimeSync.UpdateReadTimeSync()
func (node *QueryNode) RunSearch() {
for {
node.Search(node.messageClient.SearchMsg)
}
}
var tMin = node.queryNodeTimeSync.ReadTimeSyncMin
var tMax = node.queryNodeTimeSync.ReadTimeSyncMax
var readTimeSyncRange = TimeRange{timestampMin: tMin, timestampMax: tMax}
////////////////////////////////////////////////////////////////////////////////////////////////////
var clientId = insertDeleteMessages[0].ClientId
var insertIDs = make(map[int64][]int64)
var insertTimestamps = make(map[int64][]uint64)
var insertRecords = make(map[int64][][]byte)
func (node *QueryNode) MessagesPreprocess(insertDeleteMessages []*msgPb.InsertOrDeleteMsg, timeRange TimeRange) msgPb.Status {
var tMax = timeRange.timestampMax
// 1. Extract messages before readTimeSync from QueryNodeDataBuffer.
// Set valid bitmap to false.
for i, msg := range node.buffer.InsertDeleteBuffer {
if msg.Timestamp <= tMax {
if msg.Timestamp < tMax {
if msg.Op == msgPb.OpType_INSERT {
insertIDs[msg.SegmentId] = append(insertIDs[msg.SegmentId], msg.Uid)
insertTimestamps[msg.SegmentId] = append(insertTimestamps[msg.SegmentId], msg.Timestamp)
insertRecords[msg.SegmentId] = append(insertRecords[msg.SegmentId], msg.RowsData.Blob)
node.insertData.insertIDs[msg.SegmentId] = append(node.insertData.insertIDs[msg.SegmentId], msg.Uid)
node.insertData.insertTimestamps[msg.SegmentId] = append(node.insertData.insertTimestamps[msg.SegmentId], msg.Timestamp)
node.insertData.insertRecords[msg.SegmentId] = append(node.insertData.insertRecords[msg.SegmentId], msg.RowsData.Blob)
} else if msg.Op == msgPb.OpType_DELETE {
var r = DeleteRecord {
entityID: msg.Uid,
timestamp: msg.Timestamp,
}
*node.deleteRecordsMap[readTimeSyncRange].deleteRecords = append(*node.deleteRecordsMap[readTimeSyncRange].deleteRecords, r)
node.deleteRecordsMap[readTimeSyncRange].count <- <- node.deleteRecordsMap[readTimeSyncRange].count + 1
node.deletePreprocessData.deleteRecords = append(node.deletePreprocessData.deleteRecords, &r)
node.deletePreprocessData.count <- <- node.deletePreprocessData.count + 1
}
node.buffer.validInsertDeleteBuffer[i] = false
}
@ -264,18 +185,18 @@ func (node *QueryNode) InsertAndDelete(insertDeleteMessages []*msgPb.InsertOrDel
// Move massages after readTimeSync to QueryNodeDataBuffer.
// Set valid bitmap to true.
for _, msg := range insertDeleteMessages {
if msg.Timestamp <= tMax {
if msg.Timestamp < tMax {
if msg.Op == msgPb.OpType_INSERT {
insertIDs[msg.SegmentId] = append(insertIDs[msg.SegmentId], msg.Uid)
insertTimestamps[msg.SegmentId] = append(insertTimestamps[msg.SegmentId], msg.Timestamp)
insertRecords[msg.SegmentId] = append(insertRecords[msg.SegmentId], msg.RowsData.Blob)
node.insertData.insertIDs[msg.SegmentId] = append(node.insertData.insertIDs[msg.SegmentId], msg.Uid)
node.insertData.insertTimestamps[msg.SegmentId] = append(node.insertData.insertTimestamps[msg.SegmentId], msg.Timestamp)
node.insertData.insertRecords[msg.SegmentId] = append(node.insertData.insertRecords[msg.SegmentId], msg.RowsData.Blob)
} else if msg.Op == msgPb.OpType_DELETE {
var r = DeleteRecord {
entityID: msg.Uid,
timestamp: msg.Timestamp,
}
*node.deleteRecordsMap[readTimeSyncRange].deleteRecords = append(*node.deleteRecordsMap[readTimeSyncRange].deleteRecords, r)
node.deleteRecordsMap[readTimeSyncRange].count <- <- node.deleteRecordsMap[readTimeSyncRange].count + 1
node.deletePreprocessData.deleteRecords = append(node.deletePreprocessData.deleteRecords, &r)
node.deletePreprocessData.count <- <- node.deletePreprocessData.count + 1
}
} else {
node.buffer.InsertDeleteBuffer = append(node.buffer.InsertDeleteBuffer, msg)
@ -283,81 +204,126 @@ func (node *QueryNode) InsertAndDelete(insertDeleteMessages []*msgPb.InsertOrDel
}
}
// 4. Do insert
// TODO: multi-thread insert
for segmentID, records := range insertRecords {
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func (node *QueryNode) WriterDelete() msgPb.Status {
// TODO: set timeout
for {
var ids, timestamps, segmentIDs = node.GetKey2Segments()
for i := 0; i <= len(*ids); i++ {
id := (*ids)[i]
timestamp := (*timestamps)[i]
segmentID := (*segmentIDs)[i]
for _, r := range node.deletePreprocessData.deleteRecords {
if r.timestamp == timestamp && r.entityID == id {
r.segmentID = segmentID
node.deletePreprocessData.count <- <- node.deletePreprocessData.count - 1
}
}
}
if <- node.deletePreprocessData.count == 0 {
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
}
}
func (node *QueryNode) PreInsertAndDelete() msgPb.Status {
// 1. Do PreInsert
for segmentID := range node.insertData.insertRecords {
var targetSegment, err = node.GetSegmentBySegmentID(segmentID)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
ids := insertIDs[segmentID]
timestamps := insertTimestamps[segmentID]
err = targetSegment.SegmentInsert(&ids, &timestamps, &records, tMin, tMax)
var numOfRecords = len(node.insertData.insertRecords[segmentID])
var offset = targetSegment.SegmentPreInsert(numOfRecords)
node.insertData.insertOffset[segmentID] = offset
}
// 2. Sort delete preprocess data by segment id
for _, r := range node.deletePreprocessData.deleteRecords {
node.deleteData.deleteIDs[r.segmentID] = append(node.deleteData.deleteIDs[r.segmentID], r.entityID)
node.deleteData.deleteTimestamps[r.segmentID] = append(node.deleteData.deleteTimestamps[r.segmentID], r.timestamp)
}
// 3. Do PreDelete
for segmentID := range node.deleteData.deleteIDs {
var targetSegment, err = node.GetSegmentBySegmentID(segmentID)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
var numOfRecords = len(node.deleteData.deleteIDs[segmentID])
var offset = targetSegment.SegmentPreDelete(numOfRecords)
node.deleteData.deleteOffset[segmentID] = offset
}
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func (node *QueryNode) DoInsertAndDelete() msgPb.Status {
var wg sync.WaitGroup
// Do insert
for segmentID, records := range node.insertData.insertRecords {
wg.Add(1)
go node.DoInsert(segmentID, &records, &wg)
}
// Do delete
for segmentID, deleteIDs := range node.deleteData.deleteIDs {
wg.Add(1)
var deleteTimestamps = node.deleteData.deleteTimestamps[segmentID]
go node.DoDelete(segmentID, &deleteIDs, &deleteTimestamps, &wg)
}
wg.Wait()
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func (node *QueryNode) DoInsert(segmentID int64, records *[][]byte, wg *sync.WaitGroup) msgPb.Status {
var targetSegment, err = node.GetSegmentBySegmentID(segmentID)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
ids := node.insertData.insertIDs[segmentID]
timestamps := node.insertData.insertTimestamps[segmentID]
offsets := node.insertData.insertOffset[segmentID]
err = targetSegment.SegmentInsert(offsets, &ids, &timestamps, records)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
wg.Done()
return publishResult(nil, clientId)
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func (node *QueryNode) searchDeleteInMap() {
var ids, timestamps, segmentIDs = node.GetKey2Segments()
for i := 0; i <= len(ids); i++ {
id := ids[i]
timestamp := timestamps[i]
segmentID := segmentIDs[i]
for timeRange, records := range node.deleteRecordsMap {
if timestamp < timeRange.timestampMax && timestamp > timeRange.timestampMin {
for _, r := range *records.deleteRecords {
if r.timestamp == timestamp && r.entityID == id {
r.segmentID = segmentID
records.count <- <- records.count - 1
}
}
}
}
func (node *QueryNode) DoDelete(segmentID int64, deleteIDs *[]int64, deleteTimestamps *[]uint64, wg *sync.WaitGroup) msgPb.Status {
var segment, err = node.GetSegmentBySegmentID(segmentID)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
offset := node.deleteData.deleteOffset[segmentID]
err = segment.SegmentDelete(offset, deleteIDs, deleteTimestamps)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
wg.Done()
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func (node *QueryNode) Delete() msgPb.Status {
type DeleteData struct {
ids *[]int64
timestamp *[]uint64
}
for timeRange, records := range node.deleteRecordsMap {
// TODO: multi-thread delete
if <- records.count == 0 {
// 1. Sort delete records by segment id
segment2records := make(map[int64]DeleteData)
for _, r := range *records.deleteRecords {
*segment2records[r.segmentID].ids = append(*segment2records[r.segmentID].ids, r.entityID)
*segment2records[r.segmentID].timestamp = append(*segment2records[r.segmentID].timestamp, r.timestamp)
}
// 2. Do batched delete
for segmentID, deleteData := range segment2records {
var segment, err = node.GetSegmentBySegmentID(segmentID)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
err = segment.SegmentDelete(deleteData.ids, deleteData.timestamp, timeRange.timestampMin, timeRange.timestampMax)
if err != nil {
fmt.Println(err.Error())
return msgPb.Status{ErrorCode: 1}
}
}
}
}
return msgPb.Status{ErrorCode: 0}
}
func (node *QueryNode) Search(searchMessages []*msgPb.SearchMsg, wg *sync.WaitGroup) msgPb.Status {
func (node *QueryNode) Search(searchMessages []*msgPb.SearchMsg) msgPb.Status {
var clientId = searchMessages[0].ClientId
type SearchResultTmp struct {
@ -379,9 +345,16 @@ func (node *QueryNode) Search(searchMessages []*msgPb.SearchMsg, wg *sync.WaitGr
// TODO: get top-k's k from queryString
const TopK = 1
// 1. Do search in all segments
var timestamp = msg.Timestamp
var vector = msg.Records
// 1. Timestamp check
// TODO: return or wait? Or adding graceful time
if timestamp > node.queryNodeTimeSync.SearchTimeSync {
return msgPb.Status{ErrorCode: 1}
}
// 2. Do search in all segments
for _, partition := range targetCollection.Partitions {
for _, openSegment := range partition.OpenedSegments {
var res, err = openSegment.SegmentSearch("", timestamp, vector)
@ -410,16 +383,15 @@ func (node *QueryNode) Search(searchMessages []*msgPb.SearchMsg, wg *sync.WaitGr
return resultsTmp[i].ResultDistance < resultsTmp[j].ResultDistance
})
resultsTmp = resultsTmp[:TopK]
var results SearchResult
var results msgPb.QueryResult
for _, res := range resultsTmp {
results.ResultIds = append(results.ResultIds, res.ResultId)
results.ResultDistances = append(results.ResultDistances, res.ResultDistance)
results.Entities.Ids = append(results.Entities.Ids, res.ResultId)
results.Distances = append(results.Distances, res.ResultDistance)
}
// 3. publish result to pulsar
publishSearchResult(&results, clientId)
node.PublishSearchResult(&results, clientId)
}
wg.Done()
return msgPb.Status{ErrorCode: 0}
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}

View File

@ -24,9 +24,8 @@ func (t *QueryNodeTime) UpdateWriteTimeSync() {
t.WriteTimeSync = 0
}
func (t *QueryNodeTime) UpdateSearchTimeSync() {
// TODO: Add time sync
t.SearchTimeSync = 0
func (t *QueryNodeTime) UpdateSearchTimeSync(timeRange TimeRange) {
t.SearchTimeSync = timeRange.timestampMax
}
func (t *QueryNodeTime) UpdateTSOTimeSync() {

View File

@ -1,22 +1,11 @@
package reader
import (
"fmt"
"sync"
"time"
)
func startQueryNode() {
qn := NewQueryNode(0, 0)
qn.InitQueryNodeCollection()
go qn.SegmentService()
qn.StartMessageClient()
var wg sync.WaitGroup
for {
time.Sleep(200 * time.Millisecond)
qn.PrepareBatchMsg()
qn.doQueryNode(&wg)
fmt.Println("do a batch in 200ms")
}
go qn.RunInsertDelete()
go qn.RunSearch()
}

View File

@ -1,6 +1,7 @@
package reader
import (
"context"
"fmt"
msgPb "github.com/czs007/suvlim/pkg/message"
"strconv"
@ -18,21 +19,16 @@ func getResultTopicByClientId(clientId int64) string {
return "result-topic/partition-" + strconv.FormatInt(clientId, 10)
}
func publishResult(ids *ResultEntityIds, clientId int64) msgPb.Status {
// TODO: Pulsar publish
func (node *QueryNode) PublishSearchResult(results *msgPb.QueryResult, clientId int64) msgPb.Status {
var ctx = context.Background()
var resultTopic = getResultTopicByClientId(clientId)
node.messageClient.Send(ctx, *results)
fmt.Println(resultTopic)
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func publishSearchResult(searchResults *SearchResult, clientId int64) msgPb.Status {
// TODO: Pulsar publish
var resultTopic = getResultTopicByClientId(clientId)
fmt.Println(resultTopic)
return msgPb.Status{ErrorCode: msgPb.ErrorCode_SUCCESS}
}
func publicStatistic(statisticTopic string) msgPb.Status {
func (node *QueryNode) PublicStatistic(statisticTopic string) msgPb.Status {
// TODO: get statistic info
// getStatisticInfo()
// var info = getStatisticInfo()

View File

@ -27,8 +27,8 @@ const (
)
type Segment struct {
SegmentPtr C.CSegmentBase
SegmentId int64
SegmentPtr C.CSegmentBase
SegmentId int64
SegmentCloseTime uint64
}
@ -76,43 +76,59 @@ func (s *Segment) Close() error {
}
////////////////////////////////////////////////////////////////////////////
func (s *Segment) SegmentInsert(entityIds *[]int64, timestamps *[]uint64, records *[][]byte, timestampMin uint64, timestampMax uint64) error {
func (s *Segment) SegmentPreInsert(numOfRecords int) int64 {
/*C.PreInsert
long int
PreInsert(CSegmentBase c_segment, long int size);
*/
var offset = C.PreInsert(s.SegmentPtr, C.long(int64(numOfRecords)))
return int64(offset)
}
func (s *Segment) SegmentPreDelete(numOfRecords int) int64 {
/*C.PreDelete
long int
PreDelete(CSegmentBase c_segment, long int size);
*/
var offset = C.PreDelete(s.SegmentPtr, C.long(int64(numOfRecords)))
return int64(offset)
}
func (s *Segment) SegmentInsert(offset int64, entityIDs *[]int64, timestamps *[]uint64, records *[][]byte) error {
/*C.Insert
int
Insert(CSegmentBase c_segment,
long int reserved_offset,
signed long int size,
const unsigned long* primary_keys,
const long* primary_keys,
const unsigned long* timestamps,
void* raw_data,
int sizeof_per_row,
signed long int count,
unsigned long timestamp_min,
unsigned long timestamp_max);
signed long int count);
*/
// Blobs to one big blob
var rowData []byte
var rawData []byte
for i := 0; i < len(*records); i++ {
copy(rowData, (*records)[i])
copy(rawData, (*records)[i])
}
// TODO: remove hard code schema
// auto schema_tmp = std::make_shared<Schema>();
// schema_tmp->AddField("fakeVec", DataType::VECTOR_FLOAT, 16);
// schema_tmp->AddField("age", DataType::INT32);
// TODO: remove hard code & fake dataChunk
const DIM = 4
const N = 3
var vec = [DIM]float32{1.1, 2.2, 3.3, 4.4}
var rawData []int8
for i := 0; i <= N; i++ {
for _, ele := range vec {
rawData=append(rawData, int8(ele))
}
rawData=append(rawData, int8(i))
}
const sizeofPerRow = 4 + DIM * 4
var cOffset = C.long(offset)
var cNumOfRows = C.long(len(*entityIDs))
var cEntityIdsPtr = (*C.long)(&(*entityIDs)[0])
var cTimestampsPtr = (*C.ulong)(&(*timestamps)[0])
var cSizeofPerRow = C.int(len((*records)[0]))
var cRawDataVoidPtr = unsafe.Pointer(&rawData[0])
var status = C.Insert(s.SegmentPtr, C.long(N), (*C.ulong)(&(*entityIds)[0]), (*C.ulong)(&(*timestamps)[0]), unsafe.Pointer(&rawData[0]), C.int(sizeofPerRow), C.long(N), C.ulong(timestampMin), C.ulong(timestampMax))
var status = C.Insert(s.SegmentPtr,
cOffset,
cNumOfRows,
cEntityIdsPtr,
cTimestampsPtr,
cRawDataVoidPtr,
cSizeofPerRow,
cNumOfRows)
if status != 0 {
return errors.New("Insert failed, error code = " + strconv.Itoa(int(status)))
@ -121,19 +137,21 @@ func (s *Segment) SegmentInsert(entityIds *[]int64, timestamps *[]uint64, record
return nil
}
func (s *Segment) SegmentDelete(entityIds *[]int64, timestamps *[]uint64, timestampMin uint64, timestampMax uint64) error {
func (s *Segment) SegmentDelete(offset int64, entityIDs *[]int64, timestamps *[]uint64) error {
/*C.Delete
int
Delete(CSegmentBase c_segment,
long int reserved_offset,
long size,
const unsigned long* primary_keys,
const unsigned long* timestamps,
unsigned long timestamp_min,
unsigned long timestamp_max);
const long* primary_keys,
const unsigned long* timestamps);
*/
size := len(*entityIds)
var cOffset = C.long(offset)
var cSize = C.long(len(*entityIDs))
var cEntityIdsPtr = (*C.long)(&(*entityIDs)[0])
var cTimestampsPtr = (*C.ulong)(&(*timestamps)[0])
var status = C.Delete(s.SegmentPtr, C.long(size), (*C.ulong)(&(*entityIds)[0]), (*C.ulong)(&(*timestamps)[0]), C.ulong(timestampMin), C.ulong(timestampMax))
var status = C.Delete(s.SegmentPtr, cOffset, cSize, cEntityIdsPtr, cTimestampsPtr)
if status != 0 {
return errors.New("Delete failed, error code = " + strconv.Itoa(int(status)))
@ -157,7 +175,13 @@ func (s *Segment) SegmentSearch(queryString string, timestamp uint64, vectorReco
resultIds := make([]int64, TopK)
resultDistances := make([]float32, TopK)
var status = C.Search(s.SegmentPtr, unsafe.Pointer(nil), C.ulong(timestamp), (*C.long)(&resultIds[0]), (*C.float)(&resultDistances[0]))
var cQueryPtr = unsafe.Pointer(nil)
var cTimestamp = C.ulong(timestamp)
var cResultIds = (*C.long)(&resultIds[0])
var cResultDistances = (*C.float)(&resultDistances[0])
var status = C.Search(s.SegmentPtr, cQueryPtr, cTimestamp, cResultIds, cResultDistances)
if status != 0 {
return nil, errors.New("Search failed, error code = " + strconv.Itoa(int(status)))
}

View File

@ -0,0 +1,40 @@
package reader
import (
"fmt"
"time"
)
func (node *QueryNode) SegmentsManagement() {
node.queryNodeTimeSync.UpdateTSOTimeSync()
var timeNow = node.queryNodeTimeSync.TSOTimeSync
for _, collection := range node.Collections {
for _, partition := range collection.Partitions {
for _, oldSegment := range partition.OpenedSegments {
// TODO: check segment status
if timeNow >= oldSegment.SegmentCloseTime {
// start new segment and add it into partition.OpenedSegments
// TODO: get segmentID from master
var segmentID int64 = 0
var newSegment = partition.NewSegment(segmentID)
newSegment.SegmentCloseTime = timeNow + SegmentLifetime
partition.OpenedSegments = append(partition.OpenedSegments, newSegment)
node.SegmentsMap[segmentID] = newSegment
// close old segment and move it into partition.ClosedSegments
// TODO: check status
var _ = oldSegment.Close()
partition.ClosedSegments = append(partition.ClosedSegments, oldSegment)
}
}
}
}
}
func (node *QueryNode) SegmentService() {
for {
time.Sleep(200 * time.Millisecond)
node.SegmentsManagement()
fmt.Println("do segments management in 200ms")
}
}

View File

@ -6,136 +6,271 @@ import (
"testing"
)
func TestConstructorAndDestructor(t *testing.T) {
func TestSegment_ConstructorAndDestructor(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegmentInsert(t *testing.T) {
func TestSegment_SegmentInsert(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Create ids and timestamps
ids :=[] int64{1, 2, 3}
timestamps :=[] uint64 {0, 0, 0}
var err = segment.SegmentInsert(&ids, &timestamps, nil, 0, 0)
// 3. Create records, use schema below:
// schema_tmp->AddField("fakeVec", DataType::VECTOR_FLOAT, 16);
// schema_tmp->AddField("age", DataType::INT32);
const DIM = 4
const N = 3
var vec = [DIM]float32{1.1, 2.2, 3.3, 4.4}
var rawData []byte
for _, ele := range vec {
rawData=append(rawData, byte(ele))
}
rawData=append(rawData, byte(1))
var records [][]byte
for i:= 0; i < N; i++ {
records = append(records, rawData)
}
// 4. Do PreInsert
var offset = segment.SegmentPreInsert(N)
assert.Greater(t, offset, 0)
// 5. Do Insert
var err = segment.SegmentInsert(offset, &ids, &timestamps, &records)
assert.NoError(t, err)
// 6. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegmentDelete(t *testing.T) {
func TestSegment_SegmentDelete(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Create ids and timestamps
ids :=[] int64{1, 2, 3}
timestamps :=[] uint64 {0, 0, 0}
var err = segment.SegmentDelete(&ids, &timestamps, 0, 0)
// 3. Do PreDelete
var offset = segment.SegmentPreDelete(10)
assert.Greater(t, offset, 0)
// 4. Do Delete
var err = segment.SegmentDelete(offset, &ids, &timestamps)
assert.NoError(t, err)
// 5. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegmentSearch(t *testing.T) {
func TestSegment_SegmentSearch(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Create ids and timestamps
ids :=[] int64{1, 2, 3}
timestamps :=[] uint64 {0, 0, 0}
var insertErr = segment.SegmentInsert(&ids, &timestamps, nil, 0, 0)
assert.NoError(t, insertErr)
// 3. Create records, use schema below:
// schema_tmp->AddField("fakeVec", DataType::VECTOR_FLOAT, 16);
// schema_tmp->AddField("age", DataType::INT32);
const DIM = 4
const N = 3
var vec = [DIM]float32{1.1, 2.2, 3.3, 4.4}
var rawData []byte
for _, ele := range vec {
rawData=append(rawData, byte(ele))
}
rawData=append(rawData, byte(1))
var records [][]byte
for i:= 0; i < N; i++ {
records = append(records, rawData)
}
// 4. Do PreInsert
var offset = segment.SegmentPreInsert(N)
assert.Greater(t, offset, 0)
// 5. Do Insert
var err = segment.SegmentInsert(offset, &ids, &timestamps, &records)
assert.NoError(t, err)
// 6. Do search
var searchRes, searchErr = segment.SegmentSearch("fake query string", timestamps[0], nil)
assert.NoError(t, searchErr)
fmt.Println(searchRes)
// 7. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegment_GetStatus(t *testing.T) {
func TestSegment_SegmentPreInsert(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Do PreInsert
var offset = segment.SegmentPreInsert(10)
assert.Greater(t, offset, 0)
// 3. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegment_SegmentPreDelete(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Do PreDelete
var offset = segment.SegmentPreDelete(10)
assert.Greater(t, offset, 0)
// 3. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
// Segment util functions test
////////////////////////////////////////////////////////////////////////////
func TestSegment_GetStatus(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Get segment status
var status = segment.GetStatus()
assert.Equal(t, status, SegmentOpened)
// 3. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegment_Close(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Close segment
var err = segment.Close()
assert.NoError(t, err)
// 3. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegment_GetRowCount(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Create ids and timestamps
ids :=[] int64{1, 2, 3}
timestamps :=[] uint64 {0, 0, 0}
var err = segment.SegmentInsert(&ids, &timestamps, nil, 0, 0)
// 3. Create records, use schema below:
// schema_tmp->AddField("fakeVec", DataType::VECTOR_FLOAT, 16);
// schema_tmp->AddField("age", DataType::INT32);
const DIM = 4
const N = 3
var vec = [DIM]float32{1.1, 2.2, 3.3, 4.4}
var rawData []byte
for _, ele := range vec {
rawData=append(rawData, byte(ele))
}
rawData=append(rawData, byte(1))
var records [][]byte
for i:= 0; i < N; i++ {
records = append(records, rawData)
}
// 4. Do PreInsert
var offset = segment.SegmentPreInsert(N)
assert.Greater(t, offset, 0)
// 5. Do Insert
var err = segment.SegmentInsert(offset, &ids, &timestamps, &records)
assert.NoError(t, err)
// 6. Get segment row count
var rowCount = segment.GetRowCount()
assert.Equal(t, rowCount, int64(len(ids)))
// 7. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)
}
func TestSegment_GetDeletedCount(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var segment = partition.NewSegment(0)
// 2. Create ids and timestamps
ids :=[] int64{1, 2, 3}
timestamps :=[] uint64 {0, 0, 0}
var err = segment.SegmentDelete(&ids, &timestamps, 0, 0)
// 3. Do PreDelete
var offset = segment.SegmentPreDelete(10)
assert.Greater(t, offset, 0)
// 4. Do Delete
var err = segment.SegmentDelete(offset, &ids, &timestamps)
assert.NoError(t, err)
// 5. Get segment deleted count
var deletedCount = segment.GetDeletedCount()
// TODO: assert.Equal(t, deletedCount, len(ids))
assert.Equal(t, deletedCount, int64(0))
// 6. Destruct node, collection, and segment
partition.DeleteSegment(segment)
collection.DeletePartition(partition)
node.DeleteCollection(collection)

44
reader/util_functions.go Normal file
View File

@ -0,0 +1,44 @@
package reader
import (
"errors"
"strconv"
)
// Function `GetSegmentByEntityId` should return entityIDs, timestamps and segmentIDs
func (node *QueryNode) GetKey2Segments() (*[]int64, *[]uint64, *[]int64) {
var entityIDs []int64
var timestamps []uint64
var segmentIDs []int64
var key2SegMsg = &node.messageClient.Key2SegMsg
for _, msg := range *key2SegMsg {
for _, segmentID := range (*msg).SegmentId {
entityIDs = append(entityIDs, msg.Uid)
timestamps = append(timestamps, msg.Timestamp)
segmentIDs = append(segmentIDs, segmentID)
}
}
return &entityIDs, &timestamps, &segmentIDs
}
func (node *QueryNode) GetCollectionByCollectionName(collectionName string) (*Collection, error) {
for _, collection := range node.Collections {
if collection.CollectionName == collectionName {
return collection, nil
}
}
return nil, errors.New("Cannot found collection: " + collectionName)
}
func (node *QueryNode) GetSegmentBySegmentID(segmentID int64) (*Segment, error) {
targetSegment := node.SegmentsMap[segmentID]
if targetSegment == nil {
return nil, errors.New("cannot found segment with id = " + strconv.FormatInt(segmentID, 10))
}
return targetSegment, nil
}

View File

@ -0,0 +1,34 @@
package reader
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestUtilFunctions_GetKey2Segments(t *testing.T) {
// TODO: Add GetKey2Segments test
}
func TestUtilFunctions_GetCollectionByCollectionName(t *testing.T) {
// 1. Construct node, and collections
node := NewQueryNode(0, 0)
var _ = node.NewCollection("collection0", "fake schema")
// 2. Get collection by collectionName
var c0, err = node.GetCollectionByCollectionName("collection0")
assert.NoError(t, err)
assert.Equal(t, c0.CollectionName, "collection0")
}
func TestUtilFunctions_GetSegmentBySegmentID(t *testing.T) {
// 1. Construct node, collection, partition and segment
node := NewQueryNode(0, 0)
var collection = node.NewCollection("collection0", "fake schema")
var partition = collection.NewPartition("partition0")
var _ = partition.NewSegment(0)
// 2. Get segment by segment id
var s0, err = node.GetSegmentBySegmentID(0)
assert.NoError(t, err)
assert.Equal(t, s0.SegmentId, 0)
}

302
timesync/readertimesync.go Normal file
View File

@ -0,0 +1,302 @@
package readertimesync
import (
"context"
"fmt"
"github.com/apache/pulsar-client-go/pulsar"
pb "github.com/czs007/suvlim/pkg/message"
"github.com/golang/protobuf/proto"
"log"
"sort"
)
const TimeSyncClientId int64 = -1
type ReaderTimeSync interface {
Start() error
Close()
TimeSync() <-chan TimeSyncMsg
InsertOrDelete() <-chan *pb.InsertOrDeleteMsg
IsInsertDeleteChanFull() bool
}
type TimeSyncMsg struct {
Timestamp uint64
NumRecorders int64
}
type ReaderTimeSyncOption func(*readerTimeSyncCfg)
type readerTimeSyncCfg struct {
pulsarClient pulsar.Client
timeSyncConsumer pulsar.Consumer
readerConsumer pulsar.Consumer
readerProducer []pulsar.Producer
timesyncMsgChan chan TimeSyncMsg
insertOrDeleteChan chan *pb.InsertOrDeleteMsg //output insert or delete msg
interval int
proxyIdList []int64
readerQueueSize int
revTimesyncFromReader map[uint64]int
ctx context.Context
cancel context.CancelFunc
}
/*
layout of timestamp
time ms logic number
/-------46 bit-----------\/------18bit-----\
+-------------------------+================+
*/
func toMillisecond(ts *pb.TimeSyncMsg) int {
// get Millisecond in second
return int(ts.GetTimestamp() >> 18)
}
func NewReaderTimeSync(
pulsarAddr string,
timeSyncTopic string,
timeSyncSubName string,
readTopics []string,
readSubName string,
proxyIdList []int64,
interval int,
opts ...ReaderTimeSyncOption,
) (ReaderTimeSync, error) {
//check if proxyId has duplication
if len(proxyIdList) == 0 {
return nil, fmt.Errorf("proxy id list is empty")
}
if len(proxyIdList) > 1 {
sort.Slice(proxyIdList, func(i int, j int) bool { return proxyIdList[i] < proxyIdList[j] })
}
for i := 1; i < len(proxyIdList); i++ {
if proxyIdList[i] == proxyIdList[i-1] {
return nil, fmt.Errorf("there are two proxies have the same id = %d", proxyIdList[i])
}
}
r := &readerTimeSyncCfg{
interval: interval,
proxyIdList: proxyIdList,
}
for _, opt := range opts {
opt(r)
}
//check if read topic is empty
if len(readTopics) == 0 {
return nil, fmt.Errorf("read topic is empyt")
}
//set default value
if r.readerQueueSize == 0 {
r.readerQueueSize = 1024
}
r.timesyncMsgChan = make(chan TimeSyncMsg, len(readTopics)*r.readerQueueSize)
r.insertOrDeleteChan = make(chan *pb.InsertOrDeleteMsg, len(readTopics)*r.readerQueueSize)
r.revTimesyncFromReader = make(map[uint64]int)
r.ctx, r.cancel = context.WithCancel(context.Background())
client, err := pulsar.NewClient(pulsar.ClientOptions{URL: pulsarAddr})
if err != nil {
return nil, fmt.Errorf("connect pulsar failed, %v", err)
}
r.pulsarClient = client
timeSyncChan := make(chan pulsar.ConsumerMessage, len(r.proxyIdList))
if r.timeSyncConsumer, err = r.pulsarClient.Subscribe(pulsar.ConsumerOptions{
Topic: timeSyncTopic,
SubscriptionName: timeSyncSubName,
Type: pulsar.KeyShared,
SubscriptionInitialPosition: pulsar.SubscriptionPositionEarliest,
MessageChannel: timeSyncChan,
}); err != nil {
return nil, fmt.Errorf("failed to subscribe topic %s, error = %v", timeSyncTopic, err)
}
readerChan := make(chan pulsar.ConsumerMessage, len(readTopics)*r.readerQueueSize)
if r.readerConsumer, err = r.pulsarClient.Subscribe(pulsar.ConsumerOptions{
Topics: readTopics,
SubscriptionName: readSubName,
Type: pulsar.KeyShared,
SubscriptionInitialPosition: pulsar.SubscriptionPositionEarliest,
MessageChannel: readerChan,
}); err != nil {
return nil, fmt.Errorf("failed to subscrive reader topics : %v, error = %v", readTopics, err)
}
r.readerProducer = make([]pulsar.Producer, 0, len(readTopics))
for i := 0; i < len(readTopics); i++ {
rp, err := r.pulsarClient.CreateProducer(pulsar.ProducerOptions{Topic: readTopics[i]})
if err != nil {
return nil, fmt.Errorf("failed to create reader producer %s, error = %v", readTopics[i], err)
}
r.readerProducer = append(r.readerProducer, rp)
}
return r, nil
}
func (r *readerTimeSyncCfg) Close() {
r.cancel()
r.timeSyncConsumer.Close()
r.readerConsumer.Close()
for i := 0; i < len(r.readerProducer); i++ {
r.readerProducer[i].Close()
}
r.pulsarClient.Close()
}
func (r *readerTimeSyncCfg) Start() error {
go r.startReadTopics()
go r.startTimeSync()
return r.ctx.Err()
}
func (r *readerTimeSyncCfg) InsertOrDelete() <-chan *pb.InsertOrDeleteMsg {
return r.insertOrDeleteChan
}
func (r *readerTimeSyncCfg) TimeSync() <-chan TimeSyncMsg {
return r.timesyncMsgChan
}
func (r *readerTimeSyncCfg) IsInsertDeleteChanFull() bool {
return len(r.insertOrDeleteChan) == len(r.readerProducer)*r.readerQueueSize
}
func (r *readerTimeSyncCfg) alignTimeSync(ts []*pb.TimeSyncMsg) []*pb.TimeSyncMsg {
if len(r.proxyIdList) > 1 {
if len(ts) > 1 {
for i := 1; i < len(r.proxyIdList); i++ {
curIdx := len(ts) - 1 - i
preIdx := len(ts) - i
timeGap := toMillisecond(ts[curIdx]) - toMillisecond(ts[preIdx])
if timeGap >= (r.interval/2) || timeGap <= (-r.interval/2) {
ts = ts[preIdx:]
return ts
}
}
ts = ts[len(ts)-len(r.proxyIdList):]
sort.Slice(ts, func(i int, j int) bool { return ts[i].Peer_Id < ts[j].Peer_Id })
for i := 0; i < len(r.proxyIdList); i++ {
if ts[i].Peer_Id != r.proxyIdList[i] {
ts = ts[:0]
return ts
}
}
}
} else {
ts = ts[len(ts)-1:]
return ts
}
return ts
}
func (r *readerTimeSyncCfg) readTimeSync(ctx context.Context, ts []*pb.TimeSyncMsg, n int) ([]*pb.TimeSyncMsg, error) {
for i := 0; i < n; i++ {
select {
case <-ctx.Done():
return nil, ctx.Err()
case cm := <-r.timeSyncConsumer.Chan():
msg := cm.Message
var tsm pb.TimeSyncMsg
if err := proto.Unmarshal(msg.Payload(), &tsm); err != nil {
return nil, err
}
ts = append(ts, &tsm)
r.timeSyncConsumer.AckID(msg.ID())
}
}
return ts, nil
}
func (r *readerTimeSyncCfg) startTimeSync() {
tsm := make([]*pb.TimeSyncMsg, 0, len(r.proxyIdList)*2)
ctx, _ := context.WithCancel(r.ctx)
var err error
for {
for len(tsm) != len(r.proxyIdList) {
tsm = r.alignTimeSync(tsm)
tsm, err = r.readTimeSync(ctx, tsm, len(r.proxyIdList)-len(tsm))
if err != nil {
if ctx.Err() != nil {
return
} else {
//TODO, log error msg
log.Printf("read time sync error %v", err)
}
}
}
ts := tsm[0].Timestamp
for i := 1; i < len(tsm); i++ {
if tsm[i].Timestamp < ts {
ts = tsm[i].Timestamp
}
}
tsm = tsm[:0]
//send timestamp flag to reader channel
msg := pb.InsertOrDeleteMsg{Timestamp: ts, ClientId: TimeSyncClientId}
payload, err := proto.Marshal(&msg)
if err != nil {
//TODO log error
log.Printf("Marshal timesync flag error %v", err)
} else {
for _, p := range r.readerProducer {
if _, err := p.Send(ctx, &pulsar.ProducerMessage{Payload: payload}); err != nil {
//TODO, log error
log.Printf("Send timesync flag error %v", err)
}
}
}
}
}
func (r *readerTimeSyncCfg) startReadTopics() {
ctx, _ := context.WithCancel(r.ctx)
tsm := TimeSyncMsg{Timestamp: 0, NumRecorders: 0}
for {
select {
case <-ctx.Done():
return
case cm := <-r.readerConsumer.Chan():
msg := cm.Message
var imsg pb.InsertOrDeleteMsg
if err := proto.Unmarshal(msg.Payload(), &imsg); err != nil {
//TODO, log error
log.Printf("unmarshal InsertOrDeleteMsg error %v", err)
break
}
if imsg.ClientId == TimeSyncClientId { //timestamp flag
gval := r.revTimesyncFromReader[imsg.Timestamp]
gval++
if gval >= len(r.readerProducer) {
if imsg.Timestamp >= tsm.Timestamp {
tsm.Timestamp = imsg.Timestamp
r.timesyncMsgChan <- tsm
tsm.NumRecorders = 0
}
delete(r.revTimesyncFromReader, imsg.Timestamp)
} else {
r.revTimesyncFromReader[imsg.Timestamp] = gval
}
} else {
if r.IsInsertDeleteChanFull() {
log.Printf("WARN : Insert or delete chan is full ...")
}
tsm.NumRecorders++
r.insertOrDeleteChan <- &imsg
}
r.readerConsumer.AckID(msg.ID())
}
}
}
func WithReaderQueueSize(size int) ReaderTimeSyncOption {
return func(r *readerTimeSyncCfg) {
r.readerQueueSize = size
}
}

View File

@ -0,0 +1,416 @@
package readertimesync
import (
"context"
"github.com/apache/pulsar-client-go/pulsar"
pb "github.com/czs007/suvlim/pkg/message"
"github.com/golang/protobuf/proto"
"log"
"testing"
"time"
)
const (
pulsarAddr = "pulsar://localhost:6650"
timeSyncTopic = "timesync"
timeSyncSubName = "timesync-g"
readerTopic1 = "reader1"
readerTopic2 = "reader2"
readerTopic3 = "reader3"
readerTopic4 = "reader4"
readerSubName = "reader-g"
interval = 200
)
func TestAlignTimeSync(t *testing.T) {
r := &readerTimeSyncCfg{
proxyIdList: []int64{1, 2, 3},
interval: 200,
}
ts := []*pb.TimeSyncMsg{
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 3,
Timestamp: toTimestamp(15),
},
{
Peer_Id: 2,
Timestamp: toTimestamp(20),
},
}
r.alignTimeSync(ts)
if len(r.proxyIdList) != 3 {
t.Fatalf("proxyIdList should be : 1 2 3")
}
for i := 0; i < len(r.proxyIdList); i++ {
if r.proxyIdList[i] != ts[i].Peer_Id {
t.Fatalf("Align falied")
}
}
}
func TestAlignTimeSync2(t *testing.T) {
r := &readerTimeSyncCfg{
proxyIdList: []int64{1, 2, 3},
interval: 200,
}
ts := []*pb.TimeSyncMsg{
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 3,
Timestamp: toTimestamp(150),
},
{
Peer_Id: 2,
Timestamp: toTimestamp(20),
},
}
ts = r.alignTimeSync(ts)
if len(r.proxyIdList) != 3 {
t.Fatalf("proxyIdList should be : 1 2 3")
}
if len(ts) != 1 || ts[0].Peer_Id != 2 {
t.Fatalf("align failed")
}
}
func TestAlignTimeSync3(t *testing.T) {
r := &readerTimeSyncCfg{
proxyIdList: []int64{1, 2, 3},
interval: 200,
}
ts := []*pb.TimeSyncMsg{
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 3,
Timestamp: toTimestamp(15),
},
{
Peer_Id: 2,
Timestamp: toTimestamp(20),
},
}
ts = r.alignTimeSync(ts)
if len(r.proxyIdList) != 3 {
t.Fatalf("proxyIdList should be : 1 2 3")
}
for i := 0; i < len(r.proxyIdList); i++ {
if r.proxyIdList[i] != ts[i].Peer_Id {
t.Fatalf("Align falied")
}
}
}
func TestAlignTimeSync4(t *testing.T) {
r := &readerTimeSyncCfg{
proxyIdList: []int64{1},
interval: 200,
}
ts := []*pb.TimeSyncMsg{
{
Peer_Id: 1,
Timestamp: toTimestamp(15),
},
{
Peer_Id: 1,
Timestamp: toTimestamp(25),
},
{
Peer_Id: 1,
Timestamp: toTimestamp(35),
},
}
ts = r.alignTimeSync(ts)
if len(r.proxyIdList) != 1 {
t.Fatalf("proxyIdList should be : 1")
}
if len(ts) != 1 {
t.Fatalf("aligned failed")
}
if getMillisecond(ts[0].Timestamp) != 35 {
t.Fatalf("aligned failed")
}
}
func TestAlignTimeSync5(t *testing.T) {
r := &readerTimeSyncCfg{
proxyIdList: []int64{1, 2, 3},
interval: 200,
}
ts := []*pb.TimeSyncMsg{
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 1,
Timestamp: toTimestamp(5),
},
{
Peer_Id: 3,
Timestamp: toTimestamp(15),
},
{
Peer_Id: 3,
Timestamp: toTimestamp(20),
},
}
ts = r.alignTimeSync(ts)
if len(ts) != 0 {
t.Fatalf("aligned failed")
}
}
func TestNewReaderTimeSync(t *testing.T) {
r, err := NewReaderTimeSync(pulsarAddr,
timeSyncTopic,
timeSyncSubName,
[]string{readerTopic1, readerTopic2, readerTopic3, readerTopic4},
readerSubName,
[]int64{2, 1},
interval,
WithReaderQueueSize(8),
)
if err != nil {
t.Fatal(err)
}
rr := r.(*readerTimeSyncCfg)
if rr.pulsarClient == nil {
t.Fatalf("create pulsar client failed")
}
if rr.timeSyncConsumer == nil {
t.Fatalf("create time sync consumer failed")
}
if rr.readerConsumer == nil {
t.Fatalf("create reader consumer failed")
}
if len(rr.readerProducer) != 4 {
t.Fatalf("create reader producer failed")
}
if rr.interval != interval {
t.Fatalf("interval shoudl be %d", interval)
}
if rr.readerQueueSize != 8 {
t.Fatalf("set read queue size failed")
}
if len(rr.proxyIdList) != 2 {
t.Fatalf("set proxy id failed")
}
if rr.proxyIdList[0] != 1 || rr.proxyIdList[1] != 2 {
t.Fatalf("set proxy id failed")
}
r.Close()
}
func TestPulsarClient(t *testing.T) {
t.Skip("skip pulsar client")
client, err := pulsar.NewClient(pulsar.ClientOptions{URL: pulsarAddr})
if err != nil {
t.Fatal(err)
}
ctx, _ := context.WithTimeout(context.Background(), 3*time.Second)
go startWriteTimeSync(1, timeSyncTopic, client, 2*time.Second, t)
go startWriteTimeSync(2, timeSyncTopic, client, 2*time.Second, t)
timeSyncChan := make(chan pulsar.ConsumerMessage)
consumer, err := client.Subscribe(pulsar.ConsumerOptions{
Topic: timeSyncTopic,
SubscriptionName: timeSyncSubName,
Type: pulsar.KeyShared,
SubscriptionInitialPosition: pulsar.SubscriptionPositionEarliest,
MessageChannel: timeSyncChan,
})
if err != nil {
log.Fatal(err)
}
for {
select {
case cm := <-timeSyncChan:
msg := cm.Message
var tsm pb.TimeSyncMsg
if err := proto.Unmarshal(msg.Payload(), &tsm); err != nil {
log.Fatal(err)
}
consumer.AckID(msg.ID())
log.Printf("read time stamp, id = %d, time stamp = %d\n", tsm.Peer_Id, tsm.Timestamp)
case <-ctx.Done():
break
}
if ctx.Err() != nil {
break
}
}
}
func TestReaderTimesync(t *testing.T) {
r, err := NewReaderTimeSync(pulsarAddr,
timeSyncTopic,
timeSyncSubName,
[]string{readerTopic1, readerTopic2, readerTopic3, readerTopic4},
readerSubName,
[]int64{2, 1},
interval,
WithReaderQueueSize(1024),
)
if err != nil {
t.Fatal(err)
}
rr := r.(*readerTimeSyncCfg)
pt1, err := rr.pulsarClient.CreateProducer(pulsar.ProducerOptions{Topic: timeSyncTopic})
if err != nil {
t.Fatalf("create time sync producer 1 error %v", err)
}
pt2, err := rr.pulsarClient.CreateProducer(pulsar.ProducerOptions{Topic: timeSyncTopic})
if err != nil {
t.Fatalf("create time sync producer 2 error %v", err)
}
pr1, err := rr.pulsarClient.CreateProducer(pulsar.ProducerOptions{Topic: readerTopic1})
if err != nil {
t.Fatalf("create reader 1 error %v", err)
}
pr2, err := rr.pulsarClient.CreateProducer(pulsar.ProducerOptions{Topic: readerTopic2})
if err != nil {
t.Fatalf("create reader 2 error %v", err)
}
pr3, err := rr.pulsarClient.CreateProducer(pulsar.ProducerOptions{Topic: readerTopic3})
if err != nil {
t.Fatalf("create reader 3 error %v", err)
}
pr4, err := rr.pulsarClient.CreateProducer(pulsar.ProducerOptions{Topic: readerTopic4})
if err != nil {
t.Fatalf("create reader 4 error %v", err)
}
go startProxy(pt1, 1, pr1, 1, pr2, 2, 2*time.Second, t)
go startProxy(pt2, 2, pr3, 3, pr4, 4, 2*time.Second, t)
ctx, _ := context.WithTimeout(context.Background(), 3*time.Second)
r.Start()
var tsm1, tsm2 TimeSyncMsg
var totalRecordes int64 = 0
for {
if ctx.Err() != nil {
break
}
select {
case <-ctx.Done():
tsm1.NumRecorders = 0
break
case tsm1 = <-r.TimeSync():
}
if tsm1.NumRecorders > 0 {
log.Printf("timestamp %d, num records = %d", getMillisecond(tsm1.Timestamp), tsm1.NumRecorders)
totalRecordes += tsm1.NumRecorders
for i := int64(0); i < tsm1.NumRecorders; i++ {
im := <-r.InsertOrDelete()
//log.Printf("%d - %d", getMillisecond(im.Timestamp), getMillisecond(tsm2.Timestamp))
if im.Timestamp < tsm2.Timestamp {
t.Fatalf("time sync error , im.Timestamp = %d, tsm2.Timestamp = %d", im.Timestamp, tsm2.Timestamp)
}
}
tsm2 = tsm1
}
}
log.Printf("total recordes = %d", totalRecordes)
if totalRecordes != 800 {
t.Fatalf("total records should be 800")
}
r.Close()
}
func getMillisecond(ts uint64) uint64 {
return ts >> 18
}
func toTimestamp(ts uint64) uint64 {
return ts << 18
}
func startWriteTimeSync(id int64, topic string, client pulsar.Client, duration time.Duration, t *testing.T) {
p, _ := client.CreateProducer(pulsar.ProducerOptions{Topic: topic})
ticker := time.Tick(interval * time.Millisecond)
numSteps := int(duration / (interval * time.Millisecond))
var tm uint64 = 0
for i := 0; i < numSteps; i++ {
<-ticker
tm += interval
tsm := pb.TimeSyncMsg{Timestamp: toTimestamp(tm), Peer_Id: id}
tb, _ := proto.Marshal(&tsm)
if _, err := p.Send(context.Background(), &pulsar.ProducerMessage{Payload: tb}); err != nil {
t.Fatalf("send failed tsm id=%d, timestamp=%d, err=%v", tsm.Peer_Id, tsm.Timestamp, err)
} else {
//log.Printf("send tsm id=%d, timestamp=%d", tsm.Peer_Id, tsm.Timestamp)
}
}
}
func startProxy(pt pulsar.Producer, ptid int64, pr1 pulsar.Producer, prid1 int64, pr2 pulsar.Producer, prid2 int64, duration time.Duration, t *testing.T) {
total := int(duration / (10 * time.Millisecond))
ticker := time.Tick(10 * time.Millisecond)
var timestamp uint64 = 0
for i := 1; i <= total; i++ {
<-ticker
timestamp += 10
msg := pb.InsertOrDeleteMsg{ClientId: prid1, Timestamp: toTimestamp(timestamp)}
mb, err := proto.Marshal(&msg)
if err != nil {
t.Fatalf("marshal error %v", err)
}
if _, err := pr1.Send(context.Background(), &pulsar.ProducerMessage{Payload: mb}); err != nil {
t.Fatalf("send msg error %v", err)
}
msg.ClientId = prid2
mb, err = proto.Marshal(&msg)
if err != nil {
t.Fatalf("marshal error %v", err)
}
if _, err := pr2.Send(context.Background(), &pulsar.ProducerMessage{Payload: mb}); err != nil {
t.Fatalf("send msg error %v", err)
}
//log.Printf("send msg id = [ %d %d ], timestamp = %d", prid1, prid2, timestamp)
if i%20 == 0 {
tm := pb.TimeSyncMsg{Peer_Id: ptid, Timestamp: toTimestamp(timestamp)}
tb, err := proto.Marshal(&tm)
if err != nil {
t.Fatalf("marshal error %v", err)
}
if _, err := pt.Send(context.Background(), &pulsar.ProducerMessage{Payload: tb}); err != nil {
t.Fatalf("send msg error %v", err)
}
//log.Printf("send timestamp id = %d, timestamp = %d", ptid, timestamp)
}
}
}