feat: Support stats task to sort segment by PK (#35054)

issue: #33744 

This PR includes the following changes:
1. Added a new task type to the task scheduler in datacoord: stats task,
which sorts segments by primary key.
2. Implemented segment sorting in indexnode.
3. Added a new field `FieldStatsLog` to SegmentInfo to store token index
information.

---------

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
pull/35901/head
cai.zhang 2024-09-02 14:19:03 +08:00 committed by GitHub
parent 9d80137698
commit 2c9bb4dfa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
110 changed files with 5617 additions and 1438 deletions

View File

@ -189,7 +189,8 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
case DataType::INT64: {
auto int64_index = dynamic_cast<index::ScalarIndex<int64_t>*>(
scalar_indexings_[field_id].get());
if (insert_record_.empty_pks() && int64_index->HasRawData()) {
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
int64_index->HasRawData()) {
for (int i = 0; i < row_count; ++i) {
insert_record_.insert_pk(int64_index->Reverse_Lookup(i),
i);
@ -202,7 +203,8 @@ SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
auto string_index =
dynamic_cast<index::ScalarIndex<std::string>*>(
scalar_indexings_[field_id].get());
if (insert_record_.empty_pks() && string_index->HasRawData()) {
if (!is_sorted_by_pk_ && insert_record_.empty_pks() &&
string_index->HasRawData()) {
for (int i = 0; i < row_count; ++i) {
insert_record_.insert_pk(
string_index->Reverse_Lookup(i), i);
@ -445,7 +447,9 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
}
// set pks to offset
if (schema_->get_primary_field_id() == field_id) {
// if the segments are already sorted by pk, there is no need to build a pk offset index.
// it can directly perform a binary search on the pk column.
if (schema_->get_primary_field_id() == field_id && !is_sorted_by_pk_) {
AssertInfo(field_id.get() != -1, "Primary key is -1");
AssertInfo(insert_record_.empty_pks(), "already exists");
insert_record_.insert_pks(data_type, column);
@ -571,7 +575,8 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) {
strerror(errno)));
// set pks to offset
if (schema_->get_primary_field_id() == field_id) {
// no need pk
if (schema_->get_primary_field_id() == field_id && !is_sorted_by_pk_) {
AssertInfo(field_id.get() != -1, "Primary key is -1");
AssertInfo(insert_record_.empty_pks(), "already exists");
insert_record_.insert_pks(data_type, column);
@ -721,6 +726,182 @@ SegmentSealedImpl::get_schema() const {
return *schema_;
}
std::vector<SegOffset>
SegmentSealedImpl::search_pk(const PkType& pk, Timestamp timestamp) const {
auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
AssertInfo(pk_field_id.get() != -1, "Primary key is -1");
auto pk_column = fields_.at(pk_field_id);
std::vector<SegOffset> pk_offsets;
switch (schema_->get_fields().at(pk_field_id).get_data_type()) {
case DataType::INT64: {
auto target = std::get<int64_t>(pk);
// get int64 pks
auto src = reinterpret_cast<const int64_t*>(pk_column->Data());
auto it =
std::lower_bound(src,
src + pk_column->NumRows(),
target,
[](const int64_t& elem, const int64_t& value) {
return elem < value;
});
for (; it != src + pk_column->NumRows() && *it == target; it++) {
auto offset = it - src;
if (insert_record_.timestamps_[offset] <= timestamp) {
pk_offsets.emplace_back(it - src);
}
}
break;
}
case DataType::VARCHAR: {
auto target = std::get<std::string>(pk);
// get varchar pks
auto var_column =
std::dynamic_pointer_cast<VariableColumn<std::string>>(
pk_column);
auto views = var_column->Views();
auto it = std::lower_bound(views.begin(), views.end(), target);
for (; it != views.end() && *it == target; it++) {
auto offset = std::distance(views.begin(), it);
if (insert_record_.timestamps_[offset] <= timestamp) {
pk_offsets.emplace_back(offset);
}
}
break;
}
default: {
PanicInfo(
DataTypeInvalid,
fmt::format(
"unsupported type {}",
schema_->get_fields().at(pk_field_id).get_data_type()));
}
}
return pk_offsets;
}
std::vector<SegOffset>
SegmentSealedImpl::search_pk(const PkType& pk, int64_t insert_barrier) const {
auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
AssertInfo(pk_field_id.get() != -1, "Primary key is -1");
auto pk_column = fields_.at(pk_field_id);
std::vector<SegOffset> pk_offsets;
switch (schema_->get_fields().at(pk_field_id).get_data_type()) {
case DataType::INT64: {
auto target = std::get<int64_t>(pk);
// get int64 pks
auto src = reinterpret_cast<const int64_t*>(pk_column->Data());
auto it =
std::lower_bound(src,
src + pk_column->NumRows(),
target,
[](const int64_t& elem, const int64_t& value) {
return elem < value;
});
for (; it != src + pk_column->NumRows() && *it == target; it++) {
if (it - src < insert_barrier) {
pk_offsets.emplace_back(it - src);
}
}
break;
}
case DataType::VARCHAR: {
auto target = std::get<std::string>(pk);
// get varchar pks
auto var_column =
std::dynamic_pointer_cast<VariableColumn<std::string>>(
pk_column);
auto views = var_column->Views();
auto it = std::lower_bound(views.begin(), views.end(), target);
while (it != views.end() && *it == target) {
auto offset = std::distance(views.begin(), it);
if (offset < insert_barrier) {
pk_offsets.emplace_back(offset);
}
++it;
}
break;
}
default: {
PanicInfo(
DataTypeInvalid,
fmt::format(
"unsupported type {}",
schema_->get_fields().at(pk_field_id).get_data_type()));
}
}
return pk_offsets;
}
std::shared_ptr<DeletedRecord::TmpBitmap>
SegmentSealedImpl::get_deleted_bitmap_s(int64_t del_barrier,
int64_t insert_barrier,
DeletedRecord& delete_record,
Timestamp query_timestamp) const {
// if insert_barrier and del_barrier have not changed, use cache data directly
bool hit_cache = false;
int64_t old_del_barrier = 0;
auto current = delete_record.clone_lru_entry(
insert_barrier, del_barrier, old_del_barrier, hit_cache);
if (hit_cache) {
return current;
}
auto bitmap = current->bitmap_ptr;
int64_t start, end;
if (del_barrier < old_del_barrier) {
// in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp
// so these deletion records do not take effect in query/search
// so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] will be reset to 0
// for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0]
start = del_barrier;
end = old_del_barrier;
} else {
// the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier]
// for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0]
start = old_del_barrier;
end = del_barrier;
}
// Avoid invalid calculations when there are a lot of repeated delete pks
std::unordered_map<PkType, Timestamp> delete_timestamps;
for (auto del_index = start; del_index < end; ++del_index) {
auto pk = delete_record.pks()[del_index];
auto timestamp = delete_record.timestamps()[del_index];
delete_timestamps[pk] = timestamp > delete_timestamps[pk]
? timestamp
: delete_timestamps[pk];
}
for (auto& [pk, timestamp] : delete_timestamps) {
auto segOffsets = search_pk(pk, insert_barrier);
for (auto offset : segOffsets) {
int64_t insert_row_offset = offset.get();
// The deletion record do not take effect in search/query,
// and reset bitmap to 0
if (timestamp > query_timestamp) {
bitmap->reset(insert_row_offset);
continue;
}
// Insert after delete with same pk, delete will not task effect on this insert record,
// and reset bitmap to 0
if (insert_record_.timestamps_[offset.get()] >= timestamp) {
bitmap->reset(insert_row_offset);
continue;
}
// insert data corresponding to the insert_row_offset will be ignored in search/query
bitmap->set(insert_row_offset);
}
}
delete_record.insert_lru_entry(current);
return current;
}
void
SegmentSealedImpl::mask_with_delete(BitsetType& bitset,
int64_t ins_barrier,
@ -730,8 +911,19 @@ SegmentSealedImpl::mask_with_delete(BitsetType& bitset,
return;
}
auto bitmap_holder = get_deleted_bitmap(
del_barrier, ins_barrier, deleted_record_, insert_record_, timestamp);
auto bitmap_holder = std::shared_ptr<DeletedRecord::TmpBitmap>();
if (!is_sorted_by_pk_) {
bitmap_holder = get_deleted_bitmap(del_barrier,
ins_barrier,
deleted_record_,
insert_record_,
timestamp);
} else {
bitmap_holder = get_deleted_bitmap_s(
del_barrier, ins_barrier, deleted_record_, timestamp);
}
if (!bitmap_holder || !bitmap_holder->bitmap_ptr) {
return;
}
@ -1037,7 +1229,8 @@ SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema,
IndexMetaPtr index_meta,
const SegcoreConfig& segcore_config,
int64_t segment_id,
bool TEST_skip_index_for_retrieve)
bool TEST_skip_index_for_retrieve,
bool is_sorted_by_pk)
: segcore_config_(segcore_config),
field_data_ready_bitset_(schema->size()),
index_ready_bitset_(schema->size()),
@ -1047,7 +1240,8 @@ SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema,
schema_(schema),
id_(segment_id),
col_index_meta_(index_meta),
TEST_skip_index_for_retrieve_(TEST_skip_index_for_retrieve) {
TEST_skip_index_for_retrieve_(TEST_skip_index_for_retrieve),
is_sorted_by_pk_(is_sorted_by_pk) {
mmap_descriptor_ = std::shared_ptr<storage::MmapChunkDescriptor>(
new storage::MmapChunkDescriptor({segment_id, SegmentType::Sealed}));
auto mcm = storage::MmapManager::GetInstance().GetMmapChunkManager();
@ -1506,13 +1700,18 @@ SegmentSealedImpl::search_ids(const IdArray& id_array,
auto ids_size = GetSizeOfIdArray(id_array);
std::vector<PkType> pks(ids_size);
ParsePksFromIDs(pks, data_type, id_array);
auto res_id_arr = std::make_unique<IdArray>();
std::vector<SegOffset> res_offsets;
res_offsets.reserve(pks.size());
for (auto& pk : pks) {
auto segOffsets = insert_record_.search_pk(pk, timestamp);
for (auto offset : segOffsets) {
std::vector<SegOffset> pk_offsets;
if (!is_sorted_by_pk_) {
pk_offsets = insert_record_.search_pk(pk, timestamp);
} else {
pk_offsets = search_pk(pk, timestamp);
}
for (auto offset : pk_offsets) {
switch (data_type) {
case DataType::INT64: {
res_id_arr->mutable_int_id()->add_data(
@ -1535,6 +1734,39 @@ SegmentSealedImpl::search_ids(const IdArray& id_array,
return {std::move(res_id_arr), std::move(res_offsets)};
}
std::pair<std::vector<OffsetMap::OffsetType>, bool>
SegmentSealedImpl::find_first(int64_t limit, const BitsetType& bitset) const {
if (!is_sorted_by_pk_) {
return insert_record_.pk2offset_->find_first(limit, bitset);
}
if (limit == Unlimited || limit == NoLimit) {
limit = num_rows_.value();
}
int64_t hit_num = 0; // avoid counting the number everytime.
auto size = bitset.size();
int64_t cnt = size - bitset.count();
auto more_hit_than_limit = cnt > limit;
limit = std::min(limit, cnt);
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}
if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
}
return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
}
SegcoreError
SegmentSealedImpl::Delete(int64_t reserved_offset, // deprecated
int64_t size,

View File

@ -43,7 +43,8 @@ class SegmentSealedImpl : public SegmentSealed {
IndexMetaPtr index_meta,
const SegcoreConfig& segcore_config,
int64_t segment_id,
bool TEST_skip_index_for_retrieve = false);
bool TEST_skip_index_for_retrieve = false,
bool is_sorted_by_pk = false);
~SegmentSealedImpl() override;
void
LoadIndex(const LoadIndexInfo& info) override;
@ -105,6 +106,18 @@ class SegmentSealedImpl : public SegmentSealed {
const Schema&
get_schema() const override;
std::vector<SegOffset>
search_pk(const PkType& pk, Timestamp timestamp) const;
std::vector<SegOffset>
search_pk(const PkType& pk, int64_t insert_barrier) const;
std::shared_ptr<DeletedRecord::TmpBitmap>
get_deleted_bitmap_s(int64_t del_barrier,
int64_t insert_barrier,
DeletedRecord& delete_record,
Timestamp query_timestamp) const;
std::unique_ptr<DataArray>
get_vector(FieldId field_id, const int64_t* ids, int64_t count) const;
@ -142,9 +155,7 @@ class SegmentSealedImpl : public SegmentSealed {
const Timestamp* timestamps) override;
std::pair<std::vector<OffsetMap::OffsetType>, bool>
find_first(int64_t limit, const BitsetType& bitset) const override {
return insert_record_.pk2offset_->find_first(limit, bitset);
}
find_first(int64_t limit, const BitsetType& bitset) const override;
// Calculate: output[i] = Vec[seg_offset[i]]
// where Vec is determined from field_offset
@ -343,6 +354,9 @@ class SegmentSealedImpl : public SegmentSealed {
// for sparse vector unit test only! Once a type of sparse index that
// doesn't has raw data is added, this should be removed.
bool TEST_skip_index_for_retrieve_ = false;
// whether the segment is sorted by the pk
bool is_sorted_by_pk_ = false;
};
inline SegmentSealedUPtr
@ -351,12 +365,14 @@ CreateSealedSegment(
IndexMetaPtr index_meta = nullptr,
int64_t segment_id = -1,
const SegcoreConfig& segcore_config = SegcoreConfig::default_config(),
bool TEST_skip_index_for_retrieve = false) {
bool TEST_skip_index_for_retrieve = false,
bool is_sorted_by_pk = false) {
return std::make_unique<SegmentSealedImpl>(schema,
index_meta,
segcore_config,
segment_id,
TEST_skip_index_for_retrieve);
TEST_skip_index_for_retrieve,
is_sorted_by_pk);
}
} // namespace milvus::segcore

View File

@ -35,7 +35,8 @@ CStatus
NewSegment(CCollection collection,
SegmentType seg_type,
int64_t segment_id,
CSegmentInterface* newSegment) {
CSegmentInterface* newSegment,
bool is_sorted_by_pk) {
try {
auto col = static_cast<milvus::segcore::Collection*>(collection);
@ -50,7 +51,12 @@ NewSegment(CCollection collection,
case Sealed:
case Indexing:
segment = milvus::segcore::CreateSealedSegment(
col->get_schema(), col->get_index_meta(), segment_id);
col->get_schema(),
col->get_index_meta(),
segment_id,
milvus::segcore::SegcoreConfig::default_config(),
false,
is_sorted_by_pk);
break;
default:
PanicInfo(milvus::UnexpectedError,

View File

@ -33,7 +33,8 @@ CStatus
NewSegment(CCollection collection,
SegmentType seg_type,
int64_t segment_id,
CSegmentInterface* newSegment);
CSegmentInterface* newSegment,
bool is_sorted_by_pk);
void
DeleteSegment(CSegmentInterface c_segment);

View File

@ -388,10 +388,10 @@ TEST(CApiTest, GetCollectionNameTest) {
TEST(CApiTest, SegmentTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
CSegmentInterface a_segment;
status = NewSegment(collection, Invalid, -1, &a_segment);
status = NewSegment(collection, Invalid, -1, &a_segment, false);
ASSERT_NE(status.error_code, Success);
DeleteCollection(collection);
DeleteSegment(segment);
@ -537,7 +537,7 @@ TEST(CApiTest, CApiCPlan_bfloat16) {
TEST(CApiTest, InsertTest) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -564,7 +564,7 @@ TEST(CApiTest, InsertTest) {
TEST(CApiTest, DeleteTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
@ -590,7 +590,7 @@ TEST(CApiTest, DeleteTest) {
TEST(CApiTest, MultiDeleteGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
@ -712,7 +712,7 @@ TEST(CApiTest, MultiDeleteGrowingSegment) {
TEST(CApiTest, MultiDeleteSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
auto status = NewSegment(collection, Sealed, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
@ -826,7 +826,7 @@ TEST(CApiTest, MultiDeleteSealedSegment) {
TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
@ -931,7 +931,7 @@ TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
auto status = NewSegment(collection, Sealed, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
@ -1014,7 +1014,7 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
TEST(CApiTest, SearcTestWhenNullable) {
auto c_collection = NewCollection(get_default_schema_config_nullable());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -1082,7 +1082,7 @@ TEST(CApiTest, SearcTestWhenNullable) {
TEST(CApiTest, InsertSamePkAfterDeleteOnGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, 111, &segment);
auto status = NewSegment(collection, Growing, 111, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
@ -1189,7 +1189,7 @@ TEST(CApiTest, InsertSamePkAfterDeleteOnGrowingSegment) {
TEST(CApiTest, InsertSamePkAfterDeleteOnSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
auto status = NewSegment(collection, Sealed, -1, &segment, true);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
@ -1261,7 +1261,7 @@ TEST(CApiTest, InsertSamePkAfterDeleteOnSealedSegment) {
TEST(CApiTest, SearchTest) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -1331,7 +1331,7 @@ TEST(CApiTest, SearchTest) {
TEST(CApiTest, SearchTestWithExpr) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -1398,7 +1398,7 @@ TEST(CApiTest, SearchTestWithExpr) {
TEST(CApiTest, RetrieveTestWithExpr) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
@ -1460,7 +1460,7 @@ TEST(CApiTest, RetrieveTestWithExpr) {
TEST(CApiTest, GetMemoryUsageInBytesTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto old_memory_usage_size = GetMemoryUsageInBytes(segment);
@ -1491,7 +1491,7 @@ TEST(CApiTest, GetMemoryUsageInBytesTest) {
TEST(CApiTest, GetDeletedCountTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
@ -1522,7 +1522,7 @@ TEST(CApiTest, GetDeletedCountTest) {
TEST(CApiTest, GetRowCountTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
@ -1552,7 +1552,7 @@ TEST(CApiTest, GetRowCountTest) {
TEST(CApiTest, GetRealCount) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
@ -1602,7 +1602,7 @@ TEST(CApiTest, GetRealCount) {
TEST(CApiTest, ReduceNullResult) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
@ -1687,7 +1687,7 @@ TEST(CApiTest, ReduceNullResult) {
TEST(CApiTest, ReduceRemoveDuplicates) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
@ -1837,7 +1837,7 @@ testReduceSearchWithExpr(int N,
}
auto collection = NewCollection(schema_fun());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
@ -2113,7 +2113,7 @@ TEST(CApiTest, Indexing_Without_Predicate) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -2262,7 +2262,7 @@ TEST(CApiTest, Indexing_Expr_Without_Predicate) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -2412,7 +2412,7 @@ TEST(CApiTest, Indexing_With_float_Predicate_Range) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -2590,7 +2590,7 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = 1000 * 10;
@ -2770,7 +2770,7 @@ TEST(CApiTest, Indexing_With_float_Predicate_Term) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -2942,7 +2942,7 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = 1000 * 10;
@ -3116,7 +3116,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -3296,7 +3296,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -3476,7 +3476,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -3673,7 +3673,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -3860,7 +3860,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
TEST(CApiTest, SealedSegmentTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
auto status = NewSegment(collection, Sealed, -1, &segment, true);
ASSERT_EQ(status.error_code, Success);
int N = 1000;
@ -3886,7 +3886,7 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
auto status = NewSegment(collection, Sealed, -1, &segment, true);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -4039,7 +4039,7 @@ TEST(CApiTest, SealedSegment_search_without_predicates) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
auto status = NewSegment(collection, Sealed, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -4119,7 +4119,7 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
auto status = NewSegment(collection, Sealed, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -4500,7 +4500,7 @@ TEST(CApiTest, RetriveScalarFieldFromSealedSegmentWithIndex) {
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_IP) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -4565,7 +4565,7 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP) {
auto c_collection =
NewCollection(get_default_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -4629,7 +4629,7 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP) {
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_L2) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -4693,7 +4693,7 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_L2) {
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_L2) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -4912,7 +4912,7 @@ TEST(CApiTest, Indexing_Without_Predicate_float16) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -5062,7 +5062,7 @@ TEST(CApiTest, Indexing_Without_Predicate_bfloat16) {
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
auto status = NewSegment(collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
@ -5207,7 +5207,7 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_FLOAT16) {
auto c_collection =
NewCollection(get_float16_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
@ -5272,7 +5272,7 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_BFLOAT16) {
auto c_collection =
NewCollection(get_bfloat16_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;

View File

@ -21,10 +21,10 @@ TEST(CApiTest, StreamReduce) {
//1. set up segments
CSegmentInterface segment_1;
auto status = NewSegment(collection, Growing, -1, &segment_1);
auto status = NewSegment(collection, Growing, -1, &segment_1, false);
ASSERT_EQ(status.error_code, Success);
CSegmentInterface segment_2;
status = NewSegment(collection, Growing, -1, &segment_2);
status = NewSegment(collection, Growing, -1, &segment_2, false);
ASSERT_EQ(status.error_code, Success);
//2. insert data into segments
@ -208,7 +208,7 @@ TEST(CApiTest, StreamReduceGroupBY) {
}
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
auto status = NewSegment(c_collection, Growing, -1, &segment, false);
ASSERT_EQ(status.error_code, Success);
//2. generate data and insert

View File

@ -1675,6 +1675,71 @@ TEST_P(ExprTest, test_term_pk) {
}
}
TEST_P(ExprTest, test_term_pk_with_sorted) {
auto schema = std::make_shared<Schema>();
schema->AddField(
FieldName("Timestamp"), FieldId(1), DataType::INT64, false);
auto vec_fid = schema->AddDebugField("fakevec", data_type, 16, metric_type);
auto str1_fid = schema->AddDebugField("string1", DataType::VARCHAR);
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
schema->set_primary_field_id(int64_fid);
auto seg = CreateSealedSegment(
schema, nullptr, 1, SegcoreConfig::default_config(), false, true);
int N = 100000;
auto raw_data = DataGen(schema, N);
// load field data
auto fields = schema->get_fields();
for (auto field_data : raw_data.raw_->fields_data()) {
int64_t field_id = field_data.field_id();
auto info = FieldDataInfo(field_data.field_id(), N, "/tmp/a");
auto field_meta = fields.at(FieldId(field_id));
info.channel->push(
CreateFieldDataFromDataArray(N, &field_data, field_meta));
info.channel->close();
seg->LoadFieldData(FieldId(field_id), info);
}
std::vector<proto::plan::GenericValue> retrieve_ints;
for (int i = 0; i < 10; ++i) {
proto::plan::GenericValue val;
val.set_int64_val(i);
retrieve_ints.push_back(val);
}
auto expr = std::make_shared<expr::TermFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64), retrieve_ints);
query::ExecPlanNodeVisitor visitor(*seg, MAX_TIMESTAMP);
BitsetType final;
auto plan =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
visitor.ExecuteExprNode(plan, seg.get(), N, final);
EXPECT_EQ(final.size(), N);
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(final[i], true);
}
for (int i = 10; i < N; ++i) {
EXPECT_EQ(final[i], false);
}
retrieve_ints.clear();
for (int i = 0; i < 10; ++i) {
proto::plan::GenericValue val;
val.set_int64_val(i + N);
retrieve_ints.push_back(val);
}
expr = std::make_shared<expr::TermFilterExpr>(
expr::ColumnInfo(int64_fid, DataType::INT64), retrieve_ints);
plan = std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, expr);
visitor.ExecuteExprNode(plan, seg.get(), N, final);
EXPECT_EQ(final.size(), N);
for (int i = 0; i < N; ++i) {
EXPECT_EQ(final[i], false);
}
}
TEST_P(ExprTest, TestSealedSegmentGetBatchSize) {
auto schema = std::make_shared<Schema>();
auto vec_fid = schema->AddDebugField("fakevec", data_type, 16, metric_type);

View File

@ -26,6 +26,7 @@ import (
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/timerecord"
)
@ -142,7 +143,7 @@ func (m *analyzeMeta) BuildingTask(taskID, nodeID int64) error {
return m.saveTask(cloneT)
}
func (m *analyzeMeta) FinishTask(taskID int64, result *indexpb.AnalyzeResult) error {
func (m *analyzeMeta) FinishTask(taskID int64, result *workerpb.AnalyzeResult) error {
m.Lock()
defer m.Unlock()

View File

@ -26,6 +26,7 @@ import (
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
)
type AnalyzeMetaSuite struct {
@ -153,7 +154,7 @@ func (s *AnalyzeMetaSuite) Test_AnalyzeMeta() {
})
s.Run("FinishTask", func() {
err := am.FinishTask(1, &indexpb.AnalyzeResult{
err := am.FinishTask(1, &workerpb.AnalyzeResult{
TaskID: 1,
State: indexpb.JobState_JobStateFinished,
})
@ -239,7 +240,7 @@ func (s *AnalyzeMetaSuite) Test_failCase() {
err := am.FinishTask(777, nil)
s.Error(err)
err = am.FinishTask(1, &indexpb.AnalyzeResult{
err = am.FinishTask(1, &workerpb.AnalyzeResult{
TaskID: 1,
State: indexpb.JobState_JobStateFinished,
})

View File

@ -102,7 +102,7 @@ func (c *compactionPlanHandler) getCompactionInfo(triggerID int64) *compactionIn
func summaryCompactionState(tasks []*datapb.CompactionTask) *compactionInfo {
ret := &compactionInfo{}
var executingCnt, pipeliningCnt, completedCnt, failedCnt, timeoutCnt, analyzingCnt, indexingCnt, cleanedCnt, metaSavedCnt int
var executingCnt, pipeliningCnt, completedCnt, failedCnt, timeoutCnt, analyzingCnt, indexingCnt, cleanedCnt, metaSavedCnt, stats int
mergeInfos := make(map[int64]*milvuspb.CompactionMergeInfo)
for _, task := range tasks {
@ -128,12 +128,14 @@ func summaryCompactionState(tasks []*datapb.CompactionTask) *compactionInfo {
cleanedCnt++
case datapb.CompactionTaskState_meta_saved:
metaSavedCnt++
case datapb.CompactionTaskState_statistic:
stats++
default:
}
mergeInfos[task.GetPlanID()] = getCompactionMergeInfo(task)
}
ret.executingCnt = executingCnt + pipeliningCnt + analyzingCnt + indexingCnt + metaSavedCnt
ret.executingCnt = executingCnt + pipeliningCnt + analyzingCnt + indexingCnt + metaSavedCnt + stats
ret.completedCnt = completedCnt
ret.timeoutCnt = timeoutCnt
ret.failedCnt = failedCnt

View File

@ -89,6 +89,12 @@ func setResultSegments(segments []int64) compactionTaskOpt {
}
}
func setTmpSegments(segments []int64) compactionTaskOpt {
return func(task *datapb.CompactionTask) {
task.TmpSegments = segments
}
}
func setState(state datapb.CompactionTaskState) compactionTaskOpt {
return func(task *datapb.CompactionTask) {
task.State = state

View File

@ -144,6 +144,9 @@ func (t *clusteringCompactionTask) retryableProcess() error {
return t.processMetaSaved()
case datapb.CompactionTaskState_indexing:
return t.processIndexing()
case datapb.CompactionTaskState_statistic:
return t.processStats()
case datapb.CompactionTaskState_timeout:
return t.processFailedOrTimeout()
case datapb.CompactionTaskState_failed:
@ -194,6 +197,7 @@ func (t *clusteringCompactionTask) BuildCompactionRequest() (*datapb.CompactionP
FieldBinlogs: segInfo.GetBinlogs(),
Field2StatslogPaths: segInfo.GetStatslogs(),
Deltalogs: segInfo.GetDeltalogs(),
IsSorted: segInfo.GetIsSorted(),
})
}
log.Info("Compaction handler build clustering compaction plan")
@ -262,7 +266,7 @@ func (t *clusteringCompactionTask) processExecuting() error {
return err
}
metricMutation.commit()
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved), setResultSegments(resultSegmentIDs))
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved), setTmpSegments(resultSegmentIDs))
if err != nil {
return err
}
@ -286,7 +290,25 @@ func (t *clusteringCompactionTask) processExecuting() error {
}
func (t *clusteringCompactionTask) processMetaSaved() error {
return t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_indexing))
return t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_statistic))
}
func (t *clusteringCompactionTask) processStats() error {
// just the memory step, if it crashes at this step, the state after recovery is CompactionTaskState_statistic.
resultSegments := make([]int64, 0, len(t.GetTmpSegments()))
for _, segmentID := range t.GetTmpSegments() {
to, ok := t.meta.(*meta).GetCompactionTo(segmentID)
if !ok {
return nil
}
resultSegments = append(resultSegments, to.GetID())
}
log.Info("clustering compaction stats task finished",
zap.Int64s("tmp segments", t.GetTmpSegments()),
zap.Int64s("result segments", resultSegments))
return t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_indexing), setResultSegments(resultSegments))
}
func (t *clusteringCompactionTask) processIndexing() error {
@ -298,7 +320,7 @@ func (t *clusteringCompactionTask) processIndexing() error {
}
indexed := func() bool {
for _, collectionIndex := range collectionIndexes {
for _, segmentID := range t.ResultSegments {
for _, segmentID := range t.GetResultSegments() {
segmentIndexState := t.meta.GetIndexMeta().GetSegmentIndexState(t.GetCollectionID(), segmentID, collectionIndex.IndexID)
log.Debug("segment index state", zap.String("segment", segmentIndexState.String()))
if segmentIndexState.GetState() != commonpb.IndexState_Finished {
@ -413,7 +435,7 @@ func (t *clusteringCompactionTask) processFailedOrTimeout() error {
}
func (t *clusteringCompactionTask) doAnalyze() error {
newAnalyzeTask := &indexpb.AnalyzeTask{
analyzeTask := &indexpb.AnalyzeTask{
CollectionID: t.GetCollectionID(),
PartitionID: t.GetPartitionID(),
FieldID: t.GetClusteringKeyField().FieldID,
@ -423,18 +445,14 @@ func (t *clusteringCompactionTask) doAnalyze() error {
TaskID: t.GetAnalyzeTaskID(),
State: indexpb.JobState_JobStateInit,
}
err := t.meta.GetAnalyzeMeta().AddAnalyzeTask(newAnalyzeTask)
err := t.meta.GetAnalyzeMeta().AddAnalyzeTask(analyzeTask)
if err != nil {
log.Warn("failed to create analyze task", zap.Int64("planID", t.GetPlanID()), zap.Error(err))
return err
}
t.analyzeScheduler.enqueue(&analyzeTask{
taskID: t.GetAnalyzeTaskID(),
taskInfo: &indexpb.AnalyzeResult{
TaskID: t.GetAnalyzeTaskID(),
State: indexpb.JobState_JobStateInit,
},
})
t.analyzeScheduler.enqueue(newAnalyzeTask(t.GetAnalyzeTaskID()))
log.Info("submit analyze task", zap.Int64("planID", t.GetPlanID()), zap.Int64("triggerID", t.GetTriggerID()), zap.Int64("collectionID", t.GetCollectionID()), zap.Int64("id", t.GetAnalyzeTaskID()))
return t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_analyzing))
}

View File

@ -79,7 +79,7 @@ func (s *ClusteringCompactionTaskSuite) SetupTest() {
s.mockSessionMgr = session.NewMockDataNodeManager(s.T())
scheduler := newTaskScheduler(ctx, s.meta, nil, cm, newIndexEngineVersionManager(), nil)
scheduler := newTaskScheduler(ctx, s.meta, nil, cm, newIndexEngineVersionManager(), nil, nil)
s.analyzeScheduler = scheduler
}
@ -370,7 +370,7 @@ func (s *ClusteringCompactionTaskSuite) TestProcessExecuting() {
},
}, nil).Once()
s.Equal(false, task.Process())
s.Equal(datapb.CompactionTaskState_indexing, task.GetState())
s.Equal(datapb.CompactionTaskState_statistic, task.GetState())
})
s.Run("process executing, compaction result ready", func() {
@ -403,7 +403,7 @@ func (s *ClusteringCompactionTaskSuite) TestProcessExecuting() {
},
}, nil).Once()
s.Equal(false, task.Process())
s.Equal(datapb.CompactionTaskState_indexing, task.GetState())
s.Equal(datapb.CompactionTaskState_statistic, task.GetState())
})
s.Run("process executing, compaction result timeout", func() {
@ -499,6 +499,8 @@ func (s *ClusteringCompactionTaskSuite) TestProcessIndexingState() {
CollectionID: 1,
IndexID: 3,
}
task.ResultSegments = []int64{10, 11}
err := s.meta.indexMeta.CreateIndex(index)
s.NoError(err)

View File

@ -311,6 +311,7 @@ func (t *l0CompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, err
Level: segInfo.GetLevel(),
InsertChannel: segInfo.GetInsertChannel(),
Deltalogs: segInfo.GetDeltalogs(),
IsSorted: segInfo.GetIsSorted(),
})
}
@ -347,6 +348,7 @@ func (t *l0CompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, err
Level: info.GetLevel(),
CollectionID: info.GetCollectionID(),
PartitionID: info.GetPartitionID(),
IsSorted: info.GetIsSorted(),
}
})

View File

@ -371,6 +371,7 @@ func (t *mixCompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, er
FieldBinlogs: segInfo.GetBinlogs(),
Field2StatslogPaths: segInfo.GetStatslogs(),
Deltalogs: segInfo.GetDeltalogs(),
IsSorted: segInfo.GetIsSorted(),
})
segIDMap[segID] = segInfo.GetDeltalogs()
}

View File

@ -317,7 +317,8 @@ func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) error {
!segment.isCompacting && // not compacting now
!segment.GetIsImporting() && // not importing now
segment.GetLevel() != datapb.SegmentLevel_L0 && // ignore level zero segments
segment.GetLevel() != datapb.SegmentLevel_L2 // ignore l2 segment
segment.GetLevel() != datapb.SegmentLevel_L2 && // ignore l2 segment
segment.GetIsSorted() // segment is sorted
}) // partSegments is list of chanPartSegments, which is channel-partition organized segments
if len(partSegments) == 0 {

View File

@ -169,6 +169,7 @@ func Test_compactionTrigger_force(t *testing.T) {
},
},
},
IsSorted: true,
},
},
2: {
@ -195,6 +196,7 @@ func Test_compactionTrigger_force(t *testing.T) {
},
},
},
IsSorted: true,
},
},
3: {
@ -207,6 +209,7 @@ func Test_compactionTrigger_force(t *testing.T) {
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
IsSorted: true,
},
},
},
@ -457,6 +460,7 @@ func Test_compactionTrigger_force(t *testing.T) {
InsertChannel: "ch1",
CollectionID: 2,
PartitionID: 1,
IsSorted: true,
},
{
SegmentID: 2,
@ -478,6 +482,7 @@ func Test_compactionTrigger_force(t *testing.T) {
InsertChannel: "ch1",
CollectionID: 2,
PartitionID: 1,
IsSorted: true,
},
},
// StartTime: 0,
@ -672,6 +677,7 @@ func Test_compactionTrigger_force_maxSegmentLimit(t *testing.T) {
},
},
},
IsSorted: true,
},
}
@ -757,6 +763,7 @@ func Test_compactionTrigger_force_maxSegmentLimit(t *testing.T) {
},
},
},
IsSorted: true,
},
{
SegmentID: 2,
@ -775,6 +782,7 @@ func Test_compactionTrigger_force_maxSegmentLimit(t *testing.T) {
},
},
},
IsSorted: true,
},
},
StartTime: 3,
@ -1005,6 +1013,7 @@ func Test_compactionTrigger_PrioritizedCandi(t *testing.T) {
},
},
},
IsSorted: true,
}
}
mock0Allocator := newMockAllocator(t)
@ -1194,6 +1203,7 @@ func Test_compactionTrigger_SmallCandi(t *testing.T) {
},
},
},
IsSorted: true,
}
}
@ -1389,6 +1399,7 @@ func Test_compactionTrigger_SqueezeNonPlannedSegs(t *testing.T) {
},
},
},
IsSorted: true,
}
}
@ -1619,6 +1630,7 @@ func Test_compactionTrigger_noplan_random_size(t *testing.T) {
},
},
},
IsSorted: true,
},
lastFlushTime: time.Now(),
}
@ -2115,6 +2127,7 @@ func (s *CompactionTriggerSuite) genSeg(segID, numRows int64) *datapb.SegmentInf
},
},
},
IsSorted: true,
}
}

View File

@ -163,6 +163,7 @@ func (gc *garbageCollector) work(ctx context.Context) {
gc.recycleUnusedIndexes(ctx)
gc.recycleUnusedSegIndexes(ctx)
gc.recycleUnusedAnalyzeFiles(ctx)
gc.recycleUnusedTextIndexFiles(ctx)
})
}()
go func() {
@ -465,9 +466,14 @@ func (gc *garbageCollector) recycleDroppedSegments(ctx context.Context) {
}
logs := getLogs(segment)
for key := range getTextLogs(segment) {
logs[key] = struct{}{}
}
log.Info("GC segment start...", zap.Int("insert_logs", len(segment.GetBinlogs())),
zap.Int("delta_logs", len(segment.GetDeltalogs())),
zap.Int("stats_logs", len(segment.GetStatslogs())))
zap.Int("stats_logs", len(segment.GetStatslogs())),
zap.Int("text_logs", len(segment.GetTextStatsLogs())))
if err := gc.removeObjectFiles(ctx, logs); err != nil {
log.Warn("GC segment remove logs failed", zap.Error(err))
continue
@ -561,6 +567,17 @@ func getLogs(sinfo *SegmentInfo) map[string]struct{} {
return logs
}
func getTextLogs(sinfo *SegmentInfo) map[string]struct{} {
textLogs := make(map[string]struct{})
for _, flog := range sinfo.GetTextStatsLogs() {
for _, file := range flog.GetFiles() {
textLogs[file] = struct{}{}
}
}
return textLogs
}
// removeObjectFiles remove file from oss storage, return error if any log failed to remove.
func (gc *garbageCollector) removeObjectFiles(ctx context.Context, filePaths map[string]struct{}) error {
futures := make([]*conc.Future[struct{}], 0)
@ -818,3 +835,64 @@ func (gc *garbageCollector) recycleUnusedAnalyzeFiles(ctx context.Context) {
log.Info("analyze stats files recycle success", zap.Int64("taskID", taskID))
}
}
// recycleUnusedTextIndexFiles load meta file info and compares OSS keys
// if missing found, performs gc cleanup
func (gc *garbageCollector) recycleUnusedTextIndexFiles(ctx context.Context) {
start := time.Now()
log := log.With(zap.String("gcName", "recycleUnusedTextIndexFiles"), zap.Time("startAt", start))
log.Info("start recycleUnusedTextIndexFiles...")
defer func() { log.Info("recycleUnusedTextIndexFiles done", zap.Duration("timeCost", time.Since(start))) }()
hasTextIndexSegments := gc.meta.SelectSegments(SegmentFilterFunc(func(info *SegmentInfo) bool {
return len(info.GetTextStatsLogs()) != 0
}))
fileNum := 0
deletedFilesNum := atomic.NewInt32(0)
for _, seg := range hasTextIndexSegments {
for _, fieldStats := range seg.GetTextStatsLogs() {
log := log.With(zap.Int64("segmentID", seg.GetID()), zap.Int64("fieldID", fieldStats.GetFieldID()))
// clear low version task
for i := int64(1); i < fieldStats.GetVersion(); i++ {
prefix := fmt.Sprintf("%s/%s/%d/%d/%d/%d/%d", gc.option.cli.RootPath(), common.TextIndexPath,
seg.GetCollectionID(), seg.GetPartitionID(), seg.GetID(), fieldStats.GetFieldID(), i)
futures := make([]*conc.Future[struct{}], 0)
err := gc.option.cli.WalkWithPrefix(ctx, prefix, true, func(files *storage.ChunkObjectInfo) bool {
file := files.FilePath
future := gc.option.removeObjectPool.Submit(func() (struct{}, error) {
log := log.With(zap.String("file", file))
log.Info("garbageCollector recycleUnusedTextIndexFiles remove file...")
if err := gc.option.cli.Remove(ctx, file); err != nil {
log.Warn("garbageCollector recycleUnusedTextIndexFiles remove file failed", zap.Error(err))
return struct{}{}, err
}
deletedFilesNum.Inc()
log.Info("garbageCollector recycleUnusedTextIndexFiles remove file success")
return struct{}{}, nil
})
futures = append(futures, future)
return true
})
// Wait for all remove tasks done.
if err := conc.BlockOnAll(futures...); err != nil {
// error is logged, and can be ignored here.
log.Warn("some task failure in remove object pool", zap.Error(err))
}
log = log.With(zap.Int("deleteIndexFilesNum", int(deletedFilesNum.Load())), zap.Int("walkFileNum", fileNum))
if err != nil {
log.Warn("text index files recycle failed when walk with prefix", zap.Error(err))
return
}
}
}
}
log.Info("text index files recycle done")
metrics.GarbageCollectorRunCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Add(1)
}

View File

@ -47,7 +47,7 @@ import (
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/funcutil"
@ -1441,7 +1441,7 @@ func TestGarbageCollector_clearETCD(t *testing.T) {
})
assert.NoError(t, err)
err = gc.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = gc.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: buildID + 4,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2", "file3", "file4"},

View File

@ -57,6 +57,7 @@ func (s *ImportCheckerSuite) SetupTest() {
catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
cluster := NewMockCluster(s.T())
s.alloc = allocator.NewMockAllocator(s.T())

View File

@ -62,6 +62,7 @@ func (s *ImportSchedulerSuite) SetupTest() {
s.catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
s.catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
s.catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
s.catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
s.cluster = NewMockCluster(s.T())
s.alloc = allocator.NewMockAllocator(s.T())

View File

@ -158,6 +158,7 @@ func TestImportUtil_AssembleRequest(t *testing.T) {
catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
alloc := allocator.NewMockAllocator(t)
alloc.EXPECT().AllocN(mock.Anything).RunAndReturn(func(n int64) (int64, int64, error) {
@ -241,6 +242,7 @@ func TestImportUtil_CheckDiskQuota(t *testing.T) {
catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
imeta, err := NewImportMeta(catalog)
assert.NoError(t, err)
@ -426,6 +428,7 @@ func TestImportUtil_GetImportProgress(t *testing.T) {
catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
imeta, err := NewImportMeta(catalog)
assert.NoError(t, err)

View File

@ -23,16 +23,17 @@ import (
"strconv"
"sync"
"github.com/golang/protobuf/proto"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"go.uber.org/zap"
"google.golang.org/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -692,7 +693,7 @@ func (m *indexMeta) UpdateVersion(buildID UniqueID) error {
return m.updateSegIndexMeta(segIdx, updateFunc)
}
func (m *indexMeta) FinishTask(taskInfo *indexpb.IndexTaskInfo) error {
func (m *indexMeta) FinishTask(taskInfo *workerpb.IndexTaskInfo) error {
m.Lock()
defer m.Unlock()

View File

@ -33,6 +33,7 @@ import (
catalogmocks "github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/common"
)
@ -734,7 +735,7 @@ func TestMeta_MarkIndexAsDeleted(t *testing.T) {
func TestMeta_GetSegmentIndexes(t *testing.T) {
catalog := &datacoord.Catalog{MetaKv: mockkv.NewMetaKv(t)}
m := createMeta(catalog, nil, createIndexMeta(catalog))
m := createMeta(catalog, withIndexMeta(createIndexMeta(catalog)))
t.Run("success", func(t *testing.T) {
segIndexes := m.indexMeta.getSegmentIndexes(segID)
@ -1136,7 +1137,7 @@ func TestMeta_FinishTask(t *testing.T) {
m := updateSegmentIndexMeta(t)
t.Run("success", func(t *testing.T) {
err := m.FinishTask(&indexpb.IndexTaskInfo{
err := m.FinishTask(&workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2"},
@ -1153,7 +1154,7 @@ func TestMeta_FinishTask(t *testing.T) {
m.catalog = &datacoord.Catalog{
MetaKv: metakv,
}
err := m.FinishTask(&indexpb.IndexTaskInfo{
err := m.FinishTask(&workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2"},
@ -1164,7 +1165,7 @@ func TestMeta_FinishTask(t *testing.T) {
})
t.Run("not exist", func(t *testing.T) {
err := m.FinishTask(&indexpb.IndexTaskInfo{
err := m.FinishTask(&workerpb.IndexTaskInfo{
BuildID: buildID + 1,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2"},
@ -1372,7 +1373,7 @@ func TestRemoveSegmentIndex(t *testing.T) {
func TestIndexMeta_GetUnindexedSegments(t *testing.T) {
catalog := &datacoord.Catalog{MetaKv: mockkv.NewMetaKv(t)}
m := createMeta(catalog, nil, createIndexMeta(catalog))
m := createMeta(catalog, withIndexMeta(createIndexMeta(catalog)))
// normal case
segmentIDs := make([]int64, 0, 11)

View File

@ -26,6 +26,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -52,6 +53,10 @@ func (s *Server) startIndexService(ctx context.Context) {
}
func (s *Server) createIndexForSegment(segment *SegmentInfo, indexID UniqueID) error {
if !segment.GetIsSorted() && !segment.GetIsImporting() && segment.Level != datapb.SegmentLevel_L0 {
log.Info("segment not sorted, skip create index", zap.Int64("segmentID", segment.GetID()))
return nil
}
log.Info("create index for segment", zap.Int64("segmentID", segment.ID), zap.Int64("indexID", indexID))
buildID, err := s.allocator.AllocID(context.Background())
if err != nil {
@ -70,17 +75,15 @@ func (s *Server) createIndexForSegment(segment *SegmentInfo, indexID UniqueID) e
if err = s.meta.indexMeta.AddSegmentIndex(segIndex); err != nil {
return err
}
s.taskScheduler.enqueue(&indexBuildTask{
taskID: buildID,
taskInfo: &indexpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
},
})
s.taskScheduler.enqueue(newIndexBuildTask(buildID))
return nil
}
func (s *Server) createIndexesForSegment(segment *SegmentInfo) error {
if !segment.GetIsSorted() && !segment.GetIsImporting() && segment.GetLevel() != datapb.SegmentLevel_L0 {
log.Debug("segment is not sorted by pk, skip create index", zap.Int64("segmentID", segment.ID))
return nil
}
indexes := s.meta.indexMeta.GetIndexesForCollection(segment.CollectionID, "")
indexIDToSegIndexes := s.meta.indexMeta.GetSegmentIndexes(segment.CollectionID, segment.ID)
for _, index := range indexes {
@ -113,7 +116,7 @@ func (s *Server) createIndexForSegmentLoop(ctx context.Context) {
log.Info("start create index for segment loop...")
defer s.serverLoopWg.Done()
ticker := time.NewTicker(time.Minute)
ticker := time.NewTicker(Params.DataCoordCfg.TaskCheckInterval.GetAsDuration(time.Second))
defer ticker.Stop()
for {
select {
@ -131,7 +134,7 @@ func (s *Server) createIndexForSegmentLoop(ctx context.Context) {
case collectionID := <-s.notifyIndexChan:
log.Info("receive create index notify", zap.Int64("collectionID", collectionID))
segments := s.meta.SelectSegments(WithCollection(collectionID), SegmentFilterFunc(func(info *SegmentInfo) bool {
return isFlush(info)
return isFlush(info) && info.GetIsSorted()
}))
for _, segment := range segments {
if err := s.createIndexesForSegment(segment); err != nil {
@ -399,7 +402,7 @@ func (s *Server) GetIndexState(ctx context.Context, req *indexpb.GetIndexStateRe
indexInfo := &indexpb.IndexInfo{}
// The total rows of all indexes should be based on the current perspective
segments := s.selectSegmentIndexesStats(WithCollection(req.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool {
return (isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped)
return info.GetLevel() != datapb.SegmentLevel_L0 && (isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped)
}))
s.completeIndexInfo(indexInfo, indexes[0], segments, false, indexes[0].CreateTime)
@ -650,7 +653,7 @@ func (s *Server) GetIndexBuildProgress(ctx context.Context, req *indexpb.GetInde
// The total rows of all indexes should be based on the current perspective
segments := s.selectSegmentIndexesStats(WithCollection(req.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool {
return (isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped)
return info.GetLevel() != datapb.SegmentLevel_L0 && (isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped)
}))
s.completeIndexInfo(indexInfo, indexes[0], segments, false, indexes[0].CreateTime)
@ -700,7 +703,7 @@ func (s *Server) DescribeIndex(ctx context.Context, req *indexpb.DescribeIndexRe
// The total rows of all indexes should be based on the current perspective
segments := s.selectSegmentIndexesStats(WithCollection(req.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool {
return isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped
return info.GetLevel() != datapb.SegmentLevel_L0 && (isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped)
}))
indexInfos := make([]*indexpb.IndexInfo, 0)
@ -758,7 +761,7 @@ func (s *Server) GetIndexStatistics(ctx context.Context, req *indexpb.GetIndexSt
// The total rows of all indexes should be based on the current perspective
segments := s.selectSegmentIndexesStats(WithCollection(req.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool {
return (isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped)
return info.GetLevel() != datapb.SegmentLevel_L0 && (isFlush(info) || info.GetState() == commonpb.SegmentState_Dropped)
}))
indexInfos := make([]*indexpb.IndexInfo, 0)

View File

@ -40,6 +40,7 @@ import (
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/sessionutil"
"github.com/milvus-io/milvus/pkg/common"
@ -237,7 +238,7 @@ func TestServer_CreateIndex(t *testing.T) {
s.indexNodeManager = nodeManager
mockNode := mocks.NewMockIndexNodeClient(t)
nodeManager.SetClient(1001, mockNode)
mockNode.EXPECT().GetJobStats(mock.Anything, mock.Anything).Return(&indexpb.GetJobStatsResponse{
mockNode.EXPECT().GetJobStats(mock.Anything, mock.Anything).Return(&workerpb.GetJobStatsResponse{
Status: merr.Success(),
EnableDisk: true,
}, nil)

View File

@ -37,6 +37,7 @@ import (
"github.com/milvus-io/milvus/internal/datacoord/broker"
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/segmentutil"
"github.com/milvus-io/milvus/pkg/common"
@ -71,6 +72,7 @@ type CompactionMeta interface {
GetAnalyzeMeta() *analyzeMeta
GetPartitionStatsMeta() *partitionStatsMeta
GetCompactionTaskMeta() *compactionTaskMeta
GetStatsTaskMeta() *statsTaskMeta
}
var _ CompactionMeta = (*meta)(nil)
@ -88,6 +90,7 @@ type meta struct {
analyzeMeta *analyzeMeta
partitionStatsMeta *partitionStatsMeta
compactionTaskMeta *compactionTaskMeta
statsTaskMeta *statsTaskMeta
}
func (m *meta) GetIndexMeta() *indexMeta {
@ -106,6 +109,10 @@ func (m *meta) GetCompactionTaskMeta() *compactionTaskMeta {
return m.compactionTaskMeta
}
func (m *meta) GetStatsTaskMeta() *statsTaskMeta {
return m.statsTaskMeta
}
type channelCPs struct {
lock.RWMutex
checkpoints map[string]*msgpb.MsgPosition
@ -157,6 +164,11 @@ func newMeta(ctx context.Context, catalog metastore.DataCoordCatalog, chunkManag
if err != nil {
return nil, err
}
stm, err := newStatsTaskMeta(ctx, catalog)
if err != nil {
return nil, err
}
mt := &meta{
ctx: ctx,
catalog: catalog,
@ -168,6 +180,7 @@ func newMeta(ctx context.Context, catalog metastore.DataCoordCatalog, chunkManag
chunkManager: chunkManager,
partitionStatsMeta: psm,
compactionTaskMeta: ctm,
statsTaskMeta: stm,
}
err = mt.reloadFromKV()
if err != nil {
@ -1533,6 +1546,7 @@ func (m *meta) completeMixCompactionMutation(t *datapb.CompactionTask, result *d
DmlPosition: getMinPosition(lo.Map(compactFromSegInfos, func(info *SegmentInfo, _ int) *msgpb.MsgPosition {
return info.GetDmlPosition()
})),
IsSorted: compactToSegment.GetIsSorted(),
})
// L1 segment with NumRows=0 will be discarded, so no need to change the metric
@ -1927,3 +1941,67 @@ func (m *meta) CleanPartitionStatsInfo(info *datapb.PartitionStatsInfo) error {
}
return nil
}
func (m *meta) SaveStatsResultSegment(oldSegmentID int64, result *workerpb.StatsResult) (*segMetricMutation, error) {
m.Lock()
defer m.Unlock()
log := log.With(zap.Int64("collectionID", result.GetCollectionID()),
zap.Int64("partitionID", result.GetPartitionID()),
zap.Int64("old segmentID", oldSegmentID),
zap.Int64("target segmentID", result.GetSegmentID()))
metricMutation := &segMetricMutation{stateChange: make(map[string]map[string]int)}
oldSegment := m.segments.GetSegment(oldSegmentID)
if oldSegment == nil {
log.Warn("old segment is not found with stats task")
return nil, merr.WrapErrSegmentNotFound(oldSegmentID)
}
cloned := oldSegment.Clone()
cloned.DroppedAt = uint64(time.Now().UnixNano())
cloned.Compacted = true
// metrics mutation for compaction from segments
updateSegStateAndPrepareMetrics(cloned, commonpb.SegmentState_Dropped, metricMutation)
segmentInfo := &datapb.SegmentInfo{
ID: result.GetSegmentID(),
CollectionID: result.GetCollectionID(),
PartitionID: result.GetPartitionID(),
InsertChannel: result.GetChannel(),
NumOfRows: result.GetNumRows(),
State: commonpb.SegmentState_Flushed,
MaxRowNum: cloned.GetMaxRowNum(),
Binlogs: result.GetInsertLogs(),
Statslogs: result.GetStatsLogs(),
TextStatsLogs: result.GetTextStatsLogs(),
CreatedByCompaction: true,
CompactionFrom: []int64{oldSegmentID},
LastExpireTime: cloned.GetLastExpireTime(),
Level: datapb.SegmentLevel_L1,
StartPosition: cloned.GetStartPosition(),
DmlPosition: cloned.GetDmlPosition(),
IsSorted: true,
IsImporting: cloned.GetIsImporting(),
}
segment := NewSegmentInfo(segmentInfo)
if segment.GetNumOfRows() > 0 {
metricMutation.addNewSeg(segment.GetState(), segment.GetLevel(), segment.GetNumOfRows())
} else {
segment.State = commonpb.SegmentState_Dropped
}
log.Info("meta update: prepare for complete stats mutation - complete", zap.Int64("num rows", result.GetNumRows()))
if err := m.catalog.AlterSegments(m.ctx, []*datapb.SegmentInfo{cloned.SegmentInfo, segment.SegmentInfo}, metastore.BinlogsIncrement{Segment: segment.SegmentInfo}); err != nil {
log.Warn("fail to alter segments and new segment", zap.Error(err))
return nil, err
}
m.segments.SetSegment(oldSegmentID, cloned)
m.segments.SetSegment(result.GetSegmentID(), segment)
return metricMutation, nil
}

View File

@ -75,6 +75,7 @@ func (suite *MetaReloadSuite) TestReloadFromKV() {
suite.catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
_, err := newMeta(ctx, suite.catalog, nil)
suite.Error(err)
@ -90,6 +91,7 @@ func (suite *MetaReloadSuite) TestReloadFromKV() {
suite.catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
_, err := newMeta(ctx, suite.catalog, nil)
suite.Error(err)
@ -102,6 +104,7 @@ func (suite *MetaReloadSuite) TestReloadFromKV() {
suite.catalog.EXPECT().ListAnalyzeTasks(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListCompactionTask(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListPartitionStatsInfos(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
suite.catalog.EXPECT().ListSegments(mock.Anything).Return([]*datapb.SegmentInfo{
{
ID: 1,
@ -600,7 +603,7 @@ func TestMeta_Basic(t *testing.T) {
})
t.Run("Test GetCollectionBinlogSize", func(t *testing.T) {
meta := createMeta(&datacoord.Catalog{}, nil, createIndexMeta(&datacoord.Catalog{}))
meta := createMeta(&datacoord.Catalog{}, withIndexMeta(createIndexMeta(&datacoord.Catalog{})))
ret := meta.GetCollectionIndexFilesSize()
assert.Equal(t, uint64(0), ret)

View File

@ -567,6 +567,49 @@ func (_c *MockCompactionMeta_GetSegment_Call) RunAndReturn(run func(int64) *Segm
return _c
}
// GetStatsTaskMeta provides a mock function with given fields:
func (_m *MockCompactionMeta) GetStatsTaskMeta() *statsTaskMeta {
ret := _m.Called()
var r0 *statsTaskMeta
if rf, ok := ret.Get(0).(func() *statsTaskMeta); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*statsTaskMeta)
}
}
return r0
}
// MockCompactionMeta_GetStatsTaskMeta_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetStatsTaskMeta'
type MockCompactionMeta_GetStatsTaskMeta_Call struct {
*mock.Call
}
// GetStatsTaskMeta is a helper method to define mock.On call
func (_e *MockCompactionMeta_Expecter) GetStatsTaskMeta() *MockCompactionMeta_GetStatsTaskMeta_Call {
return &MockCompactionMeta_GetStatsTaskMeta_Call{Call: _e.mock.On("GetStatsTaskMeta")}
}
func (_c *MockCompactionMeta_GetStatsTaskMeta_Call) Run(run func()) *MockCompactionMeta_GetStatsTaskMeta_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *MockCompactionMeta_GetStatsTaskMeta_Call) Return(_a0 *statsTaskMeta) *MockCompactionMeta_GetStatsTaskMeta_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockCompactionMeta_GetStatsTaskMeta_Call) RunAndReturn(run func() *statsTaskMeta) *MockCompactionMeta_GetStatsTaskMeta_Call {
_c.Call.Return(run)
return _c
}
// SaveCompactionTask provides a mock function with given fields: task
func (_m *MockCompactionMeta) SaveCompactionTask(task *datapb.CompactionTask) error {
ret := _m.Called(task)

View File

@ -129,6 +129,7 @@ type Server struct {
metricsCacheManager *metricsinfo.MetricsCacheManager
flushCh chan UniqueID
statsCh chan UniqueID
buildIndexCh chan UniqueID
notifyIndexChan chan UniqueID
factory dependency.Factory
@ -205,6 +206,7 @@ func CreateServer(ctx context.Context, factory dependency.Factory, opts ...Optio
quitCh: make(chan struct{}),
factory: factory,
flushCh: make(chan UniqueID, 1024),
statsCh: make(chan UniqueID, 1024),
buildIndexCh: make(chan UniqueID, 1024),
notifyIndexChan: make(chan UniqueID),
dataNodeCreator: defaultDataNodeCreatorFunc,
@ -393,7 +395,7 @@ func (s *Server) initDataCoord() error {
if err != nil {
return err
}
s.importScheduler = NewImportScheduler(s.meta, s.cluster, s.allocator, s.importMeta, s.buildIndexCh)
s.importScheduler = NewImportScheduler(s.meta, s.cluster, s.allocator, s.importMeta, s.statsCh)
s.importChecker = NewImportChecker(s.meta, s.broker, s.cluster, s.allocator, s.segmentManager, s.importMeta)
s.syncSegmentsScheduler = newSyncSegmentsScheduler(s.meta, s.channelManager, s.sessionManager)
@ -425,7 +427,7 @@ func (s *Server) Start() error {
}
func (s *Server) startDataCoord() {
s.taskScheduler.Start()
s.startTaskScheduler()
s.startServerLoop()
// http.Register(&http.Handler{
@ -669,7 +671,7 @@ func (s *Server) initMeta(chunkManager storage.ChunkManager) error {
func (s *Server) initTaskScheduler(manager storage.ChunkManager) {
if s.taskScheduler == nil {
s.taskScheduler = newTaskScheduler(s.ctx, s.meta, s.indexNodeManager, manager, s.indexEngineVersionManager, s.handler)
s.taskScheduler = newTaskScheduler(s.ctx, s.meta, s.indexNodeManager, manager, s.indexEngineVersionManager, s.handler, s.allocator)
}
}
@ -720,7 +722,6 @@ func (s *Server) startServerLoop() {
s.serverLoopWg.Add(2)
s.startWatchService(s.serverLoopCtx)
s.startFlushLoop(s.serverLoopCtx)
s.startIndexService(s.serverLoopCtx)
go s.importScheduler.Start()
go s.importChecker.Start()
s.garbageCollector.start()
@ -730,6 +731,13 @@ func (s *Server) startServerLoop() {
}
}
func (s *Server) startTaskScheduler() {
s.taskScheduler.Start()
s.startIndexService(s.serverLoopCtx)
s.startStatsTasksCheckLoop(s.serverLoopCtx)
}
func (s *Server) updateSegmentStatistics(stats []*commonpb.SegmentStats) {
for _, stat := range stats {
segment := s.meta.GetSegment(stat.GetSegmentID())
@ -981,7 +989,7 @@ func (s *Server) postFlush(ctx context.Context, segmentID UniqueID) error {
return err
}
select {
case s.buildIndexCh <- segmentID:
case s.statsCh <- segmentID:
default:
}

View File

@ -48,9 +48,9 @@ import (
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/internal/util/sessionutil"
@ -1315,7 +1315,7 @@ func TestGetQueryVChanPositions(t *testing.T) {
IndexID: 1,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: 1,
State: commonpb.IndexState_Finished,
})
@ -1682,7 +1682,7 @@ func TestGetQueryVChanPositions_Retrieve_unIndexed(t *testing.T) {
IndexID: 1,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: 1,
State: commonpb.IndexState_Finished,
})
@ -1710,7 +1710,7 @@ func TestGetQueryVChanPositions_Retrieve_unIndexed(t *testing.T) {
IndexID: 1,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: 2,
State: commonpb.IndexState_Finished,
})
@ -1897,7 +1897,7 @@ func TestGetRecoveryInfo(t *testing.T) {
BuildID: seg1.ID,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: seg1.ID,
State: commonpb.IndexState_Finished,
})
@ -1907,7 +1907,7 @@ func TestGetRecoveryInfo(t *testing.T) {
BuildID: seg2.ID,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: seg2.ID,
State: commonpb.IndexState_Finished,
})
@ -2079,7 +2079,7 @@ func TestGetRecoveryInfo(t *testing.T) {
BuildID: segment.ID,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: segment.ID,
State: commonpb.IndexState_Finished,
})
@ -2608,6 +2608,7 @@ func TestPostFlush(t *testing.T) {
CollectionID: 1,
PartitionID: 1,
State: commonpb.SegmentState_Flushing,
IsSorted: true,
}))
assert.NoError(t, err)

View File

@ -751,6 +751,7 @@ func (s *Server) GetRecoveryInfo(ctx context.Context, req *datapb.GetRecoveryInf
segment2DeltaBinlogs := make(map[UniqueID][]*datapb.FieldBinlog)
segment2InsertChannel := make(map[UniqueID]string)
segmentsNumOfRows := make(map[UniqueID]int64)
segment2TextStatsLogs := make(map[UniqueID]map[UniqueID]*datapb.TextIndexStats)
for id := range flushedIDs {
segment := s.meta.GetSegment(id)
if segment == nil {
@ -812,6 +813,8 @@ func (s *Server) GetRecoveryInfo(ctx context.Context, req *datapb.GetRecoveryInf
segment2StatsBinlogs[id] = append(segment2StatsBinlogs[id], fieldBinlogs)
}
segment2TextStatsLogs[id] = segment.GetTextStatsLogs()
if len(segment.GetDeltalogs()) > 0 {
segment2DeltaBinlogs[id] = append(segment2DeltaBinlogs[id], segment.GetDeltalogs()...)
}
@ -826,6 +829,7 @@ func (s *Server) GetRecoveryInfo(ctx context.Context, req *datapb.GetRecoveryInf
Statslogs: segment2StatsBinlogs[segmentID],
Deltalogs: segment2DeltaBinlogs[segmentID],
InsertChannel: segment2InsertChannel[segmentID],
TextStatsLogs: segment2TextStatsLogs[segmentID],
}
binlogs = append(binlogs, sbl)
}

View File

@ -25,8 +25,8 @@ import (
"github.com/milvus-io/milvus/internal/metastore/model"
mocks2 "github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/mq/msgstream"
@ -877,7 +877,7 @@ func TestGetRecoveryInfoV2(t *testing.T) {
BuildID: seg1.ID,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: seg1.ID,
State: commonpb.IndexState_Finished,
})
@ -887,7 +887,7 @@ func TestGetRecoveryInfoV2(t *testing.T) {
BuildID: seg2.ID,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: seg2.ID,
State: commonpb.IndexState_Finished,
})
@ -1061,7 +1061,7 @@ func TestGetRecoveryInfoV2(t *testing.T) {
BuildID: segment.ID,
})
assert.NoError(t, err)
err = svr.meta.indexMeta.FinishTask(&indexpb.IndexTaskInfo{
err = svr.meta.indexMeta.FinishTask(&workerpb.IndexTaskInfo{
BuildID: segment.ID,
State: commonpb.IndexState_Finished,
})

View File

@ -25,7 +25,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
indexnodeclient "github.com/milvus-io/milvus/internal/distributed/indexnode/client"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -133,7 +133,7 @@ func (nm *IndexNodeManager) PickClient() (typeutil.UniqueID, types.IndexNodeClie
wg.Add(1)
go func() {
defer wg.Done()
resp, err := client.GetJobStats(ctx, &indexpb.GetJobStatsRequest{})
resp, err := client.GetJobStats(ctx, &workerpb.GetJobStatsRequest{})
if err != nil {
log.Warn("get IndexNode slots failed", zap.Int64("nodeID", nodeID), zap.Error(err))
return
@ -188,7 +188,7 @@ func (nm *IndexNodeManager) ClientSupportDisk() bool {
wg.Add(1)
go func() {
defer wg.Done()
resp, err := client.GetJobStats(ctx, &indexpb.GetJobStatsRequest{})
resp, err := client.GetJobStats(ctx, &workerpb.GetJobStatsRequest{})
if err := merr.CheckRPCCall(resp, err); err != nil {
log.Warn("get IndexNode slots failed", zap.Int64("nodeID", nodeID), zap.Error(err))
return

View File

@ -25,7 +25,7 @@ import (
"github.com/stretchr/testify/mock"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/util/lock"
"github.com/milvus-io/milvus/pkg/util/merr"
@ -50,7 +50,7 @@ func TestIndexNodeManager_AddNode(t *testing.T) {
func TestIndexNodeManager_PickClient(t *testing.T) {
paramtable.Init()
getMockedGetJobStatsClient := func(resp *indexpb.GetJobStatsResponse, err error) types.IndexNodeClient {
getMockedGetJobStatsClient := func(resp *workerpb.GetJobStatsResponse, err error) types.IndexNodeClient {
ic := mocks.NewMockIndexNodeClient(t)
ic.EXPECT().GetJobStats(mock.Anything, mock.Anything, mock.Anything).Return(resp, err)
return ic
@ -62,32 +62,32 @@ func TestIndexNodeManager_PickClient(t *testing.T) {
nm := &IndexNodeManager{
ctx: context.TODO(),
nodeClients: map[typeutil.UniqueID]types.IndexNodeClient{
1: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
1: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, err),
2: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
2: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, err),
3: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
3: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, err),
4: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
4: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, err),
5: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
5: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, nil),
6: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
6: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, nil),
7: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
7: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, nil),
8: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
8: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
TaskSlots: 1,
Status: merr.Success(),
}, nil),
9: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
9: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
TaskSlots: 10,
Status: merr.Success(),
}, nil),
@ -102,7 +102,7 @@ func TestIndexNodeManager_PickClient(t *testing.T) {
func TestIndexNodeManager_ClientSupportDisk(t *testing.T) {
paramtable.Init()
getMockedGetJobStatsClient := func(resp *indexpb.GetJobStatsResponse, err error) types.IndexNodeClient {
getMockedGetJobStatsClient := func(resp *workerpb.GetJobStatsResponse, err error) types.IndexNodeClient {
ic := mocks.NewMockIndexNodeClient(t)
ic.EXPECT().GetJobStats(mock.Anything, mock.Anything, mock.Anything).Return(resp, err)
return ic
@ -115,7 +115,7 @@ func TestIndexNodeManager_ClientSupportDisk(t *testing.T) {
ctx: context.Background(),
lock: lock.RWMutex{},
nodeClients: map[typeutil.UniqueID]types.IndexNodeClient{
1: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
1: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Success(),
TaskSlots: 1,
JobInfos: nil,
@ -133,7 +133,7 @@ func TestIndexNodeManager_ClientSupportDisk(t *testing.T) {
ctx: context.Background(),
lock: lock.RWMutex{},
nodeClients: map[typeutil.UniqueID]types.IndexNodeClient{
1: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
1: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Success(),
TaskSlots: 1,
JobInfos: nil,
@ -175,7 +175,7 @@ func TestIndexNodeManager_ClientSupportDisk(t *testing.T) {
ctx: context.Background(),
lock: lock.RWMutex{},
nodeClients: map[typeutil.UniqueID]types.IndexNodeClient{
1: getMockedGetJobStatsClient(&indexpb.GetJobStatsResponse{
1: getMockedGetJobStatsClient(&workerpb.GetJobStatsResponse{
Status: merr.Status(err),
TaskSlots: 0,
JobInfos: nil,

View File

@ -0,0 +1,304 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"strconv"
"sync"
"github.com/golang/protobuf/proto"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/timerecord"
)
type statsTaskMeta struct {
sync.RWMutex
ctx context.Context
catalog metastore.DataCoordCatalog
// taskID -> analyzeStats
// TODO: when to mark as dropped?
tasks map[int64]*indexpb.StatsTask
segmentStatsTaskIndex map[int64]*indexpb.StatsTask
}
func newStatsTaskMeta(ctx context.Context, catalog metastore.DataCoordCatalog) (*statsTaskMeta, error) {
stm := &statsTaskMeta{
ctx: ctx,
catalog: catalog,
tasks: make(map[int64]*indexpb.StatsTask),
segmentStatsTaskIndex: make(map[int64]*indexpb.StatsTask),
}
if err := stm.reloadFromKV(); err != nil {
return nil, err
}
return stm, nil
}
func (stm *statsTaskMeta) reloadFromKV() error {
record := timerecord.NewTimeRecorder("statsTaskMeta-reloadFromKV")
// load stats task
statsTasks, err := stm.catalog.ListStatsTasks(stm.ctx)
if err != nil {
log.Error("statsTaskMeta reloadFromKV load stats tasks failed", zap.Error(err))
return err
}
for _, t := range statsTasks {
stm.tasks[t.GetTaskID()] = t
stm.tasks[t.GetSegmentID()] = t
}
log.Info("statsTaskMeta reloadFromKV done", zap.Duration("duration", record.ElapseSpan()))
return nil
}
func (stm *statsTaskMeta) updateMetrics() {
taskMetrics := make(map[UniqueID]map[indexpb.JobState]int)
for _, t := range stm.tasks {
if _, ok := taskMetrics[t.GetCollectionID()]; !ok {
taskMetrics[t.GetCollectionID()] = make(map[indexpb.JobState]int)
taskMetrics[t.GetCollectionID()][indexpb.JobState_JobStateNone] = 0
taskMetrics[t.GetCollectionID()][indexpb.JobState_JobStateInit] = 0
taskMetrics[t.GetCollectionID()][indexpb.JobState_JobStateInProgress] = 0
taskMetrics[t.GetCollectionID()][indexpb.JobState_JobStateFinished] = 0
taskMetrics[t.GetCollectionID()][indexpb.JobState_JobStateFailed] = 0
taskMetrics[t.GetCollectionID()][indexpb.JobState_JobStateRetry] = 0
}
taskMetrics[t.GetCollectionID()][t.GetState()]++
}
jobType := indexpb.JobType_JobTypeStatsJob.String()
for collID, m := range taskMetrics {
for k, v := range m {
metrics.TaskNum.WithLabelValues(strconv.FormatInt(collID, 10), jobType, k.String()).Set(float64(v))
}
}
}
func (stm *statsTaskMeta) AddStatsTask(t *indexpb.StatsTask) error {
stm.Lock()
defer stm.Unlock()
if _, ok := stm.segmentStatsTaskIndex[t.GetSegmentID()]; ok {
msg := fmt.Sprintf("stats task already exist in meta of segment %d", t.GetSegmentID())
log.Warn(msg)
return merr.WrapErrTaskDuplicate(indexpb.JobType_JobTypeStatsJob.String(), msg)
}
log.Info("add stats task", zap.Int64("taskID", t.GetTaskID()), zap.Int64("segmentID", t.GetSegmentID()))
t.State = indexpb.JobState_JobStateInit
if err := stm.catalog.SaveStatsTask(stm.ctx, t); err != nil {
log.Warn("adding stats task failed",
zap.Int64("taskID", t.GetTaskID()),
zap.Int64("segmentID", t.GetSegmentID()),
zap.Error(err))
return err
}
stm.tasks[t.GetTaskID()] = t
stm.segmentStatsTaskIndex[t.GetSegmentID()] = t
stm.updateMetrics()
log.Info("add stats task success", zap.Int64("taskID", t.GetTaskID()), zap.Int64("segmentID", t.GetSegmentID()))
return nil
}
func (stm *statsTaskMeta) RemoveStatsTaskByTaskID(taskID int64) error {
stm.Lock()
defer stm.Unlock()
log.Info("remove stats task by taskID", zap.Int64("taskID", taskID))
t, ok := stm.tasks[taskID]
if !ok {
log.Info("remove stats task success, task already not exist", zap.Int64("taskID", taskID))
return nil
}
if err := stm.catalog.DropStatsTask(stm.ctx, taskID); err != nil {
log.Warn("meta update: removing stats task failed",
zap.Int64("taskID", taskID),
zap.Int64("segmentID", taskID),
zap.Error(err))
return err
}
delete(stm.tasks, taskID)
delete(stm.segmentStatsTaskIndex, t.SegmentID)
stm.updateMetrics()
log.Info("remove stats task success", zap.Int64("taskID", taskID), zap.Int64("segmentID", t.SegmentID))
return nil
}
func (stm *statsTaskMeta) RemoveStatsTaskBySegmentID(segmentID int64) error {
stm.Lock()
defer stm.Unlock()
log.Info("remove stats task by segmentID", zap.Int64("segmentID", segmentID))
t, ok := stm.segmentStatsTaskIndex[segmentID]
if !ok {
log.Info("remove stats task success, task already not exist", zap.Int64("segmentID", segmentID))
return nil
}
if err := stm.catalog.DropStatsTask(stm.ctx, t.TaskID); err != nil {
log.Warn("meta update: removing stats task failed",
zap.Int64("taskID", t.TaskID),
zap.Int64("segmentID", segmentID),
zap.Error(err))
return err
}
delete(stm.tasks, t.TaskID)
delete(stm.segmentStatsTaskIndex, segmentID)
stm.updateMetrics()
log.Info("remove stats task success", zap.Int64("taskID", t.TaskID), zap.Int64("segmentID", segmentID))
return nil
}
func (stm *statsTaskMeta) UpdateVersion(taskID int64) error {
stm.Lock()
defer stm.Unlock()
t, ok := stm.tasks[taskID]
if !ok {
return fmt.Errorf("task %d not found", taskID)
}
cloneT := proto.Clone(t).(*indexpb.StatsTask)
cloneT.Version++
if err := stm.catalog.SaveStatsTask(stm.ctx, cloneT); err != nil {
log.Warn("update stats task version failed",
zap.Int64("taskID", t.GetTaskID()),
zap.Int64("segmentID", t.GetSegmentID()),
zap.Error(err))
return err
}
stm.tasks[t.TaskID] = cloneT
stm.segmentStatsTaskIndex[t.SegmentID] = cloneT
stm.updateMetrics()
log.Info("update stats task version success", zap.Int64("taskID", taskID), zap.Int64("newVersion", cloneT.GetVersion()))
return nil
}
func (stm *statsTaskMeta) UpdateBuildingTask(taskID, nodeID int64) error {
stm.Lock()
defer stm.Unlock()
t, ok := stm.tasks[taskID]
if !ok {
return fmt.Errorf("task %d not found", taskID)
}
cloneT := proto.Clone(t).(*indexpb.StatsTask)
cloneT.NodeID = nodeID
cloneT.State = indexpb.JobState_JobStateInProgress
if err := stm.catalog.SaveStatsTask(stm.ctx, cloneT); err != nil {
log.Warn("update stats task state building failed",
zap.Int64("taskID", t.GetTaskID()),
zap.Int64("segmentID", t.GetSegmentID()),
zap.Error(err))
return err
}
stm.tasks[t.TaskID] = cloneT
stm.segmentStatsTaskIndex[t.SegmentID] = cloneT
stm.updateMetrics()
log.Info("update building stats task success", zap.Int64("taskID", taskID), zap.Int64("nodeID", nodeID))
return nil
}
func (stm *statsTaskMeta) FinishTask(taskID int64, result *workerpb.StatsResult) error {
stm.Lock()
defer stm.Unlock()
t, ok := stm.tasks[taskID]
if !ok {
return fmt.Errorf("task %d not found", taskID)
}
cloneT := proto.Clone(t).(*indexpb.StatsTask)
cloneT.State = result.GetState()
cloneT.FailReason = result.GetFailReason()
if err := stm.catalog.SaveStatsTask(stm.ctx, cloneT); err != nil {
log.Warn("finish stats task state failed",
zap.Int64("taskID", t.GetTaskID()),
zap.Int64("segmentID", t.GetSegmentID()),
zap.Error(err))
return err
}
stm.tasks[t.TaskID] = cloneT
stm.segmentStatsTaskIndex[t.SegmentID] = cloneT
stm.updateMetrics()
log.Info("finish stats task meta success", zap.Int64("taskID", taskID), zap.Int64("segmentID", t.SegmentID),
zap.String("state", result.GetState().String()), zap.String("failReason", t.GetFailReason()))
return nil
}
func (stm *statsTaskMeta) GetStatsTaskState(taskID int64) indexpb.JobState {
stm.RLock()
defer stm.RUnlock()
t, ok := stm.tasks[taskID]
if !ok {
return indexpb.JobState_JobStateNone
}
return t.GetState()
}
func (stm *statsTaskMeta) GetStatsTaskStateBySegmentID(segmentID int64) indexpb.JobState {
stm.RLock()
defer stm.RUnlock()
t, ok := stm.segmentStatsTaskIndex[segmentID]
if !ok {
return indexpb.JobState_JobStateNone
}
return t.GetState()
}
func (stm *statsTaskMeta) CanCleanedTasks() []int64 {
stm.RLock()
defer stm.RUnlock()
needCleanedTaskIDs := make([]int64, 0)
for taskID, t := range stm.tasks {
if t.GetState() == indexpb.JobState_JobStateFinished ||
t.GetState() == indexpb.JobState_JobStateFailed {
needCleanedTaskIDs = append(needCleanedTaskIDs, taskID)
}
}
return needCleanedTaskIDs
}

View File

@ -0,0 +1,309 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"testing"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
)
type statsTaskMetaSuite struct {
suite.Suite
collectionID int64
partitionID int64
segmentID int64
}
func (s *statsTaskMetaSuite) SetupSuite() {}
func (s *statsTaskMetaSuite) TearDownSuite() {}
func (s *statsTaskMetaSuite) SetupTest() {
s.collectionID = 100
s.partitionID = 101
s.segmentID = 102
}
func (s *statsTaskMetaSuite) Test_Method() {
s.Run("newStatsTaskMeta", func() {
s.Run("normal case", func() {
catalog := mocks.NewDataCoordCatalog(s.T())
catalog.EXPECT().ListStatsTasks(mock.Anything).Return([]*indexpb.StatsTask{
{
CollectionID: s.collectionID,
PartitionID: s.partitionID,
SegmentID: 10000,
InsertChannel: "ch1",
TaskID: 10001,
Version: 1,
NodeID: 0,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
},
}, nil)
m, err := newStatsTaskMeta(context.Background(), catalog)
s.NoError(err)
s.NotNil(m)
})
s.Run("failed case", func() {
catalog := mocks.NewDataCoordCatalog(s.T())
catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, fmt.Errorf("mock error"))
m, err := newStatsTaskMeta(context.Background(), catalog)
s.Error(err)
s.Nil(m)
})
})
catalog := mocks.NewDataCoordCatalog(s.T())
catalog.EXPECT().ListStatsTasks(mock.Anything).Return(nil, nil)
m, err := newStatsTaskMeta(context.Background(), catalog)
s.NoError(err)
t := &indexpb.StatsTask{
CollectionID: s.collectionID,
PartitionID: s.partitionID,
SegmentID: s.segmentID,
InsertChannel: "ch1",
TaskID: 1,
Version: 0,
NodeID: 0,
State: indexpb.JobState_JobStateInit,
FailReason: "",
}
s.Run("AddStatsTask", func() {
s.Run("failed case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("mock error")).Once()
s.Error(m.AddStatsTask(t))
_, ok := m.tasks[1]
s.False(ok)
_, ok = m.segmentStatsTaskIndex[s.segmentID]
s.False(ok)
})
s.Run("normal case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil).Once()
s.NoError(m.AddStatsTask(t))
_, ok := m.tasks[1]
s.True(ok)
_, ok = m.segmentStatsTaskIndex[s.segmentID]
s.True(ok)
})
s.Run("already exist", func() {
s.Error(m.AddStatsTask(t))
_, ok := m.tasks[1]
s.True(ok)
_, ok = m.segmentStatsTaskIndex[s.segmentID]
s.True(ok)
})
})
s.Run("UpdateVersion", func() {
s.Run("normal case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil).Once()
s.NoError(m.UpdateVersion(1))
task, ok := m.tasks[1]
s.True(ok)
s.Equal(int64(1), task.GetVersion())
sTask, ok := m.segmentStatsTaskIndex[s.segmentID]
s.True(ok)
s.Equal(int64(1), sTask.GetVersion())
})
s.Run("task not exist", func() {
_, ok := m.tasks[100]
s.False(ok)
s.Error(m.UpdateVersion(100))
})
s.Run("failed case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("mock error")).Once()
s.Error(m.UpdateVersion(1))
task, ok := m.tasks[1]
s.True(ok)
// still 1
s.Equal(int64(1), task.GetVersion())
sTask, ok := m.segmentStatsTaskIndex[s.segmentID]
s.True(ok)
s.Equal(int64(1), sTask.GetVersion())
})
})
s.Run("UpdateBuildingTask", func() {
s.Run("failed case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("mock error")).Once()
s.Error(m.UpdateBuildingTask(1, 1180))
task, ok := m.tasks[1]
s.True(ok)
s.Equal(indexpb.JobState_JobStateInit, task.GetState())
s.Equal(int64(0), task.GetNodeID())
})
s.Run("normal case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil).Once()
s.NoError(m.UpdateBuildingTask(1, 1180))
task, ok := m.tasks[1]
s.True(ok)
s.Equal(indexpb.JobState_JobStateInProgress, task.GetState())
s.Equal(int64(1180), task.GetNodeID())
})
s.Run("task not exist", func() {
_, ok := m.tasks[100]
s.False(ok)
s.Error(m.UpdateBuildingTask(100, 1180))
})
})
s.Run("FinishTask", func() {
result := &workerpb.StatsResult{
TaskID: 1,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
CollectionID: s.collectionID,
PartitionID: s.partitionID,
SegmentID: s.segmentID,
Channel: "ch1",
InsertLogs: []*datapb.FieldBinlog{
{FieldID: 0, Binlogs: []*datapb.Binlog{{LogID: 1}, {LogID: 5}}},
{FieldID: 1, Binlogs: []*datapb.Binlog{{LogID: 2}, {LogID: 6}}},
{FieldID: 100, Binlogs: []*datapb.Binlog{{LogID: 3}, {LogID: 7}}},
{FieldID: 101, Binlogs: []*datapb.Binlog{{LogID: 4}, {LogID: 8}}},
},
StatsLogs: []*datapb.FieldBinlog{
{FieldID: 100, Binlogs: []*datapb.Binlog{{LogID: 9}}},
},
DeltaLogs: nil,
TextStatsLogs: map[int64]*datapb.TextIndexStats{
100: {
FieldID: 100,
Version: 1,
Files: []string{"file1", "file2", "file3"},
LogSize: 100,
MemorySize: 100,
},
},
NumRows: 2048,
}
s.Run("failed case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("mock error")).Once()
s.Error(m.FinishTask(1, result))
task, ok := m.tasks[1]
s.True(ok)
s.Equal(indexpb.JobState_JobStateInProgress, task.GetState())
})
s.Run("normal case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil).Once()
s.NoError(m.FinishTask(1, result))
task, ok := m.tasks[1]
s.True(ok)
s.Equal(indexpb.JobState_JobStateFinished, task.GetState())
})
s.Run("task not exist", func() {
s.Error(m.FinishTask(100, result))
})
})
s.Run("GetStatsTaskState", func() {
s.Run("task not exist", func() {
state := m.GetStatsTaskState(100)
s.Equal(indexpb.JobState_JobStateNone, state)
})
s.Run("normal case", func() {
state := m.GetStatsTaskState(1)
s.Equal(indexpb.JobState_JobStateFinished, state)
})
})
s.Run("GetStatsTaskStateBySegmentID", func() {
s.Run("task not exist", func() {
state := m.GetStatsTaskStateBySegmentID(100)
s.Equal(indexpb.JobState_JobStateNone, state)
})
s.Run("normal case", func() {
state := m.GetStatsTaskStateBySegmentID(s.segmentID)
s.Equal(indexpb.JobState_JobStateFinished, state)
})
})
s.Run("RemoveStatsTask", func() {
s.Run("failed case", func() {
catalog.EXPECT().DropStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("mock error")).Twice()
s.Error(m.RemoveStatsTaskByTaskID(1))
_, ok := m.tasks[1]
s.True(ok)
s.Error(m.RemoveStatsTaskBySegmentID(s.segmentID))
_, ok = m.segmentStatsTaskIndex[s.segmentID]
s.True(ok)
})
s.Run("normal case", func() {
catalog.EXPECT().DropStatsTask(mock.Anything, mock.Anything).Return(nil).Twice()
s.NoError(m.RemoveStatsTaskByTaskID(1))
_, ok := m.tasks[1]
s.False(ok)
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil).Once()
s.NoError(m.AddStatsTask(t))
s.NoError(m.RemoveStatsTaskBySegmentID(s.segmentID))
_, ok = m.segmentStatsTaskIndex[s.segmentID]
s.False(ok)
})
})
}
func Test_statsTaskMeta(t *testing.T) {
suite.Run(t, new(statsTaskMetaSuite))
}

View File

@ -28,6 +28,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/log"
@ -40,13 +41,23 @@ var _ Task = (*analyzeTask)(nil)
type analyzeTask struct {
taskID int64
nodeID int64
taskInfo *indexpb.AnalyzeResult
taskInfo *workerpb.AnalyzeResult
queueTime time.Time
startTime time.Time
endTime time.Time
req *indexpb.AnalyzeRequest
req *workerpb.AnalyzeRequest
}
func newAnalyzeTask(taskID int64) *analyzeTask {
return &analyzeTask{
taskID: taskID,
taskInfo: &workerpb.AnalyzeResult{
TaskID: taskID,
State: indexpb.JobState_JobStateInit,
},
}
}
func (at *analyzeTask) GetTaskID() int64 {
@ -57,7 +68,7 @@ func (at *analyzeTask) GetNodeID() int64 {
return at.nodeID
}
func (at *analyzeTask) ResetNodeID() {
func (at *analyzeTask) ResetTask(mt *meta) {
at.nodeID = 0
}
@ -124,33 +135,10 @@ func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler)
if t == nil {
log.Ctx(ctx).Info("task is nil, delete it", zap.Int64("taskID", at.GetTaskID()))
at.SetState(indexpb.JobState_JobStateNone, "analyze task is nil")
return true
return false
}
var storageConfig *indexpb.StorageConfig
if Params.CommonCfg.StorageType.GetValue() == "local" {
storageConfig = &indexpb.StorageConfig{
RootPath: Params.LocalStorageCfg.Path.GetValue(),
StorageType: Params.CommonCfg.StorageType.GetValue(),
}
} else {
storageConfig = &indexpb.StorageConfig{
Address: Params.MinioCfg.Address.GetValue(),
AccessKeyID: Params.MinioCfg.AccessKeyID.GetValue(),
SecretAccessKey: Params.MinioCfg.SecretAccessKey.GetValue(),
UseSSL: Params.MinioCfg.UseSSL.GetAsBool(),
BucketName: Params.MinioCfg.BucketName.GetValue(),
RootPath: Params.MinioCfg.RootPath.GetValue(),
UseIAM: Params.MinioCfg.UseIAM.GetAsBool(),
IAMEndpoint: Params.MinioCfg.IAMEndpoint.GetValue(),
StorageType: Params.CommonCfg.StorageType.GetValue(),
Region: Params.MinioCfg.Region.GetValue(),
UseVirtualHost: Params.MinioCfg.UseVirtualHost.GetAsBool(),
CloudProvider: Params.MinioCfg.CloudProvider.GetValue(),
RequestTimeoutMs: Params.MinioCfg.RequestTimeoutMs.GetAsInt64(),
}
}
at.req = &indexpb.AnalyzeRequest{
at.req = &workerpb.AnalyzeRequest{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
TaskID: at.GetTaskID(),
CollectionID: t.CollectionID,
@ -161,7 +149,7 @@ func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler)
Dim: t.Dim,
SegmentStats: make(map[int64]*indexpb.SegmentStats),
Version: t.Version + 1,
StorageConfig: storageConfig,
StorageConfig: createStorageConfig(),
}
// When data analyze occurs, segments must not be discarded. Such as compaction, GC, etc.
@ -179,7 +167,7 @@ func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler)
log.Ctx(ctx).Warn("analyze stats task is processing, but segment is nil, delete the task",
zap.Int64("taskID", at.GetTaskID()), zap.Int64("segmentID", segID))
at.SetState(indexpb.JobState_JobStateFailed, fmt.Sprintf("segmentInfo with ID: %d is nil", segID))
return true
return false
}
totalSegmentsRows += info.GetNumOfRows()
@ -194,10 +182,10 @@ func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler)
collInfo, err := dependency.handler.GetCollection(ctx, segments[0].GetCollectionID())
if err != nil {
log.Ctx(ctx).Info("analyze task get collection info failed", zap.Int64("collectionID",
log.Ctx(ctx).Warn("analyze task get collection info failed", zap.Int64("collectionID",
segments[0].GetCollectionID()), zap.Error(err))
at.SetState(indexpb.JobState_JobStateInit, err.Error())
return true
return false
}
schema := collInfo.Schema
@ -212,7 +200,7 @@ func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler)
dim, err := storage.GetDimFromParams(field.TypeParams)
if err != nil {
at.SetState(indexpb.JobState_JobStateInit, err.Error())
return true
return false
}
at.req.Dim = int64(dim)
@ -221,7 +209,7 @@ func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler)
if numClusters < Params.DataCoordCfg.ClusteringCompactionMinCentroidsNum.GetAsInt64() {
log.Ctx(ctx).Info("data size is too small, skip analyze task", zap.Float64("raw data size", totalSegmentsRawDataSize), zap.Int64("num clusters", numClusters), zap.Int64("minimum num clusters required", Params.DataCoordCfg.ClusteringCompactionMinCentroidsNum.GetAsInt64()))
at.SetState(indexpb.JobState_JobStateFinished, "")
return true
return false
}
if numClusters > Params.DataCoordCfg.ClusteringCompactionMaxCentroidsNum.GetAsInt64() {
numClusters = Params.DataCoordCfg.ClusteringCompactionMaxCentroidsNum.GetAsInt64()
@ -233,17 +221,17 @@ func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler)
at.req.MaxClusterSizeRatio = Params.DataCoordCfg.ClusteringCompactionMaxClusterSizeRatio.GetAsFloat()
at.req.MaxClusterSize = Params.DataCoordCfg.ClusteringCompactionMaxClusterSize.GetAsSize()
return false
return true
}
func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeClient) bool {
ctx, cancel := context.WithTimeout(context.Background(), reqTimeoutInterval)
defer cancel()
resp, err := client.CreateJobV2(ctx, &indexpb.CreateJobV2Request{
resp, err := client.CreateJobV2(ctx, &workerpb.CreateJobV2Request{
ClusterID: at.req.GetClusterID(),
TaskID: at.req.GetTaskID(),
JobType: indexpb.JobType_JobTypeAnalyzeJob,
Request: &indexpb.CreateJobV2Request_AnalyzeRequest{
Request: &workerpb.CreateJobV2Request_AnalyzeRequest{
AnalyzeRequest: at.req,
},
})
@ -261,12 +249,12 @@ func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeCli
return true
}
func (at *analyzeTask) setResult(result *indexpb.AnalyzeResult) {
func (at *analyzeTask) setResult(result *workerpb.AnalyzeResult) {
at.taskInfo = result
}
func (at *analyzeTask) QueryResult(ctx context.Context, client types.IndexNodeClient) {
resp, err := client.QueryJobsV2(ctx, &indexpb.QueryJobsV2Request{
resp, err := client.QueryJobsV2(ctx, &workerpb.QueryJobsV2Request{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
TaskIDs: []int64{at.GetTaskID()},
JobType: indexpb.JobType_JobTypeAnalyzeJob,
@ -304,7 +292,7 @@ func (at *analyzeTask) QueryResult(ctx context.Context, client types.IndexNodeCl
}
func (at *analyzeTask) DropTaskOnWorker(ctx context.Context, client types.IndexNodeClient) bool {
resp, err := client.DropJobsV2(ctx, &indexpb.DropJobsV2Request{
resp, err := client.DropJobsV2(ctx, &workerpb.DropJobsV2Request{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
TaskIDs: []UniqueID{at.GetTaskID()},
JobType: indexpb.JobType_JobTypeAnalyzeJob,

View File

@ -26,6 +26,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/common"
@ -38,17 +39,27 @@ import (
type indexBuildTask struct {
taskID int64
nodeID int64
taskInfo *indexpb.IndexTaskInfo
taskInfo *workerpb.IndexTaskInfo
queueTime time.Time
startTime time.Time
endTime time.Time
req *indexpb.CreateJobRequest
req *workerpb.CreateJobRequest
}
var _ Task = (*indexBuildTask)(nil)
func newIndexBuildTask(taskID int64) *indexBuildTask {
return &indexBuildTask{
taskID: taskID,
taskInfo: &workerpb.IndexTaskInfo{
BuildID: taskID,
State: commonpb.IndexState_Unissued,
},
}
}
func (it *indexBuildTask) GetTaskID() int64 {
return it.taskID
}
@ -57,7 +68,7 @@ func (it *indexBuildTask) GetNodeID() int64 {
return it.nodeID
}
func (it *indexBuildTask) ResetNodeID() {
func (it *indexBuildTask) ResetTask(mt *meta) {
it.nodeID = 0
}
@ -121,51 +132,28 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule
if !exist || segIndex == nil {
log.Ctx(ctx).Info("index task has not exist in meta table, remove task", zap.Int64("taskID", it.taskID))
it.SetState(indexpb.JobState_JobStateNone, "index task has not exist in meta table")
return true
return false
}
segment := dependency.meta.GetSegment(segIndex.SegmentID)
if !isSegmentHealthy(segment) || !dependency.meta.indexMeta.IsIndexExist(segIndex.CollectionID, segIndex.IndexID) {
log.Ctx(ctx).Info("task is no need to build index, remove it", zap.Int64("taskID", it.taskID))
it.SetState(indexpb.JobState_JobStateNone, "task is no need to build index")
return true
return false
}
indexParams := dependency.meta.indexMeta.GetIndexParams(segIndex.CollectionID, segIndex.IndexID)
indexType := GetIndexType(indexParams)
if isFlatIndex(indexType) || segIndex.NumRows < Params.DataCoordCfg.MinSegmentNumRowsToEnableIndex.GetAsInt64() {
log.Ctx(ctx).Info("segment does not need index really", zap.Int64("taskID", it.taskID),
zap.Int64("segmentID", segIndex.SegmentID), zap.Int64("num rows", segIndex.NumRows))
it.SetStartTime(time.Now())
it.SetEndTime(time.Now())
it.SetState(indexpb.JobState_JobStateFinished, "fake finished index success")
return true
return false
}
typeParams := dependency.meta.indexMeta.GetTypeParams(segIndex.CollectionID, segIndex.IndexID)
var storageConfig *indexpb.StorageConfig
if Params.CommonCfg.StorageType.GetValue() == "local" {
storageConfig = &indexpb.StorageConfig{
RootPath: Params.LocalStorageCfg.Path.GetValue(),
StorageType: Params.CommonCfg.StorageType.GetValue(),
}
} else {
storageConfig = &indexpb.StorageConfig{
Address: Params.MinioCfg.Address.GetValue(),
AccessKeyID: Params.MinioCfg.AccessKeyID.GetValue(),
SecretAccessKey: Params.MinioCfg.SecretAccessKey.GetValue(),
UseSSL: Params.MinioCfg.UseSSL.GetAsBool(),
SslCACert: Params.MinioCfg.SslCACert.GetValue(),
BucketName: Params.MinioCfg.BucketName.GetValue(),
RootPath: Params.MinioCfg.RootPath.GetValue(),
UseIAM: Params.MinioCfg.UseIAM.GetAsBool(),
IAMEndpoint: Params.MinioCfg.IAMEndpoint.GetValue(),
StorageType: Params.CommonCfg.StorageType.GetValue(),
Region: Params.MinioCfg.Region.GetValue(),
UseVirtualHost: Params.MinioCfg.UseVirtualHost.GetAsBool(),
CloudProvider: Params.MinioCfg.CloudProvider.GetValue(),
RequestTimeoutMs: Params.MinioCfg.RequestTimeoutMs.GetAsInt64(),
}
}
fieldID := dependency.meta.indexMeta.GetFieldIDByIndexID(segIndex.CollectionID, segIndex.IndexID)
binlogIDs := getBinLogIDs(segment, fieldID)
if isDiskANNIndex(GetIndexType(indexParams)) {
@ -174,14 +162,14 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule
if err != nil {
log.Ctx(ctx).Warn("failed to append index build params", zap.Int64("taskID", it.taskID), zap.Error(err))
it.SetState(indexpb.JobState_JobStateInit, err.Error())
return true
return false
}
}
collectionInfo, err := dependency.handler.GetCollection(ctx, segment.GetCollectionID())
if err != nil {
log.Ctx(ctx).Info("index builder get collection info failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err))
return true
return false
}
schema := collectionInfo.Schema
@ -233,12 +221,12 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule
}
}
it.req = &indexpb.CreateJobRequest{
it.req = &workerpb.CreateJobRequest{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
BuildID: it.taskID,
IndexVersion: segIndex.IndexVersion + 1,
StorageConfig: storageConfig,
StorageConfig: createStorageConfig(),
IndexParams: indexParams,
TypeParams: typeParams,
NumRows: segIndex.NumRows,
@ -257,17 +245,17 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule
}
log.Ctx(ctx).Info("index task pre check successfully", zap.Int64("taskID", it.GetTaskID()))
return false
return true
}
func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNodeClient) bool {
ctx, cancel := context.WithTimeout(context.Background(), reqTimeoutInterval)
defer cancel()
resp, err := client.CreateJobV2(ctx, &indexpb.CreateJobV2Request{
resp, err := client.CreateJobV2(ctx, &workerpb.CreateJobV2Request{
ClusterID: it.req.GetClusterID(),
TaskID: it.req.GetBuildID(),
JobType: indexpb.JobType_JobTypeIndexJob,
Request: &indexpb.CreateJobV2Request_IndexRequest{
Request: &workerpb.CreateJobV2Request_IndexRequest{
IndexRequest: it.req,
},
})
@ -285,12 +273,12 @@ func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNode
return true
}
func (it *indexBuildTask) setResult(info *indexpb.IndexTaskInfo) {
func (it *indexBuildTask) setResult(info *workerpb.IndexTaskInfo) {
it.taskInfo = info
}
func (it *indexBuildTask) QueryResult(ctx context.Context, node types.IndexNodeClient) {
resp, err := node.QueryJobsV2(ctx, &indexpb.QueryJobsV2Request{
resp, err := node.QueryJobsV2(ctx, &workerpb.QueryJobsV2Request{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
TaskIDs: []UniqueID{it.GetTaskID()},
JobType: indexpb.JobType_JobTypeIndexJob,
@ -326,7 +314,7 @@ func (it *indexBuildTask) QueryResult(ctx context.Context, node types.IndexNodeC
}
func (it *indexBuildTask) DropTaskOnWorker(ctx context.Context, client types.IndexNodeClient) bool {
resp, err := client.DropJobsV2(ctx, &indexpb.DropJobsV2Request{
resp, err := client.DropJobsV2(ctx, &workerpb.DropJobsV2Request{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
TaskIDs: []UniqueID{it.GetTaskID()},
JobType: indexpb.JobType_JobTypeIndexJob,

View File

@ -24,8 +24,10 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/datacoord/allocator"
"github.com/milvus-io/milvus/internal/datacoord/session"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -58,6 +60,7 @@ type taskScheduler struct {
chunkManager storage.ChunkManager
indexEngineVersionManager IndexEngineVersionManager
handler Handler
allocator allocator.Allocator
}
func newTaskScheduler(
@ -66,6 +69,7 @@ func newTaskScheduler(
chunkManager storage.ChunkManager,
indexEngineVersionManager IndexEngineVersionManager,
handler Handler,
allocator allocator.Allocator,
) *taskScheduler {
ctx, cancel := context.WithCancel(ctx)
@ -83,6 +87,7 @@ func newTaskScheduler(
chunkManager: chunkManager,
handler: handler,
indexEngineVersionManager: indexEngineVersionManager,
allocator: allocator,
}
ts.reloadFromKV()
return ts
@ -110,7 +115,7 @@ func (s *taskScheduler) reloadFromKV() {
s.tasks[segIndex.BuildID] = &indexBuildTask{
taskID: segIndex.BuildID,
nodeID: segIndex.NodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: segIndex.BuildID,
State: segIndex.IndexState,
FailReason: segIndex.FailReason,
@ -129,7 +134,7 @@ func (s *taskScheduler) reloadFromKV() {
s.tasks[taskID] = &analyzeTask{
taskID: taskID,
nodeID: t.NodeID,
taskInfo: &indexpb.AnalyzeResult{
taskInfo: &workerpb.AnalyzeResult{
TaskID: taskID,
State: t.State,
FailReason: t.FailReason,
@ -158,9 +163,20 @@ func (s *taskScheduler) enqueue(task Task) {
taskID := task.GetTaskID()
if _, ok := s.tasks[taskID]; !ok {
s.tasks[taskID] = task
task.SetQueueTime(time.Now())
log.Info("taskScheduler enqueue task", zap.Int64("taskID", taskID))
}
}
func (s *taskScheduler) AbortTask(taskID int64) {
s.RLock()
task, ok := s.tasks[taskID]
s.RUnlock()
if ok {
s.taskLock.Lock(taskID)
task.SetState(indexpb.JobState_JobStateFailed, "canceled")
s.taskLock.Unlock(taskID)
}
task.SetQueueTime(time.Now())
log.Info("taskScheduler enqueue task", zap.Int64("taskID", taskID))
}
func (s *taskScheduler) schedule() {
@ -234,99 +250,21 @@ func (s *taskScheduler) process(taskID UniqueID) bool {
}
state := task.GetState()
log.Ctx(s.ctx).Info("task is processing", zap.Int64("taskID", taskID),
zap.String("state", state.String()))
zap.String("task type", task.GetTaskType()), zap.String("state", state.String()))
switch state {
case indexpb.JobState_JobStateNone:
s.removeTask(taskID)
case indexpb.JobState_JobStateInit:
// 0. pre check task
skip := task.PreCheck(s.ctx, s)
if skip {
return true
}
// 1. pick an indexNode client
nodeID, client := s.nodeManager.PickClient()
if client == nil {
log.Ctx(s.ctx).Debug("pick client failed")
return false
}
log.Ctx(s.ctx).Info("pick client success", zap.Int64("taskID", taskID), zap.Int64("nodeID", nodeID))
// 2. update version
if err := task.UpdateVersion(s.ctx, s.meta); err != nil {
log.Ctx(s.ctx).Warn("update task version failed", zap.Int64("taskID", taskID), zap.Error(err))
return false
}
log.Ctx(s.ctx).Info("update task version success", zap.Int64("taskID", taskID))
// 3. assign task to indexNode
success := task.AssignTask(s.ctx, client)
if !success {
log.Ctx(s.ctx).Warn("assign task to client failed", zap.Int64("taskID", taskID),
zap.String("new state", task.GetState().String()), zap.String("fail reason", task.GetFailReason()))
// If the problem is caused by the task itself, subsequent tasks will not be skipped.
// If etcd fails or fails to send tasks to the node, the subsequent tasks will be skipped.
return false
}
log.Ctx(s.ctx).Info("assign task to client success", zap.Int64("taskID", taskID), zap.Int64("nodeID", nodeID))
// 4. update meta state
if err := task.UpdateMetaBuildingState(nodeID, s.meta); err != nil {
log.Ctx(s.ctx).Warn("update meta building state failed", zap.Int64("taskID", taskID), zap.Error(err))
task.SetState(indexpb.JobState_JobStateRetry, "update meta building state failed")
return false
}
task.SetStartTime(time.Now())
queueingTime := task.GetStartTime().Sub(task.GetQueueTime())
if queueingTime > Params.DataCoordCfg.TaskSlowThreshold.GetAsDuration(time.Second) {
log.Warn("task queueing time is too long", zap.Int64("taskID", taskID),
zap.Int64("queueing time(ms)", queueingTime.Milliseconds()))
}
metrics.DataCoordTaskExecuteLatency.
WithLabelValues(task.GetTaskType(), metrics.Pending).Observe(float64(queueingTime.Milliseconds()))
log.Ctx(s.ctx).Info("update task meta state to InProgress success", zap.Int64("taskID", taskID),
zap.Int64("nodeID", nodeID))
return s.processInit(task)
case indexpb.JobState_JobStateFinished, indexpb.JobState_JobStateFailed:
if err := task.SetJobInfo(s.meta); err != nil {
log.Ctx(s.ctx).Warn("update task info failed", zap.Error(err))
return true
}
task.SetEndTime(time.Now())
runningTime := task.GetEndTime().Sub(task.GetStartTime())
if runningTime > Params.DataCoordCfg.TaskSlowThreshold.GetAsDuration(time.Second) {
log.Warn("task running time is too long", zap.Int64("taskID", taskID),
zap.Int64("running time(ms)", runningTime.Milliseconds()))
}
metrics.DataCoordTaskExecuteLatency.
WithLabelValues(task.GetTaskType(), metrics.Executing).Observe(float64(runningTime.Milliseconds()))
client, exist := s.nodeManager.GetClientByID(task.GetNodeID())
if exist {
if !task.DropTaskOnWorker(s.ctx, client) {
return true
}
}
s.removeTask(taskID)
return s.processFinished(task)
case indexpb.JobState_JobStateRetry:
client, exist := s.nodeManager.GetClientByID(task.GetNodeID())
if exist {
if !task.DropTaskOnWorker(s.ctx, client) {
return true
}
}
task.SetState(indexpb.JobState_JobStateInit, "")
task.ResetNodeID()
return s.processRetry(task)
default:
// state: in_progress
client, exist := s.nodeManager.GetClientByID(task.GetNodeID())
if exist {
task.QueryResult(s.ctx, client)
return true
}
task.SetState(indexpb.JobState_JobStateRetry, "")
return s.processInProgress(task)
}
return true
}
@ -406,3 +344,105 @@ func (s *taskScheduler) collectTaskMetrics() {
}
}
}
func (s *taskScheduler) processInit(task Task) bool {
// 0. pre check task
// Determine whether the task can be performed or if it is truly necessary.
// for example: flat index doesn't need to actually build. checkPass is false.
checkPass := task.PreCheck(s.ctx, s)
if !checkPass {
return true
}
// 1. pick an indexNode client
nodeID, client := s.nodeManager.PickClient()
if client == nil {
log.Ctx(s.ctx).Debug("pick client failed")
return false
}
log.Ctx(s.ctx).Info("pick client success", zap.Int64("taskID", task.GetTaskID()), zap.Int64("nodeID", nodeID))
// 2. update version
if err := task.UpdateVersion(s.ctx, s.meta); err != nil {
log.Ctx(s.ctx).Warn("update task version failed", zap.Int64("taskID", task.GetTaskID()), zap.Error(err))
return false
}
log.Ctx(s.ctx).Info("update task version success", zap.Int64("taskID", task.GetTaskID()))
// 3. assign task to indexNode
success := task.AssignTask(s.ctx, client)
if !success {
log.Ctx(s.ctx).Warn("assign task to client failed", zap.Int64("taskID", task.GetTaskID()),
zap.String("new state", task.GetState().String()), zap.String("fail reason", task.GetFailReason()))
// If the problem is caused by the task itself, subsequent tasks will not be skipped.
// If etcd fails or fails to send tasks to the node, the subsequent tasks will be skipped.
return false
}
log.Ctx(s.ctx).Info("assign task to client success", zap.Int64("taskID", task.GetTaskID()), zap.Int64("nodeID", nodeID))
// 4. update meta state
if err := task.UpdateMetaBuildingState(nodeID, s.meta); err != nil {
log.Ctx(s.ctx).Warn("update meta building state failed", zap.Int64("taskID", task.GetTaskID()), zap.Error(err))
task.SetState(indexpb.JobState_JobStateRetry, "update meta building state failed")
return false
}
task.SetStartTime(time.Now())
queueingTime := task.GetStartTime().Sub(task.GetQueueTime())
if queueingTime > Params.DataCoordCfg.TaskSlowThreshold.GetAsDuration(time.Second) {
log.Warn("task queueing time is too long", zap.Int64("taskID", task.GetTaskID()),
zap.Int64("queueing time(ms)", queueingTime.Milliseconds()))
}
metrics.DataCoordTaskExecuteLatency.
WithLabelValues(task.GetTaskType(), metrics.Pending).Observe(float64(queueingTime.Milliseconds()))
log.Ctx(s.ctx).Info("update task meta state to InProgress success", zap.Int64("taskID", task.GetTaskID()),
zap.Int64("nodeID", nodeID))
return s.processInProgress(task)
}
func (s *taskScheduler) processFinished(task Task) bool {
if err := task.SetJobInfo(s.meta); err != nil {
log.Ctx(s.ctx).Warn("update task info failed", zap.Error(err))
return true
}
task.SetEndTime(time.Now())
runningTime := task.GetEndTime().Sub(task.GetStartTime())
if runningTime > Params.DataCoordCfg.TaskSlowThreshold.GetAsDuration(time.Second) {
log.Warn("task running time is too long", zap.Int64("taskID", task.GetTaskID()),
zap.Int64("running time(ms)", runningTime.Milliseconds()))
}
metrics.DataCoordTaskExecuteLatency.
WithLabelValues(task.GetTaskType(), metrics.Executing).Observe(float64(runningTime.Milliseconds()))
client, exist := s.nodeManager.GetClientByID(task.GetNodeID())
if exist {
if !task.DropTaskOnWorker(s.ctx, client) {
return true
}
}
s.removeTask(task.GetTaskID())
return true
}
func (s *taskScheduler) processRetry(task Task) bool {
client, exist := s.nodeManager.GetClientByID(task.GetNodeID())
if exist {
if !task.DropTaskOnWorker(s.ctx, client) {
return true
}
}
task.SetState(indexpb.JobState_JobStateInit, "")
task.ResetTask(s.meta)
return true
}
func (s *taskScheduler) processInProgress(task Task) bool {
client, exist := s.nodeManager.GetClientByID(task.GetNodeID())
if exist {
task.QueryResult(s.ctx, client)
if task.GetState() == indexpb.JobState_JobStateFinished || task.GetState() == indexpb.JobState_JobStateFailed {
return s.processFinished(task)
}
return true
}
task.SetState(indexpb.JobState_JobStateRetry, "node does not exist")
return true
}

View File

@ -37,6 +37,7 @@ import (
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/indexparamcheck"
"github.com/milvus-io/milvus/pkg/util/merr"
@ -468,8 +469,28 @@ func createIndexMeta(catalog metastore.DataCoordCatalog) *indexMeta {
}
}
func createMeta(catalog metastore.DataCoordCatalog, am *analyzeMeta, im *indexMeta) *meta {
return &meta{
type testMetaOption func(*meta)
func withAnalyzeMeta(am *analyzeMeta) testMetaOption {
return func(mt *meta) {
mt.analyzeMeta = am
}
}
func withIndexMeta(im *indexMeta) testMetaOption {
return func(mt *meta) {
mt.indexMeta = im
}
}
func withStatsTaskMeta(stm *statsTaskMeta) testMetaOption {
return func(mt *meta) {
mt.statsTaskMeta = stm
}
}
func createMeta(catalog metastore.DataCoordCatalog, opts ...testMetaOption) *meta {
mt := &meta{
catalog: catalog,
segments: &SegmentsInfo{
segments: map[UniqueID]*SegmentInfo{
@ -637,9 +658,12 @@ func createMeta(catalog metastore.DataCoordCatalog, am *analyzeMeta, im *indexMe
},
},
},
analyzeMeta: am,
indexMeta: im,
}
for _, opt := range opts {
opt(mt)
}
return mt
}
type taskSchedulerSuite struct {
@ -720,7 +744,7 @@ func (s *taskSchedulerSuite) createAnalyzeMeta(catalog metastore.DataCoordCatalo
}
}
func (s *taskSchedulerSuite) SetupTest() {
func (s *taskSchedulerSuite) SetupSuite() {
paramtable.Init()
s.initParams()
Params.DataCoordCfg.ClusteringCompactionMinCentroidsNum.SwapTempValue("0")
@ -745,19 +769,20 @@ func (s *taskSchedulerSuite) scheduler(handler Handler) {
return nil
})
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil)
//catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil)
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(merr.Success(), nil)
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, request *indexpb.QueryJobsV2Request, option ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
func(ctx context.Context, request *workerpb.QueryJobsV2Request, option ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error) {
once.Do(func() {
time.Sleep(time.Second * 3)
})
switch request.GetJobType() {
case indexpb.JobType_JobTypeIndexJob:
results := make([]*indexpb.IndexTaskInfo, 0)
results := make([]*workerpb.IndexTaskInfo, 0)
for _, buildID := range request.GetTaskIDs() {
results = append(results, &indexpb.IndexTaskInfo{
results = append(results, &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2", "file3"},
@ -767,36 +792,36 @@ func (s *taskSchedulerSuite) scheduler(handler Handler) {
IndexStoreVersion: 1,
})
}
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: request.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &indexpb.IndexJobResults{
Result: &workerpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &workerpb.IndexJobResults{
Results: results,
},
},
}, nil
case indexpb.JobType_JobTypeAnalyzeJob:
results := make([]*indexpb.AnalyzeResult, 0)
results := make([]*workerpb.AnalyzeResult, 0)
for _, taskID := range request.GetTaskIDs() {
results = append(results, &indexpb.AnalyzeResult{
results = append(results, &workerpb.AnalyzeResult{
TaskID: taskID,
State: indexpb.JobState_JobStateFinished,
CentroidsFile: fmt.Sprintf("%d/stats_file", taskID),
FailReason: "",
})
}
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: request.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &indexpb.AnalyzeResults{
Result: &workerpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &workerpb.AnalyzeResults{
Results: results,
},
},
}, nil
default:
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Status(errors.New("unknown job type")),
ClusterID: request.GetClusterID(),
}, nil
@ -808,12 +833,12 @@ func (s *taskSchedulerSuite) scheduler(handler Handler) {
workerManager.EXPECT().PickClient().Return(s.nodeID, in)
workerManager.EXPECT().GetClientByID(mock.Anything).Return(in, true)
mt := createMeta(catalog, s.createAnalyzeMeta(catalog), createIndexMeta(catalog))
mt := createMeta(catalog, withAnalyzeMeta(s.createAnalyzeMeta(catalog)), withIndexMeta(createIndexMeta(catalog)))
cm := mocks.NewChunkManager(s.T())
cm.EXPECT().RootPath().Return("root")
scheduler := newTaskScheduler(ctx, mt, workerManager, cm, newIndexEngineVersionManager(), handler)
scheduler := newTaskScheduler(ctx, mt, workerManager, cm, newIndexEngineVersionManager(), handler, nil)
s.Equal(9, len(scheduler.tasks))
s.Equal(indexpb.JobState_JobStateInit, scheduler.tasks[1].GetState())
s.Equal(indexpb.JobState_JobStateInProgress, scheduler.tasks[2].GetState())
@ -844,7 +869,7 @@ func (s *taskSchedulerSuite) scheduler(handler Handler) {
s.NoError(err)
t := &analyzeTask{
taskID: taskID,
taskInfo: &indexpb.AnalyzeResult{
taskInfo: &workerpb.AnalyzeResult{
TaskID: taskID,
State: indexpb.JobState_JobStateInit,
FailReason: "",
@ -935,7 +960,7 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
workerManager := session.NewMockWorkerManager(s.T())
mt := createMeta(catalog,
&analyzeMeta{
withAnalyzeMeta(&analyzeMeta{
ctx: context.Background(),
catalog: catalog,
tasks: map[int64]*indexpb.AnalyzeTask{
@ -948,15 +973,15 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
State: indexpb.JobState_JobStateInit,
},
},
},
&indexMeta{
}),
withIndexMeta(&indexMeta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
})
}))
handler := NewNMockHandler(s.T())
scheduler := newTaskScheduler(ctx, mt, workerManager, nil, nil, handler)
scheduler := newTaskScheduler(ctx, mt, workerManager, nil, nil, handler, nil)
mt.segments.DropSegment(1000)
scheduler.scheduleDuration = s.duration
@ -991,11 +1016,11 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
workerManager := session.NewMockWorkerManager(s.T())
mt := createMeta(catalog, s.createAnalyzeMeta(catalog), &indexMeta{
mt := createMeta(catalog, withAnalyzeMeta(s.createAnalyzeMeta(catalog)), withIndexMeta(&indexMeta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
})
}))
handler := NewNMockHandler(s.T())
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
@ -1013,7 +1038,7 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
},
}, nil)
scheduler := newTaskScheduler(ctx, mt, workerManager, nil, nil, handler)
scheduler := newTaskScheduler(ctx, mt, workerManager, nil, nil, handler, nil)
// remove task in meta
err := scheduler.meta.analyzeMeta.DropAnalyzeTask(1)
@ -1070,19 +1095,19 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
// query result InProgress --> state: InProgress
workerManager.EXPECT().GetClientByID(mock.Anything).Return(in, true).Once()
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, request *indexpb.QueryJobsV2Request, option ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
results := make([]*indexpb.AnalyzeResult, 0)
func(ctx context.Context, request *workerpb.QueryJobsV2Request, option ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error) {
results := make([]*workerpb.AnalyzeResult, 0)
for _, taskID := range request.GetTaskIDs() {
results = append(results, &indexpb.AnalyzeResult{
results = append(results, &workerpb.AnalyzeResult{
TaskID: taskID,
State: indexpb.JobState_JobStateInProgress,
})
}
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: request.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &indexpb.AnalyzeResults{
Result: &workerpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &workerpb.AnalyzeResults{
Results: results,
},
},
@ -1092,20 +1117,20 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
// query result Retry --> state: retry
workerManager.EXPECT().GetClientByID(mock.Anything).Return(in, true).Once()
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, request *indexpb.QueryJobsV2Request, option ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
results := make([]*indexpb.AnalyzeResult, 0)
func(ctx context.Context, request *workerpb.QueryJobsV2Request, option ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error) {
results := make([]*workerpb.AnalyzeResult, 0)
for _, taskID := range request.GetTaskIDs() {
results = append(results, &indexpb.AnalyzeResult{
results = append(results, &workerpb.AnalyzeResult{
TaskID: taskID,
State: indexpb.JobState_JobStateRetry,
FailReason: "node analyze data failed",
})
}
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: request.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &indexpb.AnalyzeResults{
Result: &workerpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &workerpb.AnalyzeResults{
Results: results,
},
},
@ -1122,7 +1147,7 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
// query result failed --> state: retry
workerManager.EXPECT().GetClientByID(mock.Anything).Return(in, true).Once()
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&indexpb.QueryJobsV2Response{
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: merr.Status(errors.New("query job failed")),
}, nil).Once()
@ -1136,10 +1161,10 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
// query result not exists --> state: retry
workerManager.EXPECT().GetClientByID(mock.Anything).Return(in, true).Once()
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&indexpb.QueryJobsV2Response{
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: "",
Result: &indexpb.QueryJobsV2Response_AnalyzeJobResults{},
Result: &workerpb.QueryJobsV2Response_AnalyzeJobResults{},
}, nil).Once()
// retry --> state: init
@ -1164,10 +1189,10 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
// query result success --> state: finished
workerManager.EXPECT().GetClientByID(mock.Anything).Return(in, true).Once()
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, request *indexpb.QueryJobsV2Request, option ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
results := make([]*indexpb.AnalyzeResult, 0)
func(ctx context.Context, request *workerpb.QueryJobsV2Request, option ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error) {
results := make([]*workerpb.AnalyzeResult, 0)
for _, taskID := range request.GetTaskIDs() {
results = append(results, &indexpb.AnalyzeResult{
results = append(results, &workerpb.AnalyzeResult{
TaskID: taskID,
State: indexpb.JobState_JobStateFinished,
//CentroidsFile: fmt.Sprintf("%d/stats_file", taskID),
@ -1179,11 +1204,11 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
FailReason: "",
})
}
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: request.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &indexpb.AnalyzeResults{
Result: &workerpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &workerpb.AnalyzeResults{
Results: results,
},
},
@ -1226,11 +1251,11 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
workerManager := session.NewMockWorkerManager(s.T())
mt := createMeta(catalog,
&analyzeMeta{
withAnalyzeMeta(&analyzeMeta{
ctx: context.Background(),
catalog: catalog,
},
&indexMeta{
}),
withIndexMeta(&indexMeta{
RWMutex: sync.RWMutex{},
ctx: ctx,
catalog: catalog,
@ -1284,13 +1309,13 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
},
},
},
})
}))
cm := mocks.NewChunkManager(s.T())
cm.EXPECT().RootPath().Return("ut-index")
handler := NewNMockHandler(s.T())
scheduler := newTaskScheduler(ctx, mt, workerManager, cm, newIndexEngineVersionManager(), handler)
scheduler := newTaskScheduler(ctx, mt, workerManager, cm, newIndexEngineVersionManager(), handler, nil)
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("True")
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("False")
@ -1334,12 +1359,12 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
// inProgress --> Finished
workerManager.EXPECT().GetClientByID(mock.Anything).Return(in, true).Once()
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&indexpb.QueryJobsV2Response{
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: &commonpb.Status{ErrorCode: commonpb.ErrorCode_Success},
ClusterID: "",
Result: &indexpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &indexpb.IndexJobResults{
Results: []*indexpb.IndexTaskInfo{
Result: &workerpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &workerpb.IndexJobResults{
Results: []*workerpb.IndexTaskInfo{
{
BuildID: buildID,
State: commonpb.IndexState_Finished,
@ -1539,7 +1564,7 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
scheduler := newTaskScheduler(ctx, &mt, workerManager, cm, newIndexEngineVersionManager(), handler)
scheduler := newTaskScheduler(ctx, &mt, workerManager, cm, newIndexEngineVersionManager(), handler, nil)
waitTaskDoneFunc := func(sche *taskScheduler) {
for {
@ -1564,12 +1589,12 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
}
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, request *indexpb.QueryJobsV2Request, option ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
func(ctx context.Context, request *workerpb.QueryJobsV2Request, option ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error) {
switch request.GetJobType() {
case indexpb.JobType_JobTypeIndexJob:
results := make([]*indexpb.IndexTaskInfo, 0)
results := make([]*workerpb.IndexTaskInfo, 0)
for _, buildID := range request.GetTaskIDs() {
results = append(results, &indexpb.IndexTaskInfo{
results = append(results, &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2"},
@ -1579,17 +1604,17 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
IndexStoreVersion: 0,
})
}
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: request.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &indexpb.IndexJobResults{
Result: &workerpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &workerpb.IndexJobResults{
Results: results,
},
},
}, nil
default:
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Status(errors.New("unknown job type")),
}, nil
}
@ -1598,7 +1623,7 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
s.Run("success to get opt field on startup", func() {
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.NotZero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should be set")
return merr.Success(), nil
}).Once()
@ -1621,14 +1646,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
} {
mt.collections[collID].Schema.Fields[1].DataType = dataType
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.NotZero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should be set")
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1644,14 +1669,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
s.Run("enqueue returns empty optional field when cfg disable", func() {
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Zero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should not be set")
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Zero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should be set")
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1669,14 +1694,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
} {
mt.collections[collID].Schema.Fields[0].DataType = dataType
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Zero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should not be set")
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1699,14 +1724,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
} {
mt.collections[collID].Schema.Fields[1].DataType = dataType
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Zero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should not be set")
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Zero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should be set")
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1722,14 +1747,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
mt.collections[collID].Schema.Fields[1].IsPartitionKey = false
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Zero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should not be set")
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Zero(len(in.GetIndexRequest().OptionalScalarFields), "optional scalar field should be set")
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1743,14 +1768,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
s.Run("enqueue partitionKeyIsolation is false when schema is not set", func() {
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true")
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Equal(in.GetIndexRequest().PartitionKeyIsolation, false)
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1776,20 +1801,20 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
handler_isolation := NewNMockHandler(s.T())
handler_isolation.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(isoCollInfo, nil)
scheduler_isolation := newTaskScheduler(ctx, &mt, workerManager, cm, newIndexEngineVersionManager(), handler_isolation)
scheduler_isolation := newTaskScheduler(ctx, &mt, workerManager, cm, newIndexEngineVersionManager(), handler_isolation, nil)
scheduler_isolation.Start()
s.Run("enqueue partitionKeyIsolation is false when MV not enabled", func() {
paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Equal(in.GetIndexRequest().PartitionKeyIsolation, false)
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1805,14 +1830,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
isoCollInfo.Properties[common.PartitionKeyIsolationKey] = "true"
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Equal(in.GetIndexRequest().PartitionKeyIsolation, true)
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",
@ -1828,14 +1853,14 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false")
isoCollInfo.Properties[common.PartitionKeyIsolationKey] = "invalid"
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).RunAndReturn(
func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
s.Equal(in.GetIndexRequest().PartitionKeyIsolation, false)
return merr.Success(), nil
}).Once()
t := &indexBuildTask{
taskID: buildID,
nodeID: nodeID,
taskInfo: &indexpb.IndexTaskInfo{
taskInfo: &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Unissued,
FailReason: "",

View File

@ -0,0 +1,426 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"time"
"github.com/cockroachdb/errors"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
)
func (s *Server) startStatsTasksCheckLoop(ctx context.Context) {
s.serverLoopWg.Add(2)
go s.checkStatsTaskLoop(ctx)
go s.cleanupStatsTasksLoop(ctx)
}
func (s *Server) checkStatsTaskLoop(ctx context.Context) {
log.Info("start checkStatsTaskLoop...")
defer s.serverLoopWg.Done()
ticker := time.NewTicker(Params.DataCoordCfg.TaskCheckInterval.GetAsDuration(time.Second))
defer ticker.Stop()
for {
select {
case <-ctx.Done():
log.Warn("DataCoord context done, exit checkStatsTaskLoop...")
return
case <-ticker.C:
if Params.DataCoordCfg.EnableStatsTask.GetAsBool() {
segments := s.meta.SelectSegments(SegmentFilterFunc(func(seg *SegmentInfo) bool {
return isFlush(seg) && seg.GetLevel() != datapb.SegmentLevel_L0 && !seg.GetIsSorted() && !seg.isCompacting
}))
for _, segment := range segments {
if err := s.createStatsSegmentTask(segment); err != nil {
log.Warn("create stats task for segment failed, wait for retry",
zap.Int64("segmentID", segment.GetID()), zap.Error(err))
continue
}
}
}
case segID := <-s.statsCh:
log.Info("receive new flushed segment", zap.Int64("segmentID", segID))
segment := s.meta.GetSegment(segID)
if segment == nil {
log.Warn("segment is not exist, no need to do stats task", zap.Int64("segmentID", segID))
continue
}
// TODO @xiaocai2333: remove code after allow create stats task for importing segment
if segment.GetIsImporting() {
log.Info("segment is importing, skip stats task", zap.Int64("segmentID", segID))
select {
case s.buildIndexCh <- segID:
default:
}
continue
}
if err := s.createStatsSegmentTask(segment); err != nil {
log.Warn("create stats task for segment failed, wait for retry",
zap.Int64("segmentID", segment.ID), zap.Error(err))
continue
}
}
}
}
// cleanupStatsTasks clean up the finished/failed stats tasks
func (s *Server) cleanupStatsTasksLoop(ctx context.Context) {
log.Info("start cleanupStatsTasksLoop...")
defer s.serverLoopWg.Done()
ticker := time.NewTicker(Params.DataCoordCfg.GCInterval.GetAsDuration(time.Second))
defer ticker.Stop()
for {
select {
case <-ctx.Done():
log.Warn("DataCoord context done, exit cleanupStatsTasksLoop...")
return
case <-ticker.C:
start := time.Now()
log.Info("start cleanupUnusedStatsTasks...", zap.Time("startAt", start))
taskIDs := s.meta.statsTaskMeta.CanCleanedTasks()
for _, taskID := range taskIDs {
if err := s.meta.statsTaskMeta.RemoveStatsTaskByTaskID(taskID); err != nil {
// ignore err, if remove failed, wait next GC
log.Warn("clean up stats task failed", zap.Int64("taskID", taskID), zap.Error(err))
}
}
log.Info("recycleUnusedStatsTasks done", zap.Duration("timeCost", time.Since(start)))
}
}
}
func (s *Server) createStatsSegmentTask(segment *SegmentInfo) error {
if segment.GetIsSorted() || segment.GetIsImporting() {
// TODO @xiaocai2333: allow importing segment stats
log.Info("segment is sorted by segmentID", zap.Int64("segmentID", segment.GetID()))
return nil
}
start, _, err := s.allocator.AllocN(2)
if err != nil {
return err
}
t := &indexpb.StatsTask{
CollectionID: segment.GetCollectionID(),
PartitionID: segment.GetPartitionID(),
SegmentID: segment.GetID(),
InsertChannel: segment.GetInsertChannel(),
TaskID: start,
Version: 0,
NodeID: 0,
State: indexpb.JobState_JobStateInit,
FailReason: "",
TargetSegmentID: start + 1,
}
if err = s.meta.statsTaskMeta.AddStatsTask(t); err != nil {
if errors.Is(err, merr.ErrTaskDuplicate) {
return nil
}
return err
}
s.taskScheduler.enqueue(newStatsTask(t.GetTaskID(), t.GetSegmentID(), t.GetTargetSegmentID(), s.buildIndexCh))
return nil
}
type statsTask struct {
taskID int64
segmentID int64
targetSegmentID int64
nodeID int64
taskInfo *workerpb.StatsResult
queueTime time.Time
startTime time.Time
endTime time.Time
req *workerpb.CreateStatsRequest
buildIndexCh chan UniqueID
}
var _ Task = (*statsTask)(nil)
func newStatsTask(taskID int64, segmentID, targetSegmentID int64, buildIndexCh chan UniqueID) *statsTask {
return &statsTask{
taskID: taskID,
segmentID: segmentID,
targetSegmentID: targetSegmentID,
taskInfo: &workerpb.StatsResult{
TaskID: taskID,
State: indexpb.JobState_JobStateInit,
},
buildIndexCh: buildIndexCh,
}
}
func (st *statsTask) setResult(result *workerpb.StatsResult) {
st.taskInfo = result
}
func (st *statsTask) GetTaskID() int64 {
return st.taskID
}
func (st *statsTask) GetNodeID() int64 {
return st.nodeID
}
func (st *statsTask) ResetTask(mt *meta) {
st.nodeID = 0
// reset isCompacting
mt.SetSegmentsCompacting([]UniqueID{st.segmentID}, false)
}
func (st *statsTask) SetQueueTime(t time.Time) {
st.queueTime = t
}
func (st *statsTask) GetQueueTime() time.Time {
return st.queueTime
}
func (st *statsTask) SetStartTime(t time.Time) {
st.startTime = t
}
func (st *statsTask) GetStartTime() time.Time {
return st.startTime
}
func (st *statsTask) SetEndTime(t time.Time) {
st.endTime = t
}
func (st *statsTask) GetEndTime() time.Time {
return st.endTime
}
func (st *statsTask) GetTaskType() string {
return indexpb.JobType_JobTypeStatsJob.String()
}
func (st *statsTask) CheckTaskHealthy(mt *meta) bool {
seg := mt.GetHealthySegment(st.segmentID)
return seg != nil
}
func (st *statsTask) SetState(state indexpb.JobState, failReason string) {
st.taskInfo.State = state
st.taskInfo.FailReason = failReason
}
func (st *statsTask) GetState() indexpb.JobState {
return st.taskInfo.GetState()
}
func (st *statsTask) GetFailReason() string {
return st.taskInfo.GetFailReason()
}
func (st *statsTask) UpdateVersion(ctx context.Context, meta *meta) error {
// mark compacting
if exist, canDo := meta.CheckAndSetSegmentsCompacting([]UniqueID{st.segmentID}); !exist || !canDo {
log.Warn("segment is not exist or is compacting, skip stats",
zap.Bool("exist", exist), zap.Bool("canDo", canDo))
st.SetState(indexpb.JobState_JobStateNone, "segment is not healthy")
return fmt.Errorf("mark segment compacting failed, isCompacting: %v", !canDo)
}
return meta.statsTaskMeta.UpdateVersion(st.taskID)
}
func (st *statsTask) UpdateMetaBuildingState(nodeID int64, meta *meta) error {
st.nodeID = nodeID
return meta.statsTaskMeta.UpdateBuildingTask(st.taskID, nodeID)
}
func (st *statsTask) PreCheck(ctx context.Context, dependency *taskScheduler) bool {
// set segment compacting
log := log.Ctx(ctx).With(zap.Int64("taskID", st.taskID), zap.Int64("segmentID", st.segmentID))
segment := dependency.meta.GetHealthySegment(st.segmentID)
if segment == nil {
log.Warn("segment is node healthy, skip stats")
st.SetState(indexpb.JobState_JobStateNone, "segment is not healthy")
return false
}
if segment.GetIsSorted() {
log.Info("stats task is marked as sorted, skip stats")
st.SetState(indexpb.JobState_JobStateNone, "segment is marked as sorted")
return false
}
collInfo, err := dependency.handler.GetCollection(ctx, segment.GetCollectionID())
if err != nil {
log.Warn("stats task get collection info failed", zap.Int64("collectionID",
segment.GetCollectionID()), zap.Error(err))
st.SetState(indexpb.JobState_JobStateInit, err.Error())
return false
}
collTtl, err := getCollectionTTL(collInfo.Properties)
if err != nil {
log.Warn("stats task get collection ttl failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err))
st.SetState(indexpb.JobState_JobStateInit, err.Error())
return false
}
start, end, err := dependency.allocator.AllocN(segment.getSegmentSize() / Params.DataNodeCfg.BinLogMaxSize.GetAsInt64() * int64(len(collInfo.Schema.GetFields())) * 2)
if err != nil {
log.Warn("stats task alloc logID failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err))
st.SetState(indexpb.JobState_JobStateInit, err.Error())
return false
}
st.req = &workerpb.CreateStatsRequest{
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
TaskID: st.GetTaskID(),
CollectionID: segment.GetCollectionID(),
PartitionID: segment.GetPartitionID(),
InsertChannel: segment.GetInsertChannel(),
SegmentID: segment.GetID(),
InsertLogs: segment.GetBinlogs(),
DeltaLogs: segment.GetDeltalogs(),
StorageConfig: createStorageConfig(),
Schema: collInfo.Schema,
TargetSegmentID: st.targetSegmentID,
StartLogID: start,
EndLogID: end,
NumRows: segment.GetNumOfRows(),
CollectionTtl: collTtl.Nanoseconds(),
CurrentTs: tsoutil.GetCurrentTime(),
BinlogMaxSize: Params.DataNodeCfg.BinLogMaxSize.GetAsUint64(),
}
return true
}
func (st *statsTask) AssignTask(ctx context.Context, client types.IndexNodeClient) bool {
ctx, cancel := context.WithTimeout(ctx, reqTimeoutInterval)
defer cancel()
resp, err := client.CreateJobV2(ctx, &workerpb.CreateJobV2Request{
ClusterID: st.req.GetClusterID(),
TaskID: st.req.GetTaskID(),
JobType: indexpb.JobType_JobTypeStatsJob,
Request: &workerpb.CreateJobV2Request_StatsRequest{
StatsRequest: st.req,
},
})
if err := merr.CheckRPCCall(resp, err); err != nil {
log.Ctx(ctx).Warn("assign stats task failed", zap.Int64("taskID", st.taskID),
zap.Int64("segmentID", st.segmentID), zap.Error(err))
st.SetState(indexpb.JobState_JobStateRetry, err.Error())
return false
}
log.Ctx(ctx).Info("assign stats task success", zap.Int64("taskID", st.taskID), zap.Int64("segmentID", st.segmentID))
st.SetState(indexpb.JobState_JobStateInProgress, "")
return true
}
func (st *statsTask) QueryResult(ctx context.Context, client types.IndexNodeClient) {
resp, err := client.QueryJobsV2(ctx, &workerpb.QueryJobsV2Request{
ClusterID: st.req.GetClusterID(),
TaskIDs: []int64{st.GetTaskID()},
JobType: indexpb.JobType_JobTypeStatsJob,
})
if err := merr.CheckRPCCall(resp, err); err != nil {
log.Ctx(ctx).Warn("query stats task result failed", zap.Int64("taskID", st.GetTaskID()),
zap.Int64("segmentID", st.segmentID), zap.Error(err))
st.SetState(indexpb.JobState_JobStateRetry, err.Error())
return
}
for _, result := range resp.GetStatsJobResults().GetResults() {
if result.GetTaskID() == st.GetTaskID() {
log.Ctx(ctx).Info("query stats task result success", zap.Int64("taskID", st.GetTaskID()),
zap.Int64("segmentID", st.segmentID), zap.String("result state", result.GetState().String()),
zap.String("failReason", result.GetFailReason()))
if result.GetState() == indexpb.JobState_JobStateFinished || result.GetState() == indexpb.JobState_JobStateRetry ||
result.GetState() == indexpb.JobState_JobStateFailed {
st.setResult(result)
} else if result.GetState() == indexpb.JobState_JobStateNone {
st.SetState(indexpb.JobState_JobStateRetry, "stats task state is none in info response")
}
// inProgress or unissued/init, keep InProgress state
return
}
}
log.Ctx(ctx).Warn("query stats task result failed, indexNode does not have task info",
zap.Int64("taskID", st.GetTaskID()), zap.Int64("segmentID", st.segmentID))
st.SetState(indexpb.JobState_JobStateRetry, "stats task is not in info response")
}
func (st *statsTask) DropTaskOnWorker(ctx context.Context, client types.IndexNodeClient) bool {
resp, err := client.DropJobsV2(ctx, &workerpb.DropJobsV2Request{
ClusterID: st.req.GetClusterID(),
TaskIDs: []int64{st.GetTaskID()},
JobType: indexpb.JobType_JobTypeStatsJob,
})
if err := merr.CheckRPCCall(resp, err); err != nil {
log.Ctx(ctx).Warn("notify worker drop the stats task failed", zap.Int64("taskID", st.GetTaskID()),
zap.Int64("segmentID", st.segmentID), zap.Error(err))
return false
}
log.Ctx(ctx).Info("drop stats task success", zap.Int64("taskID", st.GetTaskID()),
zap.Int64("segmentID", st.segmentID))
return true
}
func (st *statsTask) SetJobInfo(meta *meta) error {
// first update segment
metricMutation, err := meta.SaveStatsResultSegment(st.segmentID, st.taskInfo)
if err != nil {
log.Warn("save stats result failed", zap.Int64("taskID", st.taskID),
zap.Int64("segmentID", st.segmentID), zap.Error(err))
return err
}
// second update the task meta
if err = meta.statsTaskMeta.FinishTask(st.taskID, st.taskInfo); err != nil {
log.Warn("save stats result failed", zap.Int64("taskID", st.taskID), zap.Error(err))
return err
}
metricMutation.commit()
log.Info("SetJobInfo for stats task success", zap.Int64("taskID", st.taskID),
zap.Int64("oldSegmentID", st.segmentID), zap.Int64("targetSegmentID", st.taskInfo.GetSegmentID()))
if st.buildIndexCh != nil {
select {
case st.buildIndexCh <- st.taskInfo.GetSegmentID():
default:
}
}
return nil
}

View File

@ -0,0 +1,570 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/datacoord/allocator"
catalogmocks "github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/common"
)
type statsTaskSuite struct {
suite.Suite
mt *meta
segID int64
taskID int64
targetID int64
}
func Test_statsTaskSuite(t *testing.T) {
suite.Run(t, new(statsTaskSuite))
}
func (s *statsTaskSuite) SetupSuite() {
s.taskID = 1178
s.segID = 1179
s.targetID = 1180
s.mt = &meta{
segments: &SegmentsInfo{
segments: map[int64]*SegmentInfo{
s.segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: s.segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "ch1",
NumOfRows: 65535,
State: commonpb.SegmentState_Flushed,
MaxRowNum: 65535,
},
},
},
secondaryIndexes: segmentInfoIndexes{
coll2Segments: map[UniqueID]map[UniqueID]*SegmentInfo{
collID: {
s.segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: s.segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "ch1",
NumOfRows: 65535,
State: commonpb.SegmentState_Flushed,
MaxRowNum: 65535,
},
},
},
},
channel2Segments: map[string]map[UniqueID]*SegmentInfo{
"ch1": {
s.segID: {
SegmentInfo: &datapb.SegmentInfo{
ID: s.segID,
CollectionID: collID,
PartitionID: partID,
InsertChannel: "ch1",
NumOfRows: 65535,
State: commonpb.SegmentState_Flushed,
MaxRowNum: 65535,
},
},
},
},
},
compactionTo: map[UniqueID]UniqueID{},
},
statsTaskMeta: &statsTaskMeta{
RWMutex: sync.RWMutex{},
ctx: context.Background(),
catalog: nil,
tasks: map[int64]*indexpb.StatsTask{
s.taskID: {
CollectionID: 1,
PartitionID: 2,
SegmentID: s.segID,
InsertChannel: "ch1",
TaskID: s.taskID,
Version: 0,
NodeID: 0,
State: indexpb.JobState_JobStateInit,
FailReason: "",
},
},
segmentStatsTaskIndex: map[int64]*indexpb.StatsTask{
s.segID: {
CollectionID: 1,
PartitionID: 2,
SegmentID: s.segID,
InsertChannel: "ch1",
TaskID: s.taskID,
Version: 0,
NodeID: 0,
State: indexpb.JobState_JobStateInit,
FailReason: "",
},
},
},
}
}
func (s *statsTaskSuite) TestTaskStats_PreCheck() {
st := newStatsTask(s.taskID, s.segID, s.targetID, nil)
s.Equal(s.taskID, st.GetTaskID())
s.Run("queue time", func() {
t := time.Now()
st.SetQueueTime(t)
s.Equal(t, st.GetQueueTime())
})
s.Run("start time", func() {
t := time.Now()
st.SetStartTime(t)
s.Equal(t, st.GetStartTime())
})
s.Run("end time", func() {
t := time.Now()
st.SetEndTime(t)
s.Equal(t, st.GetEndTime())
})
s.Run("CheckTaskHealthy", func() {
s.True(st.CheckTaskHealthy(s.mt))
s.mt.segments.segments[s.segID].State = commonpb.SegmentState_Dropped
s.False(st.CheckTaskHealthy(s.mt))
})
s.Run("UpdateVersion", func() {
catalog := catalogmocks.NewDataCoordCatalog(s.T())
s.mt.statsTaskMeta.catalog = catalog
s.Run("segment is compacting", func() {
s.mt.segments.segments[s.segID].isCompacting = true
s.Error(st.UpdateVersion(context.Background(), s.mt))
})
s.Run("normal case", func() {
s.mt.segments.segments[s.segID].isCompacting = false
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil).Once()
s.NoError(st.UpdateVersion(context.Background(), s.mt))
})
s.Run("failed case", func() {
s.mt.segments.segments[s.segID].isCompacting = false
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("error")).Once()
s.Error(st.UpdateVersion(context.Background(), s.mt))
})
})
s.Run("UpdateMetaBuildingState", func() {
catalog := catalogmocks.NewDataCoordCatalog(s.T())
s.mt.statsTaskMeta.catalog = catalog
s.Run("normal case", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil).Once()
s.NoError(st.UpdateMetaBuildingState(1, s.mt))
})
s.Run("update error", func() {
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("error")).Once()
s.Error(st.UpdateMetaBuildingState(1, s.mt))
})
})
s.Run("PreCheck", func() {
catalog := catalogmocks.NewDataCoordCatalog(s.T())
s.mt.statsTaskMeta.catalog = catalog
s.Run("segment not healthy", func() {
s.mt.segments.segments[s.segID].State = commonpb.SegmentState_Dropped
checkPass := st.PreCheck(context.Background(), &taskScheduler{
meta: s.mt,
})
s.False(checkPass)
})
s.Run("segment is sorted", func() {
s.mt.segments.segments[s.segID].State = commonpb.SegmentState_Flushed
s.mt.segments.segments[s.segID].IsSorted = true
checkPass := st.PreCheck(context.Background(), &taskScheduler{
meta: s.mt,
})
s.False(checkPass)
})
s.Run("get collection failed", func() {
s.mt.segments.segments[s.segID].IsSorted = false
handler := NewNMockHandler(s.T())
handler.EXPECT().GetCollection(context.Background(), collID).Return(nil, fmt.Errorf("mock error")).Once()
checkPass := st.PreCheck(context.Background(), &taskScheduler{
meta: s.mt,
handler: handler,
})
s.False(checkPass)
})
s.Run("get collection ttl failed", func() {
handler := NewNMockHandler(s.T())
handler.EXPECT().GetCollection(context.Background(), collID).Return(&collectionInfo{
ID: collID,
Schema: &schemapb.CollectionSchema{
Name: "test_1",
Fields: []*schemapb.FieldSchema{
{
FieldID: 100,
Name: "pk",
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
AutoID: true,
},
{
FieldID: 101,
Name: "embedding",
IsPrimaryKey: true,
DataType: schemapb.DataType_FloatVector,
AutoID: true,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "8"},
},
},
},
},
Properties: map[string]string{common.CollectionTTLConfigKey: "false"},
}, nil).Once()
checkPass := st.PreCheck(context.Background(), &taskScheduler{
meta: s.mt,
handler: handler,
})
s.False(checkPass)
})
s.Run("alloc failed", func() {
alloc := allocator.NewMockAllocator(s.T())
alloc.EXPECT().AllocN(mock.Anything).Return(0, 0, fmt.Errorf("mock error"))
handler := NewNMockHandler(s.T())
handler.EXPECT().GetCollection(context.Background(), collID).Return(&collectionInfo{
ID: collID,
Schema: &schemapb.CollectionSchema{
Name: "test_1",
Fields: []*schemapb.FieldSchema{
{
FieldID: 100,
Name: "pk",
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
AutoID: true,
},
{
FieldID: 101,
Name: "embedding",
IsPrimaryKey: true,
DataType: schemapb.DataType_FloatVector,
AutoID: true,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "8"},
},
},
},
},
Properties: map[string]string{common.CollectionTTLConfigKey: "100"},
}, nil)
checkPass := st.PreCheck(context.Background(), &taskScheduler{
meta: s.mt,
handler: handler,
allocator: alloc,
})
s.False(checkPass)
})
s.Run("normal case", func() {
alloc := allocator.NewMockAllocator(s.T())
alloc.EXPECT().AllocN(mock.Anything).Return(1, 100, nil)
handler := NewNMockHandler(s.T())
handler.EXPECT().GetCollection(context.Background(), collID).Return(&collectionInfo{
ID: collID,
Schema: &schemapb.CollectionSchema{
Name: "test_1",
Fields: []*schemapb.FieldSchema{
{
FieldID: 100,
Name: "pk",
IsPrimaryKey: true,
DataType: schemapb.DataType_Int64,
AutoID: true,
},
{
FieldID: 101,
Name: "embedding",
IsPrimaryKey: true,
DataType: schemapb.DataType_FloatVector,
AutoID: true,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "8"},
},
},
},
},
Properties: map[string]string{common.CollectionTTLConfigKey: "100"},
}, nil)
checkPass := st.PreCheck(context.Background(), &taskScheduler{
meta: s.mt,
handler: handler,
allocator: alloc,
})
s.True(checkPass)
})
})
s.Run("AssignTask", func() {
s.Run("assign failed", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(&commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: "mock error",
}, nil)
s.False(st.AssignTask(context.Background(), in))
})
s.Run("assign success", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(&commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
}, nil)
s.True(st.AssignTask(context.Background(), in))
})
})
s.Run("QueryResult", func() {
s.Run("query failed", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: "mock failed",
},
}, nil)
st.QueryResult(context.Background(), in)
})
s.Run("state finished", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
},
Result: &workerpb.QueryJobsV2Response_StatsJobResults{
StatsJobResults: &workerpb.StatsResults{
Results: []*workerpb.StatsResult{
{
TaskID: s.taskID,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
CollectionID: collID,
PartitionID: partID,
SegmentID: s.segID,
Channel: "ch1",
InsertLogs: nil,
StatsLogs: nil,
DeltaLogs: nil,
TextStatsLogs: nil,
NumRows: 65535,
},
},
},
},
}, nil)
st.QueryResult(context.Background(), in)
s.Equal(indexpb.JobState_JobStateFinished, st.taskInfo.State)
})
s.Run("task none", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
},
Result: &workerpb.QueryJobsV2Response_StatsJobResults{
StatsJobResults: &workerpb.StatsResults{
Results: []*workerpb.StatsResult{
{
TaskID: s.taskID,
State: indexpb.JobState_JobStateNone,
FailReason: "",
CollectionID: collID,
PartitionID: partID,
SegmentID: s.segID,
NumRows: 65535,
},
},
},
},
}, nil)
st.QueryResult(context.Background(), in)
s.Equal(indexpb.JobState_JobStateRetry, st.taskInfo.State)
})
s.Run("task not exist", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
},
Result: &workerpb.QueryJobsV2Response_StatsJobResults{
StatsJobResults: &workerpb.StatsResults{
Results: []*workerpb.StatsResult{},
},
},
}, nil)
st.QueryResult(context.Background(), in)
s.Equal(indexpb.JobState_JobStateRetry, st.taskInfo.State)
})
})
s.Run("DropTaskOnWorker", func() {
s.Run("drop failed", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().DropJobsV2(mock.Anything, mock.Anything).Return(&commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: "mock error",
}, nil)
s.False(st.DropTaskOnWorker(context.Background(), in))
})
s.Run("drop success", func() {
in := mocks.NewMockIndexNodeClient(s.T())
in.EXPECT().DropJobsV2(mock.Anything, mock.Anything).Return(&commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
}, nil)
s.True(st.DropTaskOnWorker(context.Background(), in))
})
})
s.Run("SetJobInfo", func() {
st.taskInfo = &workerpb.StatsResult{
TaskID: s.taskID,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
CollectionID: collID,
PartitionID: partID,
SegmentID: s.segID + 1,
Channel: "ch1",
InsertLogs: []*datapb.FieldBinlog{
{
FieldID: 100,
Binlogs: []*datapb.Binlog{{LogID: 1000}, {LogID: 1002}},
},
{
FieldID: 101,
Binlogs: []*datapb.Binlog{{LogID: 1001}, {LogID: 1003}},
},
},
StatsLogs: []*datapb.FieldBinlog{
{
FieldID: 100,
Binlogs: []*datapb.Binlog{{LogID: 1004}},
},
},
TextStatsLogs: map[int64]*datapb.TextIndexStats{
101: {
FieldID: 101,
Version: 1,
Files: []string{"file1", "file2"},
LogSize: 100,
MemorySize: 100,
},
},
NumRows: 65500,
}
s.Run("set target segment failed", func() {
catalog := catalogmocks.NewDataCoordCatalog(s.T())
s.mt.catalog = catalog
catalog.EXPECT().AlterSegments(mock.Anything, mock.Anything, mock.Anything).Return(fmt.Errorf("mock error"))
s.Error(st.SetJobInfo(s.mt))
})
s.Run("update stats task failed", func() {
catalog := catalogmocks.NewDataCoordCatalog(s.T())
s.mt.catalog = catalog
s.mt.statsTaskMeta.catalog = catalog
catalog.EXPECT().AlterSegments(mock.Anything, mock.Anything, mock.Anything).Return(nil)
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(fmt.Errorf("mock error"))
s.Error(st.SetJobInfo(s.mt))
})
s.Run("normal case", func() {
catalog := catalogmocks.NewDataCoordCatalog(s.T())
s.mt.catalog = catalog
s.mt.statsTaskMeta.catalog = catalog
catalog.EXPECT().AlterSegments(mock.Anything, mock.Anything, mock.Anything).Return(nil)
catalog.EXPECT().SaveStatsTask(mock.Anything, mock.Anything).Return(nil)
s.NoError(st.SetJobInfo(s.mt))
s.NotNil(s.mt.GetHealthySegment(s.segID + 1))
s.Equal(indexpb.JobState_JobStateFinished, s.mt.statsTaskMeta.tasks[s.taskID].GetState())
})
})
}

View File

@ -27,7 +27,7 @@ import (
type Task interface {
GetTaskID() int64
GetNodeID() int64
ResetNodeID()
ResetTask(mt *meta)
PreCheck(ctx context.Context, dependency *taskScheduler) bool
CheckTaskHealthy(mt *meta) bool
SetState(state indexpb.JobState, failReason string)

View File

@ -29,6 +29,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -318,3 +319,33 @@ func CheckAllChannelsWatched(meta *meta, channelManager ChannelManager) error {
}
return nil
}
func createStorageConfig() *indexpb.StorageConfig {
var storageConfig *indexpb.StorageConfig
if Params.CommonCfg.StorageType.GetValue() == "local" {
storageConfig = &indexpb.StorageConfig{
RootPath: Params.LocalStorageCfg.Path.GetValue(),
StorageType: Params.CommonCfg.StorageType.GetValue(),
}
} else {
storageConfig = &indexpb.StorageConfig{
Address: Params.MinioCfg.Address.GetValue(),
AccessKeyID: Params.MinioCfg.AccessKeyID.GetValue(),
SecretAccessKey: Params.MinioCfg.SecretAccessKey.GetValue(),
UseSSL: Params.MinioCfg.UseSSL.GetAsBool(),
SslCACert: Params.MinioCfg.SslCACert.GetValue(),
BucketName: Params.MinioCfg.BucketName.GetValue(),
RootPath: Params.MinioCfg.RootPath.GetValue(),
UseIAM: Params.MinioCfg.UseIAM.GetAsBool(),
IAMEndpoint: Params.MinioCfg.IAMEndpoint.GetValue(),
StorageType: Params.CommonCfg.StorageType.GetValue(),
Region: Params.MinioCfg.Region.GetValue(),
UseVirtualHost: Params.MinioCfg.UseVirtualHost.GetAsBool(),
CloudProvider: Params.MinioCfg.CloudProvider.GetValue(),
RequestTimeoutMs: Params.MinioCfg.RequestTimeoutMs.GetAsInt64(),
}
}
return storageConfig
}

View File

@ -1012,6 +1012,7 @@ func (t *clusteringCompactionTask) scalarAnalyze(ctx context.Context) (map[inter
Level: segment.Level,
CollectionID: segment.CollectionID,
PartitionID: segment.PartitionID,
IsSorted: segment.IsSorted,
}
future := t.mappingPool.Submit(func() (any, error) {
analyzeResult, err := t.scalarAnalyzeSegment(ctx, segmentClone)

View File

@ -183,7 +183,7 @@ func (s *ClusteringCompactionTaskSuite) TestScalarCompactionNormal() {
err = segWriter.Write(&v)
s.Require().NoError(err)
}
segWriter.writer.Flush()
segWriter.FlushAndIsFull()
kvs, fBinlogs, err := serializeWrite(context.TODO(), s.mockAlloc, segWriter)
s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return(lo.Values(kvs), nil)
@ -315,7 +315,7 @@ func (s *ClusteringCompactionTaskSuite) TestGeneratePkStats() {
err = segWriter.Write(&v)
s.Require().NoError(err)
}
segWriter.writer.Flush()
segWriter.FlushAndIsFull()
kvs, _, err := serializeWrite(context.TODO(), s.mockAlloc, segWriter)
s.NoError(err)

View File

@ -205,7 +205,7 @@ func (t *LevelZeroCompactionTask) serializeUpload(ctx context.Context, segmentWr
return nil, err
}
blobKey, _ := binlog.BuildLogPath(storage.DeleteBinlog, writer.collectionID, writer.partitionID, writer.segmentID, -1, logID)
blobKey, _ := binlog.BuildLogPath(storage.DeleteBinlog, writer.GetCollectionID(), writer.GetPartitionID(), writer.GetSegmentID(), -1, logID)
allBlobs[blobKey] = blob.GetValue()
deltalog := &datapb.Binlog{

View File

@ -477,7 +477,6 @@ func (s *LevelZeroCompactionTaskSuite) TestSerializeUpload() {
s.SetupTest()
s.task.plan = plan
s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil)
writer := NewSegmentDeltaWriter(100, 10, 1)
writer.WriteBatch(s.dData.Pks, s.dData.Tss)
writers := map[int64]*SegmentDeltaWriter{100: writer}

View File

@ -0,0 +1,157 @@
package compaction
import (
"container/heap"
"context"
sio "io"
"math"
"github.com/samber/lo"
"go.opentelemetry.io/otel"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/flushcommon/io"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func mergeSortMultipleSegments(ctx context.Context,
plan *datapb.CompactionPlan,
collectionID, partitionID, maxRows int64,
binlogIO io.BinlogIO,
binlogs []*datapb.CompactionSegmentBinlogs,
delta map[interface{}]typeutil.Timestamp,
tr *timerecord.TimeRecorder,
currentTs typeutil.Timestamp,
collectionTtl int64,
) ([]*datapb.CompactionSegment, error) {
_ = tr.RecordSpan()
ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "mergeSortMultipleSegments")
defer span.End()
log := log.With(zap.Int64("planID", plan.GetPlanID()))
segIDAlloc := allocator.NewLocalAllocator(plan.GetPreAllocatedSegments().GetBegin(), plan.GetPreAllocatedSegments().GetEnd())
logIDAlloc := allocator.NewLocalAllocator(plan.GetBeginLogID(), math.MaxInt64)
compAlloc := NewCompactionAllocator(segIDAlloc, logIDAlloc)
mWriter := NewMultiSegmentWriter(binlogIO, compAlloc, plan, maxRows, partitionID, collectionID)
var (
expiredRowCount int64 // the number of expired entities
deletedRowCount int64
)
isValueDeleted := func(v *storage.Value) bool {
ts, ok := delta[v.PK.GetValue()]
// insert task and delete task has the same ts when upsert
// here should be < instead of <=
// to avoid the upsert data to be deleted after compact
if ok && uint64(v.Timestamp) < ts {
return true
}
return false
}
pkField, err := typeutil.GetPrimaryFieldSchema(plan.GetSchema())
if err != nil {
log.Warn("failed to get pk field from schema")
return nil, err
}
//SegmentDeserializeReaderTest(binlogPaths, t.binlogIO, writer.GetPkID())
segmentReaders := make([]*SegmentDeserializeReader, len(binlogs))
for i, s := range binlogs {
var binlogBatchCount int
for _, b := range s.GetFieldBinlogs() {
if b != nil {
binlogBatchCount = len(b.GetBinlogs())
break
}
}
if binlogBatchCount == 0 {
log.Warn("compacting empty segment", zap.Int64("segmentID", s.GetSegmentID()))
continue
}
binlogPaths := make([][]string, binlogBatchCount)
for idx := 0; idx < binlogBatchCount; idx++ {
var batchPaths []string
for _, f := range s.GetFieldBinlogs() {
batchPaths = append(batchPaths, f.GetBinlogs()[idx].GetLogPath())
}
binlogPaths[idx] = batchPaths
}
segmentReaders[i] = NewSegmentDeserializeReader(ctx, binlogPaths, binlogIO, pkField.GetFieldID())
}
pq := make(PriorityQueue, 0)
heap.Init(&pq)
for i, r := range segmentReaders {
if v, err := r.Next(); err == nil {
heap.Push(&pq, &PQItem{
Value: v,
Index: i,
})
}
}
for pq.Len() > 0 {
smallest := heap.Pop(&pq).(*PQItem)
v := smallest.Value
if isValueDeleted(v) {
deletedRowCount++
continue
}
// Filtering expired entity
if isExpiredEntity(collectionTtl, currentTs, typeutil.Timestamp(v.Timestamp)) {
expiredRowCount++
continue
}
err := mWriter.Write(v)
if err != nil {
log.Warn("compact wrong, failed to writer row", zap.Error(err))
return nil, err
}
v, err = segmentReaders[smallest.Index].Next()
if err != nil && err != sio.EOF {
return nil, err
}
if err == nil {
next := &PQItem{
Value: v,
Index: smallest.Index,
}
heap.Push(&pq, next)
}
}
res, err := mWriter.Finish()
if err != nil {
log.Warn("compact wrong, failed to finish writer", zap.Error(err))
return nil, err
}
for _, seg := range res {
seg.IsSorted = true
}
totalElapse := tr.RecordSpan()
log.Info("compact mergeSortMultipleSegments end",
zap.Int64s("mergeSplit to segments", lo.Keys(mWriter.cachedMeta)),
zap.Int64("deleted row count", deletedRowCount),
zap.Int64("expired entities", expiredRowCount),
zap.Duration("total elapse", totalElapse))
return res, nil
}

View File

@ -259,10 +259,29 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) {
return nil, err
}
res, err := t.mergeSplit(ctxTimeout, allBatchPaths, deltaPk2Ts)
if err != nil {
log.Warn("compact wrong, failed to mergeSplit", zap.Error(err))
return nil, err
allSorted := true
for _, segment := range t.plan.GetSegmentBinlogs() {
if !segment.GetIsSorted() {
allSorted = false
break
}
}
var res []*datapb.CompactionSegment
if allSorted && len(t.plan.GetSegmentBinlogs()) > 1 {
log.Info("all segments are sorted, use merge sort")
res, err = mergeSortMultipleSegments(ctxTimeout, t.plan, t.collectionID, t.partitionID, t.maxRows, t.binlogIO,
t.plan.GetSegmentBinlogs(), deltaPk2Ts, t.tr, t.currentTs, t.plan.GetCollectionTtl())
if err != nil {
log.Warn("compact wrong, fail to merge sort segments", zap.Error(err))
return nil, err
}
} else {
res, err = t.mergeSplit(ctxTimeout, allBatchPaths, deltaPk2Ts)
if err != nil {
log.Warn("compact wrong, failed to mergeSplit", zap.Error(err))
return nil, err
}
}
log.Info("compact done", zap.Duration("compact elapse", time.Since(compactStart)))

View File

@ -130,7 +130,7 @@ func (s *MixCompactionTaskSuite) TestCompactDupPK() {
Value: row,
}
err := s.segWriter.Write(v)
s.segWriter.writer.Flush()
s.segWriter.FlushAndIsFull()
s.Require().NoError(err)
kvs, fBinlogs, err := serializeWrite(context.TODO(), alloc, s.segWriter)
@ -210,6 +210,43 @@ func (s *MixCompactionTaskSuite) TestCompactTwoToOne() {
s.Empty(segment.Deltalogs)
}
func (s *MixCompactionTaskSuite) TestCompactSortedSegment() {
segments := []int64{1001, 1002, 1003}
alloc := allocator.NewLocalAllocator(100, math.MaxInt64)
s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil)
s.task.plan.SegmentBinlogs = make([]*datapb.CompactionSegmentBinlogs, 0)
for _, segID := range segments {
s.initMultiRowsSegBuffer(segID, 100, 3)
kvs, fBinlogs, err := serializeWrite(context.TODO(), alloc, s.segWriter)
s.Require().NoError(err)
s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.MatchedBy(func(keys []string) bool {
left, right := lo.Difference(keys, lo.Keys(kvs))
return len(left) == 0 && len(right) == 0
})).Return(lo.Values(kvs), nil).Once()
s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{
SegmentID: segID,
FieldBinlogs: lo.Values(fBinlogs),
IsSorted: true,
})
}
result, err := s.task.Compact()
s.NoError(err)
s.NotNil(result)
s.Equal(s.task.plan.GetPlanID(), result.GetPlanID())
s.Equal(1, len(result.GetSegments()))
s.True(result.GetSegments()[0].GetIsSorted())
segment := result.GetSegments()[0]
s.EqualValues(19531, segment.GetSegmentID())
s.EqualValues(300, segment.GetNumOfRows())
s.NotEmpty(segment.InsertLogs)
s.NotEmpty(segment.Field2StatslogPaths)
s.Empty(segment.Deltalogs)
}
func (s *MixCompactionTaskSuite) TestSplitMergeEntityExpired() {
s.initSegBuffer(3)
collTTL := 864000 // 10 days
@ -497,6 +534,25 @@ func getRow(magic int64) map[int64]interface{} {
}
}
func (s *MixCompactionTaskSuite) initMultiRowsSegBuffer(magic, numRows, step int64) {
segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 65535, magic, PartitionID, CollectionID)
s.Require().NoError(err)
for i := int64(0); i < numRows; i++ {
v := storage.Value{
PK: storage.NewInt64PrimaryKey(magic + i*step),
Timestamp: int64(tsoutil.ComposeTSByTime(getMilvusBirthday(), 0)),
Value: getRow(magic + i*step),
}
err = segWriter.Write(&v)
s.Require().NoError(err)
}
segWriter.FlushAndIsFull()
s.segWriter = segWriter
}
func (s *MixCompactionTaskSuite) initSegBuffer(magic int64) {
segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, magic, PartitionID, CollectionID)
s.Require().NoError(err)
@ -508,7 +564,7 @@ func (s *MixCompactionTaskSuite) initSegBuffer(magic int64) {
}
err = segWriter.Write(&v)
s.Require().NoError(err)
segWriter.writer.Flush()
segWriter.FlushAndIsFull()
s.segWriter = segWriter
}

View File

@ -0,0 +1,40 @@
package compaction
import "github.com/milvus-io/milvus/internal/storage"
type PQItem struct {
Value *storage.Value
Index int
Pos int
}
type PriorityQueue []*PQItem
func (pq PriorityQueue) Len() int { return len(pq) }
func (pq PriorityQueue) Less(i, j int) bool {
return pq[i].Value.PK.LT(pq[j].Value.PK)
}
func (pq PriorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
pq[i].Pos = i
pq[j].Pos = j
}
func (pq *PriorityQueue) Push(x interface{}) {
n := len(*pq)
item := x.(*PQItem)
item.Pos = n
*pq = append(*pq, item)
}
func (pq *PriorityQueue) Pop() interface{} {
old := *pq
n := len(old)
item := old[n-1]
old[n-1] = nil
item.Pos = -1
*pq = old[0 : n-1]
return item
}

View File

@ -0,0 +1,126 @@
package compaction
import (
"container/heap"
"testing"
"github.com/milvus-io/milvus/internal/storage"
"github.com/stretchr/testify/suite"
)
type PriorityQueueSuite struct {
suite.Suite
}
func (s *PriorityQueueSuite) PriorityQueueMergeSort() {
slices := [][]*storage.Value{
{
{
ID: 1,
PK: &storage.Int64PrimaryKey{
Value: 1,
},
Timestamp: 0,
IsDeleted: false,
Value: 1,
},
{
ID: 4,
PK: &storage.Int64PrimaryKey{
Value: 4,
},
Timestamp: 0,
IsDeleted: false,
Value: 4,
},
{
ID: 7,
PK: &storage.Int64PrimaryKey{
Value: 7,
},
Timestamp: 0,
IsDeleted: false,
Value: 7,
},
{
ID: 10,
PK: &storage.Int64PrimaryKey{
Value: 10,
},
Timestamp: 0,
IsDeleted: false,
Value: 10,
},
},
{
{
ID: 2,
PK: &storage.Int64PrimaryKey{
Value: 2,
},
Timestamp: 0,
IsDeleted: false,
Value: 2,
},
{
ID: 3,
PK: &storage.Int64PrimaryKey{
Value: 3,
},
Timestamp: 0,
IsDeleted: false,
Value: 3,
},
{
ID: 5,
PK: &storage.Int64PrimaryKey{
Value: 5,
},
Timestamp: 0,
IsDeleted: false,
Value: 5,
},
{
ID: 6,
PK: &storage.Int64PrimaryKey{
Value: 6,
},
Timestamp: 0,
IsDeleted: false,
Value: 6,
},
},
}
var result []*storage.Value
pq := make(PriorityQueue, 0)
heap.Init(&pq)
for i, s := range slices {
if len(s) > 0 {
heap.Push(&pq, &PQItem{
Value: s[0],
Index: i,
Pos: 1,
})
}
}
for pq.Len() > 0 {
smallest := heap.Pop(&pq).(*PQItem)
result = append(result, smallest.Value)
if smallest.Pos+1 < len(slices[smallest.Index]) {
next := &PQItem{
Value: slices[smallest.Index][smallest.Pos+1],
Index: smallest.Index,
Pos: smallest.Pos + 1,
}
heap.Push(&pq, next)
}
}
}
func TestNewPriorityQueueSuite(t *testing.T) {
suite.Run(t, new(PriorityQueueSuite))
}

View File

@ -0,0 +1,83 @@
package compaction
import (
"context"
"io"
"github.com/samber/lo"
"go.uber.org/zap"
binlogIO "github.com/milvus-io/milvus/internal/flushcommon/io"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
)
type SegmentDeserializeReader struct {
ctx context.Context
binlogIO binlogIO.BinlogIO
reader *storage.DeserializeReader[*storage.Value]
pos int
PKFieldID int64
binlogPaths [][]string
binlogPathPos int
}
func NewSegmentDeserializeReader(ctx context.Context, binlogPaths [][]string, binlogIO binlogIO.BinlogIO, PKFieldID int64) *SegmentDeserializeReader {
return &SegmentDeserializeReader{
ctx: ctx,
binlogIO: binlogIO,
reader: nil,
pos: 0,
PKFieldID: PKFieldID,
binlogPaths: binlogPaths,
binlogPathPos: 0,
}
}
func (r *SegmentDeserializeReader) initDeserializeReader() error {
if r.binlogPathPos >= len(r.binlogPaths) {
return io.EOF
}
allValues, err := r.binlogIO.Download(r.ctx, r.binlogPaths[r.binlogPathPos])
if err != nil {
log.Warn("compact wrong, fail to download insertLogs", zap.Error(err))
return err
}
blobs := lo.Map(allValues, func(v []byte, i int) *storage.Blob {
return &storage.Blob{Key: r.binlogPaths[r.binlogPathPos][i], Value: v}
})
r.reader, err = storage.NewBinlogDeserializeReader(blobs, r.PKFieldID)
if err != nil {
log.Warn("compact wrong, failed to new insert binlogs reader", zap.Error(err))
return err
}
r.binlogPathPos++
return nil
}
func (r *SegmentDeserializeReader) Next() (*storage.Value, error) {
if r.reader == nil {
if err := r.initDeserializeReader(); err != nil {
return nil, err
}
}
if err := r.reader.Next(); err != nil {
if err == io.EOF {
r.reader.Close()
if err := r.initDeserializeReader(); err != nil {
return nil, err
}
err = r.reader.Next()
return r.reader.Value(), err
}
return nil, err
}
return r.reader.Value(), nil
}
func (r *SegmentDeserializeReader) Close() {
r.reader.Close()
}

View File

@ -350,6 +350,11 @@ func (w *SegmentWriter) FlushAndIsFull() bool {
return w.writer.WrittenMemorySize() > paramtable.Get().DataNodeCfg.BinLogMaxSize.GetAsUint64()
}
func (w *SegmentWriter) FlushAndIsFullWithBinlogMaxSize(binLogMaxSize uint64) bool {
w.writer.Flush()
return w.writer.WrittenMemorySize() > binLogMaxSize
}
func (w *SegmentWriter) IsEmpty() bool {
return w.writer.WrittenMemorySize() == 0
}

View File

@ -33,9 +33,9 @@ import (
"google.golang.org/grpc/credentials/insecure"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/util/sessionutil"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/tracer"
@ -59,7 +59,7 @@ type ConnectionManager struct {
queryNodesMu sync.RWMutex
dataNodes map[int64]datapb.DataNodeClient
dataNodesMu sync.RWMutex
indexNodes map[int64]indexpb.IndexNodeClient
indexNodes map[int64]workerpb.IndexNodeClient
indexNodesMu sync.RWMutex
taskMu sync.RWMutex
@ -81,7 +81,7 @@ func NewConnectionManager(session *sessionutil.Session) *ConnectionManager {
queryNodes: make(map[int64]querypb.QueryNodeClient),
dataNodes: make(map[int64]datapb.DataNodeClient),
indexNodes: make(map[int64]indexpb.IndexNodeClient),
indexNodes: make(map[int64]workerpb.IndexNodeClient),
buildTasks: make(map[int64]*buildClientTask),
notify: make(chan int64),
@ -187,7 +187,7 @@ func (cm *ConnectionManager) GetDataNodeClients() (map[int64]datapb.DataNodeClie
return cm.dataNodes, true
}
func (cm *ConnectionManager) GetIndexNodeClients() (map[int64]indexpb.IndexNodeClient, bool) {
func (cm *ConnectionManager) GetIndexNodeClients() (map[int64]workerpb.IndexNodeClient, bool) {
cm.indexNodesMu.RLock()
defer cm.indexNodesMu.RUnlock()
_, ok := cm.dependencies[typeutil.IndexNodeRole]
@ -295,7 +295,7 @@ func (cm *ConnectionManager) buildClients(session *sessionutil.Session, connecti
case typeutil.IndexNodeRole:
cm.indexNodesMu.Lock()
defer cm.indexNodesMu.Unlock()
cm.indexNodes[session.ServerID] = indexpb.NewIndexNodeClient(connection)
cm.indexNodes[session.ServerID] = workerpb.NewIndexNodeClient(connection)
}
}

View File

@ -32,9 +32,9 @@ import (
"google.golang.org/grpc"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/util/sessionutil"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/etcd"
@ -168,7 +168,7 @@ func TestConnectionManager(t *testing.T) {
indexNode := &testIndexNode{}
grpcServer := grpc.NewServer()
defer grpcServer.Stop()
indexpb.RegisterIndexNodeServer(grpcServer, indexNode)
workerpb.RegisterIndexNodeServer(grpcServer, indexNode)
go grpcServer.Serve(lis)
session.Init(typeutil.IndexNodeRole, lis.Addr().String(), true, false)
session.Register()
@ -266,7 +266,7 @@ type testDataNode struct {
}
type testIndexNode struct {
indexpb.IndexNodeServer
workerpb.IndexNodeServer
}
func initSession(ctx context.Context) *sessionutil.Session {

View File

@ -25,8 +25,8 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/grpcclient"
"github.com/milvus-io/milvus/internal/util/sessionutil"
@ -41,7 +41,7 @@ var Params *paramtable.ComponentParam = paramtable.Get()
// Client is the grpc client of IndexNode.
type Client struct {
grpcClient grpcclient.GrpcClient[indexpb.IndexNodeClient]
grpcClient grpcclient.GrpcClient[workerpb.IndexNodeClient]
addr string
sess *sessionutil.Session
}
@ -60,7 +60,7 @@ func NewClient(ctx context.Context, addr string, nodeID int64, encryption bool)
config := &Params.IndexNodeGrpcClientCfg
client := &Client{
addr: addr,
grpcClient: grpcclient.NewClientBase[indexpb.IndexNodeClient](config, "milvus.proto.index.IndexNode"),
grpcClient: grpcclient.NewClientBase[workerpb.IndexNodeClient](config, "milvus.proto.index.IndexNode"),
sess: sess,
}
// node shall specify node id
@ -80,16 +80,16 @@ func (c *Client) Close() error {
return c.grpcClient.Close()
}
func (c *Client) newGrpcClient(cc *grpc.ClientConn) indexpb.IndexNodeClient {
return indexpb.NewIndexNodeClient(cc)
func (c *Client) newGrpcClient(cc *grpc.ClientConn) workerpb.IndexNodeClient {
return workerpb.NewIndexNodeClient(cc)
}
func (c *Client) getAddr() (string, error) {
return c.addr, nil
}
func wrapGrpcCall[T any](ctx context.Context, c *Client, call func(indexClient indexpb.IndexNodeClient) (*T, error)) (*T, error) {
ret, err := c.grpcClient.ReCall(ctx, func(client indexpb.IndexNodeClient) (any, error) {
func wrapGrpcCall[T any](ctx context.Context, c *Client, call func(indexClient workerpb.IndexNodeClient) (*T, error)) (*T, error) {
ret, err := c.grpcClient.ReCall(ctx, func(client workerpb.IndexNodeClient) (any, error) {
if !funcutil.CheckCtxValid(ctx) {
return nil, ctx.Err()
}
@ -103,41 +103,41 @@ func wrapGrpcCall[T any](ctx context.Context, c *Client, call func(indexClient i
// GetComponentStates gets the component states of IndexNode.
func (c *Client) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest, opts ...grpc.CallOption) (*milvuspb.ComponentStates, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*milvuspb.ComponentStates, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*milvuspb.ComponentStates, error) {
return client.GetComponentStates(ctx, &milvuspb.GetComponentStatesRequest{})
})
}
func (c *Client) GetStatisticsChannel(ctx context.Context, req *internalpb.GetStatisticsChannelRequest, opts ...grpc.CallOption) (*milvuspb.StringResponse, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*milvuspb.StringResponse, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*milvuspb.StringResponse, error) {
return client.GetStatisticsChannel(ctx, &internalpb.GetStatisticsChannelRequest{})
})
}
// CreateJob sends the build index request to IndexNode.
func (c *Client) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*commonpb.Status, error) {
func (c *Client) CreateJob(ctx context.Context, req *workerpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*commonpb.Status, error) {
return client.CreateJob(ctx, req)
})
}
// QueryJobs query the task info of the index task.
func (c *Client) QueryJobs(ctx context.Context, req *indexpb.QueryJobsRequest, opts ...grpc.CallOption) (*indexpb.QueryJobsResponse, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*indexpb.QueryJobsResponse, error) {
func (c *Client) QueryJobs(ctx context.Context, req *workerpb.QueryJobsRequest, opts ...grpc.CallOption) (*workerpb.QueryJobsResponse, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*workerpb.QueryJobsResponse, error) {
return client.QueryJobs(ctx, req)
})
}
// DropJobs query the task info of the index task.
func (c *Client) DropJobs(ctx context.Context, req *indexpb.DropJobsRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*commonpb.Status, error) {
func (c *Client) DropJobs(ctx context.Context, req *workerpb.DropJobsRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*commonpb.Status, error) {
return client.DropJobs(ctx, req)
})
}
// GetJobStats query the task info of the index task.
func (c *Client) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsRequest, opts ...grpc.CallOption) (*indexpb.GetJobStatsResponse, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*indexpb.GetJobStatsResponse, error) {
func (c *Client) GetJobStats(ctx context.Context, req *workerpb.GetJobStatsRequest, opts ...grpc.CallOption) (*workerpb.GetJobStatsResponse, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*workerpb.GetJobStatsResponse, error) {
return client.GetJobStats(ctx, req)
})
}
@ -148,7 +148,7 @@ func (c *Client) ShowConfigurations(ctx context.Context, req *internalpb.ShowCon
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID()))
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*internalpb.ShowConfigurationsResponse, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*internalpb.ShowConfigurationsResponse, error) {
return client.ShowConfigurations(ctx, req)
})
}
@ -159,25 +159,25 @@ func (c *Client) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID()))
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*milvuspb.GetMetricsResponse, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*milvuspb.GetMetricsResponse, error) {
return client.GetMetrics(ctx, req)
})
}
func (c *Client) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*commonpb.Status, error) {
func (c *Client) CreateJobV2(ctx context.Context, req *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*commonpb.Status, error) {
return client.CreateJobV2(ctx, req)
})
}
func (c *Client) QueryJobsV2(ctx context.Context, req *indexpb.QueryJobsV2Request, opts ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*indexpb.QueryJobsV2Response, error) {
func (c *Client) QueryJobsV2(ctx context.Context, req *workerpb.QueryJobsV2Request, opts ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*workerpb.QueryJobsV2Response, error) {
return client.QueryJobsV2(ctx, req)
})
}
func (c *Client) DropJobsV2(ctx context.Context, req *indexpb.DropJobsV2Request, opt ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client indexpb.IndexNodeClient) (*commonpb.Status, error) {
func (c *Client) DropJobsV2(ctx context.Context, req *workerpb.DropJobsV2Request, opt ...grpc.CallOption) (*commonpb.Status, error) {
return wrapGrpcCall(ctx, c, func(client workerpb.IndexNodeClient) (*commonpb.Status, error) {
return client.DropJobsV2(ctx, req)
})
}

View File

@ -18,170 +18,162 @@ package grpcindexnodeclient
import (
"context"
"math/rand"
"os"
"strings"
"testing"
"time"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/assert"
"google.golang.org/grpc"
"github.com/stretchr/testify/mock"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/util/mock"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/etcd"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
func Test_NewClient(t *testing.T) {
func TestMain(m *testing.M) {
// init embed etcd
embedetcdServer, tempDir, err := etcd.StartTestEmbedEtcdServer()
if err != nil {
log.Fatal("failed to start embed etcd server", zap.Error(err))
}
defer os.RemoveAll(tempDir)
defer embedetcdServer.Close()
addrs := etcd.GetEmbedEtcdEndpoints(embedetcdServer)
paramtable.Init()
paramtable.Get().Save(Params.EtcdCfg.Endpoints.Key, strings.Join(addrs, ","))
rand.Seed(time.Now().UnixNano())
os.Exit(m.Run())
}
func Test_NewClient(t *testing.T) {
ctx := context.Background()
client, err := NewClient(ctx, "", 1, false)
assert.Nil(t, client)
assert.Error(t, err)
client, err = NewClient(ctx, "test", 2, false)
assert.NoError(t, err)
client, err = NewClient(ctx, "localhost:1234", 1, false)
assert.NotNil(t, client)
checkFunc := func(retNotNil bool) {
retCheck := func(notNil bool, ret interface{}, err error) {
if notNil {
assert.NotNil(t, ret)
assert.NoError(t, err)
} else {
assert.Nil(t, ret)
assert.Error(t, err)
}
}
r1, err := client.GetComponentStates(ctx, nil)
retCheck(retNotNil, r1, err)
r3, err := client.GetStatisticsChannel(ctx, nil)
retCheck(retNotNil, r3, err)
r4, err := client.CreateJob(ctx, nil)
retCheck(retNotNil, r4, err)
r5, err := client.GetMetrics(ctx, nil)
retCheck(retNotNil, r5, err)
r6, err := client.QueryJobs(ctx, nil)
retCheck(retNotNil, r6, err)
r7, err := client.DropJobs(ctx, nil)
retCheck(retNotNil, r7, err)
}
client.(*Client).grpcClient = &mock.GRPCClientBase[indexpb.IndexNodeClient]{
GetGrpcClientErr: errors.New("dummy"),
}
newFunc1 := func(cc *grpc.ClientConn) indexpb.IndexNodeClient {
return &mock.GrpcIndexNodeClient{Err: nil}
}
client.(*Client).grpcClient.SetNewGrpcClientFunc(newFunc1)
checkFunc(false)
client.(*Client).grpcClient = &mock.GRPCClientBase[indexpb.IndexNodeClient]{
GetGrpcClientErr: nil,
}
newFunc2 := func(cc *grpc.ClientConn) indexpb.IndexNodeClient {
return &mock.GrpcIndexNodeClient{Err: errors.New("dummy")}
}
client.(*Client).grpcClient.SetNewGrpcClientFunc(newFunc2)
checkFunc(false)
client.(*Client).grpcClient = &mock.GRPCClientBase[indexpb.IndexNodeClient]{
GetGrpcClientErr: nil,
}
newFunc3 := func(cc *grpc.ClientConn) indexpb.IndexNodeClient {
return &mock.GrpcIndexNodeClient{Err: nil}
}
client.(*Client).grpcClient.SetNewGrpcClientFunc(newFunc3)
checkFunc(true)
assert.NoError(t, err)
err = client.Close()
assert.NoError(t, err)
}
func TestIndexNodeClient(t *testing.T) {
inc := &mock.GrpcIndexNodeClient{Err: nil}
assert.NotNil(t, inc)
ctx := context.Background()
client, err := NewClient(ctx, "localhost:1234", 1, false)
assert.NoError(t, err)
assert.NotNil(t, client)
mockIN := mocks.NewMockIndexNodeClient(t)
mockGrpcClient := mocks.NewMockGrpcClient[workerpb.IndexNodeClient](t)
mockGrpcClient.EXPECT().Close().Return(nil)
mockGrpcClient.EXPECT().ReCall(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, f func(nodeClient workerpb.IndexNodeClient) (interface{}, error)) (interface{}, error) {
return f(mockIN)
})
client.(*Client).grpcClient = mockGrpcClient
ctx := context.TODO()
t.Run("GetComponentStates", func(t *testing.T) {
_, err := inc.GetComponentStates(ctx, nil)
mockIN.EXPECT().GetComponentStates(mock.Anything, mock.Anything).Return(nil, nil)
_, err := client.GetComponentStates(ctx, nil)
assert.NoError(t, err)
})
t.Run("GetStatisticsChannel", func(t *testing.T) {
_, err := inc.GetStatisticsChannel(ctx, nil)
mockIN.EXPECT().GetStatisticsChannel(mock.Anything, mock.Anything).Return(nil, nil)
_, err := client.GetStatisticsChannel(ctx, nil)
assert.NoError(t, err)
})
t.Run("CreatJob", func(t *testing.T) {
req := &indexpb.CreateJobRequest{
mockIN.EXPECT().CreateJob(mock.Anything, mock.Anything).Return(nil, nil)
req := &workerpb.CreateJobRequest{
ClusterID: "0",
BuildID: 0,
}
_, err := inc.CreateJob(ctx, req)
_, err := client.CreateJob(ctx, req)
assert.NoError(t, err)
})
t.Run("QueryJob", func(t *testing.T) {
req := &indexpb.QueryJobsRequest{}
_, err := inc.QueryJobs(ctx, req)
mockIN.EXPECT().QueryJobs(mock.Anything, mock.Anything).Return(nil, nil)
req := &workerpb.QueryJobsRequest{}
_, err := client.QueryJobs(ctx, req)
assert.NoError(t, err)
})
t.Run("DropJob", func(t *testing.T) {
req := &indexpb.DropJobsRequest{}
_, err := inc.DropJobs(ctx, req)
mockIN.EXPECT().DropJobs(mock.Anything, mock.Anything).Return(nil, nil)
req := &workerpb.DropJobsRequest{}
_, err := client.DropJobs(ctx, req)
assert.NoError(t, err)
})
t.Run("ShowConfigurations", func(t *testing.T) {
mockIN.EXPECT().ShowConfigurations(mock.Anything, mock.Anything).Return(nil, nil)
req := &internalpb.ShowConfigurationsRequest{
Pattern: "",
}
_, err := inc.ShowConfigurations(ctx, req)
_, err := client.ShowConfigurations(ctx, req)
assert.NoError(t, err)
})
t.Run("GetMetrics", func(t *testing.T) {
mockIN.EXPECT().GetMetrics(mock.Anything, mock.Anything).Return(nil, nil)
req, err := metricsinfo.ConstructRequestByMetricType(metricsinfo.SystemInfoMetrics)
assert.NoError(t, err)
_, err = inc.GetMetrics(ctx, req)
_, err = client.GetMetrics(ctx, req)
assert.NoError(t, err)
})
t.Run("GetJobStats", func(t *testing.T) {
req := &indexpb.GetJobStatsRequest{}
_, err := inc.GetJobStats(ctx, req)
mockIN.EXPECT().GetJobStats(mock.Anything, mock.Anything).Return(nil, nil)
req := &workerpb.GetJobStatsRequest{}
_, err := client.GetJobStats(ctx, req)
assert.NoError(t, err)
})
t.Run("CreateJobV2", func(t *testing.T) {
req := &indexpb.CreateJobV2Request{}
_, err := inc.CreateJobV2(ctx, req)
mockIN.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(nil, nil)
req := &workerpb.CreateJobV2Request{}
_, err := client.CreateJobV2(ctx, req)
assert.NoError(t, err)
})
t.Run("QueryJobsV2", func(t *testing.T) {
req := &indexpb.QueryJobsV2Request{}
_, err := inc.QueryJobsV2(ctx, req)
mockIN.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(nil, nil)
req := &workerpb.QueryJobsV2Request{}
_, err := client.QueryJobsV2(ctx, req)
assert.NoError(t, err)
})
t.Run("DropJobsV2", func(t *testing.T) {
req := &indexpb.DropJobsV2Request{}
_, err := inc.DropJobsV2(ctx, req)
mockIN.EXPECT().DropJobsV2(mock.Anything, mock.Anything).Return(nil, nil)
req := &workerpb.DropJobsV2Request{}
_, err := client.DropJobsV2(ctx, req)
assert.NoError(t, err)
})
err := inc.Close()
err = client.Close()
assert.NoError(t, err)
}

View File

@ -36,8 +36,8 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/distributed/utils"
"github.com/milvus-io/milvus/internal/indexnode"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/dependency"
_ "github.com/milvus-io/milvus/internal/util/grpcclient"
@ -133,7 +133,7 @@ func (s *Server) startGrpcLoop(grpcPort int) {
return s.serverID.Load()
}),
)))
indexpb.RegisterIndexNodeServer(s.grpcServer, s)
workerpb.RegisterIndexNodeServer(s.grpcServer, s)
go funcutil.CheckGrpcReady(ctx, s.grpcErrChan)
if err := s.grpcServer.Serve(lis); err != nil {
s.grpcErrChan <- err
@ -261,22 +261,22 @@ func (s *Server) GetStatisticsChannel(ctx context.Context, req *internalpb.GetSt
}
// CreateJob sends the create index request to IndexNode.
func (s *Server) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest) (*commonpb.Status, error) {
func (s *Server) CreateJob(ctx context.Context, req *workerpb.CreateJobRequest) (*commonpb.Status, error) {
return s.indexnode.CreateJob(ctx, req)
}
// QueryJobs querys index jobs statues
func (s *Server) QueryJobs(ctx context.Context, req *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error) {
func (s *Server) QueryJobs(ctx context.Context, req *workerpb.QueryJobsRequest) (*workerpb.QueryJobsResponse, error) {
return s.indexnode.QueryJobs(ctx, req)
}
// DropJobs drops index build jobs
func (s *Server) DropJobs(ctx context.Context, req *indexpb.DropJobsRequest) (*commonpb.Status, error) {
func (s *Server) DropJobs(ctx context.Context, req *workerpb.DropJobsRequest) (*commonpb.Status, error) {
return s.indexnode.DropJobs(ctx, req)
}
// GetJobNum gets indexnode's job statisctics
func (s *Server) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error) {
func (s *Server) GetJobStats(ctx context.Context, req *workerpb.GetJobStatsRequest) (*workerpb.GetJobStatsResponse, error) {
return s.indexnode.GetJobStats(ctx, req)
}
@ -290,15 +290,15 @@ func (s *Server) GetMetrics(ctx context.Context, request *milvuspb.GetMetricsReq
return s.indexnode.GetMetrics(ctx, request)
}
func (s *Server) CreateJobV2(ctx context.Context, request *indexpb.CreateJobV2Request) (*commonpb.Status, error) {
func (s *Server) CreateJobV2(ctx context.Context, request *workerpb.CreateJobV2Request) (*commonpb.Status, error) {
return s.indexnode.CreateJobV2(ctx, request)
}
func (s *Server) QueryJobsV2(ctx context.Context, request *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error) {
func (s *Server) QueryJobsV2(ctx context.Context, request *workerpb.QueryJobsV2Request) (*workerpb.QueryJobsV2Response, error) {
return s.indexnode.QueryJobsV2(ctx, request)
}
func (s *Server) DropJobsV2(ctx context.Context, request *indexpb.DropJobsV2Request) (*commonpb.Status, error) {
func (s *Server) DropJobsV2(ctx context.Context, request *workerpb.DropJobsV2Request) (*commonpb.Status, error) {
return s.indexnode.DropJobsV2(ctx, request)
}

View File

@ -26,8 +26,8 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
@ -79,7 +79,7 @@ func TestIndexNodeServer(t *testing.T) {
t.Run("CreateJob", func(t *testing.T) {
inm.EXPECT().CreateJob(mock.Anything, mock.Anything).Return(merr.Success(), nil)
req := &indexpb.CreateJobRequest{
req := &workerpb.CreateJobRequest{
ClusterID: "",
BuildID: 0,
IndexID: 0,
@ -91,10 +91,10 @@ func TestIndexNodeServer(t *testing.T) {
})
t.Run("QueryJob", func(t *testing.T) {
inm.EXPECT().QueryJobs(mock.Anything, mock.Anything).Return(&indexpb.QueryJobsResponse{
inm.EXPECT().QueryJobs(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsResponse{
Status: merr.Success(),
}, nil)
req := &indexpb.QueryJobsRequest{}
req := &workerpb.QueryJobsRequest{}
resp, err := server.QueryJobs(ctx, req)
assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetStatus().GetErrorCode())
@ -102,7 +102,7 @@ func TestIndexNodeServer(t *testing.T) {
t.Run("DropJobs", func(t *testing.T) {
inm.EXPECT().DropJobs(mock.Anything, mock.Anything).Return(merr.Success(), nil)
req := &indexpb.DropJobsRequest{}
req := &workerpb.DropJobsRequest{}
resp, err := server.DropJobs(ctx, req)
assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
@ -132,10 +132,10 @@ func TestIndexNodeServer(t *testing.T) {
})
t.Run("GetTaskSlots", func(t *testing.T) {
inm.EXPECT().GetJobStats(mock.Anything, mock.Anything).Return(&indexpb.GetJobStatsResponse{
inm.EXPECT().GetJobStats(mock.Anything, mock.Anything).Return(&workerpb.GetJobStatsResponse{
Status: merr.Success(),
}, nil)
req := &indexpb.GetJobStatsRequest{}
req := &workerpb.GetJobStatsRequest{}
resp, err := server.GetJobStats(ctx, req)
assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetStatus().GetErrorCode())
@ -143,17 +143,17 @@ func TestIndexNodeServer(t *testing.T) {
t.Run("CreateJobV2", func(t *testing.T) {
inm.EXPECT().CreateJobV2(mock.Anything, mock.Anything).Return(merr.Success(), nil)
req := &indexpb.CreateJobV2Request{}
req := &workerpb.CreateJobV2Request{}
resp, err := server.CreateJobV2(ctx, req)
assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetErrorCode())
})
t.Run("QueryJobsV2", func(t *testing.T) {
inm.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&indexpb.QueryJobsV2Response{
inm.EXPECT().QueryJobsV2(mock.Anything, mock.Anything).Return(&workerpb.QueryJobsV2Response{
Status: merr.Success(),
}, nil)
req := &indexpb.QueryJobsV2Request{}
req := &workerpb.QueryJobsV2Request{}
resp, err := server.QueryJobsV2(ctx, req)
assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetStatus().GetErrorCode())
@ -161,7 +161,7 @@ func TestIndexNodeServer(t *testing.T) {
t.Run("DropJobsV2", func(t *testing.T) {
inm.EXPECT().DropJobsV2(mock.Anything, mock.Anything).Return(merr.Success(), nil)
req := &indexpb.DropJobsV2Request{}
req := &workerpb.DropJobsV2Request{}
resp, err := server.DropJobsV2(ctx, req)
assert.NoError(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetErrorCode())

View File

@ -26,30 +26,30 @@ func generateTestSchema() *schemapb.CollectionSchema {
schema := &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{
{FieldID: common.TimeStampField, Name: "ts", DataType: schemapb.DataType_Int64},
{FieldID: common.RowIDField, Name: "rowid", DataType: schemapb.DataType_Int64},
{FieldID: 10, Name: "bool", DataType: schemapb.DataType_Bool},
{FieldID: 11, Name: "int8", DataType: schemapb.DataType_Int8},
{FieldID: 12, Name: "int16", DataType: schemapb.DataType_Int16},
{FieldID: 13, Name: "int64", DataType: schemapb.DataType_Int64},
{FieldID: 14, Name: "float", DataType: schemapb.DataType_Float},
{FieldID: 15, Name: "double", DataType: schemapb.DataType_Double},
{FieldID: 16, Name: "varchar", DataType: schemapb.DataType_VarChar},
{FieldID: 17, Name: "string", DataType: schemapb.DataType_String},
{FieldID: 18, Name: "array", DataType: schemapb.DataType_Array},
{FieldID: 19, Name: "string", DataType: schemapb.DataType_JSON},
{FieldID: 101, Name: "int32", DataType: schemapb.DataType_Int32},
{FieldID: 102, Name: "floatVector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{
{FieldID: 100, Name: "bool", DataType: schemapb.DataType_Bool},
{FieldID: 101, Name: "int8", DataType: schemapb.DataType_Int8},
{FieldID: 102, Name: "int16", DataType: schemapb.DataType_Int16},
{FieldID: 103, Name: "int64", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
{FieldID: 104, Name: "float", DataType: schemapb.DataType_Float},
{FieldID: 105, Name: "double", DataType: schemapb.DataType_Double},
{FieldID: 106, Name: "varchar", DataType: schemapb.DataType_VarChar},
{FieldID: 107, Name: "string", DataType: schemapb.DataType_String},
{FieldID: 108, Name: "array", DataType: schemapb.DataType_Array},
{FieldID: 109, Name: "json", DataType: schemapb.DataType_JSON},
{FieldID: 110, Name: "int32", DataType: schemapb.DataType_Int32},
{FieldID: 111, Name: "floatVector", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "8"},
}},
{FieldID: 103, Name: "binaryVector", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{
{FieldID: 112, Name: "binaryVector", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "8"},
}},
{FieldID: 104, Name: "float16Vector", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{
{FieldID: 113, Name: "float16Vector", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "8"},
}},
{FieldID: 105, Name: "bf16Vector", DataType: schemapb.DataType_BFloat16Vector, TypeParams: []*commonpb.KeyValuePair{
{FieldID: 114, Name: "bf16Vector", DataType: schemapb.DataType_BFloat16Vector, TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "8"},
}},
{FieldID: 106, Name: "sparseFloatVector", DataType: schemapb.DataType_SparseFloatVector, TypeParams: []*commonpb.KeyValuePair{
{FieldID: 115, Name: "sparseFloatVector", DataType: schemapb.DataType_SparseFloatVector, TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "28433"},
}},
}}
@ -128,34 +128,34 @@ func generateTestData(collID, partID, segID int64, num int) ([]*Blob, error) {
common.RowIDField: &storage.Int64FieldData{Data: field0},
common.TimeStampField: &storage.Int64FieldData{Data: field1},
10: &storage.BoolFieldData{Data: field10},
11: &storage.Int8FieldData{Data: field11},
12: &storage.Int16FieldData{Data: field12},
13: &storage.Int64FieldData{Data: field13},
14: &storage.FloatFieldData{Data: field14},
15: &storage.DoubleFieldData{Data: field15},
16: &storage.StringFieldData{Data: field16},
17: &storage.StringFieldData{Data: field17},
18: &storage.ArrayFieldData{Data: field18},
19: &storage.JSONFieldData{Data: field19},
101: &storage.Int32FieldData{Data: field101},
102: &storage.FloatVectorFieldData{
100: &storage.BoolFieldData{Data: field10},
101: &storage.Int8FieldData{Data: field11},
102: &storage.Int16FieldData{Data: field12},
103: &storage.Int64FieldData{Data: field13},
104: &storage.FloatFieldData{Data: field14},
105: &storage.DoubleFieldData{Data: field15},
106: &storage.StringFieldData{Data: field16},
107: &storage.StringFieldData{Data: field17},
108: &storage.ArrayFieldData{Data: field18},
109: &storage.JSONFieldData{Data: field19},
110: &storage.Int32FieldData{Data: field101},
111: &storage.FloatVectorFieldData{
Data: field102,
Dim: 8,
},
103: &storage.BinaryVectorFieldData{
112: &storage.BinaryVectorFieldData{
Data: field103,
Dim: 8,
},
104: &storage.Float16VectorFieldData{
113: &storage.Float16VectorFieldData{
Data: field104,
Dim: 8,
},
105: &storage.BFloat16VectorFieldData{
114: &storage.BFloat16VectorFieldData{
Data: field105,
Dim: 8,
},
106: &storage.SparseFloatVectorFieldData{
115: &storage.SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 28433,
Contents: field106,

View File

@ -44,6 +44,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/flushcommon/io"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/dependency"
@ -83,7 +84,7 @@ func getCurrentIndexVersion(v int32) int32 {
type taskKey struct {
ClusterID string
BuildID UniqueID
TaskID UniqueID
}
// IndexNode is a component that executes the task of building indexes.
@ -105,10 +106,13 @@ type IndexNode struct {
etcdCli *clientv3.Client
address string
binlogIO io.BinlogIO
initOnce sync.Once
stateLock sync.Mutex
indexTasks map[taskKey]*indexTaskInfo
analyzeTasks map[taskKey]*analyzeTaskInfo
statsTasks map[taskKey]*statsTaskInfo
}
// NewIndexNode creates a new IndexNode component.
@ -123,6 +127,7 @@ func NewIndexNode(ctx context.Context, factory dependency.Factory) *IndexNode {
storageFactory: NewChunkMgrFactory(),
indexTasks: make(map[taskKey]*indexTaskInfo),
analyzeTasks: make(map[taskKey]*analyzeTaskInfo),
statsTasks: make(map[taskKey]*statsTaskInfo),
lifetime: lifetime.NewLifetime(commonpb.StateCode_Abnormal),
}
sc := NewTaskScheduler(b.loopCtx)
@ -236,6 +241,27 @@ func (i *IndexNode) Start() error {
return startErr
}
func (i *IndexNode) deleteAllTasks() {
deletedIndexTasks := i.deleteAllIndexTasks()
for _, t := range deletedIndexTasks {
if t.cancel != nil {
t.cancel()
}
}
deletedAnalyzeTasks := i.deleteAllAnalyzeTasks()
for _, t := range deletedAnalyzeTasks {
if t.cancel != nil {
t.cancel()
}
}
deletedStatsTasks := i.deleteAllStatsTasks()
for _, t := range deletedStatsTasks {
if t.cancel != nil {
t.cancel()
}
}
}
// Stop closes the server.
func (i *IndexNode) Stop() error {
i.stopOnce.Do(func() {
@ -253,18 +279,8 @@ func (i *IndexNode) Stop() error {
i.lifetime.Wait()
log.Info("Index node abnormal")
// cleanup all running tasks
deletedIndexTasks := i.deleteAllIndexTasks()
for _, t := range deletedIndexTasks {
if t.cancel != nil {
t.cancel()
}
}
deletedAnalyzeTasks := i.deleteAllAnalyzeTasks()
for _, t := range deletedAnalyzeTasks {
if t.cancel != nil {
t.cancel()
}
}
i.deleteAllTasks()
if i.sched != nil {
i.sched.Close()
}

View File

@ -1,319 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package indexnode
import (
"context"
"fmt"
"github.com/cockroachdb/errors"
clientv3 "go.etcd.io/etcd/client/v3"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/util/hardware"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
// Mock is an alternative to IndexNode, it will return specific results based on specific parameters.
type Mock struct {
types.IndexNode
CallInit func() error
CallStart func() error
CallStop func() error
CallGetComponentStates func(ctx context.Context) (*milvuspb.ComponentStates, error)
CallGetStatisticsChannel func(ctx context.Context) (*milvuspb.StringResponse, error)
CallRegister func() error
CallSetAddress func(address string)
CallSetEtcdClient func(etcdClient *clientv3.Client)
CallUpdateStateCode func(stateCode commonpb.StateCode)
CallCreateJob func(ctx context.Context, req *indexpb.CreateJobRequest) (*commonpb.Status, error)
CallQueryJobs func(ctx context.Context, in *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error)
CallDropJobs func(ctx context.Context, in *indexpb.DropJobsRequest) (*commonpb.Status, error)
CallGetJobStats func(ctx context.Context, in *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error)
CallCreateJobV2 func(ctx context.Context, req *indexpb.CreateJobV2Request) (*commonpb.Status, error)
CallQueryJobV2 func(ctx context.Context, req *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error)
CallDropJobV2 func(ctx context.Context, req *indexpb.DropJobsV2Request) (*commonpb.Status, error)
CallGetMetrics func(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error)
CallShowConfigurations func(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error)
}
func NewIndexNodeMock() *Mock {
return &Mock{
CallInit: func() error {
return nil
},
CallStart: func() error {
return nil
},
CallRegister: func() error {
return nil
},
CallStop: func() error {
return nil
},
CallSetAddress: func(address string) {
},
CallSetEtcdClient: func(etcdClient *clientv3.Client) {
},
CallUpdateStateCode: func(stateCode commonpb.StateCode) {
},
CallGetComponentStates: func(ctx context.Context) (*milvuspb.ComponentStates, error) {
return &milvuspb.ComponentStates{
State: &milvuspb.ComponentInfo{
NodeID: 1,
Role: typeutil.IndexNodeRole,
StateCode: commonpb.StateCode_Healthy,
},
SubcomponentStates: nil,
Status: merr.Success(),
}, nil
},
CallGetStatisticsChannel: func(ctx context.Context) (*milvuspb.StringResponse, error) {
return &milvuspb.StringResponse{
Status: merr.Success(),
}, nil
},
CallCreateJob: func(ctx context.Context, req *indexpb.CreateJobRequest) (*commonpb.Status, error) {
return merr.Success(), nil
},
CallQueryJobs: func(ctx context.Context, in *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error) {
indexInfos := make([]*indexpb.IndexTaskInfo, 0)
for _, buildID := range in.BuildIDs {
indexInfos = append(indexInfos, &indexpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{"file1", "file2"},
})
}
return &indexpb.QueryJobsResponse{
Status: merr.Success(),
ClusterID: in.ClusterID,
IndexInfos: indexInfos,
}, nil
},
CallDropJobs: func(ctx context.Context, in *indexpb.DropJobsRequest) (*commonpb.Status, error) {
return merr.Success(), nil
},
CallCreateJobV2: func(ctx context.Context, req *indexpb.CreateJobV2Request) (*commonpb.Status, error) {
return merr.Success(), nil
},
CallQueryJobV2: func(ctx context.Context, req *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error) {
switch req.GetJobType() {
case indexpb.JobType_JobTypeIndexJob:
results := make([]*indexpb.IndexTaskInfo, 0)
for _, buildID := range req.GetTaskIDs() {
results = append(results, &indexpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_Finished,
IndexFileKeys: []string{},
SerializedSize: 1024,
FailReason: "",
CurrentIndexVersion: 1,
IndexStoreVersion: 1,
})
}
return &indexpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: req.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &indexpb.IndexJobResults{
Results: results,
},
},
}, nil
case indexpb.JobType_JobTypeAnalyzeJob:
results := make([]*indexpb.AnalyzeResult, 0)
for _, taskID := range req.GetTaskIDs() {
results = append(results, &indexpb.AnalyzeResult{
TaskID: taskID,
State: indexpb.JobState_JobStateFinished,
CentroidsFile: fmt.Sprintf("%d/stats_file", taskID),
FailReason: "",
})
}
return &indexpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: req.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &indexpb.AnalyzeResults{
Results: results,
},
},
}, nil
default:
return &indexpb.QueryJobsV2Response{
Status: merr.Status(errors.New("unknown job type")),
ClusterID: req.GetClusterID(),
}, nil
}
},
CallDropJobV2: func(ctx context.Context, req *indexpb.DropJobsV2Request) (*commonpb.Status, error) {
return merr.Success(), nil
},
CallGetJobStats: func(ctx context.Context, in *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error) {
return &indexpb.GetJobStatsResponse{
Status: merr.Success(),
TotalJobNum: 1,
EnqueueJobNum: 0,
InProgressJobNum: 1,
TaskSlots: 1,
JobInfos: []*indexpb.JobInfo{
{
NumRows: 1024,
Dim: 128,
StartTime: 1,
EndTime: 10,
PodID: 1,
},
},
}, nil
},
CallGetMetrics: func(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
return getMockSystemInfoMetrics(ctx, req, nil)
},
CallShowConfigurations: func(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) {
return &internalpb.ShowConfigurationsResponse{
Status: merr.Success(),
}, nil
},
}
}
func (m *Mock) Init() error {
return m.CallInit()
}
func (m *Mock) Start() error {
return m.CallStart()
}
func (m *Mock) Stop() error {
return m.CallStop()
}
func (m *Mock) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) {
return m.CallGetComponentStates(ctx)
}
func (m *Mock) GetStatisticsChannel(ctx context.Context, req *internalpb.GetStatisticsChannelRequest) (*milvuspb.StringResponse, error) {
return m.CallGetStatisticsChannel(ctx)
}
func (m *Mock) Register() error {
return m.CallRegister()
}
func (m *Mock) SetAddress(address string) {
m.CallSetAddress(address)
}
func (m *Mock) GetAddress() string {
return ""
}
func (m *Mock) SetEtcdClient(etcdClient *clientv3.Client) {
}
func (m *Mock) UpdateStateCode(stateCode commonpb.StateCode) {
}
func (m *Mock) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest) (*commonpb.Status, error) {
return m.CallCreateJob(ctx, req)
}
func (m *Mock) QueryJobs(ctx context.Context, req *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error) {
return m.CallQueryJobs(ctx, req)
}
func (m *Mock) DropJobs(ctx context.Context, req *indexpb.DropJobsRequest) (*commonpb.Status, error) {
return m.CallDropJobs(ctx, req)
}
func (m *Mock) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error) {
return m.CallGetJobStats(ctx, req)
}
func (m *Mock) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
return m.CallGetMetrics(ctx, req)
}
func (m *Mock) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Request) (*commonpb.Status, error) {
return m.CallCreateJobV2(ctx, req)
}
func (m *Mock) QueryJobsV2(ctx context.Context, req *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error) {
return m.CallQueryJobV2(ctx, req)
}
func (m *Mock) DropJobsV2(ctx context.Context, req *indexpb.DropJobsV2Request) (*commonpb.Status, error) {
return m.CallDropJobV2(ctx, req)
}
// ShowConfigurations returns the configurations of Mock indexNode matching req.Pattern
func (m *Mock) ShowConfigurations(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) {
return m.CallShowConfigurations(ctx, req)
}
func getMockSystemInfoMetrics(
ctx context.Context,
req *milvuspb.GetMetricsRequest,
node *Mock,
) (*milvuspb.GetMetricsResponse, error) {
// TODO(dragondriver): add more metrics
nodeInfos := metricsinfo.IndexNodeInfos{
BaseComponentInfos: metricsinfo.BaseComponentInfos{
Name: metricsinfo.ConstructComponentName(typeutil.IndexNodeRole, paramtable.GetNodeID()),
HardwareInfos: metricsinfo.HardwareMetrics{
CPUCoreCount: hardware.GetCPUNum(),
CPUCoreUsage: hardware.GetCPUUsage(),
Memory: 1000,
MemoryUsage: hardware.GetUsedMemoryCount(),
Disk: hardware.GetDiskCount(),
DiskUsage: hardware.GetDiskUsage(),
},
SystemInfo: metricsinfo.DeployMetrics{},
CreatedTime: paramtable.GetCreateTime().String(),
UpdatedTime: paramtable.GetUpdateTime().String(),
Type: typeutil.IndexNodeRole,
},
SystemConfigurations: metricsinfo.IndexNodeConfiguration{
MinioBucketName: Params.MinioCfg.BucketName.GetValue(),
SimdType: Params.CommonCfg.SimdType.GetValue(),
},
}
metricsinfo.FillDeployMetricsWithEnv(&nodeInfos.SystemInfo)
resp, _ := metricsinfo.MarshalComponentInfos(nodeInfos)
return &milvuspb.GetMetricsResponse{
Status: merr.Success(),
Response: resp,
ComponentName: metricsinfo.ConstructComponentName(typeutil.IndexNodeRole, paramtable.GetNodeID()),
}, nil
}

View File

@ -28,18 +28,19 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/flushcommon/io"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest) (*commonpb.Status, error) {
func (i *IndexNode) CreateJob(ctx context.Context, req *workerpb.CreateJobRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.String("clusterID", req.GetClusterID()),
zap.Int64("indexBuildID", req.GetBuildID()),
@ -93,7 +94,7 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest
zap.String("accessKey", req.GetStorageConfig().GetAccessKeyID()),
zap.Error(err),
)
i.deleteIndexTaskInfos(ctx, []taskKey{{ClusterID: req.GetClusterID(), BuildID: req.GetBuildID()}})
i.deleteIndexTaskInfos(ctx, []taskKey{{ClusterID: req.GetClusterID(), TaskID: req.GetBuildID()}})
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
return merr.Status(err), nil
}
@ -112,13 +113,13 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest
return ret, nil
}
func (i *IndexNode) QueryJobs(ctx context.Context, req *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error) {
func (i *IndexNode) QueryJobs(ctx context.Context, req *workerpb.QueryJobsRequest) (*workerpb.QueryJobsResponse, error) {
log := log.Ctx(ctx).With(
zap.String("clusterID", req.GetClusterID()),
).WithRateGroup("in.queryJobs", 1, 60)
if err := i.lifetime.Add(merr.IsHealthyOrStopping); err != nil {
log.Warn("index node not ready", zap.Error(err))
return &indexpb.QueryJobsResponse{
return &workerpb.QueryJobsResponse{
Status: merr.Status(err),
}, nil
}
@ -136,13 +137,13 @@ func (i *IndexNode) QueryJobs(ctx context.Context, req *indexpb.QueryJobsRequest
}
}
})
ret := &indexpb.QueryJobsResponse{
ret := &workerpb.QueryJobsResponse{
Status: merr.Success(),
ClusterID: req.GetClusterID(),
IndexInfos: make([]*indexpb.IndexTaskInfo, 0, len(req.GetBuildIDs())),
IndexInfos: make([]*workerpb.IndexTaskInfo, 0, len(req.GetBuildIDs())),
}
for i, buildID := range req.GetBuildIDs() {
ret.IndexInfos = append(ret.IndexInfos, &indexpb.IndexTaskInfo{
ret.IndexInfos = append(ret.IndexInfos, &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_IndexStateNone,
IndexFileKeys: nil,
@ -165,7 +166,7 @@ func (i *IndexNode) QueryJobs(ctx context.Context, req *indexpb.QueryJobsRequest
return ret, nil
}
func (i *IndexNode) DropJobs(ctx context.Context, req *indexpb.DropJobsRequest) (*commonpb.Status, error) {
func (i *IndexNode) DropJobs(ctx context.Context, req *workerpb.DropJobsRequest) (*commonpb.Status, error) {
log.Ctx(ctx).Info("drop index build jobs",
zap.String("clusterID", req.ClusterID),
zap.Int64s("indexBuildIDs", req.BuildIDs),
@ -177,7 +178,7 @@ func (i *IndexNode) DropJobs(ctx context.Context, req *indexpb.DropJobsRequest)
defer i.lifetime.Done()
keys := make([]taskKey, 0, len(req.GetBuildIDs()))
for _, buildID := range req.GetBuildIDs() {
keys = append(keys, taskKey{ClusterID: req.GetClusterID(), BuildID: buildID})
keys = append(keys, taskKey{ClusterID: req.GetClusterID(), TaskID: buildID})
}
infos := i.deleteIndexTaskInfos(ctx, keys)
for _, info := range infos {
@ -191,10 +192,10 @@ func (i *IndexNode) DropJobs(ctx context.Context, req *indexpb.DropJobsRequest)
}
// GetJobStats should be GetSlots
func (i *IndexNode) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error) {
func (i *IndexNode) GetJobStats(ctx context.Context, req *workerpb.GetJobStatsRequest) (*workerpb.GetJobStatsResponse, error) {
if err := i.lifetime.Add(merr.IsHealthyOrStopping); err != nil {
log.Ctx(ctx).Warn("index node not ready", zap.Error(err))
return &indexpb.GetJobStatsResponse{
return &workerpb.GetJobStatsResponse{
Status: merr.Status(err),
}, nil
}
@ -210,7 +211,7 @@ func (i *IndexNode) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsReq
zap.Int("active", active),
zap.Int("slot", slots),
)
return &indexpb.GetJobStatsResponse{
return &workerpb.GetJobStatsResponse{
Status: merr.Success(),
TotalJobNum: int64(active) + int64(unissued),
InProgressJobNum: int64(active),
@ -269,9 +270,9 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
}, nil
}
func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Request) (*commonpb.Status, error) {
func (i *IndexNode) CreateJobV2(ctx context.Context, req *workerpb.CreateJobV2Request) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.String("clusterID", req.GetClusterID()), zap.Int64("taskID", req.GetTaskID()),
zap.String("clusterID", req.GetClusterID()), zap.Int64("TaskID", req.GetTaskID()),
zap.String("jobType", req.GetJobType().String()),
)
@ -289,8 +290,9 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
case indexpb.JobType_JobTypeIndexJob:
indexRequest := req.GetIndexRequest()
log.Info("IndexNode building index ...",
zap.Int64("indexID", indexRequest.GetIndexID()),
zap.String("indexName", indexRequest.GetIndexName()),
zap.Int64("collectionID", indexRequest.CollectionID),
zap.Int64("partitionID", indexRequest.PartitionID),
zap.Int64("segmentID", indexRequest.SegmentID),
zap.String("indexFilePrefix", indexRequest.GetIndexFilePrefix()),
zap.Int64("indexVersion", indexRequest.GetIndexVersion()),
zap.Strings("dataPaths", indexRequest.GetDataPaths()),
@ -301,13 +303,18 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
zap.String("storePath", indexRequest.GetStorePath()),
zap.Int64("storeVersion", indexRequest.GetStoreVersion()),
zap.String("indexStorePath", indexRequest.GetIndexStorePath()),
zap.Int64("dim", indexRequest.GetDim()))
zap.Int64("dim", indexRequest.GetDim()),
zap.Int64("fieldID", indexRequest.GetFieldID()),
zap.String("fieldType", indexRequest.GetFieldType().String()),
zap.Any("field", indexRequest.GetField()),
)
taskCtx, taskCancel := context.WithCancel(i.loopCtx)
if oldInfo := i.loadOrStoreIndexTask(indexRequest.GetClusterID(), indexRequest.GetBuildID(), &indexTaskInfo{
cancel: taskCancel,
state: commonpb.IndexState_InProgress,
}); oldInfo != nil {
err := merr.WrapErrIndexDuplicate(indexRequest.GetIndexName(), "building index task existed")
err := merr.WrapErrTaskDuplicate(req.GetJobType().String(),
fmt.Sprintf("building index task existed with %s-%d", req.GetClusterID(), req.GetTaskID()))
log.Warn("duplicated index build task", zap.Error(err))
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
return merr.Status(err), nil
@ -318,7 +325,7 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
zap.String("accessKey", indexRequest.GetStorageConfig().GetAccessKeyID()),
zap.Error(err),
)
i.deleteIndexTaskInfos(ctx, []taskKey{{ClusterID: indexRequest.GetClusterID(), BuildID: indexRequest.GetBuildID()}})
i.deleteIndexTaskInfos(ctx, []taskKey{{ClusterID: indexRequest.GetClusterID(), TaskID: indexRequest.GetBuildID()}})
metrics.IndexNodeBuildIndexTaskCounter.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), metrics.FailLabel).Inc()
return merr.Status(err), nil
}
@ -352,18 +359,12 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
cancel: taskCancel,
state: indexpb.JobState_JobStateInProgress,
}); oldInfo != nil {
err := merr.WrapErrIndexDuplicate("", "analyze task already existed")
err := merr.WrapErrTaskDuplicate(req.GetJobType().String(),
fmt.Sprintf("analyze task already existed with %s-%d", req.GetClusterID(), req.GetTaskID()))
log.Warn("duplicated analyze task", zap.Error(err))
return merr.Status(err), nil
}
t := &analyzeTask{
ident: fmt.Sprintf("%s/%d", analyzeRequest.GetClusterID(), analyzeRequest.GetTaskID()),
ctx: taskCtx,
cancel: taskCancel,
req: analyzeRequest,
node: i,
tr: timerecord.NewTimeRecorder(fmt.Sprintf("ClusterID: %s, IndexBuildID: %d", req.GetClusterID(), req.GetTaskID())),
}
t := newAnalyzeTask(taskCtx, taskCancel, analyzeRequest, i)
ret := merr.Success()
if err := i.sched.TaskQueue.Enqueue(t); err != nil {
log.Warn("IndexNode failed to schedule", zap.Error(err))
@ -372,20 +373,60 @@ func (i *IndexNode) CreateJobV2(ctx context.Context, req *indexpb.CreateJobV2Req
}
log.Info("IndexNode analyze job enqueued successfully")
return ret, nil
case indexpb.JobType_JobTypeStatsJob:
statsRequest := req.GetStatsRequest()
log.Info("receive stats job", zap.Int64("collectionID", statsRequest.GetCollectionID()),
zap.Int64("partitionID", statsRequest.GetPartitionID()),
zap.Int64("segmentID", statsRequest.GetSegmentID()),
zap.Int64("targetSegmentID", statsRequest.GetTargetSegmentID()),
zap.Int64("startLogID", statsRequest.GetStartLogID()),
zap.Int64("endLogID", statsRequest.GetEndLogID()),
)
taskCtx, taskCancel := context.WithCancel(i.loopCtx)
if oldInfo := i.loadOrStoreStatsTask(statsRequest.GetClusterID(), statsRequest.GetTaskID(), &statsTaskInfo{
cancel: taskCancel,
state: indexpb.JobState_JobStateInProgress,
}); oldInfo != nil {
err := merr.WrapErrTaskDuplicate(req.GetJobType().String(),
fmt.Sprintf("stats task already existed with %s-%d", req.GetClusterID(), req.GetTaskID()))
log.Warn("duplicated stats task", zap.Error(err))
return merr.Status(err), nil
}
cm, err := i.storageFactory.NewChunkManager(i.loopCtx, statsRequest.GetStorageConfig())
if err != nil {
log.Error("create chunk manager failed", zap.String("bucket", statsRequest.GetStorageConfig().GetBucketName()),
zap.String("accessKey", statsRequest.GetStorageConfig().GetAccessKeyID()),
zap.Error(err),
)
i.deleteStatsTaskInfos(ctx, []taskKey{{ClusterID: req.GetClusterID(), TaskID: req.GetTaskID()}})
return merr.Status(err), nil
}
t := newStatsTask(taskCtx, taskCancel, statsRequest, i, io.NewBinlogIO(cm))
ret := merr.Success()
if err := i.sched.TaskQueue.Enqueue(t); err != nil {
log.Warn("IndexNode failed to schedule", zap.Error(err))
ret = merr.Status(err)
return ret, nil
}
log.Info("IndexNode stats job enqueued successfully")
return ret, nil
default:
log.Warn("IndexNode receive unknown type job")
return merr.Status(fmt.Errorf("IndexNode receive unknown type job with taskID: %d", req.GetTaskID())), nil
return merr.Status(fmt.Errorf("IndexNode receive unknown type job with TaskID: %d", req.GetTaskID())), nil
}
}
func (i *IndexNode) QueryJobsV2(ctx context.Context, req *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error) {
func (i *IndexNode) QueryJobsV2(ctx context.Context, req *workerpb.QueryJobsV2Request) (*workerpb.QueryJobsV2Response, error) {
log := log.Ctx(ctx).With(
zap.String("clusterID", req.GetClusterID()), zap.Int64s("taskIDs", req.GetTaskIDs()),
).WithRateGroup("QueryResult", 1, 60)
if err := i.lifetime.Add(merr.IsHealthyOrStopping); err != nil {
log.Warn("IndexNode not ready", zap.Error(err))
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Status(err),
}, nil
}
@ -406,9 +447,9 @@ func (i *IndexNode) QueryJobsV2(ctx context.Context, req *indexpb.QueryJobsV2Req
}
}
})
results := make([]*indexpb.IndexTaskInfo, 0, len(req.GetTaskIDs()))
results := make([]*workerpb.IndexTaskInfo, 0, len(req.GetTaskIDs()))
for i, buildID := range req.GetTaskIDs() {
results = append(results, &indexpb.IndexTaskInfo{
results = append(results, &workerpb.IndexTaskInfo{
BuildID: buildID,
State: commonpb.IndexState_IndexStateNone,
IndexFileKeys: nil,
@ -424,21 +465,21 @@ func (i *IndexNode) QueryJobsV2(ctx context.Context, req *indexpb.QueryJobsV2Req
}
}
log.Debug("query index jobs result success", zap.Any("results", results))
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: req.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &indexpb.IndexJobResults{
Result: &workerpb.QueryJobsV2Response_IndexJobResults{
IndexJobResults: &workerpb.IndexJobResults{
Results: results,
},
},
}, nil
case indexpb.JobType_JobTypeAnalyzeJob:
results := make([]*indexpb.AnalyzeResult, 0, len(req.GetTaskIDs()))
results := make([]*workerpb.AnalyzeResult, 0, len(req.GetTaskIDs()))
for _, taskID := range req.GetTaskIDs() {
info := i.getAnalyzeTaskInfo(req.GetClusterID(), taskID)
if info != nil {
results = append(results, &indexpb.AnalyzeResult{
results = append(results, &workerpb.AnalyzeResult{
TaskID: taskID,
State: info.state,
FailReason: info.failReason,
@ -447,24 +488,55 @@ func (i *IndexNode) QueryJobsV2(ctx context.Context, req *indexpb.QueryJobsV2Req
}
}
log.Debug("query analyze jobs result success", zap.Any("results", results))
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: req.GetClusterID(),
Result: &indexpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &indexpb.AnalyzeResults{
Result: &workerpb.QueryJobsV2Response_AnalyzeJobResults{
AnalyzeJobResults: &workerpb.AnalyzeResults{
Results: results,
},
},
}, nil
case indexpb.JobType_JobTypeStatsJob:
results := make([]*workerpb.StatsResult, 0, len(req.GetTaskIDs()))
for _, taskID := range req.GetTaskIDs() {
info := i.getStatsTaskInfo(req.GetClusterID(), taskID)
if info != nil {
results = append(results, &workerpb.StatsResult{
TaskID: taskID,
State: info.state,
FailReason: info.failReason,
CollectionID: info.collID,
PartitionID: info.partID,
SegmentID: info.segID,
Channel: info.insertChannel,
InsertLogs: info.insertLogs,
StatsLogs: info.statsLogs,
DeltaLogs: nil,
TextStatsLogs: info.textStatsLogs,
NumRows: info.numRows,
})
}
}
log.Debug("query stats job result success", zap.Any("results", results))
return &workerpb.QueryJobsV2Response{
Status: merr.Success(),
ClusterID: req.GetClusterID(),
Result: &workerpb.QueryJobsV2Response_StatsJobResults{
StatsJobResults: &workerpb.StatsResults{
Results: results,
},
},
}, nil
default:
log.Warn("IndexNode receive querying unknown type jobs")
return &indexpb.QueryJobsV2Response{
return &workerpb.QueryJobsV2Response{
Status: merr.Status(fmt.Errorf("IndexNode receive querying unknown type jobs")),
}, nil
}
}
func (i *IndexNode) DropJobsV2(ctx context.Context, req *indexpb.DropJobsV2Request) (*commonpb.Status, error) {
func (i *IndexNode) DropJobsV2(ctx context.Context, req *workerpb.DropJobsV2Request) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(zap.String("clusterID", req.GetClusterID()),
zap.Int64s("taskIDs", req.GetTaskIDs()),
zap.String("jobType", req.GetJobType().String()),
@ -482,7 +554,7 @@ func (i *IndexNode) DropJobsV2(ctx context.Context, req *indexpb.DropJobsV2Reque
case indexpb.JobType_JobTypeIndexJob:
keys := make([]taskKey, 0, len(req.GetTaskIDs()))
for _, buildID := range req.GetTaskIDs() {
keys = append(keys, taskKey{ClusterID: req.GetClusterID(), BuildID: buildID})
keys = append(keys, taskKey{ClusterID: req.GetClusterID(), TaskID: buildID})
}
infos := i.deleteIndexTaskInfos(ctx, keys)
for _, info := range infos {
@ -495,7 +567,7 @@ func (i *IndexNode) DropJobsV2(ctx context.Context, req *indexpb.DropJobsV2Reque
case indexpb.JobType_JobTypeAnalyzeJob:
keys := make([]taskKey, 0, len(req.GetTaskIDs()))
for _, taskID := range req.GetTaskIDs() {
keys = append(keys, taskKey{ClusterID: req.GetClusterID(), BuildID: taskID})
keys = append(keys, taskKey{ClusterID: req.GetClusterID(), TaskID: taskID})
}
infos := i.deleteAnalyzeTaskInfos(ctx, keys)
for _, info := range infos {
@ -505,6 +577,19 @@ func (i *IndexNode) DropJobsV2(ctx context.Context, req *indexpb.DropJobsV2Reque
}
log.Info("drop analyze jobs success")
return merr.Success(), nil
case indexpb.JobType_JobTypeStatsJob:
keys := make([]taskKey, 0, len(req.GetTaskIDs()))
for _, taskID := range req.GetTaskIDs() {
keys = append(keys, taskKey{ClusterID: req.GetClusterID(), TaskID: taskID})
}
infos := i.deleteStatsTaskInfos(ctx, keys)
for _, info := range infos {
if info.cancel != nil {
info.cancel()
}
}
log.Info("drop stats jobs success")
return merr.Success(), nil
default:
log.Warn("IndexNode receive dropping unknown type jobs")
return merr.Status(fmt.Errorf("IndexNode receive dropping unknown type jobs")), nil

View File

@ -27,6 +27,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
)
@ -36,19 +37,19 @@ func TestAbnormalIndexNode(t *testing.T) {
assert.NoError(t, err)
assert.Nil(t, in.Stop())
ctx := context.TODO()
status, err := in.CreateJob(ctx, &indexpb.CreateJobRequest{})
status, err := in.CreateJob(ctx, &workerpb.CreateJobRequest{})
assert.NoError(t, err)
assert.ErrorIs(t, merr.Error(status), merr.ErrServiceNotReady)
qresp, err := in.QueryJobs(ctx, &indexpb.QueryJobsRequest{})
qresp, err := in.QueryJobs(ctx, &workerpb.QueryJobsRequest{})
assert.NoError(t, err)
assert.ErrorIs(t, merr.Error(qresp.GetStatus()), merr.ErrServiceNotReady)
status, err = in.DropJobs(ctx, &indexpb.DropJobsRequest{})
status, err = in.DropJobs(ctx, &workerpb.DropJobsRequest{})
assert.NoError(t, err)
assert.ErrorIs(t, merr.Error(status), merr.ErrServiceNotReady)
jobNumRsp, err := in.GetJobStats(ctx, &indexpb.GetJobStatsRequest{})
jobNumRsp, err := in.GetJobStats(ctx, &workerpb.GetJobStatsRequest{})
assert.NoError(t, err)
assert.ErrorIs(t, merr.Error(jobNumRsp.GetStatus()), merr.ErrServiceNotReady)
@ -127,19 +128,19 @@ func (suite *IndexNodeServiceSuite) Test_AbnormalIndexNode() {
suite.Nil(in.Stop())
ctx := context.TODO()
status, err := in.CreateJob(ctx, &indexpb.CreateJobRequest{})
status, err := in.CreateJob(ctx, &workerpb.CreateJobRequest{})
suite.NoError(err)
suite.ErrorIs(merr.Error(status), merr.ErrServiceNotReady)
qresp, err := in.QueryJobs(ctx, &indexpb.QueryJobsRequest{})
qresp, err := in.QueryJobs(ctx, &workerpb.QueryJobsRequest{})
suite.NoError(err)
suite.ErrorIs(merr.Error(qresp.GetStatus()), merr.ErrServiceNotReady)
status, err = in.DropJobs(ctx, &indexpb.DropJobsRequest{})
status, err = in.DropJobs(ctx, &workerpb.DropJobsRequest{})
suite.NoError(err)
suite.ErrorIs(merr.Error(status), merr.ErrServiceNotReady)
jobNumRsp, err := in.GetJobStats(ctx, &indexpb.GetJobStatsRequest{})
jobNumRsp, err := in.GetJobStats(ctx, &workerpb.GetJobStatsRequest{})
suite.NoError(err)
suite.ErrorIs(merr.Error(jobNumRsp.GetStatus()), merr.ErrServiceNotReady)
@ -151,15 +152,15 @@ func (suite *IndexNodeServiceSuite) Test_AbnormalIndexNode() {
err = merr.CheckRPCCall(configurationResp, err)
suite.ErrorIs(err, merr.ErrServiceNotReady)
status, err = in.CreateJobV2(ctx, &indexpb.CreateJobV2Request{})
status, err = in.CreateJobV2(ctx, &workerpb.CreateJobV2Request{})
err = merr.CheckRPCCall(status, err)
suite.ErrorIs(err, merr.ErrServiceNotReady)
queryAnalyzeResultResp, err := in.QueryJobsV2(ctx, &indexpb.QueryJobsV2Request{})
queryAnalyzeResultResp, err := in.QueryJobsV2(ctx, &workerpb.QueryJobsV2Request{})
err = merr.CheckRPCCall(queryAnalyzeResultResp, err)
suite.ErrorIs(err, merr.ErrServiceNotReady)
dropAnalyzeTasksResp, err := in.DropJobsV2(ctx, &indexpb.DropJobsV2Request{})
dropAnalyzeTasksResp, err := in.DropJobsV2(ctx, &workerpb.DropJobsV2Request{})
err = merr.CheckRPCCall(dropAnalyzeTasksResp, err)
suite.ErrorIs(err, merr.ErrServiceNotReady)
}
@ -173,7 +174,7 @@ func (suite *IndexNodeServiceSuite) Test_Method() {
in.UpdateStateCode(commonpb.StateCode_Healthy)
suite.Run("CreateJobV2", func() {
req := &indexpb.AnalyzeRequest{
req := &workerpb.AnalyzeRequest{
ClusterID: suite.cluster,
TaskID: suite.taskID,
CollectionID: suite.collectionID,
@ -190,11 +191,11 @@ func (suite *IndexNodeServiceSuite) Test_Method() {
StorageConfig: nil,
}
resp, err := in.CreateJobV2(ctx, &indexpb.CreateJobV2Request{
resp, err := in.CreateJobV2(ctx, &workerpb.CreateJobV2Request{
ClusterID: suite.cluster,
TaskID: suite.taskID,
JobType: indexpb.JobType_JobTypeAnalyzeJob,
Request: &indexpb.CreateJobV2Request_AnalyzeRequest{
Request: &workerpb.CreateJobV2Request_AnalyzeRequest{
AnalyzeRequest: req,
},
})
@ -203,7 +204,7 @@ func (suite *IndexNodeServiceSuite) Test_Method() {
})
suite.Run("QueryJobsV2", func() {
req := &indexpb.QueryJobsV2Request{
req := &workerpb.QueryJobsV2Request{
ClusterID: suite.cluster,
TaskIDs: []int64{suite.taskID},
JobType: indexpb.JobType_JobTypeIndexJob,
@ -215,7 +216,7 @@ func (suite *IndexNodeServiceSuite) Test_Method() {
})
suite.Run("DropJobsV2", func() {
req := &indexpb.DropJobsV2Request{
req := &workerpb.DropJobsV2Request{
ClusterID: suite.cluster,
TaskIDs: []int64{suite.taskID},
JobType: indexpb.JobType_JobTypeIndexJob,

View File

@ -30,7 +30,9 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/metastore/kv/binlog"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
@ -181,6 +183,7 @@ type IndexNodeSuite struct {
segID int64
fieldID int64
logID int64
numRows int64
data []*Blob
in *IndexNode
storageConfig *indexpb.StorageConfig
@ -195,13 +198,14 @@ func (s *IndexNodeSuite) SetupTest() {
s.collID = 1
s.partID = 2
s.segID = 3
s.fieldID = 102
s.fieldID = 111
s.logID = 10000
s.numRows = 3000
paramtable.Init()
Params.MinioCfg.RootPath.SwapTempValue("indexnode-ut")
var err error
s.data, err = generateTestData(s.collID, s.partID, s.segID, 1025)
s.data, err = generateTestData(s.collID, s.partID, s.segID, 3000)
s.NoError(err)
s.storageConfig = &indexpb.StorageConfig{
@ -264,7 +268,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
buildID := int64(1)
dataPath, err := binlog.BuildLogPath(storage.InsertBinlog, s.collID, s.partID, s.segID, s.fieldID, s.logID+13)
s.NoError(err)
req := &indexpb.CreateJobRequest{
req := &workerpb.CreateJobRequest{
ClusterID: "cluster1",
IndexFilePrefix: "indexnode-ut/index_files",
BuildID: buildID,
@ -290,7 +294,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
Key: "dim", Value: "8",
},
},
NumRows: 1025,
NumRows: s.numRows,
}
status, err := s.in.CreateJob(ctx, req)
@ -299,7 +303,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
s.NoError(err)
for {
resp, err := s.in.QueryJobs(ctx, &indexpb.QueryJobsRequest{
resp, err := s.in.QueryJobs(ctx, &workerpb.QueryJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -314,7 +318,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
time.Sleep(time.Second)
}
status, err = s.in.DropJobs(ctx, &indexpb.DropJobsRequest{
status, err = s.in.DropJobs(ctx, &workerpb.DropJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -325,7 +329,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
s.Run("v2.4.x", func() {
buildID := int64(2)
req := &indexpb.CreateJobRequest{
req := &workerpb.CreateJobRequest{
ClusterID: "cluster1",
IndexFilePrefix: "indexnode-ut/index_files",
BuildID: buildID,
@ -351,7 +355,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
Key: "dim", Value: "8",
},
},
NumRows: 1025,
NumRows: s.numRows,
CurrentIndexVersion: 0,
CollectionID: s.collID,
PartitionID: s.partID,
@ -368,7 +372,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
s.NoError(err)
for {
resp, err := s.in.QueryJobs(ctx, &indexpb.QueryJobsRequest{
resp, err := s.in.QueryJobs(ctx, &workerpb.QueryJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -383,7 +387,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
time.Sleep(time.Second)
}
status, err = s.in.DropJobs(ctx, &indexpb.DropJobsRequest{
status, err = s.in.DropJobs(ctx, &workerpb.DropJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -394,7 +398,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
s.Run("v2.5.x", func() {
buildID := int64(3)
req := &indexpb.CreateJobRequest{
req := &workerpb.CreateJobRequest{
ClusterID: "cluster1",
IndexFilePrefix: "indexnode-ut/index_files",
BuildID: buildID,
@ -419,7 +423,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
Key: "dim", Value: "8",
},
},
NumRows: 1025,
NumRows: s.numRows,
CurrentIndexVersion: 0,
CollectionID: s.collID,
PartitionID: s.partID,
@ -442,7 +446,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
s.NoError(err)
for {
resp, err := s.in.QueryJobs(ctx, &indexpb.QueryJobsRequest{
resp, err := s.in.QueryJobs(ctx, &workerpb.QueryJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -457,7 +461,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_Compatibility() {
time.Sleep(time.Second)
}
status, err = s.in.DropJobs(ctx, &indexpb.DropJobsRequest{
status, err = s.in.DropJobs(ctx, &workerpb.DropJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -473,10 +477,10 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_ScalarIndex() {
s.Run("int64 inverted", func() {
buildID := int64(10)
fieldID := int64(13)
dataPath, err := binlog.BuildLogPath(storage.InsertBinlog, s.collID, s.partID, s.segID, s.fieldID, s.logID+13)
fieldID := int64(103)
dataPath, err := binlog.BuildLogPath(storage.InsertBinlog, s.collID, s.partID, s.segID, fieldID, s.logID+5)
s.NoError(err)
req := &indexpb.CreateJobRequest{
req := &workerpb.CreateJobRequest{
ClusterID: "cluster1",
IndexFilePrefix: "indexnode-ut/index_files",
BuildID: buildID,
@ -489,8 +493,8 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_ScalarIndex() {
},
},
TypeParams: nil,
NumRows: 1025,
DataIds: []int64{s.logID + 13},
NumRows: s.numRows,
DataIds: []int64{s.logID + 5},
Field: &schemapb.FieldSchema{
FieldID: fieldID,
Name: "int64",
@ -504,7 +508,7 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_ScalarIndex() {
s.NoError(err)
for {
resp, err := s.in.QueryJobs(ctx, &indexpb.QueryJobsRequest{
resp, err := s.in.QueryJobs(ctx, &workerpb.QueryJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -515,11 +519,11 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_ScalarIndex() {
if resp.GetIndexInfos()[0].GetState() == commonpb.IndexState_Finished {
break
}
require.Equal(s.T(), resp.GetIndexInfos()[0].GetState(), commonpb.IndexState_InProgress)
require.Equal(s.T(), commonpb.IndexState_InProgress, resp.GetIndexInfos()[0].GetState())
time.Sleep(time.Second)
}
status, err = s.in.DropJobs(ctx, &indexpb.DropJobsRequest{
status, err = s.in.DropJobs(ctx, &workerpb.DropJobsRequest{
ClusterID: "cluster1",
BuildIDs: []int64{buildID},
})
@ -528,3 +532,157 @@ func (s *IndexNodeSuite) Test_CreateIndexJob_ScalarIndex() {
s.NoError(err)
})
}
func (s *IndexNodeSuite) Test_CreateAnalyzeTask() {
ctx := context.Background()
s.Run("normal case", func() {
taskID := int64(200)
req := &workerpb.AnalyzeRequest{
ClusterID: "cluster1",
TaskID: taskID,
CollectionID: s.collID,
PartitionID: s.partID,
FieldID: s.fieldID,
FieldName: "floatVector",
FieldType: schemapb.DataType_FloatVector,
SegmentStats: map[int64]*indexpb.SegmentStats{
s.segID: {
ID: s.segID,
NumRows: s.numRows,
LogIDs: []int64{s.logID + 13},
},
},
Version: 1,
StorageConfig: s.storageConfig,
Dim: 8,
MaxTrainSizeRatio: 0.8,
NumClusters: 1,
MinClusterSizeRatio: 0.01,
MaxClusterSizeRatio: 10,
MaxClusterSize: 5 * 1024 * 1024 * 1024,
}
status, err := s.in.CreateJobV2(ctx, &workerpb.CreateJobV2Request{
ClusterID: "cluster1",
TaskID: taskID,
JobType: indexpb.JobType_JobTypeAnalyzeJob,
Request: &workerpb.CreateJobV2Request_AnalyzeRequest{
AnalyzeRequest: req,
},
})
s.NoError(err)
err = merr.Error(status)
s.NoError(err)
for {
resp, err := s.in.QueryJobsV2(ctx, &workerpb.QueryJobsV2Request{
ClusterID: "cluster1",
TaskIDs: []int64{taskID},
JobType: indexpb.JobType_JobTypeAnalyzeJob,
})
s.NoError(err)
err = merr.Error(resp.GetStatus())
s.NoError(err)
s.Equal(1, len(resp.GetAnalyzeJobResults().GetResults()))
if resp.GetAnalyzeJobResults().GetResults()[0].GetState() == indexpb.JobState_JobStateFinished {
s.Equal("", resp.GetAnalyzeJobResults().GetResults()[0].GetCentroidsFile())
break
}
s.Equal(indexpb.JobState_JobStateInProgress, resp.GetAnalyzeJobResults().GetResults()[0].GetState())
time.Sleep(time.Second)
}
status, err = s.in.DropJobsV2(ctx, &workerpb.DropJobsV2Request{
ClusterID: "cluster1",
TaskIDs: []int64{taskID},
JobType: indexpb.JobType_JobTypeAnalyzeJob,
})
s.NoError(err)
err = merr.Error(status)
s.NoError(err)
})
}
func (s *IndexNodeSuite) Test_CreateStatsTask() {
ctx := context.Background()
fieldBinlogs := make([]*datapb.FieldBinlog, 0)
for i, field := range generateTestSchema().GetFields() {
fieldBinlogs = append(fieldBinlogs, &datapb.FieldBinlog{
FieldID: field.GetFieldID(),
Binlogs: []*datapb.Binlog{{
LogID: s.logID + int64(i),
}},
})
}
s.Run("normal case", func() {
taskID := int64(100)
req := &workerpb.CreateStatsRequest{
ClusterID: "cluster2",
TaskID: taskID,
CollectionID: s.collID,
PartitionID: s.partID,
InsertChannel: "ch1",
SegmentID: s.segID,
InsertLogs: fieldBinlogs,
DeltaLogs: nil,
StorageConfig: s.storageConfig,
Schema: generateTestSchema(),
TargetSegmentID: s.segID + 1,
StartLogID: s.logID + 100,
EndLogID: s.logID + 200,
NumRows: s.numRows,
BinlogMaxSize: 131000,
}
status, err := s.in.CreateJobV2(ctx, &workerpb.CreateJobV2Request{
ClusterID: "cluster2",
TaskID: taskID,
JobType: indexpb.JobType_JobTypeStatsJob,
Request: &workerpb.CreateJobV2Request_StatsRequest{
StatsRequest: req,
},
})
s.NoError(err)
err = merr.Error(status)
s.NoError(err)
for {
resp, err := s.in.QueryJobsV2(ctx, &workerpb.QueryJobsV2Request{
ClusterID: "cluster2",
TaskIDs: []int64{taskID},
JobType: indexpb.JobType_JobTypeStatsJob,
})
s.NoError(err)
err = merr.Error(resp.GetStatus())
s.NoError(err)
s.Equal(1, len(resp.GetStatsJobResults().GetResults()))
if resp.GetStatsJobResults().GetResults()[0].GetState() == indexpb.JobState_JobStateFinished {
s.NotZero(len(resp.GetStatsJobResults().GetResults()[0].GetInsertLogs()))
s.NotZero(len(resp.GetStatsJobResults().GetResults()[0].GetStatsLogs()))
s.Zero(len(resp.GetStatsJobResults().GetResults()[0].GetDeltaLogs()))
s.Equal(s.numRows, resp.GetStatsJobResults().GetResults()[0].GetNumRows())
break
}
s.Equal(indexpb.JobState_JobStateInProgress, resp.GetStatsJobResults().GetResults()[0].GetState())
time.Sleep(time.Second)
}
slotResp, err := s.in.GetJobStats(ctx, &workerpb.GetJobStatsRequest{})
s.NoError(err)
err = merr.Error(slotResp.GetStatus())
s.NoError(err)
s.Equal(int64(1), slotResp.GetTaskSlots())
status, err = s.in.DropJobsV2(ctx, &workerpb.DropJobsV2Request{
ClusterID: "cluster2",
TaskIDs: []int64{taskID},
JobType: indexpb.JobType_JobTypeStatsJob,
})
s.NoError(err)
err = merr.Error(status)
s.NoError(err)
})
}

View File

@ -18,6 +18,7 @@ package indexnode
import (
"context"
"fmt"
"time"
"go.uber.org/zap"
@ -25,6 +26,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/clusteringpb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/util/analyzecgowrapper"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/hardware"
@ -32,19 +34,33 @@ import (
"github.com/milvus-io/milvus/pkg/util/timerecord"
)
var _ task = (*analyzeTask)(nil)
type analyzeTask struct {
ident string
ctx context.Context
cancel context.CancelFunc
req *indexpb.AnalyzeRequest
req *workerpb.AnalyzeRequest
tr *timerecord.TimeRecorder
queueDur time.Duration
node *IndexNode
analyze analyzecgowrapper.CodecAnalyze
}
startTime int64
endTime int64
func newAnalyzeTask(ctx context.Context,
cancel context.CancelFunc,
req *workerpb.AnalyzeRequest,
node *IndexNode,
) *analyzeTask {
return &analyzeTask{
ident: fmt.Sprintf("%s/%d", req.GetClusterID(), req.GetTaskID()),
ctx: ctx,
cancel: cancel,
req: req,
node: node,
tr: timerecord.NewTimeRecorder(fmt.Sprintf("ClusterID: %s, TaskID: %d", req.GetClusterID(), req.GetTaskID())),
}
}
func (at *analyzeTask) Ctx() context.Context {
@ -58,7 +74,7 @@ func (at *analyzeTask) Name() string {
func (at *analyzeTask) PreExecute(ctx context.Context) error {
at.queueDur = at.tr.RecordSpan()
log := log.Ctx(ctx).With(zap.String("clusterID", at.req.GetClusterID()),
zap.Int64("taskID", at.req.GetTaskID()), zap.Int64("Collection", at.req.GetCollectionID()),
zap.Int64("TaskID", at.req.GetTaskID()), zap.Int64("Collection", at.req.GetCollectionID()),
zap.Int64("partitionID", at.req.GetPartitionID()), zap.Int64("fieldID", at.req.GetFieldID()))
log.Info("Begin to prepare analyze task")
@ -70,7 +86,7 @@ func (at *analyzeTask) Execute(ctx context.Context) error {
var err error
log := log.Ctx(ctx).With(zap.String("clusterID", at.req.GetClusterID()),
zap.Int64("taskID", at.req.GetTaskID()), zap.Int64("Collection", at.req.GetCollectionID()),
zap.Int64("TaskID", at.req.GetTaskID()), zap.Int64("Collection", at.req.GetCollectionID()),
zap.Int64("partitionID", at.req.GetPartitionID()), zap.Int64("fieldID", at.req.GetFieldID()))
log.Info("Begin to build analyze task")
@ -148,7 +164,7 @@ func (at *analyzeTask) Execute(ctx context.Context) error {
func (at *analyzeTask) PostExecute(ctx context.Context) error {
log := log.Ctx(ctx).With(zap.String("clusterID", at.req.GetClusterID()),
zap.Int64("taskID", at.req.GetTaskID()), zap.Int64("Collection", at.req.GetCollectionID()),
zap.Int64("TaskID", at.req.GetTaskID()), zap.Int64("Collection", at.req.GetCollectionID()),
zap.Int64("partitionID", at.req.GetPartitionID()), zap.Int64("fieldID", at.req.GetFieldID()))
gc := func() {
if err := at.analyze.Delete(); err != nil {
@ -164,7 +180,6 @@ func (at *analyzeTask) PostExecute(ctx context.Context) error {
}
log.Info("analyze result", zap.String("centroidsFile", centroidsFile))
at.endTime = time.Now().UnixMicro()
at.node.storeAnalyzeFilesAndStatistic(at.req.GetClusterID(),
at.req.GetTaskID(),
centroidsFile)
@ -176,9 +191,9 @@ func (at *analyzeTask) PostExecute(ctx context.Context) error {
func (at *analyzeTask) OnEnqueue(ctx context.Context) error {
at.queueDur = 0
at.tr.RecordSpan()
at.startTime = time.Now().UnixMicro()
log.Ctx(ctx).Info("IndexNode analyzeTask enqueued", zap.String("clusterID", at.req.GetClusterID()),
zap.Int64("taskID", at.req.GetTaskID()))
zap.Int64("TaskID", at.req.GetTaskID()))
return nil
}
@ -198,6 +213,4 @@ func (at *analyzeTask) Reset() {
at.tr = nil
at.queueDur = 0
at.node = nil
at.startTime = 0
at.endTime = 0
}

View File

@ -30,6 +30,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/indexcgopb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/indexcgowrapper"
"github.com/milvus-io/milvus/pkg/common"
@ -51,7 +52,7 @@ type indexBuildTask struct {
cm storage.ChunkManager
index indexcgowrapper.CodecIndex
req *indexpb.CreateJobRequest
req *workerpb.CreateJobRequest
newTypeParams map[string]string
newIndexParams map[string]string
tr *timerecord.TimeRecorder
@ -61,7 +62,7 @@ type indexBuildTask struct {
func newIndexBuildTask(ctx context.Context,
cancel context.CancelFunc,
req *indexpb.CreateJobRequest,
req *workerpb.CreateJobRequest,
cm storage.ChunkManager,
node *IndexNode,
) *indexBuildTask {
@ -198,7 +199,8 @@ func (it *indexBuildTask) PreExecute(ctx context.Context) error {
it.req.CurrentIndexVersion = getCurrentIndexVersion(it.req.GetCurrentIndexVersion())
log.Ctx(ctx).Info("Successfully prepare indexBuildTask", zap.Int64("buildID", it.req.GetBuildID()),
zap.Int64("collectionID", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()))
zap.Int64("collectionID", it.req.GetCollectionID()), zap.Int64("segmentID", it.req.GetSegmentID()),
zap.Int64("currentIndexVersion", it.req.GetIndexVersion()))
return nil
}

View File

@ -27,4 +27,5 @@ func TestTaskState_String(t *testing.T) {
assert.Equal(t, TaskStateAbandon.String(), "Abandon")
assert.Equal(t, TaskStateRetry.String(), "Retry")
assert.Equal(t, TaskStateFailed.String(), "Failed")
assert.Equal(t, TaskState(100).String(), "None")
}

View File

@ -0,0 +1,589 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package indexnode
import (
"context"
"fmt"
sio "io"
"sort"
"strconv"
"time"
"github.com/samber/lo"
"go.opentelemetry.io/otel"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/datanode/compaction"
iter "github.com/milvus-io/milvus/internal/datanode/iterators"
"github.com/milvus-io/milvus/internal/flushcommon/io"
"github.com/milvus-io/milvus/internal/metastore/kv/binlog"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
_ "github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
var _ task = (*statsTask)(nil)
type statsTask struct {
ident string
ctx context.Context
cancel context.CancelFunc
req *workerpb.CreateStatsRequest
tr *timerecord.TimeRecorder
queueDur time.Duration
node *IndexNode
binlogIO io.BinlogIO
insertLogs [][]string
deltaLogs []string
logIDOffset int64
}
func newStatsTask(ctx context.Context,
cancel context.CancelFunc,
req *workerpb.CreateStatsRequest,
node *IndexNode,
binlogIO io.BinlogIO,
) *statsTask {
return &statsTask{
ident: fmt.Sprintf("%s/%d", req.GetClusterID(), req.GetTaskID()),
ctx: ctx,
cancel: cancel,
req: req,
node: node,
binlogIO: binlogIO,
tr: timerecord.NewTimeRecorder(fmt.Sprintf("ClusterID: %s, TaskID: %d", req.GetClusterID(), req.GetTaskID())),
logIDOffset: 0,
}
}
func (st *statsTask) Ctx() context.Context {
return st.ctx
}
func (st *statsTask) Name() string {
return st.ident
}
func (st *statsTask) OnEnqueue(ctx context.Context) error {
st.queueDur = 0
st.tr.RecordSpan()
log.Ctx(ctx).Info("statsTask enqueue", zap.Int64("collectionID", st.req.GetCollectionID()),
zap.Int64("partitionID", st.req.GetPartitionID()),
zap.Int64("segmentID", st.req.GetSegmentID()))
return nil
}
func (st *statsTask) SetState(state indexpb.JobState, failReason string) {
st.node.storeStatsTaskState(st.req.GetClusterID(), st.req.GetTaskID(), state, failReason)
}
func (st *statsTask) GetState() indexpb.JobState {
return st.node.getStatsTaskState(st.req.GetClusterID(), st.req.GetTaskID())
}
func (st *statsTask) PreExecute(ctx context.Context) error {
ctx, span := otel.Tracer(typeutil.IndexNodeRole).Start(ctx, fmt.Sprintf("Stats-PreExecute-%s-%d", st.req.GetClusterID(), st.req.GetTaskID()))
defer span.End()
st.queueDur = st.tr.RecordSpan()
log.Ctx(ctx).Info("Begin to prepare stats task",
zap.String("clusterID", st.req.GetClusterID()),
zap.Int64("taskID", st.req.GetTaskID()),
zap.Int64("collectionID", st.req.GetCollectionID()),
zap.Int64("partitionID", st.req.GetPartitionID()),
zap.Int64("segmentID", st.req.GetSegmentID()),
)
if err := binlog.DecompressBinLog(storage.InsertBinlog, st.req.GetCollectionID(), st.req.GetPartitionID(),
st.req.GetSegmentID(), st.req.GetInsertLogs()); err != nil {
log.Warn("Decompress insert binlog error", zap.Error(err))
return err
}
if err := binlog.DecompressBinLog(storage.DeleteBinlog, st.req.GetCollectionID(), st.req.GetPartitionID(),
st.req.GetSegmentID(), st.req.GetDeltaLogs()); err != nil {
log.Warn("Decompress delta binlog error", zap.Error(err))
return err
}
st.insertLogs = make([][]string, 0)
binlogNum := len(st.req.GetInsertLogs()[0].GetBinlogs())
for idx := 0; idx < binlogNum; idx++ {
var batchPaths []string
for _, f := range st.req.GetInsertLogs() {
batchPaths = append(batchPaths, f.GetBinlogs()[idx].GetLogPath())
}
st.insertLogs = append(st.insertLogs, batchPaths)
}
for _, d := range st.req.GetDeltaLogs() {
for _, l := range d.GetBinlogs() {
st.deltaLogs = append(st.deltaLogs, l.GetLogPath())
}
}
return nil
}
func (st *statsTask) Execute(ctx context.Context) error {
// sort segment and check need to do text index.
ctx, span := otel.Tracer(typeutil.IndexNodeRole).Start(ctx, fmt.Sprintf("Stats-Execute-%s-%d", st.req.GetClusterID(), st.req.GetTaskID()))
defer span.End()
log := log.Ctx(ctx).With(
zap.String("clusterID", st.req.GetClusterID()),
zap.Int64("taskID", st.req.GetTaskID()),
zap.Int64("collectionID", st.req.GetCollectionID()),
zap.Int64("partitionID", st.req.GetPartitionID()),
zap.Int64("segmentID", st.req.GetSegmentID()),
)
numRows := st.req.GetNumRows()
writer, err := compaction.NewSegmentWriter(st.req.GetSchema(), numRows, st.req.GetTargetSegmentID(), st.req.GetPartitionID(), st.req.GetCollectionID())
if err != nil {
log.Warn("sort segment wrong, unable to init segment writer", zap.Error(err))
return err
}
var (
flushBatchCount int // binlog batch count
unFlushedRowCount int64 = 0
// All binlog meta of a segment
allBinlogs = make(map[typeutil.UniqueID]*datapb.FieldBinlog)
)
serWriteTimeCost := time.Duration(0)
uploadTimeCost := time.Duration(0)
sortTimeCost := time.Duration(0)
values, err := st.downloadData(ctx, numRows, writer.GetPkID())
if err != nil {
log.Warn("download data failed", zap.Error(err))
return err
}
sortStart := time.Now()
sort.Slice(values, func(i, j int) bool {
return values[i].PK.LT(values[j].PK)
})
sortTimeCost += time.Since(sortStart)
for _, v := range values {
err := writer.Write(v)
if err != nil {
log.Warn("write value wrong, failed to writer row", zap.Error(err))
return err
}
unFlushedRowCount++
if (unFlushedRowCount+1)%100 == 0 && writer.FlushAndIsFullWithBinlogMaxSize(st.req.GetBinlogMaxSize()) {
serWriteStart := time.Now()
binlogNum, kvs, partialBinlogs, err := serializeWrite(ctx, st.req.GetStartLogID()+st.logIDOffset, writer)
if err != nil {
log.Warn("stats wrong, failed to serialize writer", zap.Error(err))
return err
}
serWriteTimeCost += time.Since(serWriteStart)
uploadStart := time.Now()
if err := st.binlogIO.Upload(ctx, kvs); err != nil {
log.Warn("stats wrong, failed to upload kvs", zap.Error(err))
return err
}
uploadTimeCost += time.Since(uploadStart)
mergeFieldBinlogs(allBinlogs, partialBinlogs)
flushBatchCount++
unFlushedRowCount = 0
st.logIDOffset += binlogNum
if st.req.GetStartLogID()+st.logIDOffset >= st.req.GetEndLogID() {
log.Warn("binlog files too much, log is not enough",
zap.Int64("binlog num", binlogNum), zap.Int64("startLogID", st.req.GetStartLogID()),
zap.Int64("endLogID", st.req.GetEndLogID()), zap.Int64("logIDOffset", st.logIDOffset))
return fmt.Errorf("binlog files too much, log is not enough")
}
}
}
if !writer.FlushAndIsEmpty() {
serWriteStart := time.Now()
binlogNum, kvs, partialBinlogs, err := serializeWrite(ctx, st.req.GetStartLogID()+st.logIDOffset, writer)
if err != nil {
log.Warn("stats wrong, failed to serialize writer", zap.Error(err))
return err
}
serWriteTimeCost += time.Since(serWriteStart)
st.logIDOffset += binlogNum
uploadStart := time.Now()
if err := st.binlogIO.Upload(ctx, kvs); err != nil {
return err
}
uploadTimeCost += time.Since(uploadStart)
mergeFieldBinlogs(allBinlogs, partialBinlogs)
flushBatchCount++
}
serWriteStart := time.Now()
binlogNums, sPath, err := statSerializeWrite(ctx, st.binlogIO, st.req.GetStartLogID()+st.logIDOffset, writer, numRows)
if err != nil {
log.Warn("stats wrong, failed to serialize write segment stats",
zap.Int64("remaining row count", numRows), zap.Error(err))
return err
}
serWriteTimeCost += time.Since(serWriteStart)
st.logIDOffset += binlogNums
totalElapse := st.tr.RecordSpan()
insertLogs := lo.Values(allBinlogs)
if err := binlog.CompressFieldBinlogs(insertLogs); err != nil {
return err
}
statsLogs := []*datapb.FieldBinlog{sPath}
if err := binlog.CompressFieldBinlogs(statsLogs); err != nil {
return err
}
log.Info("sort segment end",
zap.Int64("target segmentID", st.req.GetTargetSegmentID()),
zap.Int64("old rows", numRows),
zap.Int("valid rows", len(values)),
zap.Int("binlog batch count", flushBatchCount),
zap.Duration("upload binlogs elapse", uploadTimeCost),
zap.Duration("sort elapse", sortTimeCost),
zap.Duration("serWrite elapse", serWriteTimeCost),
zap.Duration("total elapse", totalElapse))
textIndexStatsLogs, err := st.createTextIndex(ctx,
st.req.GetStorageConfig(),
st.req.GetCollectionID(),
st.req.GetPartitionID(),
st.req.GetTargetSegmentID(),
st.req.GetTaskVersion(),
lo.Values(allBinlogs))
if err != nil {
log.Warn("stats wrong, failed to create text index", zap.Error(err))
return err
}
st.node.storeStatsResult(st.req.GetClusterID(),
st.req.GetTaskID(),
st.req.GetCollectionID(),
st.req.GetPartitionID(),
st.req.GetTargetSegmentID(),
st.req.GetInsertChannel(),
int64(len(values)), insertLogs, statsLogs, textIndexStatsLogs)
return nil
}
func (st *statsTask) PostExecute(ctx context.Context) error {
return nil
}
func (st *statsTask) Reset() {
st.ident = ""
st.ctx = nil
st.req = nil
st.cancel = nil
st.tr = nil
st.node = nil
}
func (st *statsTask) downloadData(ctx context.Context, numRows int64, PKFieldID int64) ([]*storage.Value, error) {
log := log.Ctx(ctx).With(
zap.String("clusterID", st.req.GetClusterID()),
zap.Int64("taskID", st.req.GetTaskID()),
zap.Int64("collectionID", st.req.GetCollectionID()),
zap.Int64("partitionID", st.req.GetPartitionID()),
zap.Int64("segmentID", st.req.GetSegmentID()),
)
deletePKs, err := st.loadDeltalogs(ctx, st.deltaLogs)
if err != nil {
log.Warn("load deletePKs failed", zap.Error(err))
return nil, err
}
var (
remainingRowCount int64 // the number of remaining entities
expiredRowCount int64 // the number of expired entities
)
isValueDeleted := func(v *storage.Value) bool {
ts, ok := deletePKs[v.PK.GetValue()]
// insert task and delete task has the same ts when upsert
// here should be < instead of <=
// to avoid the upsert data to be deleted after compact
if ok && uint64(v.Timestamp) < ts {
return true
}
return false
}
downloadTimeCost := time.Duration(0)
values := make([]*storage.Value, 0, numRows)
for _, paths := range st.insertLogs {
log := log.With(zap.Strings("paths", paths))
downloadStart := time.Now()
allValues, err := st.binlogIO.Download(ctx, paths)
if err != nil {
log.Warn("download wrong, fail to download insertLogs", zap.Error(err))
return nil, err
}
downloadTimeCost += time.Since(downloadStart)
blobs := lo.Map(allValues, func(v []byte, i int) *storage.Blob {
return &storage.Blob{Key: paths[i], Value: v}
})
iter, err := storage.NewBinlogDeserializeReader(blobs, PKFieldID)
if err != nil {
log.Warn("downloadData wrong, failed to new insert binlogs reader", zap.Error(err))
return nil, err
}
for {
err := iter.Next()
if err != nil {
if err == sio.EOF {
break
} else {
log.Warn("downloadData wrong, failed to iter through data", zap.Error(err))
return nil, err
}
}
v := iter.Value()
if isValueDeleted(v) {
continue
}
// Filtering expired entity
if st.isExpiredEntity(typeutil.Timestamp(v.Timestamp)) {
expiredRowCount++
continue
}
values = append(values, iter.Value())
remainingRowCount++
}
}
log.Info("download data success",
zap.Int64("old rows", numRows),
zap.Int64("remainingRowCount", remainingRowCount),
zap.Int64("expiredRowCount", expiredRowCount),
zap.Duration("download binlogs elapse", downloadTimeCost),
)
return values, nil
}
func (st *statsTask) loadDeltalogs(ctx context.Context, dpaths []string) (map[interface{}]typeutil.Timestamp, error) {
st.tr.RecordSpan()
ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "loadDeltalogs")
defer span.End()
log := log.Ctx(ctx).With(
zap.String("clusterID", st.req.GetClusterID()),
zap.Int64("taskID", st.req.GetTaskID()),
zap.Int64("collectionID", st.req.GetCollectionID()),
zap.Int64("partitionID", st.req.GetPartitionID()),
zap.Int64("segmentID", st.req.GetSegmentID()),
)
pk2ts := make(map[interface{}]typeutil.Timestamp)
if len(dpaths) == 0 {
log.Info("compact with no deltalogs, skip merge deltalogs")
return pk2ts, nil
}
blobs, err := st.binlogIO.Download(ctx, dpaths)
if err != nil {
log.Warn("compact wrong, fail to download deltalogs", zap.Error(err))
return nil, err
}
deltaIter := iter.NewDeltalogIterator(blobs, nil)
for deltaIter.HasNext() {
labeled, _ := deltaIter.Next()
ts := labeled.GetTimestamp()
if lastTs, ok := pk2ts[labeled.GetPk().GetValue()]; ok && lastTs > ts {
ts = lastTs
}
pk2ts[labeled.GetPk().GetValue()] = ts
}
log.Info("compact loadDeltalogs end",
zap.Int("deleted pk counts", len(pk2ts)),
zap.Duration("elapse", st.tr.RecordSpan()))
return pk2ts, nil
}
func (st *statsTask) isExpiredEntity(ts typeutil.Timestamp) bool {
now := st.req.GetCurrentTs()
// entity expire is not enabled if duration <= 0
if st.req.GetCollectionTtl() <= 0 {
return false
}
entityT, _ := tsoutil.ParseTS(ts)
nowT, _ := tsoutil.ParseTS(now)
return entityT.Add(time.Duration(st.req.GetCollectionTtl())).Before(nowT)
}
func mergeFieldBinlogs(base, paths map[typeutil.UniqueID]*datapb.FieldBinlog) {
for fID, fpath := range paths {
if _, ok := base[fID]; !ok {
base[fID] = &datapb.FieldBinlog{FieldID: fID, Binlogs: make([]*datapb.Binlog, 0)}
}
base[fID].Binlogs = append(base[fID].Binlogs, fpath.GetBinlogs()...)
}
}
func serializeWrite(ctx context.Context, startID int64, writer *compaction.SegmentWriter) (binlogNum int64, kvs map[string][]byte, fieldBinlogs map[int64]*datapb.FieldBinlog, err error) {
_, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "serializeWrite")
defer span.End()
blobs, tr, err := writer.SerializeYield()
if err != nil {
return 0, nil, nil, err
}
binlogNum = int64(len(blobs))
kvs = make(map[string][]byte)
fieldBinlogs = make(map[int64]*datapb.FieldBinlog)
for i := range blobs {
// Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt
fID, _ := strconv.ParseInt(blobs[i].GetKey(), 10, 64)
key, _ := binlog.BuildLogPath(storage.InsertBinlog, writer.GetCollectionID(), writer.GetPartitionID(), writer.GetSegmentID(), fID, startID+int64(i))
kvs[key] = blobs[i].GetValue()
fieldBinlogs[fID] = &datapb.FieldBinlog{
FieldID: fID,
Binlogs: []*datapb.Binlog{
{
LogSize: int64(len(blobs[i].GetValue())),
MemorySize: blobs[i].GetMemorySize(),
LogPath: key,
EntriesNum: blobs[i].RowNum,
TimestampFrom: tr.GetMinTimestamp(),
TimestampTo: tr.GetMaxTimestamp(),
},
},
}
}
return
}
func statSerializeWrite(ctx context.Context, io io.BinlogIO, startID int64, writer *compaction.SegmentWriter, finalRowCount int64) (int64, *datapb.FieldBinlog, error) {
ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "statslog serializeWrite")
defer span.End()
sblob, err := writer.Finish()
if err != nil {
return 0, nil, err
}
binlogNum := int64(1)
key, _ := binlog.BuildLogPath(storage.StatsBinlog, writer.GetCollectionID(), writer.GetPartitionID(), writer.GetSegmentID(), writer.GetPkID(), startID)
kvs := map[string][]byte{key: sblob.GetValue()}
statFieldLog := &datapb.FieldBinlog{
FieldID: writer.GetPkID(),
Binlogs: []*datapb.Binlog{
{
LogSize: int64(len(sblob.GetValue())),
MemorySize: int64(len(sblob.GetValue())),
LogPath: key,
EntriesNum: finalRowCount,
},
},
}
if err := io.Upload(ctx, kvs); err != nil {
log.Warn("failed to upload insert log", zap.Error(err))
return binlogNum, nil, err
}
return binlogNum, statFieldLog, nil
}
func buildTextLogPrefix(rootPath string, collID, partID, segID, fieldID, version int64) string {
return fmt.Sprintf("%s/%s/%d/%d/%d/%d/%d", rootPath, common.TextIndexPath, collID, partID, segID, fieldID, version)
}
func (st *statsTask) createTextIndex(ctx context.Context,
storageConfig *indexpb.StorageConfig,
collectionID int64,
partitionID int64,
segmentID int64,
version int64,
insertBinlogs []*datapb.FieldBinlog,
) (map[int64]*datapb.TextIndexStats, error) {
log := log.Ctx(ctx).With(
zap.String("clusterID", st.req.GetClusterID()),
zap.Int64("taskID", st.req.GetTaskID()),
zap.Int64("collectionID", st.req.GetCollectionID()),
zap.Int64("partitionID", st.req.GetPartitionID()),
zap.Int64("segmentID", st.req.GetSegmentID()),
)
fieldStatsLogs := make(map[int64]*datapb.TextIndexStats)
for _, field := range st.req.GetSchema().GetFields() {
if field.GetDataType() == schemapb.DataType_VarChar {
for _, binlog := range insertBinlogs {
if binlog.GetFieldID() == field.GetFieldID() {
// do text index
_ = buildTextLogPrefix(storageConfig.GetRootPath(), collectionID, partitionID, segmentID, field.GetFieldID(), version)
fieldStatsLogs[field.GetFieldID()] = &datapb.TextIndexStats{
Version: version,
Files: nil,
}
log.Info("TODO: call CGO CreateTextIndex", zap.Int64("fieldID", field.GetFieldID()))
break
}
}
}
}
totalElapse := st.tr.RecordSpan()
log.Info("create text index done",
zap.Int64("target segmentID", st.req.GetTargetSegmentID()),
zap.Duration("total elapse", totalElapse))
return fieldStatsLogs, nil
}

View File

@ -26,6 +26,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/pkg/common"
@ -90,7 +91,7 @@ func (suite *IndexBuildTaskSuite) serializeData() ([]*storage.Blob, error) {
func (suite *IndexBuildTaskSuite) TestBuildMemoryIndex() {
ctx, cancel := context.WithCancel(context.Background())
req := &indexpb.CreateJobRequest{
req := &workerpb.CreateJobRequest{
BuildID: 1,
IndexVersion: 1,
DataPaths: []string{suite.dataPath},
@ -184,7 +185,7 @@ func (suite *AnalyzeTaskSuite) serializeData() ([]*storage.Blob, error) {
func (suite *AnalyzeTaskSuite) TestAnalyze() {
ctx, cancel := context.WithCancel(context.Background())
req := &indexpb.AnalyzeRequest{
req := &workerpb.AnalyzeRequest{
ClusterID: "test",
TaskID: 1,
CollectionID: suite.collectionID,

View File

@ -1,3 +1,19 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package indexnode
import (
@ -7,6 +23,7 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
@ -28,7 +45,7 @@ type indexTaskInfo struct {
func (i *IndexNode) loadOrStoreIndexTask(ClusterID string, buildID UniqueID, info *indexTaskInfo) *indexTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
key := taskKey{ClusterID: ClusterID, TaskID: buildID}
oldInfo, ok := i.indexTasks[key]
if ok {
return oldInfo
@ -38,7 +55,7 @@ func (i *IndexNode) loadOrStoreIndexTask(ClusterID string, buildID UniqueID, inf
}
func (i *IndexNode) loadIndexTaskState(ClusterID string, buildID UniqueID) commonpb.IndexState {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
key := taskKey{ClusterID: ClusterID, TaskID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
task, ok := i.indexTasks[key]
@ -49,7 +66,7 @@ func (i *IndexNode) loadIndexTaskState(ClusterID string, buildID UniqueID) commo
}
func (i *IndexNode) storeIndexTaskState(ClusterID string, buildID UniqueID, state commonpb.IndexState, failReason string) {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
key := taskKey{ClusterID: ClusterID, TaskID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if task, ok := i.indexTasks[key]; ok {
@ -64,7 +81,7 @@ func (i *IndexNode) foreachIndexTaskInfo(fn func(ClusterID string, buildID Uniqu
i.stateLock.Lock()
defer i.stateLock.Unlock()
for key, info := range i.indexTasks {
fn(key.ClusterID, key.BuildID, info)
fn(key.ClusterID, key.TaskID, info)
}
}
@ -75,7 +92,7 @@ func (i *IndexNode) storeIndexFilesAndStatistic(
serializedSize uint64,
currentIndexVersion int32,
) {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
key := taskKey{ClusterID: ClusterID, TaskID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.indexTasks[key]; ok {
@ -94,7 +111,7 @@ func (i *IndexNode) storeIndexFilesAndStatisticV2(
currentIndexVersion int32,
indexStoreVersion int64,
) {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
key := taskKey{ClusterID: ClusterID, TaskID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.indexTasks[key]; ok {
@ -116,7 +133,7 @@ func (i *IndexNode) deleteIndexTaskInfos(ctx context.Context, keys []taskKey) []
deleted = append(deleted, info)
delete(i.indexTasks, key)
log.Ctx(ctx).Info("delete task infos",
zap.String("cluster_id", key.ClusterID), zap.Int64("build_id", key.BuildID))
zap.String("cluster_id", key.ClusterID), zap.Int64("build_id", key.TaskID))
}
}
return deleted
@ -145,7 +162,7 @@ type analyzeTaskInfo struct {
func (i *IndexNode) loadOrStoreAnalyzeTask(clusterID string, taskID UniqueID, info *analyzeTaskInfo) *analyzeTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
key := taskKey{ClusterID: clusterID, BuildID: taskID}
key := taskKey{ClusterID: clusterID, TaskID: taskID}
oldInfo, ok := i.analyzeTasks[key]
if ok {
return oldInfo
@ -155,7 +172,7 @@ func (i *IndexNode) loadOrStoreAnalyzeTask(clusterID string, taskID UniqueID, in
}
func (i *IndexNode) loadAnalyzeTaskState(clusterID string, taskID UniqueID) indexpb.JobState {
key := taskKey{ClusterID: clusterID, BuildID: taskID}
key := taskKey{ClusterID: clusterID, TaskID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
task, ok := i.analyzeTasks[key]
@ -166,11 +183,11 @@ func (i *IndexNode) loadAnalyzeTaskState(clusterID string, taskID UniqueID) inde
}
func (i *IndexNode) storeAnalyzeTaskState(clusterID string, taskID UniqueID, state indexpb.JobState, failReason string) {
key := taskKey{ClusterID: clusterID, BuildID: taskID}
key := taskKey{ClusterID: clusterID, TaskID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if task, ok := i.analyzeTasks[key]; ok {
log.Info("IndexNode store analyze task state", zap.String("clusterID", clusterID), zap.Int64("taskID", taskID),
log.Info("IndexNode store analyze task state", zap.String("clusterID", clusterID), zap.Int64("TaskID", taskID),
zap.String("state", state.String()), zap.String("fail reason", failReason))
task.state = state
task.failReason = failReason
@ -181,7 +198,7 @@ func (i *IndexNode) foreachAnalyzeTaskInfo(fn func(clusterID string, taskID Uniq
i.stateLock.Lock()
defer i.stateLock.Unlock()
for key, info := range i.analyzeTasks {
fn(key.ClusterID, key.BuildID, info)
fn(key.ClusterID, key.TaskID, info)
}
}
@ -190,7 +207,7 @@ func (i *IndexNode) storeAnalyzeFilesAndStatistic(
taskID UniqueID,
centroidsFile string,
) {
key := taskKey{ClusterID: ClusterID, BuildID: taskID}
key := taskKey{ClusterID: ClusterID, TaskID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.analyzeTasks[key]; ok {
@ -203,7 +220,15 @@ func (i *IndexNode) getAnalyzeTaskInfo(clusterID string, taskID UniqueID) *analy
i.stateLock.Lock()
defer i.stateLock.Unlock()
return i.analyzeTasks[taskKey{ClusterID: clusterID, BuildID: taskID}]
if info, ok := i.analyzeTasks[taskKey{ClusterID: clusterID, TaskID: taskID}]; ok {
return &analyzeTaskInfo{
cancel: info.cancel,
state: info.state,
failReason: info.failReason,
centroidsFile: info.centroidsFile,
}
}
return nil
}
func (i *IndexNode) deleteAnalyzeTaskInfos(ctx context.Context, keys []taskKey) []*analyzeTaskInfo {
@ -216,7 +241,7 @@ func (i *IndexNode) deleteAnalyzeTaskInfos(ctx context.Context, keys []taskKey)
deleted = append(deleted, info)
delete(i.analyzeTasks, key)
log.Ctx(ctx).Info("delete analyze task infos",
zap.String("clusterID", key.ClusterID), zap.Int64("taskID", key.BuildID))
zap.String("clusterID", key.ClusterID), zap.Int64("TaskID", key.TaskID))
}
}
return deleted
@ -285,3 +310,131 @@ func (i *IndexNode) waitTaskFinish() {
}
}
}
type statsTaskInfo struct {
cancel context.CancelFunc
state indexpb.JobState
failReason string
collID UniqueID
partID UniqueID
segID UniqueID
insertChannel string
numRows int64
insertLogs []*datapb.FieldBinlog
statsLogs []*datapb.FieldBinlog
textStatsLogs map[int64]*datapb.TextIndexStats
}
func (i *IndexNode) loadOrStoreStatsTask(clusterID string, taskID UniqueID, info *statsTaskInfo) *statsTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
key := taskKey{ClusterID: clusterID, TaskID: taskID}
oldInfo, ok := i.statsTasks[key]
if ok {
return oldInfo
}
i.statsTasks[key] = info
return nil
}
func (i *IndexNode) getStatsTaskState(clusterID string, taskID UniqueID) indexpb.JobState {
key := taskKey{ClusterID: clusterID, TaskID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
task, ok := i.statsTasks[key]
if !ok {
return indexpb.JobState_JobStateNone
}
return task.state
}
func (i *IndexNode) storeStatsTaskState(clusterID string, taskID UniqueID, state indexpb.JobState, failReason string) {
key := taskKey{ClusterID: clusterID, TaskID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if task, ok := i.statsTasks[key]; ok {
log.Info("IndexNode store stats task state", zap.String("clusterID", clusterID), zap.Int64("TaskID", taskID),
zap.String("state", state.String()), zap.String("fail reason", failReason))
task.state = state
task.failReason = failReason
}
}
func (i *IndexNode) storeStatsResult(
ClusterID string,
taskID UniqueID,
collID UniqueID,
partID UniqueID,
segID UniqueID,
channel string,
numRows int64,
insertLogs []*datapb.FieldBinlog,
statsLogs []*datapb.FieldBinlog,
fieldStatsLogs map[int64]*datapb.TextIndexStats,
) {
key := taskKey{ClusterID: ClusterID, TaskID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.statsTasks[key]; ok {
info.collID = collID
info.partID = partID
info.segID = segID
info.insertChannel = channel
info.numRows = numRows
info.insertLogs = insertLogs
info.statsLogs = statsLogs
info.textStatsLogs = fieldStatsLogs
return
}
}
func (i *IndexNode) getStatsTaskInfo(clusterID string, taskID UniqueID) *statsTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.statsTasks[taskKey{ClusterID: clusterID, TaskID: taskID}]; ok {
return &statsTaskInfo{
cancel: info.cancel,
state: info.state,
failReason: info.failReason,
collID: info.collID,
partID: info.partID,
segID: info.segID,
insertChannel: info.insertChannel,
numRows: info.numRows,
insertLogs: info.insertLogs,
statsLogs: info.statsLogs,
textStatsLogs: info.textStatsLogs,
}
}
return nil
}
func (i *IndexNode) deleteStatsTaskInfos(ctx context.Context, keys []taskKey) []*statsTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
deleted := make([]*statsTaskInfo, 0, len(keys))
for _, key := range keys {
info, ok := i.statsTasks[key]
if ok {
deleted = append(deleted, info)
delete(i.statsTasks, key)
log.Ctx(ctx).Info("delete stats task infos",
zap.String("clusterID", key.ClusterID), zap.Int64("TaskID", key.TaskID))
}
}
return deleted
}
func (i *IndexNode) deleteAllStatsTasks() []*statsTaskInfo {
i.stateLock.Lock()
deletedTasks := i.statsTasks
i.statsTasks = make(map[taskKey]*statsTaskInfo)
i.stateLock.Unlock()
deleted := make([]*statsTaskInfo, 0, len(deletedTasks))
for _, info := range deletedTasks {
deleted = append(deleted, info)
}
return deleted
}

View File

@ -0,0 +1,110 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package indexnode
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
)
type statsTaskInfoSuite struct {
suite.Suite
ctx context.Context
node *IndexNode
cluster string
taskID int64
}
func Test_statsTaskInfoSuite(t *testing.T) {
suite.Run(t, new(statsTaskInfoSuite))
}
func (s *statsTaskInfoSuite) SetupSuite() {
s.node = &IndexNode{
loopCtx: context.Background(),
statsTasks: make(map[taskKey]*statsTaskInfo),
}
s.cluster = "test"
s.taskID = 100
}
func (s *statsTaskInfoSuite) Test_Methods() {
s.Run("loadOrStoreStatsTask", func() {
_, cancel := context.WithCancel(s.node.loopCtx)
info := &statsTaskInfo{
cancel: cancel,
state: indexpb.JobState_JobStateInProgress,
}
reInfo := s.node.loadOrStoreStatsTask(s.cluster, s.taskID, info)
s.Nil(reInfo)
reInfo = s.node.loadOrStoreStatsTask(s.cluster, s.taskID, info)
s.Equal(indexpb.JobState_JobStateInProgress, reInfo.state)
})
s.Run("getStatsTaskState", func() {
s.Equal(indexpb.JobState_JobStateInProgress, s.node.getStatsTaskState(s.cluster, s.taskID))
s.Equal(indexpb.JobState_JobStateNone, s.node.getStatsTaskState(s.cluster, s.taskID+1))
})
s.Run("storeStatsTaskState", func() {
s.node.storeStatsTaskState(s.cluster, s.taskID, indexpb.JobState_JobStateFinished, "finished")
s.Equal(indexpb.JobState_JobStateFinished, s.node.getStatsTaskState(s.cluster, s.taskID))
})
s.Run("storeStatsResult", func() {
s.node.storeStatsResult(s.cluster, s.taskID, 1, 2, 3, "ch1", 65535,
[]*datapb.FieldBinlog{{FieldID: 100, Binlogs: []*datapb.Binlog{{LogID: 1}}}},
[]*datapb.FieldBinlog{{FieldID: 100, Binlogs: []*datapb.Binlog{{LogID: 2}}}},
map[int64]*datapb.TextIndexStats{
100: {
FieldID: 100,
Version: 1,
Files: []string{"file1"},
LogSize: 1024,
MemorySize: 1024,
},
},
)
})
s.Run("getStatsTaskInfo", func() {
taskInfo := s.node.getStatsTaskInfo(s.cluster, s.taskID)
s.Equal(indexpb.JobState_JobStateFinished, taskInfo.state)
s.Equal(int64(1), taskInfo.collID)
s.Equal(int64(2), taskInfo.partID)
s.Equal(int64(3), taskInfo.segID)
s.Equal("ch1", taskInfo.insertChannel)
s.Equal(int64(65535), taskInfo.numRows)
})
s.Run("deleteStatsTaskInfos", func() {
s.node.deleteStatsTaskInfos(s.ctx, []taskKey{{ClusterID: s.cluster, TaskID: s.taskID}})
s.Nil(s.node.getStatsTaskInfo(s.cluster, s.taskID))
})
}

View File

@ -170,6 +170,10 @@ type DataCoordCatalog interface {
SaveCurrentPartitionStatsVersion(ctx context.Context, collID, partID int64, vChannel string, currentVersion int64) error
GetCurrentPartitionStatsVersion(ctx context.Context, collID, partID int64, vChannel string) (int64, error)
DropCurrentPartitionStatsVersion(ctx context.Context, collID, partID int64, vChannel string) error
ListStatsTasks(ctx context.Context) ([]*indexpb.StatsTask, error)
SaveStatsTask(ctx context.Context, task *indexpb.StatsTask) error
DropStatsTask(ctx context.Context, taskID typeutil.UniqueID) error
}
type QueryCoordCatalog interface {

View File

@ -31,6 +31,7 @@ const (
AnalyzeTaskPrefix = MetaPrefix + "/analyze-task"
PartitionStatsInfoPrefix = MetaPrefix + "/partition-stats"
PartitionStatsCurrentVersionPrefix = MetaPrefix + "/current-partition-stats-version"
StatsTaskPrefix = MetaPrefix + "/stats-task"
NonRemoveFlagTomestone = "non-removed"
RemoveFlagTomestone = "removed"

View File

@ -923,3 +923,40 @@ func (kc *Catalog) DropCurrentPartitionStatsVersion(ctx context.Context, collID,
key := buildCurrentPartitionStatsVersionPath(collID, partID, vChannel)
return kc.MetaKv.Remove(key)
}
func (kc *Catalog) ListStatsTasks(ctx context.Context) ([]*indexpb.StatsTask, error) {
tasks := make([]*indexpb.StatsTask, 0)
_, values, err := kc.MetaKv.LoadWithPrefix(StatsTaskPrefix)
if err != nil {
return nil, err
}
for _, value := range values {
task := &indexpb.StatsTask{}
err = proto.Unmarshal([]byte(value), task)
if err != nil {
return nil, err
}
tasks = append(tasks, task)
}
return tasks, nil
}
func (kc *Catalog) SaveStatsTask(ctx context.Context, task *indexpb.StatsTask) error {
key := buildStatsTaskKey(task.TaskID)
value, err := proto.Marshal(task)
if err != nil {
return err
}
err = kc.MetaKv.Save(key, string(value))
if err != nil {
return err
}
return nil
}
func (kc *Catalog) DropStatsTask(ctx context.Context, taskID typeutil.UniqueID) error {
key := buildStatsTaskKey(taskID)
return kc.MetaKv.Remove(key)
}

View File

@ -34,6 +34,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/kv/mocks"
"github.com/milvus-io/milvus/internal/metastore"
@ -1518,3 +1519,345 @@ func TestCatalog_Import(t *testing.T) {
assert.Error(t, err)
})
}
func TestCatalog_AnalyzeTask(t *testing.T) {
kc := &Catalog{}
mockErr := errors.New("mock error")
t.Run("ListAnalyzeTasks", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, nil, mockErr)
kc.MetaKv = txn
tasks, err := kc.ListAnalyzeTasks(context.Background())
assert.Error(t, err)
assert.Nil(t, tasks)
task := &indexpb.AnalyzeTask{
CollectionID: 1,
PartitionID: 2,
FieldID: 3,
FieldName: "vector",
FieldType: schemapb.DataType_FloatVector,
TaskID: 4,
Version: 1,
SegmentIDs: nil,
NodeID: 1,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
Dim: 8,
CentroidsFile: "centroids",
}
value, err := proto.Marshal(task)
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return([]string{"key1"}, []string{
string(value),
}, nil)
kc.MetaKv = txn
tasks, err = kc.ListAnalyzeTasks(context.Background())
assert.NoError(t, err)
assert.Equal(t, 1, len(tasks))
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return([]string{"key1"}, []string{"1234"}, nil)
kc.MetaKv = txn
tasks, err = kc.ListAnalyzeTasks(context.Background())
assert.Error(t, err)
assert.Nil(t, tasks)
})
t.Run("SaveAnalyzeTask", func(t *testing.T) {
task := &indexpb.AnalyzeTask{
CollectionID: 1,
PartitionID: 2,
FieldID: 3,
FieldName: "vector",
FieldType: schemapb.DataType_FloatVector,
TaskID: 4,
Version: 1,
SegmentIDs: nil,
NodeID: 1,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
Dim: 8,
CentroidsFile: "centroids",
}
txn := mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.SaveAnalyzeTask(context.Background(), task)
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.SaveAnalyzeTask(context.Background(), task)
assert.Error(t, err)
})
t.Run("DropAnalyzeTask", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.DropAnalyzeTask(context.Background(), 1)
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.DropAnalyzeTask(context.Background(), 1)
assert.Error(t, err)
})
}
func Test_PartitionStatsInfo(t *testing.T) {
kc := &Catalog{}
mockErr := errors.New("mock error")
t.Run("ListPartitionStatsInfo", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, nil, mockErr)
kc.MetaKv = txn
infos, err := kc.ListPartitionStatsInfos(context.Background())
assert.Error(t, err)
assert.Nil(t, infos)
info := &datapb.PartitionStatsInfo{
CollectionID: 1,
PartitionID: 2,
VChannel: "ch1",
Version: 1,
SegmentIDs: nil,
AnalyzeTaskID: 3,
CommitTime: 10,
}
value, err := proto.Marshal(info)
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return([]string{"key1"}, []string{string(value)}, nil)
kc.MetaKv = txn
infos, err = kc.ListPartitionStatsInfos(context.Background())
assert.NoError(t, err)
assert.Equal(t, 1, len(infos))
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return([]string{"key1"}, []string{"1234"}, nil)
kc.MetaKv = txn
infos, err = kc.ListPartitionStatsInfos(context.Background())
assert.Error(t, err)
assert.Nil(t, infos)
})
t.Run("SavePartitionStatsInfo", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().MultiSave(mock.Anything).Return(mockErr)
kc.MetaKv = txn
info := &datapb.PartitionStatsInfo{
CollectionID: 1,
PartitionID: 2,
VChannel: "ch1",
Version: 1,
SegmentIDs: nil,
AnalyzeTaskID: 3,
CommitTime: 10,
}
err := kc.SavePartitionStatsInfo(context.Background(), info)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().MultiSave(mock.Anything).Return(nil)
kc.MetaKv = txn
err = kc.SavePartitionStatsInfo(context.Background(), info)
assert.NoError(t, err)
})
t.Run("DropPartitionStatsInfo", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(mockErr)
kc.MetaKv = txn
info := &datapb.PartitionStatsInfo{
CollectionID: 1,
PartitionID: 2,
VChannel: "ch1",
Version: 1,
SegmentIDs: nil,
AnalyzeTaskID: 3,
CommitTime: 10,
}
err := kc.DropPartitionStatsInfo(context.Background(), info)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(nil)
kc.MetaKv = txn
err = kc.DropPartitionStatsInfo(context.Background(), info)
assert.NoError(t, err)
})
}
func Test_CurrentPartitionStatsVersion(t *testing.T) {
kc := &Catalog{}
mockErr := errors.New("mock error")
collID := int64(1)
partID := int64(2)
vChannel := "ch1"
currentVersion := int64(1)
t.Run("SaveCurrentPartitionStatsVersion", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(mockErr)
kc.MetaKv = txn
err := kc.SaveCurrentPartitionStatsVersion(context.Background(), collID, partID, vChannel, currentVersion)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(nil)
kc.MetaKv = txn
err = kc.SaveCurrentPartitionStatsVersion(context.Background(), collID, partID, vChannel, currentVersion)
assert.NoError(t, err)
})
t.Run("GetCurrentPartitionStatsVersion", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Load(mock.Anything).Return("", mockErr)
kc.MetaKv = txn
version, err := kc.GetCurrentPartitionStatsVersion(context.Background(), collID, partID, vChannel)
assert.Error(t, err)
assert.Equal(t, int64(0), version)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Load(mock.Anything).Return("1", nil)
kc.MetaKv = txn
version, err = kc.GetCurrentPartitionStatsVersion(context.Background(), collID, partID, vChannel)
assert.NoError(t, err)
assert.Equal(t, int64(1), version)
})
t.Run("DropCurrentPartitionStatsVersion", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(mockErr)
kc.MetaKv = txn
err := kc.DropCurrentPartitionStatsVersion(context.Background(), collID, partID, vChannel)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(nil)
kc.MetaKv = txn
err = kc.DropCurrentPartitionStatsVersion(context.Background(), collID, partID, vChannel)
assert.NoError(t, err)
})
}
func Test_StatsTasks(t *testing.T) {
kc := &Catalog{}
mockErr := errors.New("mock error")
t.Run("ListStatsTasks", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, nil, mockErr)
kc.MetaKv = txn
tasks, err := kc.ListStatsTasks(context.Background())
assert.Error(t, err)
assert.Nil(t, tasks)
task := &indexpb.StatsTask{
CollectionID: 1,
PartitionID: 2,
SegmentID: 3,
InsertChannel: "ch1",
TaskID: 4,
Version: 1,
NodeID: 1,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
}
value, err := proto.Marshal(task)
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return([]string{"key1"}, []string{string(value)}, nil)
kc.MetaKv = txn
tasks, err = kc.ListStatsTasks(context.Background())
assert.NoError(t, err)
assert.Equal(t, 1, len(tasks))
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return([]string{"key1"}, []string{"1234"}, nil)
kc.MetaKv = txn
tasks, err = kc.ListStatsTasks(context.Background())
assert.Error(t, err)
assert.Nil(t, tasks)
})
t.Run("SaveStatsTask", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(mockErr)
kc.MetaKv = txn
task := &indexpb.StatsTask{
CollectionID: 1,
PartitionID: 2,
SegmentID: 3,
InsertChannel: "ch1",
TaskID: 4,
Version: 1,
NodeID: 1,
State: indexpb.JobState_JobStateFinished,
FailReason: "",
}
err := kc.SaveStatsTask(context.Background(), task)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(nil)
kc.MetaKv = txn
err = kc.SaveStatsTask(context.Background(), task)
assert.NoError(t, err)
})
t.Run("DropStatsTask", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(mockErr)
kc.MetaKv = txn
err := kc.DropStatsTask(context.Background(), 1)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(nil)
kc.MetaKv = txn
err = kc.DropStatsTask(context.Background(), 1)
assert.NoError(t, err)
})
}

View File

@ -350,3 +350,7 @@ func buildPreImportTaskKey(taskID int64) string {
func buildAnalyzeTaskKey(taskID int64) string {
return fmt.Sprintf("%s/%d", AnalyzeTaskPrefix, taskID)
}
func buildStatsTaskKey(taskID int64) string {
return fmt.Sprintf("%s/%d", StatsTaskPrefix, taskID)
}

View File

@ -865,6 +865,49 @@ func (_c *DataCoordCatalog_DropSegmentIndex_Call) RunAndReturn(run func(context.
return _c
}
// DropStatsTask provides a mock function with given fields: ctx, taskID
func (_m *DataCoordCatalog) DropStatsTask(ctx context.Context, taskID int64) error {
ret := _m.Called(ctx, taskID)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, int64) error); ok {
r0 = rf(ctx, taskID)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_DropStatsTask_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropStatsTask'
type DataCoordCatalog_DropStatsTask_Call struct {
*mock.Call
}
// DropStatsTask is a helper method to define mock.On call
// - ctx context.Context
// - taskID int64
func (_e *DataCoordCatalog_Expecter) DropStatsTask(ctx interface{}, taskID interface{}) *DataCoordCatalog_DropStatsTask_Call {
return &DataCoordCatalog_DropStatsTask_Call{Call: _e.mock.On("DropStatsTask", ctx, taskID)}
}
func (_c *DataCoordCatalog_DropStatsTask_Call) Run(run func(ctx context.Context, taskID int64)) *DataCoordCatalog_DropStatsTask_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64))
})
return _c
}
func (_c *DataCoordCatalog_DropStatsTask_Call) Return(_a0 error) *DataCoordCatalog_DropStatsTask_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_DropStatsTask_Call) RunAndReturn(run func(context.Context, int64) error) *DataCoordCatalog_DropStatsTask_Call {
_c.Call.Return(run)
return _c
}
// GcConfirm provides a mock function with given fields: ctx, collectionID, partitionID
func (_m *DataCoordCatalog) GcConfirm(ctx context.Context, collectionID int64, partitionID int64) bool {
ret := _m.Called(ctx, collectionID, partitionID)
@ -1501,6 +1544,60 @@ func (_c *DataCoordCatalog_ListSegments_Call) RunAndReturn(run func(context.Cont
return _c
}
// ListStatsTasks provides a mock function with given fields: ctx
func (_m *DataCoordCatalog) ListStatsTasks(ctx context.Context) ([]*indexpb.StatsTask, error) {
ret := _m.Called(ctx)
var r0 []*indexpb.StatsTask
var r1 error
if rf, ok := ret.Get(0).(func(context.Context) ([]*indexpb.StatsTask, error)); ok {
return rf(ctx)
}
if rf, ok := ret.Get(0).(func(context.Context) []*indexpb.StatsTask); ok {
r0 = rf(ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*indexpb.StatsTask)
}
}
if rf, ok := ret.Get(1).(func(context.Context) error); ok {
r1 = rf(ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// DataCoordCatalog_ListStatsTasks_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListStatsTasks'
type DataCoordCatalog_ListStatsTasks_Call struct {
*mock.Call
}
// ListStatsTasks is a helper method to define mock.On call
// - ctx context.Context
func (_e *DataCoordCatalog_Expecter) ListStatsTasks(ctx interface{}) *DataCoordCatalog_ListStatsTasks_Call {
return &DataCoordCatalog_ListStatsTasks_Call{Call: _e.mock.On("ListStatsTasks", ctx)}
}
func (_c *DataCoordCatalog_ListStatsTasks_Call) Run(run func(ctx context.Context)) *DataCoordCatalog_ListStatsTasks_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context))
})
return _c
}
func (_c *DataCoordCatalog_ListStatsTasks_Call) Return(_a0 []*indexpb.StatsTask, _a1 error) *DataCoordCatalog_ListStatsTasks_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *DataCoordCatalog_ListStatsTasks_Call) RunAndReturn(run func(context.Context) ([]*indexpb.StatsTask, error)) *DataCoordCatalog_ListStatsTasks_Call {
_c.Call.Return(run)
return _c
}
// MarkChannelAdded provides a mock function with given fields: ctx, channel
func (_m *DataCoordCatalog) MarkChannelAdded(ctx context.Context, channel string) error {
ret := _m.Called(ctx, channel)
@ -2018,6 +2115,49 @@ func (_c *DataCoordCatalog_SavePreImportTask_Call) RunAndReturn(run func(*datapb
return _c
}
// SaveStatsTask provides a mock function with given fields: ctx, task
func (_m *DataCoordCatalog) SaveStatsTask(ctx context.Context, task *indexpb.StatsTask) error {
ret := _m.Called(ctx, task)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.StatsTask) error); ok {
r0 = rf(ctx, task)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_SaveStatsTask_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SaveStatsTask'
type DataCoordCatalog_SaveStatsTask_Call struct {
*mock.Call
}
// SaveStatsTask is a helper method to define mock.On call
// - ctx context.Context
// - task *indexpb.StatsTask
func (_e *DataCoordCatalog_Expecter) SaveStatsTask(ctx interface{}, task interface{}) *DataCoordCatalog_SaveStatsTask_Call {
return &DataCoordCatalog_SaveStatsTask_Call{Call: _e.mock.On("SaveStatsTask", ctx, task)}
}
func (_c *DataCoordCatalog_SaveStatsTask_Call) Run(run func(ctx context.Context, task *indexpb.StatsTask)) *DataCoordCatalog_SaveStatsTask_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.StatsTask))
})
return _c
}
func (_c *DataCoordCatalog_SaveStatsTask_Call) Return(_a0 error) *DataCoordCatalog_SaveStatsTask_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_SaveStatsTask_Call) RunAndReturn(run func(context.Context, *indexpb.StatsTask) error) *DataCoordCatalog_SaveStatsTask_Call {
_c.Call.Return(run)
return _c
}
// ShouldDropChannel provides a mock function with given fields: ctx, channel
func (_m *DataCoordCatalog) ShouldDropChannel(ctx context.Context, channel string) bool {
ret := _m.Called(ctx, channel)

View File

@ -8,13 +8,13 @@ import (
commonpb "github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
clientv3 "go.etcd.io/etcd/client/v3"
indexpb "github.com/milvus-io/milvus/internal/proto/indexpb"
internalpb "github.com/milvus-io/milvus/internal/proto/internalpb"
milvuspb "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
mock "github.com/stretchr/testify/mock"
workerpb "github.com/milvus-io/milvus/internal/proto/workerpb"
)
// MockIndexNode is an autogenerated mock type for the IndexNodeComponent type
@ -31,15 +31,15 @@ func (_m *MockIndexNode) EXPECT() *MockIndexNode_Expecter {
}
// CreateJob provides a mock function with given fields: _a0, _a1
func (_m *MockIndexNode) CreateJob(_a0 context.Context, _a1 *indexpb.CreateJobRequest) (*commonpb.Status, error) {
func (_m *MockIndexNode) CreateJob(_a0 context.Context, _a1 *workerpb.CreateJobRequest) (*commonpb.Status, error) {
ret := _m.Called(_a0, _a1)
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobRequest) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobRequest) (*commonpb.Status, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobRequest) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobRequest) *commonpb.Status); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
@ -47,7 +47,7 @@ func (_m *MockIndexNode) CreateJob(_a0 context.Context, _a1 *indexpb.CreateJobRe
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.CreateJobRequest) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.CreateJobRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
@ -63,14 +63,14 @@ type MockIndexNode_CreateJob_Call struct {
// CreateJob is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *indexpb.CreateJobRequest
// - _a1 *workerpb.CreateJobRequest
func (_e *MockIndexNode_Expecter) CreateJob(_a0 interface{}, _a1 interface{}) *MockIndexNode_CreateJob_Call {
return &MockIndexNode_CreateJob_Call{Call: _e.mock.On("CreateJob", _a0, _a1)}
}
func (_c *MockIndexNode_CreateJob_Call) Run(run func(_a0 context.Context, _a1 *indexpb.CreateJobRequest)) *MockIndexNode_CreateJob_Call {
func (_c *MockIndexNode_CreateJob_Call) Run(run func(_a0 context.Context, _a1 *workerpb.CreateJobRequest)) *MockIndexNode_CreateJob_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.CreateJobRequest))
run(args[0].(context.Context), args[1].(*workerpb.CreateJobRequest))
})
return _c
}
@ -80,21 +80,21 @@ func (_c *MockIndexNode_CreateJob_Call) Return(_a0 *commonpb.Status, _a1 error)
return _c
}
func (_c *MockIndexNode_CreateJob_Call) RunAndReturn(run func(context.Context, *indexpb.CreateJobRequest) (*commonpb.Status, error)) *MockIndexNode_CreateJob_Call {
func (_c *MockIndexNode_CreateJob_Call) RunAndReturn(run func(context.Context, *workerpb.CreateJobRequest) (*commonpb.Status, error)) *MockIndexNode_CreateJob_Call {
_c.Call.Return(run)
return _c
}
// CreateJobV2 provides a mock function with given fields: _a0, _a1
func (_m *MockIndexNode) CreateJobV2(_a0 context.Context, _a1 *indexpb.CreateJobV2Request) (*commonpb.Status, error) {
func (_m *MockIndexNode) CreateJobV2(_a0 context.Context, _a1 *workerpb.CreateJobV2Request) (*commonpb.Status, error) {
ret := _m.Called(_a0, _a1)
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobV2Request) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobV2Request) (*commonpb.Status, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobV2Request) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobV2Request) *commonpb.Status); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
@ -102,7 +102,7 @@ func (_m *MockIndexNode) CreateJobV2(_a0 context.Context, _a1 *indexpb.CreateJob
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.CreateJobV2Request) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.CreateJobV2Request) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
@ -118,14 +118,14 @@ type MockIndexNode_CreateJobV2_Call struct {
// CreateJobV2 is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *indexpb.CreateJobV2Request
// - _a1 *workerpb.CreateJobV2Request
func (_e *MockIndexNode_Expecter) CreateJobV2(_a0 interface{}, _a1 interface{}) *MockIndexNode_CreateJobV2_Call {
return &MockIndexNode_CreateJobV2_Call{Call: _e.mock.On("CreateJobV2", _a0, _a1)}
}
func (_c *MockIndexNode_CreateJobV2_Call) Run(run func(_a0 context.Context, _a1 *indexpb.CreateJobV2Request)) *MockIndexNode_CreateJobV2_Call {
func (_c *MockIndexNode_CreateJobV2_Call) Run(run func(_a0 context.Context, _a1 *workerpb.CreateJobV2Request)) *MockIndexNode_CreateJobV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.CreateJobV2Request))
run(args[0].(context.Context), args[1].(*workerpb.CreateJobV2Request))
})
return _c
}
@ -135,21 +135,21 @@ func (_c *MockIndexNode_CreateJobV2_Call) Return(_a0 *commonpb.Status, _a1 error
return _c
}
func (_c *MockIndexNode_CreateJobV2_Call) RunAndReturn(run func(context.Context, *indexpb.CreateJobV2Request) (*commonpb.Status, error)) *MockIndexNode_CreateJobV2_Call {
func (_c *MockIndexNode_CreateJobV2_Call) RunAndReturn(run func(context.Context, *workerpb.CreateJobV2Request) (*commonpb.Status, error)) *MockIndexNode_CreateJobV2_Call {
_c.Call.Return(run)
return _c
}
// DropJobs provides a mock function with given fields: _a0, _a1
func (_m *MockIndexNode) DropJobs(_a0 context.Context, _a1 *indexpb.DropJobsRequest) (*commonpb.Status, error) {
func (_m *MockIndexNode) DropJobs(_a0 context.Context, _a1 *workerpb.DropJobsRequest) (*commonpb.Status, error) {
ret := _m.Called(_a0, _a1)
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsRequest) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsRequest) (*commonpb.Status, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsRequest) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsRequest) *commonpb.Status); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
@ -157,7 +157,7 @@ func (_m *MockIndexNode) DropJobs(_a0 context.Context, _a1 *indexpb.DropJobsRequ
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.DropJobsRequest) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.DropJobsRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
@ -173,14 +173,14 @@ type MockIndexNode_DropJobs_Call struct {
// DropJobs is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *indexpb.DropJobsRequest
// - _a1 *workerpb.DropJobsRequest
func (_e *MockIndexNode_Expecter) DropJobs(_a0 interface{}, _a1 interface{}) *MockIndexNode_DropJobs_Call {
return &MockIndexNode_DropJobs_Call{Call: _e.mock.On("DropJobs", _a0, _a1)}
}
func (_c *MockIndexNode_DropJobs_Call) Run(run func(_a0 context.Context, _a1 *indexpb.DropJobsRequest)) *MockIndexNode_DropJobs_Call {
func (_c *MockIndexNode_DropJobs_Call) Run(run func(_a0 context.Context, _a1 *workerpb.DropJobsRequest)) *MockIndexNode_DropJobs_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.DropJobsRequest))
run(args[0].(context.Context), args[1].(*workerpb.DropJobsRequest))
})
return _c
}
@ -190,21 +190,21 @@ func (_c *MockIndexNode_DropJobs_Call) Return(_a0 *commonpb.Status, _a1 error) *
return _c
}
func (_c *MockIndexNode_DropJobs_Call) RunAndReturn(run func(context.Context, *indexpb.DropJobsRequest) (*commonpb.Status, error)) *MockIndexNode_DropJobs_Call {
func (_c *MockIndexNode_DropJobs_Call) RunAndReturn(run func(context.Context, *workerpb.DropJobsRequest) (*commonpb.Status, error)) *MockIndexNode_DropJobs_Call {
_c.Call.Return(run)
return _c
}
// DropJobsV2 provides a mock function with given fields: _a0, _a1
func (_m *MockIndexNode) DropJobsV2(_a0 context.Context, _a1 *indexpb.DropJobsV2Request) (*commonpb.Status, error) {
func (_m *MockIndexNode) DropJobsV2(_a0 context.Context, _a1 *workerpb.DropJobsV2Request) (*commonpb.Status, error) {
ret := _m.Called(_a0, _a1)
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsV2Request) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsV2Request) (*commonpb.Status, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsV2Request) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsV2Request) *commonpb.Status); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
@ -212,7 +212,7 @@ func (_m *MockIndexNode) DropJobsV2(_a0 context.Context, _a1 *indexpb.DropJobsV2
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.DropJobsV2Request) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.DropJobsV2Request) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
@ -228,14 +228,14 @@ type MockIndexNode_DropJobsV2_Call struct {
// DropJobsV2 is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *indexpb.DropJobsV2Request
// - _a1 *workerpb.DropJobsV2Request
func (_e *MockIndexNode_Expecter) DropJobsV2(_a0 interface{}, _a1 interface{}) *MockIndexNode_DropJobsV2_Call {
return &MockIndexNode_DropJobsV2_Call{Call: _e.mock.On("DropJobsV2", _a0, _a1)}
}
func (_c *MockIndexNode_DropJobsV2_Call) Run(run func(_a0 context.Context, _a1 *indexpb.DropJobsV2Request)) *MockIndexNode_DropJobsV2_Call {
func (_c *MockIndexNode_DropJobsV2_Call) Run(run func(_a0 context.Context, _a1 *workerpb.DropJobsV2Request)) *MockIndexNode_DropJobsV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.DropJobsV2Request))
run(args[0].(context.Context), args[1].(*workerpb.DropJobsV2Request))
})
return _c
}
@ -245,7 +245,7 @@ func (_c *MockIndexNode_DropJobsV2_Call) Return(_a0 *commonpb.Status, _a1 error)
return _c
}
func (_c *MockIndexNode_DropJobsV2_Call) RunAndReturn(run func(context.Context, *indexpb.DropJobsV2Request) (*commonpb.Status, error)) *MockIndexNode_DropJobsV2_Call {
func (_c *MockIndexNode_DropJobsV2_Call) RunAndReturn(run func(context.Context, *workerpb.DropJobsV2Request) (*commonpb.Status, error)) *MockIndexNode_DropJobsV2_Call {
_c.Call.Return(run)
return _c
}
@ -347,23 +347,23 @@ func (_c *MockIndexNode_GetComponentStates_Call) RunAndReturn(run func(context.C
}
// GetJobStats provides a mock function with given fields: _a0, _a1
func (_m *MockIndexNode) GetJobStats(_a0 context.Context, _a1 *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error) {
func (_m *MockIndexNode) GetJobStats(_a0 context.Context, _a1 *workerpb.GetJobStatsRequest) (*workerpb.GetJobStatsResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *indexpb.GetJobStatsResponse
var r0 *workerpb.GetJobStatsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.GetJobStatsRequest) (*workerpb.GetJobStatsResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.GetJobStatsRequest) *indexpb.GetJobStatsResponse); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.GetJobStatsRequest) *workerpb.GetJobStatsResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*indexpb.GetJobStatsResponse)
r0 = ret.Get(0).(*workerpb.GetJobStatsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.GetJobStatsRequest) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.GetJobStatsRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
@ -379,24 +379,24 @@ type MockIndexNode_GetJobStats_Call struct {
// GetJobStats is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *indexpb.GetJobStatsRequest
// - _a1 *workerpb.GetJobStatsRequest
func (_e *MockIndexNode_Expecter) GetJobStats(_a0 interface{}, _a1 interface{}) *MockIndexNode_GetJobStats_Call {
return &MockIndexNode_GetJobStats_Call{Call: _e.mock.On("GetJobStats", _a0, _a1)}
}
func (_c *MockIndexNode_GetJobStats_Call) Run(run func(_a0 context.Context, _a1 *indexpb.GetJobStatsRequest)) *MockIndexNode_GetJobStats_Call {
func (_c *MockIndexNode_GetJobStats_Call) Run(run func(_a0 context.Context, _a1 *workerpb.GetJobStatsRequest)) *MockIndexNode_GetJobStats_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.GetJobStatsRequest))
run(args[0].(context.Context), args[1].(*workerpb.GetJobStatsRequest))
})
return _c
}
func (_c *MockIndexNode_GetJobStats_Call) Return(_a0 *indexpb.GetJobStatsResponse, _a1 error) *MockIndexNode_GetJobStats_Call {
func (_c *MockIndexNode_GetJobStats_Call) Return(_a0 *workerpb.GetJobStatsResponse, _a1 error) *MockIndexNode_GetJobStats_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockIndexNode_GetJobStats_Call) RunAndReturn(run func(context.Context, *indexpb.GetJobStatsRequest) (*indexpb.GetJobStatsResponse, error)) *MockIndexNode_GetJobStats_Call {
func (_c *MockIndexNode_GetJobStats_Call) RunAndReturn(run func(context.Context, *workerpb.GetJobStatsRequest) (*workerpb.GetJobStatsResponse, error)) *MockIndexNode_GetJobStats_Call {
_c.Call.Return(run)
return _c
}
@ -553,23 +553,23 @@ func (_c *MockIndexNode_Init_Call) RunAndReturn(run func() error) *MockIndexNode
}
// QueryJobs provides a mock function with given fields: _a0, _a1
func (_m *MockIndexNode) QueryJobs(_a0 context.Context, _a1 *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error) {
func (_m *MockIndexNode) QueryJobs(_a0 context.Context, _a1 *workerpb.QueryJobsRequest) (*workerpb.QueryJobsResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *indexpb.QueryJobsResponse
var r0 *workerpb.QueryJobsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsRequest) (*workerpb.QueryJobsResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsRequest) *indexpb.QueryJobsResponse); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsRequest) *workerpb.QueryJobsResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*indexpb.QueryJobsResponse)
r0 = ret.Get(0).(*workerpb.QueryJobsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.QueryJobsRequest) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.QueryJobsRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
@ -585,46 +585,46 @@ type MockIndexNode_QueryJobs_Call struct {
// QueryJobs is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *indexpb.QueryJobsRequest
// - _a1 *workerpb.QueryJobsRequest
func (_e *MockIndexNode_Expecter) QueryJobs(_a0 interface{}, _a1 interface{}) *MockIndexNode_QueryJobs_Call {
return &MockIndexNode_QueryJobs_Call{Call: _e.mock.On("QueryJobs", _a0, _a1)}
}
func (_c *MockIndexNode_QueryJobs_Call) Run(run func(_a0 context.Context, _a1 *indexpb.QueryJobsRequest)) *MockIndexNode_QueryJobs_Call {
func (_c *MockIndexNode_QueryJobs_Call) Run(run func(_a0 context.Context, _a1 *workerpb.QueryJobsRequest)) *MockIndexNode_QueryJobs_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.QueryJobsRequest))
run(args[0].(context.Context), args[1].(*workerpb.QueryJobsRequest))
})
return _c
}
func (_c *MockIndexNode_QueryJobs_Call) Return(_a0 *indexpb.QueryJobsResponse, _a1 error) *MockIndexNode_QueryJobs_Call {
func (_c *MockIndexNode_QueryJobs_Call) Return(_a0 *workerpb.QueryJobsResponse, _a1 error) *MockIndexNode_QueryJobs_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockIndexNode_QueryJobs_Call) RunAndReturn(run func(context.Context, *indexpb.QueryJobsRequest) (*indexpb.QueryJobsResponse, error)) *MockIndexNode_QueryJobs_Call {
func (_c *MockIndexNode_QueryJobs_Call) RunAndReturn(run func(context.Context, *workerpb.QueryJobsRequest) (*workerpb.QueryJobsResponse, error)) *MockIndexNode_QueryJobs_Call {
_c.Call.Return(run)
return _c
}
// QueryJobsV2 provides a mock function with given fields: _a0, _a1
func (_m *MockIndexNode) QueryJobsV2(_a0 context.Context, _a1 *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error) {
func (_m *MockIndexNode) QueryJobsV2(_a0 context.Context, _a1 *workerpb.QueryJobsV2Request) (*workerpb.QueryJobsV2Response, error) {
ret := _m.Called(_a0, _a1)
var r0 *indexpb.QueryJobsV2Response
var r0 *workerpb.QueryJobsV2Response
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsV2Request) (*workerpb.QueryJobsV2Response, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsV2Request) *indexpb.QueryJobsV2Response); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsV2Request) *workerpb.QueryJobsV2Response); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*indexpb.QueryJobsV2Response)
r0 = ret.Get(0).(*workerpb.QueryJobsV2Response)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.QueryJobsV2Request) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.QueryJobsV2Request) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
@ -640,24 +640,24 @@ type MockIndexNode_QueryJobsV2_Call struct {
// QueryJobsV2 is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *indexpb.QueryJobsV2Request
// - _a1 *workerpb.QueryJobsV2Request
func (_e *MockIndexNode_Expecter) QueryJobsV2(_a0 interface{}, _a1 interface{}) *MockIndexNode_QueryJobsV2_Call {
return &MockIndexNode_QueryJobsV2_Call{Call: _e.mock.On("QueryJobsV2", _a0, _a1)}
}
func (_c *MockIndexNode_QueryJobsV2_Call) Run(run func(_a0 context.Context, _a1 *indexpb.QueryJobsV2Request)) *MockIndexNode_QueryJobsV2_Call {
func (_c *MockIndexNode_QueryJobsV2_Call) Run(run func(_a0 context.Context, _a1 *workerpb.QueryJobsV2Request)) *MockIndexNode_QueryJobsV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*indexpb.QueryJobsV2Request))
run(args[0].(context.Context), args[1].(*workerpb.QueryJobsV2Request))
})
return _c
}
func (_c *MockIndexNode_QueryJobsV2_Call) Return(_a0 *indexpb.QueryJobsV2Response, _a1 error) *MockIndexNode_QueryJobsV2_Call {
func (_c *MockIndexNode_QueryJobsV2_Call) Return(_a0 *workerpb.QueryJobsV2Response, _a1 error) *MockIndexNode_QueryJobsV2_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockIndexNode_QueryJobsV2_Call) RunAndReturn(run func(context.Context, *indexpb.QueryJobsV2Request) (*indexpb.QueryJobsV2Response, error)) *MockIndexNode_QueryJobsV2_Call {
func (_c *MockIndexNode_QueryJobsV2_Call) RunAndReturn(run func(context.Context, *workerpb.QueryJobsV2Request) (*workerpb.QueryJobsV2Response, error)) *MockIndexNode_QueryJobsV2_Call {
_c.Call.Return(run)
return _c
}

View File

@ -9,13 +9,13 @@ import (
grpc "google.golang.org/grpc"
indexpb "github.com/milvus-io/milvus/internal/proto/indexpb"
internalpb "github.com/milvus-io/milvus/internal/proto/internalpb"
milvuspb "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
mock "github.com/stretchr/testify/mock"
workerpb "github.com/milvus-io/milvus/internal/proto/workerpb"
)
// MockIndexNodeClient is an autogenerated mock type for the IndexNodeClient type
@ -73,7 +73,7 @@ func (_c *MockIndexNodeClient_Close_Call) RunAndReturn(run func() error) *MockIn
}
// CreateJob provides a mock function with given fields: ctx, in, opts
func (_m *MockIndexNodeClient) CreateJob(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
func (_m *MockIndexNodeClient) CreateJob(ctx context.Context, in *workerpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
@ -85,10 +85,10 @@ func (_m *MockIndexNodeClient) CreateJob(ctx context.Context, in *indexpb.Create
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobRequest, ...grpc.CallOption) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobRequest, ...grpc.CallOption) (*commonpb.Status, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobRequest, ...grpc.CallOption) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobRequest, ...grpc.CallOption) *commonpb.Status); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
@ -96,7 +96,7 @@ func (_m *MockIndexNodeClient) CreateJob(ctx context.Context, in *indexpb.Create
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.CreateJobRequest, ...grpc.CallOption) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.CreateJobRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
@ -112,14 +112,14 @@ type MockIndexNodeClient_CreateJob_Call struct {
// CreateJob is a helper method to define mock.On call
// - ctx context.Context
// - in *indexpb.CreateJobRequest
// - in *workerpb.CreateJobRequest
// - opts ...grpc.CallOption
func (_e *MockIndexNodeClient_Expecter) CreateJob(ctx interface{}, in interface{}, opts ...interface{}) *MockIndexNodeClient_CreateJob_Call {
return &MockIndexNodeClient_CreateJob_Call{Call: _e.mock.On("CreateJob",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockIndexNodeClient_CreateJob_Call) Run(run func(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_CreateJob_Call {
func (_c *MockIndexNodeClient_CreateJob_Call) Run(run func(ctx context.Context, in *workerpb.CreateJobRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_CreateJob_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
@ -127,7 +127,7 @@ func (_c *MockIndexNodeClient_CreateJob_Call) Run(run func(ctx context.Context,
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*indexpb.CreateJobRequest), variadicArgs...)
run(args[0].(context.Context), args[1].(*workerpb.CreateJobRequest), variadicArgs...)
})
return _c
}
@ -137,13 +137,13 @@ func (_c *MockIndexNodeClient_CreateJob_Call) Return(_a0 *commonpb.Status, _a1 e
return _c
}
func (_c *MockIndexNodeClient_CreateJob_Call) RunAndReturn(run func(context.Context, *indexpb.CreateJobRequest, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_CreateJob_Call {
func (_c *MockIndexNodeClient_CreateJob_Call) RunAndReturn(run func(context.Context, *workerpb.CreateJobRequest, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_CreateJob_Call {
_c.Call.Return(run)
return _c
}
// CreateJobV2 provides a mock function with given fields: ctx, in, opts
func (_m *MockIndexNodeClient) CreateJobV2(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func (_m *MockIndexNodeClient) CreateJobV2(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
@ -155,10 +155,10 @@ func (_m *MockIndexNodeClient) CreateJobV2(ctx context.Context, in *indexpb.Crea
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobV2Request, ...grpc.CallOption) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobV2Request, ...grpc.CallOption) (*commonpb.Status, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.CreateJobV2Request, ...grpc.CallOption) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.CreateJobV2Request, ...grpc.CallOption) *commonpb.Status); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
@ -166,7 +166,7 @@ func (_m *MockIndexNodeClient) CreateJobV2(ctx context.Context, in *indexpb.Crea
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.CreateJobV2Request, ...grpc.CallOption) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.CreateJobV2Request, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
@ -182,14 +182,14 @@ type MockIndexNodeClient_CreateJobV2_Call struct {
// CreateJobV2 is a helper method to define mock.On call
// - ctx context.Context
// - in *indexpb.CreateJobV2Request
// - in *workerpb.CreateJobV2Request
// - opts ...grpc.CallOption
func (_e *MockIndexNodeClient_Expecter) CreateJobV2(ctx interface{}, in interface{}, opts ...interface{}) *MockIndexNodeClient_CreateJobV2_Call {
return &MockIndexNodeClient_CreateJobV2_Call{Call: _e.mock.On("CreateJobV2",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockIndexNodeClient_CreateJobV2_Call) Run(run func(ctx context.Context, in *indexpb.CreateJobV2Request, opts ...grpc.CallOption)) *MockIndexNodeClient_CreateJobV2_Call {
func (_c *MockIndexNodeClient_CreateJobV2_Call) Run(run func(ctx context.Context, in *workerpb.CreateJobV2Request, opts ...grpc.CallOption)) *MockIndexNodeClient_CreateJobV2_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
@ -197,7 +197,7 @@ func (_c *MockIndexNodeClient_CreateJobV2_Call) Run(run func(ctx context.Context
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*indexpb.CreateJobV2Request), variadicArgs...)
run(args[0].(context.Context), args[1].(*workerpb.CreateJobV2Request), variadicArgs...)
})
return _c
}
@ -207,13 +207,13 @@ func (_c *MockIndexNodeClient_CreateJobV2_Call) Return(_a0 *commonpb.Status, _a1
return _c
}
func (_c *MockIndexNodeClient_CreateJobV2_Call) RunAndReturn(run func(context.Context, *indexpb.CreateJobV2Request, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_CreateJobV2_Call {
func (_c *MockIndexNodeClient_CreateJobV2_Call) RunAndReturn(run func(context.Context, *workerpb.CreateJobV2Request, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_CreateJobV2_Call {
_c.Call.Return(run)
return _c
}
// DropJobs provides a mock function with given fields: ctx, in, opts
func (_m *MockIndexNodeClient) DropJobs(ctx context.Context, in *indexpb.DropJobsRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
func (_m *MockIndexNodeClient) DropJobs(ctx context.Context, in *workerpb.DropJobsRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
@ -225,10 +225,10 @@ func (_m *MockIndexNodeClient) DropJobs(ctx context.Context, in *indexpb.DropJob
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsRequest, ...grpc.CallOption) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsRequest, ...grpc.CallOption) (*commonpb.Status, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsRequest, ...grpc.CallOption) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsRequest, ...grpc.CallOption) *commonpb.Status); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
@ -236,7 +236,7 @@ func (_m *MockIndexNodeClient) DropJobs(ctx context.Context, in *indexpb.DropJob
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.DropJobsRequest, ...grpc.CallOption) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.DropJobsRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
@ -252,14 +252,14 @@ type MockIndexNodeClient_DropJobs_Call struct {
// DropJobs is a helper method to define mock.On call
// - ctx context.Context
// - in *indexpb.DropJobsRequest
// - in *workerpb.DropJobsRequest
// - opts ...grpc.CallOption
func (_e *MockIndexNodeClient_Expecter) DropJobs(ctx interface{}, in interface{}, opts ...interface{}) *MockIndexNodeClient_DropJobs_Call {
return &MockIndexNodeClient_DropJobs_Call{Call: _e.mock.On("DropJobs",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockIndexNodeClient_DropJobs_Call) Run(run func(ctx context.Context, in *indexpb.DropJobsRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_DropJobs_Call {
func (_c *MockIndexNodeClient_DropJobs_Call) Run(run func(ctx context.Context, in *workerpb.DropJobsRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_DropJobs_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
@ -267,7 +267,7 @@ func (_c *MockIndexNodeClient_DropJobs_Call) Run(run func(ctx context.Context, i
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*indexpb.DropJobsRequest), variadicArgs...)
run(args[0].(context.Context), args[1].(*workerpb.DropJobsRequest), variadicArgs...)
})
return _c
}
@ -277,13 +277,13 @@ func (_c *MockIndexNodeClient_DropJobs_Call) Return(_a0 *commonpb.Status, _a1 er
return _c
}
func (_c *MockIndexNodeClient_DropJobs_Call) RunAndReturn(run func(context.Context, *indexpb.DropJobsRequest, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_DropJobs_Call {
func (_c *MockIndexNodeClient_DropJobs_Call) RunAndReturn(run func(context.Context, *workerpb.DropJobsRequest, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_DropJobs_Call {
_c.Call.Return(run)
return _c
}
// DropJobsV2 provides a mock function with given fields: ctx, in, opts
func (_m *MockIndexNodeClient) DropJobsV2(ctx context.Context, in *indexpb.DropJobsV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
func (_m *MockIndexNodeClient) DropJobsV2(ctx context.Context, in *workerpb.DropJobsV2Request, opts ...grpc.CallOption) (*commonpb.Status, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
@ -295,10 +295,10 @@ func (_m *MockIndexNodeClient) DropJobsV2(ctx context.Context, in *indexpb.DropJ
var r0 *commonpb.Status
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsV2Request, ...grpc.CallOption) (*commonpb.Status, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsV2Request, ...grpc.CallOption) (*commonpb.Status, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.DropJobsV2Request, ...grpc.CallOption) *commonpb.Status); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.DropJobsV2Request, ...grpc.CallOption) *commonpb.Status); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
@ -306,7 +306,7 @@ func (_m *MockIndexNodeClient) DropJobsV2(ctx context.Context, in *indexpb.DropJ
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.DropJobsV2Request, ...grpc.CallOption) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.DropJobsV2Request, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
@ -322,14 +322,14 @@ type MockIndexNodeClient_DropJobsV2_Call struct {
// DropJobsV2 is a helper method to define mock.On call
// - ctx context.Context
// - in *indexpb.DropJobsV2Request
// - in *workerpb.DropJobsV2Request
// - opts ...grpc.CallOption
func (_e *MockIndexNodeClient_Expecter) DropJobsV2(ctx interface{}, in interface{}, opts ...interface{}) *MockIndexNodeClient_DropJobsV2_Call {
return &MockIndexNodeClient_DropJobsV2_Call{Call: _e.mock.On("DropJobsV2",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockIndexNodeClient_DropJobsV2_Call) Run(run func(ctx context.Context, in *indexpb.DropJobsV2Request, opts ...grpc.CallOption)) *MockIndexNodeClient_DropJobsV2_Call {
func (_c *MockIndexNodeClient_DropJobsV2_Call) Run(run func(ctx context.Context, in *workerpb.DropJobsV2Request, opts ...grpc.CallOption)) *MockIndexNodeClient_DropJobsV2_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
@ -337,7 +337,7 @@ func (_c *MockIndexNodeClient_DropJobsV2_Call) Run(run func(ctx context.Context,
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*indexpb.DropJobsV2Request), variadicArgs...)
run(args[0].(context.Context), args[1].(*workerpb.DropJobsV2Request), variadicArgs...)
})
return _c
}
@ -347,7 +347,7 @@ func (_c *MockIndexNodeClient_DropJobsV2_Call) Return(_a0 *commonpb.Status, _a1
return _c
}
func (_c *MockIndexNodeClient_DropJobsV2_Call) RunAndReturn(run func(context.Context, *indexpb.DropJobsV2Request, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_DropJobsV2_Call {
func (_c *MockIndexNodeClient_DropJobsV2_Call) RunAndReturn(run func(context.Context, *workerpb.DropJobsV2Request, ...grpc.CallOption) (*commonpb.Status, error)) *MockIndexNodeClient_DropJobsV2_Call {
_c.Call.Return(run)
return _c
}
@ -423,7 +423,7 @@ func (_c *MockIndexNodeClient_GetComponentStates_Call) RunAndReturn(run func(con
}
// GetJobStats provides a mock function with given fields: ctx, in, opts
func (_m *MockIndexNodeClient) GetJobStats(ctx context.Context, in *indexpb.GetJobStatsRequest, opts ...grpc.CallOption) (*indexpb.GetJobStatsResponse, error) {
func (_m *MockIndexNodeClient) GetJobStats(ctx context.Context, in *workerpb.GetJobStatsRequest, opts ...grpc.CallOption) (*workerpb.GetJobStatsResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
@ -433,20 +433,20 @@ func (_m *MockIndexNodeClient) GetJobStats(ctx context.Context, in *indexpb.GetJ
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *indexpb.GetJobStatsResponse
var r0 *workerpb.GetJobStatsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.GetJobStatsRequest, ...grpc.CallOption) (*indexpb.GetJobStatsResponse, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.GetJobStatsRequest, ...grpc.CallOption) (*workerpb.GetJobStatsResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.GetJobStatsRequest, ...grpc.CallOption) *indexpb.GetJobStatsResponse); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.GetJobStatsRequest, ...grpc.CallOption) *workerpb.GetJobStatsResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*indexpb.GetJobStatsResponse)
r0 = ret.Get(0).(*workerpb.GetJobStatsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.GetJobStatsRequest, ...grpc.CallOption) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.GetJobStatsRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
@ -462,14 +462,14 @@ type MockIndexNodeClient_GetJobStats_Call struct {
// GetJobStats is a helper method to define mock.On call
// - ctx context.Context
// - in *indexpb.GetJobStatsRequest
// - in *workerpb.GetJobStatsRequest
// - opts ...grpc.CallOption
func (_e *MockIndexNodeClient_Expecter) GetJobStats(ctx interface{}, in interface{}, opts ...interface{}) *MockIndexNodeClient_GetJobStats_Call {
return &MockIndexNodeClient_GetJobStats_Call{Call: _e.mock.On("GetJobStats",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockIndexNodeClient_GetJobStats_Call) Run(run func(ctx context.Context, in *indexpb.GetJobStatsRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_GetJobStats_Call {
func (_c *MockIndexNodeClient_GetJobStats_Call) Run(run func(ctx context.Context, in *workerpb.GetJobStatsRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_GetJobStats_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
@ -477,17 +477,17 @@ func (_c *MockIndexNodeClient_GetJobStats_Call) Run(run func(ctx context.Context
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*indexpb.GetJobStatsRequest), variadicArgs...)
run(args[0].(context.Context), args[1].(*workerpb.GetJobStatsRequest), variadicArgs...)
})
return _c
}
func (_c *MockIndexNodeClient_GetJobStats_Call) Return(_a0 *indexpb.GetJobStatsResponse, _a1 error) *MockIndexNodeClient_GetJobStats_Call {
func (_c *MockIndexNodeClient_GetJobStats_Call) Return(_a0 *workerpb.GetJobStatsResponse, _a1 error) *MockIndexNodeClient_GetJobStats_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockIndexNodeClient_GetJobStats_Call) RunAndReturn(run func(context.Context, *indexpb.GetJobStatsRequest, ...grpc.CallOption) (*indexpb.GetJobStatsResponse, error)) *MockIndexNodeClient_GetJobStats_Call {
func (_c *MockIndexNodeClient_GetJobStats_Call) RunAndReturn(run func(context.Context, *workerpb.GetJobStatsRequest, ...grpc.CallOption) (*workerpb.GetJobStatsResponse, error)) *MockIndexNodeClient_GetJobStats_Call {
_c.Call.Return(run)
return _c
}
@ -633,7 +633,7 @@ func (_c *MockIndexNodeClient_GetStatisticsChannel_Call) RunAndReturn(run func(c
}
// QueryJobs provides a mock function with given fields: ctx, in, opts
func (_m *MockIndexNodeClient) QueryJobs(ctx context.Context, in *indexpb.QueryJobsRequest, opts ...grpc.CallOption) (*indexpb.QueryJobsResponse, error) {
func (_m *MockIndexNodeClient) QueryJobs(ctx context.Context, in *workerpb.QueryJobsRequest, opts ...grpc.CallOption) (*workerpb.QueryJobsResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
@ -643,20 +643,20 @@ func (_m *MockIndexNodeClient) QueryJobs(ctx context.Context, in *indexpb.QueryJ
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *indexpb.QueryJobsResponse
var r0 *workerpb.QueryJobsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsRequest, ...grpc.CallOption) (*indexpb.QueryJobsResponse, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsRequest, ...grpc.CallOption) (*workerpb.QueryJobsResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsRequest, ...grpc.CallOption) *indexpb.QueryJobsResponse); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsRequest, ...grpc.CallOption) *workerpb.QueryJobsResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*indexpb.QueryJobsResponse)
r0 = ret.Get(0).(*workerpb.QueryJobsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.QueryJobsRequest, ...grpc.CallOption) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.QueryJobsRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
@ -672,14 +672,14 @@ type MockIndexNodeClient_QueryJobs_Call struct {
// QueryJobs is a helper method to define mock.On call
// - ctx context.Context
// - in *indexpb.QueryJobsRequest
// - in *workerpb.QueryJobsRequest
// - opts ...grpc.CallOption
func (_e *MockIndexNodeClient_Expecter) QueryJobs(ctx interface{}, in interface{}, opts ...interface{}) *MockIndexNodeClient_QueryJobs_Call {
return &MockIndexNodeClient_QueryJobs_Call{Call: _e.mock.On("QueryJobs",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockIndexNodeClient_QueryJobs_Call) Run(run func(ctx context.Context, in *indexpb.QueryJobsRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_QueryJobs_Call {
func (_c *MockIndexNodeClient_QueryJobs_Call) Run(run func(ctx context.Context, in *workerpb.QueryJobsRequest, opts ...grpc.CallOption)) *MockIndexNodeClient_QueryJobs_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
@ -687,23 +687,23 @@ func (_c *MockIndexNodeClient_QueryJobs_Call) Run(run func(ctx context.Context,
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*indexpb.QueryJobsRequest), variadicArgs...)
run(args[0].(context.Context), args[1].(*workerpb.QueryJobsRequest), variadicArgs...)
})
return _c
}
func (_c *MockIndexNodeClient_QueryJobs_Call) Return(_a0 *indexpb.QueryJobsResponse, _a1 error) *MockIndexNodeClient_QueryJobs_Call {
func (_c *MockIndexNodeClient_QueryJobs_Call) Return(_a0 *workerpb.QueryJobsResponse, _a1 error) *MockIndexNodeClient_QueryJobs_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockIndexNodeClient_QueryJobs_Call) RunAndReturn(run func(context.Context, *indexpb.QueryJobsRequest, ...grpc.CallOption) (*indexpb.QueryJobsResponse, error)) *MockIndexNodeClient_QueryJobs_Call {
func (_c *MockIndexNodeClient_QueryJobs_Call) RunAndReturn(run func(context.Context, *workerpb.QueryJobsRequest, ...grpc.CallOption) (*workerpb.QueryJobsResponse, error)) *MockIndexNodeClient_QueryJobs_Call {
_c.Call.Return(run)
return _c
}
// QueryJobsV2 provides a mock function with given fields: ctx, in, opts
func (_m *MockIndexNodeClient) QueryJobsV2(ctx context.Context, in *indexpb.QueryJobsV2Request, opts ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
func (_m *MockIndexNodeClient) QueryJobsV2(ctx context.Context, in *workerpb.QueryJobsV2Request, opts ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
@ -713,20 +713,20 @@ func (_m *MockIndexNodeClient) QueryJobsV2(ctx context.Context, in *indexpb.Quer
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *indexpb.QueryJobsV2Response
var r0 *workerpb.QueryJobsV2Response
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsV2Request, ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error)); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsV2Request, ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *indexpb.QueryJobsV2Request, ...grpc.CallOption) *indexpb.QueryJobsV2Response); ok {
if rf, ok := ret.Get(0).(func(context.Context, *workerpb.QueryJobsV2Request, ...grpc.CallOption) *workerpb.QueryJobsV2Response); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*indexpb.QueryJobsV2Response)
r0 = ret.Get(0).(*workerpb.QueryJobsV2Response)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *indexpb.QueryJobsV2Request, ...grpc.CallOption) error); ok {
if rf, ok := ret.Get(1).(func(context.Context, *workerpb.QueryJobsV2Request, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
@ -742,14 +742,14 @@ type MockIndexNodeClient_QueryJobsV2_Call struct {
// QueryJobsV2 is a helper method to define mock.On call
// - ctx context.Context
// - in *indexpb.QueryJobsV2Request
// - in *workerpb.QueryJobsV2Request
// - opts ...grpc.CallOption
func (_e *MockIndexNodeClient_Expecter) QueryJobsV2(ctx interface{}, in interface{}, opts ...interface{}) *MockIndexNodeClient_QueryJobsV2_Call {
return &MockIndexNodeClient_QueryJobsV2_Call{Call: _e.mock.On("QueryJobsV2",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockIndexNodeClient_QueryJobsV2_Call) Run(run func(ctx context.Context, in *indexpb.QueryJobsV2Request, opts ...grpc.CallOption)) *MockIndexNodeClient_QueryJobsV2_Call {
func (_c *MockIndexNodeClient_QueryJobsV2_Call) Run(run func(ctx context.Context, in *workerpb.QueryJobsV2Request, opts ...grpc.CallOption)) *MockIndexNodeClient_QueryJobsV2_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
@ -757,17 +757,17 @@ func (_c *MockIndexNodeClient_QueryJobsV2_Call) Run(run func(ctx context.Context
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*indexpb.QueryJobsV2Request), variadicArgs...)
run(args[0].(context.Context), args[1].(*workerpb.QueryJobsV2Request), variadicArgs...)
})
return _c
}
func (_c *MockIndexNodeClient_QueryJobsV2_Call) Return(_a0 *indexpb.QueryJobsV2Response, _a1 error) *MockIndexNodeClient_QueryJobsV2_Call {
func (_c *MockIndexNodeClient_QueryJobsV2_Call) Return(_a0 *workerpb.QueryJobsV2Response, _a1 error) *MockIndexNodeClient_QueryJobsV2_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockIndexNodeClient_QueryJobsV2_Call) RunAndReturn(run func(context.Context, *indexpb.QueryJobsV2Request, ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error)) *MockIndexNodeClient_QueryJobsV2_Call {
func (_c *MockIndexNodeClient_QueryJobsV2_Call) RunAndReturn(run func(context.Context, *workerpb.QueryJobsV2Request, ...grpc.CallOption) (*workerpb.QueryJobsV2Response, error)) *MockIndexNodeClient_QueryJobsV2_Call {
_c.Call.Return(run)
return _c
}

View File

@ -350,6 +350,12 @@ message SegmentInfo {
SegmentLevel last_level = 23;
// use in major compaction, if compaction fail, should revert partition stats version to last value
int64 last_partition_stats_version = 24;
// used to indicate whether the segment is sorted by primary key.
bool is_sorted = 25;
// textStatsLogs is used to record tokenization index for fields.
map<int64, TextIndexStats> textStatsLogs = 26;
}
message SegmentStartPosition {
@ -419,6 +425,7 @@ message SegmentBinlogs {
repeated FieldBinlog statslogs = 4;
repeated FieldBinlog deltalogs = 5;
string insert_channel = 6;
map<int64, TextIndexStats> textStatsLogs = 7;
}
message FieldBinlog{
@ -426,6 +433,14 @@ message FieldBinlog{
repeated Binlog binlogs = 2;
}
message TextIndexStats {
int64 fieldID = 1;
int64 version = 2;
repeated string files = 3;
int64 log_size = 4;
int64 memory_size = 5;
}
message Binlog {
int64 entries_num = 1;
uint64 timestamp_from = 2;
@ -570,6 +585,7 @@ message CompactionSegmentBinlogs {
SegmentLevel level = 6;
int64 collectionID = 7;
int64 partitionID = 8;
bool is_sorted = 9;
}
message CompactionPlan {
@ -603,6 +619,7 @@ message CompactionSegment {
repeated FieldBinlog field2StatslogPaths = 5;
repeated FieldBinlog deltalogs = 6;
string channel = 7;
bool is_sorted = 8;
}
message CompactionPlanResult {
@ -919,6 +936,7 @@ enum CompactionTaskState {
indexing = 7;
cleaned = 8;
meta_saved = 9;
statistic = 10;
}
message CompactionTask{
@ -948,6 +966,7 @@ message CompactionTask{
int64 analyzeVersion = 24;
int64 lastStateStartTime = 25;
int64 max_size = 26;
repeated int64 tmpSegments = 27;
}
message PartitionStatsInfo {

View File

@ -37,38 +37,6 @@ service IndexCoord {
}
}
service IndexNode {
rpc GetComponentStates(milvus.GetComponentStatesRequest)
returns (milvus.ComponentStates) {
}
rpc GetStatisticsChannel(internal.GetStatisticsChannelRequest)
returns (milvus.StringResponse) {
}
rpc CreateJob(CreateJobRequest) returns (common.Status) {
}
rpc QueryJobs(QueryJobsRequest) returns (QueryJobsResponse) {
}
rpc DropJobs(DropJobsRequest) returns (common.Status) {
}
rpc GetJobStats(GetJobStatsRequest) returns (GetJobStatsResponse) {
}
rpc ShowConfigurations(internal.ShowConfigurationsRequest)
returns (internal.ShowConfigurationsResponse) {
}
// https://wiki.lfaidata.foundation/display/MIL/MEP+8+--+Add+metrics+for+proxy
rpc GetMetrics(milvus.GetMetricsRequest)
returns (milvus.GetMetricsResponse) {
}
rpc CreateJobV2(CreateJobV2Request) returns (common.Status) {
}
rpc QueryJobsV2(QueryJobsV2Request) returns (QueryJobsV2Response) {
}
rpc DropJobsV2(DropJobsV2Request) returns (common.Status) {
}
}
message IndexInfo {
int64 collectionID = 1;
int64 fieldID = 2;
@ -261,61 +229,6 @@ message OptionalFieldInfo {
repeated int64 data_ids = 5;
}
message CreateJobRequest {
string clusterID = 1;
string index_file_prefix = 2;
int64 buildID = 3;
repeated string data_paths = 4;
int64 index_version = 5;
int64 indexID = 6;
string index_name = 7;
StorageConfig storage_config = 8;
repeated common.KeyValuePair index_params = 9;
repeated common.KeyValuePair type_params = 10;
int64 num_rows = 11;
int32 current_index_version = 12;
int64 collectionID = 13;
int64 partitionID = 14;
int64 segmentID = 15;
int64 fieldID = 16;
string field_name = 17;
schema.DataType field_type = 18;
string store_path = 19;
int64 store_version = 20;
string index_store_path = 21;
int64 dim = 22;
repeated int64 data_ids = 23;
repeated OptionalFieldInfo optional_scalar_fields = 24;
schema.FieldSchema field = 25;
bool partition_key_isolation = 26;
}
message QueryJobsRequest {
string clusterID = 1;
repeated int64 buildIDs = 2;
}
message IndexTaskInfo {
int64 buildID = 1;
common.IndexState state = 2;
repeated string index_file_keys = 3;
uint64 serialized_size = 4;
string fail_reason = 5;
int32 current_index_version = 6;
int64 index_store_version = 7;
}
message QueryJobsResponse {
common.Status status = 1;
string clusterID = 2;
repeated IndexTaskInfo index_infos = 3;
}
message DropJobsRequest {
string clusterID = 1;
repeated int64 buildIDs = 2;
}
message JobInfo {
int64 num_rows = 1;
int64 dim = 2;
@ -325,19 +238,6 @@ message JobInfo {
int64 podID = 6;
}
message GetJobStatsRequest {
}
message GetJobStatsResponse {
common.Status status = 1;
int64 total_job_num = 2;
int64 in_progress_job_num = 3;
int64 enqueue_job_num = 4;
int64 task_slots = 5;
repeated JobInfo job_infos = 6;
bool enable_disk = 7;
}
message GetIndexStatisticsRequest {
int64 collectionID = 1;
string index_name = 2;
@ -379,80 +279,18 @@ message SegmentStats {
repeated int64 logIDs = 3;
}
message AnalyzeRequest {
string clusterID = 1;
int64 taskID = 2;
int64 collectionID = 3;
int64 partitionID = 4;
int64 fieldID = 5;
string fieldName = 6;
schema.DataType field_type = 7;
map<int64, SegmentStats> segment_stats = 8;
int64 version = 9;
StorageConfig storage_config = 10;
int64 dim = 11;
double max_train_size_ratio = 12;
int64 num_clusters = 13;
schema.FieldSchema field = 14;
double min_cluster_size_ratio = 15;
double max_cluster_size_ratio = 16;
int64 max_cluster_size = 17;
}
message AnalyzeResult {
int64 taskID = 1;
JobState state = 2;
string fail_reason = 3;
string centroids_file = 4;
message FieldLogPath {
int64 fieldID = 1;
repeated string file_paths = 2;
}
enum JobType {
JobTypeNone = 0;
JobTypeIndexJob = 1;
JobTypeAnalyzeJob = 2;
JobTypeStatsJob = 3;
}
message CreateJobV2Request {
string clusterID = 1;
int64 taskID = 2;
JobType job_type = 3;
oneof request {
AnalyzeRequest analyze_request = 4;
CreateJobRequest index_request = 5;
}
// JobDescriptor job = 3;
}
message QueryJobsV2Request {
string clusterID = 1;
repeated int64 taskIDs = 2;
JobType job_type = 3;
}
message IndexJobResults {
repeated IndexTaskInfo results = 1;
}
message AnalyzeResults {
repeated AnalyzeResult results = 1;
}
message QueryJobsV2Response {
common.Status status = 1;
string clusterID = 2;
oneof result {
IndexJobResults index_job_results = 3;
AnalyzeResults analyze_job_results = 4;
}
}
message DropJobsV2Request {
string clusterID = 1;
repeated int64 taskIDs = 2;
JobType job_type = 3;
}
enum JobState {
JobStateNone = 0;
JobStateInit = 1;
@ -461,3 +299,16 @@ enum JobState {
JobStateFailed = 4;
JobStateRetry = 5;
}
message StatsTask {
int64 collectionID = 1;
int64 partitionID = 2;
int64 segmentID = 3;
string insert_channel = 4;
int64 taskID = 5;
int64 version = 6;
int64 nodeID = 7;
JobState state = 8;
string fail_reason = 9;
int64 target_segmentID = 10;
}

View File

@ -363,6 +363,7 @@ message SegmentLoadInfo {
int64 readableVersion = 16;
data.SegmentLevel level = 17;
int64 storageVersion = 18;
bool is_sorted = 19;
}
message FieldIndexInfo {

221
internal/proto/worker.proto Normal file
View File

@ -0,0 +1,221 @@
syntax = "proto3";
package milvus.proto.worker;
option go_package = "github.com/milvus-io/milvus/internal/proto/workerpb";
import "common.proto";
import "internal.proto";
import "milvus.proto";
import "schema.proto";
import "data_coord.proto";
import "index_coord.proto";
service IndexNode {
rpc GetComponentStates(milvus.GetComponentStatesRequest)
returns (milvus.ComponentStates) {
}
rpc GetStatisticsChannel(internal.GetStatisticsChannelRequest)
returns (milvus.StringResponse) {
}
rpc CreateJob(CreateJobRequest) returns (common.Status) {
}
rpc QueryJobs(QueryJobsRequest) returns (QueryJobsResponse) {
}
rpc DropJobs(DropJobsRequest) returns (common.Status) {
}
rpc GetJobStats(GetJobStatsRequest) returns (GetJobStatsResponse) {
}
rpc ShowConfigurations(internal.ShowConfigurationsRequest)
returns (internal.ShowConfigurationsResponse) {
}
// https://wiki.lfaidata.foundation/display/MIL/MEP+8+--+Add+metrics+for+proxy
rpc GetMetrics(milvus.GetMetricsRequest)
returns (milvus.GetMetricsResponse) {
}
rpc CreateJobV2(CreateJobV2Request) returns (common.Status) {
}
rpc QueryJobsV2(QueryJobsV2Request) returns (QueryJobsV2Response) {
}
rpc DropJobsV2(DropJobsV2Request) returns (common.Status) {
}
}
message CreateJobRequest {
string clusterID = 1;
string index_file_prefix = 2;
int64 buildID = 3;
repeated string data_paths = 4;
int64 index_version = 5;
int64 indexID = 6;
string index_name = 7;
index.StorageConfig storage_config = 8;
repeated common.KeyValuePair index_params = 9;
repeated common.KeyValuePair type_params = 10;
int64 num_rows = 11;
int32 current_index_version = 12;
int64 collectionID = 13;
int64 partitionID = 14;
int64 segmentID = 15;
int64 fieldID = 16;
string field_name = 17;
schema.DataType field_type = 18;
string store_path = 19;
int64 store_version = 20;
string index_store_path = 21;
int64 dim = 22;
repeated int64 data_ids = 23;
repeated index.OptionalFieldInfo optional_scalar_fields = 24;
schema.FieldSchema field = 25;
bool partition_key_isolation = 26;
}
message QueryJobsRequest {
string clusterID = 1;
repeated int64 buildIDs = 2;
}
message QueryJobsResponse {
common.Status status = 1;
string clusterID = 2;
repeated IndexTaskInfo index_infos = 3;
}
message DropJobsRequest {
string clusterID = 1;
repeated int64 buildIDs = 2;
}
message GetJobStatsRequest {
}
message GetJobStatsResponse {
common.Status status = 1;
int64 total_job_num = 2;
int64 in_progress_job_num = 3;
int64 enqueue_job_num = 4;
int64 task_slots = 5;
repeated index.JobInfo job_infos = 6;
bool enable_disk = 7;
}
message AnalyzeRequest {
string clusterID = 1;
int64 taskID = 2;
int64 collectionID = 3;
int64 partitionID = 4;
int64 fieldID = 5;
string fieldName = 6;
schema.DataType field_type = 7;
map<int64, index.SegmentStats> segment_stats = 8;
int64 version = 9;
index.StorageConfig storage_config = 10;
int64 dim = 11;
double max_train_size_ratio = 12;
int64 num_clusters = 13;
schema.FieldSchema field = 14;
double min_cluster_size_ratio = 15;
double max_cluster_size_ratio = 16;
int64 max_cluster_size = 17;
}
message CreateStatsRequest {
string clusterID = 1;
int64 taskID = 2;
int64 collectionID = 3;
int64 partitionID = 4;
string insert_channel = 5;
int64 segmentID = 6;
repeated data.FieldBinlog insert_logs = 7;
repeated data.FieldBinlog delta_logs = 8;
index.StorageConfig storage_config = 9;
schema.CollectionSchema schema = 10;
int64 targetSegmentID = 11;
int64 startLogID = 12;
int64 endLogID = 13;
int64 num_rows = 14;
int64 collection_ttl = 15;
uint64 current_ts = 16;
int64 task_version = 17;
uint64 binlogMaxSize = 18;
}
message CreateJobV2Request {
string clusterID = 1;
int64 taskID = 2;
index.JobType job_type = 3;
oneof request {
AnalyzeRequest analyze_request = 4;
CreateJobRequest index_request = 5;
CreateStatsRequest stats_request = 6;
}
}
message QueryJobsV2Request {
string clusterID = 1;
repeated int64 taskIDs = 2;
index.JobType job_type = 3;
}
message IndexTaskInfo {
int64 buildID = 1;
common.IndexState state = 2;
repeated string index_file_keys = 3;
uint64 serialized_size = 4;
string fail_reason = 5;
int32 current_index_version = 6;
int64 index_store_version = 7;
}
message IndexJobResults {
repeated IndexTaskInfo results = 1;
}
message AnalyzeResult {
int64 taskID = 1;
index.JobState state = 2;
string fail_reason = 3;
string centroids_file = 4;
}
message AnalyzeResults {
repeated AnalyzeResult results = 1;
}
message StatsResult {
int64 taskID = 1;
index.JobState state = 2;
string fail_reason = 3;
int64 collectionID = 4;
int64 partitionID = 5;
int64 segmentID = 6;
string channel = 7;
repeated data.FieldBinlog insert_logs = 8;
repeated data.FieldBinlog stats_logs = 9;
repeated data.FieldBinlog delta_logs = 10;
map<int64, data.TextIndexStats> text_stats_logs = 11;
int64 num_rows = 12;
}
message StatsResults {
repeated StatsResult results = 1;
}
message QueryJobsV2Response {
common.Status status = 1;
string clusterID = 2;
oneof result {
IndexJobResults index_job_results = 3;
AnalyzeResults analyze_job_results = 4;
StatsResults stats_job_results = 5;
}
}
message DropJobsV2Request {
string clusterID = 1;
repeated int64 taskIDs = 2;
index.JobType job_type = 3;
}

View File

@ -86,6 +86,7 @@ func PackSegmentLoadInfo(segment *datapb.SegmentInfo, channelCheckpoint *msgpb.M
DeltaPosition: channelCheckpoint,
Level: segment.GetLevel(),
StorageVersion: segment.GetStorageVersion(),
IsSorted: segment.GetIsSorted(),
}
return loadInfo
}

View File

@ -291,7 +291,7 @@ func NewSegment(ctx context.Context,
var newPtr C.CSegmentInterface
_, err = GetDynamicPool().Submit(func() (any, error) {
status := C.NewSegment(collection.collectionPtr, cSegType, C.int64_t(loadInfo.GetSegmentID()), &newPtr)
status := C.NewSegment(collection.collectionPtr, cSegType, C.int64_t(loadInfo.GetSegmentID()), &newPtr, C.bool(loadInfo.GetIsSorted()))
err := HandleCStatus(ctx, &status, "NewSegmentFailed",
zap.Int64("collectionID", loadInfo.GetCollectionID()),
zap.Int64("partitionID", loadInfo.GetPartitionID()),

View File

@ -655,7 +655,7 @@ func (loader *segmentLoader) loadSealedSegment(ctx context.Context, loadInfo *qu
if err != nil {
return err
}
if !typeutil.IsVectorType(field.GetDataType()) && !segment.HasRawData(fieldID) {
if (!typeutil.IsVectorType(field.GetDataType()) && !segment.HasRawData(fieldID)) || field.GetIsPrimaryKey() {
log.Info("field index doesn't include raw data, load binlog...",
zap.Int64("fieldID", fieldID),
zap.String("index", info.IndexInfo.GetIndexName()),

View File

@ -410,6 +410,7 @@ func (node *QueryNode) LoadSegments(ctx context.Context, req *querypb.LoadSegmen
zap.Int64("segmentID", segment.GetSegmentID()),
zap.String("level", segment.GetLevel().String()),
zap.Int64("currentNodeID", node.GetNodeID()),
zap.Bool("isSorted", segment.GetIsSorted()),
)
log.Info("received load segments request",

View File

@ -32,6 +32,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/proxypb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
"github.com/milvus-io/milvus/internal/proto/workerpb"
)
// Limiter defines the interface to perform request rate limiting.
@ -105,6 +106,7 @@ type DataNodeComponent interface {
type DataCoordClient interface {
io.Closer
datapb.DataCoordClient
indexpb.IndexCoordClient
}
// DataCoord is the interface `datacoord` package implements
@ -141,13 +143,13 @@ type DataCoordComponent interface {
// IndexNodeClient is the client interface for indexnode server
type IndexNodeClient interface {
io.Closer
indexpb.IndexNodeClient
workerpb.IndexNodeClient
}
// IndexNode is the interface `indexnode` package implements
type IndexNode interface {
Component
indexpb.IndexNodeServer
workerpb.IndexNodeServer
}
// IndexNodeComponent is used by grpc server of IndexNode

View File

@ -89,5 +89,5 @@ func GetFileType(file *internalpb.ImportFile) (FileType, error) {
}
return CSV, nil
}
return Invalid, merr.WrapErrImportFailed(fmt.Sprintf("unexpect file type, files=%v", file.GetPaths()))
return Invalid, merr.WrapErrImportFailed(fmt.Sprintf("unexpected file type, files=%v", file.GetPaths()))
}

View File

@ -1,86 +0,0 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mock
import (
"context"
"google.golang.org/grpc"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
)
var _ indexpb.IndexNodeClient = &GrpcIndexNodeClient{}
type GrpcIndexNodeClient struct {
Err error
}
func (m *GrpcIndexNodeClient) GetComponentStates(ctx context.Context, in *milvuspb.GetComponentStatesRequest, opts ...grpc.CallOption) (*milvuspb.ComponentStates, error) {
return &milvuspb.ComponentStates{}, m.Err
}
//func (m *GrpcIndexNodeClient) GetTimeTickChannel(ctx context.Context, in *internalpb.GetTimeTickChannelRequest, opts ...grpc.CallOption) (*milvuspb.StringResponse, error) {
// return &milvuspb.StringResponse{}, m.Err
//}
func (m *GrpcIndexNodeClient) GetStatisticsChannel(ctx context.Context, in *internalpb.GetStatisticsChannelRequest, opts ...grpc.CallOption) (*milvuspb.StringResponse, error) {
return &milvuspb.StringResponse{}, m.Err
}
func (m *GrpcIndexNodeClient) CreateJob(ctx context.Context, in *indexpb.CreateJobRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcIndexNodeClient) QueryJobs(ctx context.Context, in *indexpb.QueryJobsRequest, opts ...grpc.CallOption) (*indexpb.QueryJobsResponse, error) {
return &indexpb.QueryJobsResponse{}, m.Err
}
func (m *GrpcIndexNodeClient) DropJobs(ctx context.Context, in *indexpb.DropJobsRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcIndexNodeClient) GetJobStats(ctx context.Context, in *indexpb.GetJobStatsRequest, opts ...grpc.CallOption) (*indexpb.GetJobStatsResponse, error) {
return &indexpb.GetJobStatsResponse{}, m.Err
}
func (m *GrpcIndexNodeClient) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest, opts ...grpc.CallOption) (*milvuspb.GetMetricsResponse, error) {
return &milvuspb.GetMetricsResponse{}, m.Err
}
func (m *GrpcIndexNodeClient) ShowConfigurations(ctx context.Context, in *internalpb.ShowConfigurationsRequest, opts ...grpc.CallOption) (*internalpb.ShowConfigurationsResponse, error) {
return &internalpb.ShowConfigurationsResponse{}, m.Err
}
func (m *GrpcIndexNodeClient) CreateJobV2(ctx context.Context, in *indexpb.CreateJobV2Request, opt ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcIndexNodeClient) QueryJobsV2(ctx context.Context, in *indexpb.QueryJobsV2Request, opt ...grpc.CallOption) (*indexpb.QueryJobsV2Response, error) {
return &indexpb.QueryJobsV2Response{}, m.Err
}
func (m *GrpcIndexNodeClient) DropJobsV2(ctx context.Context, in *indexpb.DropJobsV2Request, opt ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcIndexNodeClient) Close() error {
return m.Err
}

View File

@ -103,6 +103,9 @@ const (
AnalyzeStatsPath = `analyze_stats`
OffsetMapping = `offset_mapping`
Centroids = "centroids"
// TextIndexPath storage path const for text index
TextIndexPath = "text_log"
)
// Search, Index parameter keys

View File

@ -321,6 +321,15 @@ var (
taskTypeLabel,
statusLabelName,
})
// TaskNum records the number of tasks of each type.
TaskNum = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataCoordRole,
Name: "task_count",
Help: "number of index tasks of each type",
}, []string{collectionIDLabelName, taskTypeLabel, taskStateLabel})
)
// RegisterDataCoord registers DataCoord metrics
@ -349,6 +358,7 @@ func RegisterDataCoord(registry *prometheus.Registry) {
registry.MustRegister(GarbageCollectorFileScanDuration)
registry.MustRegister(GarbageCollectorRunCount)
registry.MustRegister(DataCoordTaskExecuteLatency)
registry.MustRegister(TaskNum)
}
func CleanupDataCoordSegmentMetrics(dbName string, collectionID int64, segmentID int64) {

View File

@ -116,7 +116,8 @@ const (
LoadedLabel = "loaded"
NumEntitiesAllLabel = "all"
taskTypeLabel = "task_type"
taskTypeLabel = "task_type"
taskStateLabel = "task_state"
)
var (

View File

@ -110,6 +110,7 @@ var (
ErrIndexNotFound = newMilvusError("index not found", 700, false)
ErrIndexNotSupported = newMilvusError("index type not supported", 701, false)
ErrIndexDuplicate = newMilvusError("index duplicates", 702, false)
ErrTaskDuplicate = newMilvusError("task duplicates", 703, false)
// Database related
ErrDatabaseNotFound = newMilvusError("database not found", 800, false)

View File

@ -810,6 +810,14 @@ func WrapErrIndexDuplicate(indexName string, msg ...string) error {
return err
}
func WrapErrTaskDuplicate(taskType string, msg ...string) error {
err := wrapFields(ErrTaskDuplicate, value("taskType", taskType))
if len(msg) > 0 {
err = errors.Wrap(err, strings.Join(msg, "->"))
}
return err
}
// Node related
func WrapErrNodeNotFound(id int64, msg ...string) error {
err := wrapFields(ErrNodeNotFound, value("node", id))

View File

@ -3230,6 +3230,9 @@ type dataCoordConfig struct {
ClusteringCompactionSlotUsage ParamItem `refreshable:"true"`
MixCompactionSlotUsage ParamItem `refreshable:"true"`
L0DeleteCompactionSlotUsage ParamItem `refreshable:"true"`
EnableStatsTask ParamItem `refreshable:"true"`
TaskCheckInterval ParamItem `refreshable:"true"`
}
func (p *dataCoordConfig) init(base *BaseTable) {
@ -3854,7 +3857,7 @@ During compaction, the size of segment # of rows is able to exceed segment max #
p.IndexTaskSchedulerInterval = ParamItem{
Key: "indexCoord.scheduler.interval",
Version: "2.0.0",
DefaultValue: "1000",
DefaultValue: "100",
}
p.IndexTaskSchedulerInterval.Init(base.mgr)
@ -4033,6 +4036,26 @@ During compaction, the size of segment # of rows is able to exceed segment max #
Export: true,
}
p.L0DeleteCompactionSlotUsage.Init(base.mgr)
p.EnableStatsTask = ParamItem{
Key: "dataCoord.statsTask.enable",
Version: "2.5.0",
Doc: "enable stats task",
DefaultValue: "true",
PanicIfEmpty: false,
Export: false,
}
p.EnableStatsTask.Init(base.mgr)
p.TaskCheckInterval = ParamItem{
Key: "dataCoord.taskCheckInterval",
Version: "2.5.0",
Doc: "task check interval seconds",
DefaultValue: "1",
PanicIfEmpty: false,
Export: false,
}
p.TaskCheckInterval.Init(base.mgr)
}
// /////////////////////////////////////////////////////////////////////////////

Some files were not shown because too many files have changed in this diff Show More