mirror of https://github.com/milvus-io/milvus.git
enhance: Add json key inverted index in stats for optimization (#38039)
Add json key inverted index in stats for optimization https://github.com/milvus-io/milvus/issues/36995 --------- Signed-off-by: Xianhui.Lin <xianhui.lin@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>pull/41217/head
parent
a308d2c886
commit
3bc24c264f
|
@ -414,6 +414,7 @@ queryNode:
|
|||
buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
|
||||
multipleChunkedEnable: true # Enable multiple chunked search
|
||||
knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
|
||||
jsonKeyStatsCommitInterval: 200 # the commit interval for the JSON key Stats to commit
|
||||
loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
|
||||
enableDisk: false # enable querynode load disk index, and search on disk index
|
||||
maxDiskUsagePercentage: 95
|
||||
|
@ -636,6 +637,10 @@ dataCoord:
|
|||
indexTaskSlotUsage: 64 # slot usage of index task per 512mb
|
||||
statsTaskSlotUsage: 8 # slot usage of stats task per 512mb
|
||||
analyzeTaskSlotUsage: 65535 # slot usage of analyze task
|
||||
jsonStatsTriggerCount: 10 # jsonkey stats task count per trigger
|
||||
jsonStatsTriggerInterval: 10 # jsonkey task interval per trigger
|
||||
enabledJSONKeyStatsInSort: false # Indicates whether to enable JSON key stats task with sort
|
||||
jsonKeyStatsMemoryBudgetInTantivy: 16777216 # the memory budget for the JSON index In Tantivy, the unit is bytes
|
||||
ip: # TCP/IP address of dataCoord. If not specified, use the first unicastable address
|
||||
port: 13333 # TCP port of dataCoord
|
||||
grpc:
|
||||
|
@ -891,6 +896,8 @@ common:
|
|||
sync:
|
||||
taskPoolReleaseTimeoutSeconds: 60 # The maximum time to wait for the task to finish and release resources in the pool
|
||||
enabledOptimizeExpr: true # Indicates whether to enable optimize expr
|
||||
enabledJSONKeyStats: false # Indicates sealedsegment whether to enable JSON key stats
|
||||
enabledGrowingSegmentJSONKeyStats: false # Indicates growingsegment whether to enable JSON key stats
|
||||
|
||||
# QuotaConfig, configurations of Milvus quota and limits.
|
||||
# By default, we enable:
|
||||
|
|
|
@ -30,6 +30,9 @@ int CPU_NUM = DEFAULT_CPU_NUM;
|
|||
int64_t EXEC_EVAL_EXPR_BATCH_SIZE = DEFAULT_EXEC_EVAL_EXPR_BATCH_SIZE;
|
||||
bool OPTIMIZE_EXPR_ENABLED = DEFAULT_OPTIMIZE_EXPR_ENABLED;
|
||||
|
||||
int64_t JSON_KEY_STATS_COMMIT_INTERVAL = DEFAULT_JSON_KEY_STATS_COMMIT_INTERVAL;
|
||||
bool GROWING_JSON_KEY_STATS_ENABLED = DEFAULT_GROWING_JSON_KEY_STATS_ENABLED;
|
||||
|
||||
void
|
||||
SetIndexSliceSize(const int64_t size) {
|
||||
FILE_SLICE_SIZE = size << 20;
|
||||
|
@ -74,4 +77,18 @@ SetDefaultOptimizeExprEnable(bool val) {
|
|||
LOG_INFO("set default optimize expr enabled: {}", OPTIMIZE_EXPR_ENABLED);
|
||||
}
|
||||
|
||||
void
|
||||
SetDefaultJSONKeyStatsCommitInterval(int64_t val) {
|
||||
JSON_KEY_STATS_COMMIT_INTERVAL = val;
|
||||
LOG_INFO("set default json key Stats commit interval: {}",
|
||||
JSON_KEY_STATS_COMMIT_INTERVAL);
|
||||
}
|
||||
|
||||
void
|
||||
SetDefaultGrowingJSONKeyStatsEnable(bool val) {
|
||||
GROWING_JSON_KEY_STATS_ENABLED = val;
|
||||
LOG_INFO("set default growing json key index enable: {}",
|
||||
GROWING_JSON_KEY_STATS_ENABLED);
|
||||
}
|
||||
|
||||
} // namespace milvus
|
||||
|
|
|
@ -29,8 +29,9 @@ extern int64_t MIDDLE_PRIORITY_THREAD_CORE_COEFFICIENT;
|
|||
extern int64_t LOW_PRIORITY_THREAD_CORE_COEFFICIENT;
|
||||
extern int CPU_NUM;
|
||||
extern int64_t EXEC_EVAL_EXPR_BATCH_SIZE;
|
||||
extern int64_t JSON_KEY_STATS_COMMIT_INTERVAL;
|
||||
extern bool OPTIMIZE_EXPR_ENABLED;
|
||||
|
||||
extern bool GROWING_JSON_KEY_STATS_ENABLED;
|
||||
void
|
||||
SetIndexSliceSize(const int64_t size);
|
||||
|
||||
|
@ -52,6 +53,12 @@ SetDefaultExecEvalExprBatchSize(int64_t val);
|
|||
void
|
||||
SetDefaultOptimizeExprEnable(bool val);
|
||||
|
||||
void
|
||||
SetDefaultJSONKeyStatsCommitInterval(int64_t val);
|
||||
|
||||
void
|
||||
SetDefaultGrowingJSONKeyStatsEnable(bool val);
|
||||
|
||||
struct BufferView {
|
||||
struct Element {
|
||||
const char* data_;
|
||||
|
|
|
@ -49,6 +49,7 @@ const char PAGE_RETAIN_ORDER[] = "page_retain_order";
|
|||
const char TEXT_LOG_ROOT_PATH[] = "text_log";
|
||||
const char ITERATIVE_FILTER[] = "iterative_filter";
|
||||
const char HINTS[] = "hints";
|
||||
const char JSON_KEY_INDEX_LOG_ROOT_PATH[] = "json_key_index_log";
|
||||
|
||||
const char DEFAULT_PLANNODE_ID[] = "0";
|
||||
const char DEAFULT_QUERY_ID[] = "0";
|
||||
|
@ -82,3 +83,6 @@ const std::string JSON_CAST_TYPE = "json_cast_type";
|
|||
const std::string JSON_PATH = "json_path";
|
||||
const bool DEFAULT_OPTIMIZE_EXPR_ENABLED = true;
|
||||
const int64_t DEFAULT_CONVERT_OR_TO_IN_NUMERIC_LIMIT = 150;
|
||||
const int64_t DEFAULT_JSON_INDEX_MEMORY_BUDGET = 16777216; // bytes, 16MB
|
||||
const bool DEFAULT_GROWING_JSON_KEY_STATS_ENABLED = false;
|
||||
const int64_t DEFAULT_JSON_KEY_STATS_COMMIT_INTERVAL = 200;
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "common/FieldMeta.h"
|
||||
#include "common/SystemProperty.h"
|
||||
#include "common/protobuf_utils.h"
|
||||
|
||||
#include "common/Common.h"
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <optional>
|
||||
|
||||
|
@ -39,6 +39,11 @@ FieldMeta::enable_match() const {
|
|||
return string_info_->enable_match;
|
||||
}
|
||||
|
||||
bool
|
||||
FieldMeta::enable_growing_jsonStats() const {
|
||||
return IsJsonDataType(type_) && GROWING_JSON_KEY_STATS_ENABLED;
|
||||
}
|
||||
|
||||
bool
|
||||
FieldMeta::enable_analyzer() const {
|
||||
if (!IsStringDataType(type_)) {
|
||||
|
|
|
@ -148,6 +148,9 @@ class FieldMeta {
|
|||
bool
|
||||
enable_analyzer() const;
|
||||
|
||||
bool
|
||||
enable_growing_jsonStats() const;
|
||||
|
||||
TokenizerParams
|
||||
get_analyzer_params() const;
|
||||
|
||||
|
|
|
@ -149,6 +149,25 @@ class Json {
|
|||
return doc;
|
||||
}
|
||||
|
||||
value_result<document>
|
||||
doc(uint16_t offset, uint16_t length) const {
|
||||
thread_local simdjson::ondemand::parser parser;
|
||||
|
||||
// it's always safe to add the padding,
|
||||
// as we have allocated the memory with this padding
|
||||
auto doc = parser.iterate(
|
||||
data_.data() + offset, length, length + simdjson::SIMDJSON_PADDING);
|
||||
AssertInfo(doc.error() == simdjson::SUCCESS,
|
||||
"failed to parse the json {} offset {}, length {}: {}, "
|
||||
"total_json:{}",
|
||||
std::string(data_.data() + offset, length),
|
||||
offset,
|
||||
length,
|
||||
simdjson::error_message(doc.error()),
|
||||
data_);
|
||||
return doc;
|
||||
}
|
||||
|
||||
value_result<simdjson::dom::element>
|
||||
dom_doc() const {
|
||||
if (data_.size() == 0) {
|
||||
|
@ -166,6 +185,20 @@ class Json {
|
|||
return doc;
|
||||
}
|
||||
|
||||
value_result<simdjson::dom::element>
|
||||
dom_doc(uint16_t offset, uint16_t length) const {
|
||||
thread_local simdjson::dom::parser parser;
|
||||
|
||||
// it's always safe to add the padding,
|
||||
// as we have allocated the memory with this padding
|
||||
auto doc = parser.parse(data_.data() + offset, length);
|
||||
AssertInfo(doc.error() == simdjson::SUCCESS,
|
||||
"failed to parse the json {}: {}",
|
||||
std::string(data_.data() + offset, length),
|
||||
simdjson::error_message(doc.error()));
|
||||
return doc;
|
||||
}
|
||||
|
||||
bool
|
||||
exist(std::string_view pointer) const {
|
||||
auto doc = this->doc();
|
||||
|
@ -207,6 +240,22 @@ class Json {
|
|||
return doc().at_pointer(pointer).get<T>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
value_result<T>
|
||||
at(uint16_t offset, uint16_t length) const {
|
||||
return doc(offset, length).get<T>();
|
||||
}
|
||||
|
||||
std::string_view
|
||||
at_string(uint16_t offset, uint16_t length) const {
|
||||
return std::string_view(data_.data() + offset, length);
|
||||
}
|
||||
|
||||
value_result<simdjson::dom::array>
|
||||
array_at(uint16_t offset, uint16_t length) const {
|
||||
return dom_doc(offset, length).get_array();
|
||||
}
|
||||
|
||||
// get dom array by JSON pointer,
|
||||
// call `size()` to get array size,
|
||||
// call `at()` to get array element by index,
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "common/Tracer.h"
|
||||
#include "log/Log.h"
|
||||
|
||||
std::once_flag flag1, flag2, flag3, flag4, flag5, flag6, flag7;
|
||||
std::once_flag flag1, flag2, flag3, flag4, flag5, flag6, flag7, flag8, flag9;
|
||||
std::once_flag traceFlag;
|
||||
|
||||
void
|
||||
|
@ -86,6 +86,22 @@ InitDefaultOptimizeExprEnable(bool val) {
|
|||
val);
|
||||
}
|
||||
|
||||
void
|
||||
InitDefaultJSONKeyStatsCommitInterval(int64_t val) {
|
||||
std::call_once(
|
||||
flag8,
|
||||
[](int val) { milvus::SetDefaultJSONKeyStatsCommitInterval(val); },
|
||||
val);
|
||||
}
|
||||
|
||||
void
|
||||
InitDefaultGrowingJSONKeyStatsEnable(bool val) {
|
||||
std::call_once(
|
||||
flag9,
|
||||
[](bool val) { milvus::SetDefaultGrowingJSONKeyStatsEnable(val); },
|
||||
val);
|
||||
}
|
||||
|
||||
void
|
||||
InitTrace(CTraceConfig* config) {
|
||||
auto traceConfig = milvus::tracer::TraceConfig{config->exporter,
|
||||
|
|
|
@ -51,6 +51,12 @@ SetTrace(CTraceConfig* config);
|
|||
void
|
||||
InitDefaultOptimizeExprEnable(bool val);
|
||||
|
||||
void
|
||||
InitDefaultJSONKeyStatsCommitInterval(int64_t val);
|
||||
|
||||
void
|
||||
InitDefaultGrowingJSONKeyStatsEnable(bool val);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,509 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2010 Serge Zaitsev
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
#ifndef JSMN_H
|
||||
#define JSMN_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define JSMN_STATIC
|
||||
#ifdef JSMN_STATIC
|
||||
#define JSMN_API static
|
||||
#else
|
||||
#define JSMN_API extern
|
||||
#endif
|
||||
|
||||
/**
|
||||
* JSON type identifier. Basic types are:
|
||||
* o Object
|
||||
* o Array
|
||||
* o String
|
||||
* o Other primitive: number, boolean (true/false) or null
|
||||
*/
|
||||
typedef enum {
|
||||
JSMN_UNDEFINED = 0,
|
||||
JSMN_OBJECT = 1 << 0,
|
||||
JSMN_ARRAY = 1 << 1,
|
||||
JSMN_STRING = 1 << 2,
|
||||
JSMN_PRIMITIVE = 1 << 3
|
||||
} jsmntype_t;
|
||||
|
||||
enum jsmnerr {
|
||||
/* Not enough tokens were provided */
|
||||
JSMN_ERROR_NOMEM = -1,
|
||||
/* Invalid character inside JSON string */
|
||||
JSMN_ERROR_INVAL = -2,
|
||||
/* The string is not a full JSON packet, more bytes expected */
|
||||
JSMN_ERROR_PART = -3
|
||||
};
|
||||
|
||||
/**
|
||||
* JSON token description.
|
||||
* type type (object, array, string etc.)
|
||||
* start start position in JSON data string
|
||||
* end end position in JSON data string
|
||||
*/
|
||||
typedef struct jsmntok {
|
||||
jsmntype_t type;
|
||||
int start;
|
||||
int end;
|
||||
int size;
|
||||
#ifdef JSMN_PARENT_LINKS
|
||||
int parent;
|
||||
#endif
|
||||
} jsmntok_t;
|
||||
|
||||
/**
|
||||
* JSON parser. Contains an array of token blocks available. Also stores
|
||||
* the string being parsed now and current position in that string.
|
||||
*/
|
||||
typedef struct jsmn_parser {
|
||||
unsigned int pos; /* offset in the JSON string */
|
||||
unsigned int toknext; /* next token to allocate */
|
||||
int toksuper; /* superior token node, e.g. parent object or array */
|
||||
} jsmn_parser;
|
||||
|
||||
/**
|
||||
* Create JSON parser over an array of tokens
|
||||
*/
|
||||
JSMN_API void
|
||||
jsmn_init(jsmn_parser* parser);
|
||||
|
||||
/**
|
||||
* Run JSON parser. It parses a JSON data string into and array of tokens, each
|
||||
* describing
|
||||
* a single JSON object.
|
||||
*/
|
||||
JSMN_API int
|
||||
jsmn_parse(jsmn_parser* parser,
|
||||
const char* js,
|
||||
const size_t len,
|
||||
jsmntok_t* tokens,
|
||||
const unsigned int num_tokens);
|
||||
|
||||
#ifndef JSMN_HEADER
|
||||
/**
|
||||
* Allocates a fresh unused token from the token pool.
|
||||
*/
|
||||
static jsmntok_t*
|
||||
jsmn_alloc_token(jsmn_parser* parser,
|
||||
jsmntok_t* tokens,
|
||||
const size_t num_tokens) {
|
||||
jsmntok_t* tok;
|
||||
if (parser->toknext >= num_tokens) {
|
||||
return NULL;
|
||||
}
|
||||
tok = &tokens[parser->toknext++];
|
||||
tok->start = tok->end = -1;
|
||||
tok->size = 0;
|
||||
#ifdef JSMN_PARENT_LINKS
|
||||
tok->parent = -1;
|
||||
#endif
|
||||
return tok;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills token type and boundaries.
|
||||
*/
|
||||
static void
|
||||
jsmn_fill_token(jsmntok_t* token,
|
||||
const jsmntype_t type,
|
||||
const int start,
|
||||
const int end) {
|
||||
token->type = type;
|
||||
token->start = start;
|
||||
token->end = end;
|
||||
token->size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills next available token with JSON primitive.
|
||||
*/
|
||||
static int
|
||||
jsmn_parse_primitive(jsmn_parser* parser,
|
||||
const char* js,
|
||||
const size_t len,
|
||||
jsmntok_t* tokens,
|
||||
const size_t num_tokens) {
|
||||
jsmntok_t* token;
|
||||
int start;
|
||||
|
||||
start = parser->pos;
|
||||
|
||||
for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) {
|
||||
switch (js[parser->pos]) {
|
||||
#ifndef JSMN_STRICT
|
||||
/* In strict mode primitive must be followed by "," or "}" or "]" */
|
||||
case ':':
|
||||
#endif
|
||||
case '\t':
|
||||
case '\r':
|
||||
case '\n':
|
||||
case ' ':
|
||||
case ',':
|
||||
case ']':
|
||||
case '}':
|
||||
goto found;
|
||||
default:
|
||||
/* to quiet a warning from gcc*/
|
||||
break;
|
||||
}
|
||||
if (js[parser->pos] < 32 || js[parser->pos] >= 127) {
|
||||
parser->pos = start;
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
}
|
||||
#ifdef JSMN_STRICT
|
||||
/* In strict mode primitive must be followed by a comma/object/array */
|
||||
parser->pos = start;
|
||||
return JSMN_ERROR_PART;
|
||||
#endif
|
||||
|
||||
found:
|
||||
if (tokens == NULL) {
|
||||
parser->pos--;
|
||||
return 0;
|
||||
}
|
||||
token = jsmn_alloc_token(parser, tokens, num_tokens);
|
||||
if (token == NULL) {
|
||||
parser->pos = start;
|
||||
return JSMN_ERROR_NOMEM;
|
||||
}
|
||||
jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos);
|
||||
#ifdef JSMN_PARENT_LINKS
|
||||
token->parent = parser->toksuper;
|
||||
#endif
|
||||
parser->pos--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills next token with JSON string.
|
||||
*/
|
||||
static int
|
||||
jsmn_parse_string(jsmn_parser* parser,
|
||||
const char* js,
|
||||
const size_t len,
|
||||
jsmntok_t* tokens,
|
||||
const size_t num_tokens) {
|
||||
jsmntok_t* token;
|
||||
|
||||
int start = parser->pos;
|
||||
|
||||
/* Skip starting quote */
|
||||
parser->pos++;
|
||||
|
||||
for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) {
|
||||
char c = js[parser->pos];
|
||||
|
||||
/* Quote: end of string */
|
||||
if (c == '\"') {
|
||||
if (tokens == NULL) {
|
||||
return 0;
|
||||
}
|
||||
token = jsmn_alloc_token(parser, tokens, num_tokens);
|
||||
if (token == NULL) {
|
||||
parser->pos = start;
|
||||
return JSMN_ERROR_NOMEM;
|
||||
}
|
||||
jsmn_fill_token(token, JSMN_STRING, start + 1, parser->pos);
|
||||
#ifdef JSMN_PARENT_LINKS
|
||||
token->parent = parser->toksuper;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Backslash: Quoted symbol expected */
|
||||
if (c == '\\' && parser->pos + 1 < len) {
|
||||
int i;
|
||||
parser->pos++;
|
||||
switch (js[parser->pos]) {
|
||||
/* Allowed escaped symbols */
|
||||
case '\"':
|
||||
case '/':
|
||||
case '\\':
|
||||
case 'b':
|
||||
case 'f':
|
||||
case 'r':
|
||||
case 'n':
|
||||
case 't':
|
||||
break;
|
||||
/* Allows escaped symbol \uXXXX */
|
||||
case 'u':
|
||||
parser->pos++;
|
||||
for (i = 0;
|
||||
i < 4 && parser->pos < len && js[parser->pos] != '\0';
|
||||
i++) {
|
||||
/* If it isn't a hex character we have an error */
|
||||
if (!((js[parser->pos] >= 48 &&
|
||||
js[parser->pos] <= 57) || /* 0-9 */
|
||||
(js[parser->pos] >= 65 &&
|
||||
js[parser->pos] <= 70) || /* A-F */
|
||||
(js[parser->pos] >= 97 &&
|
||||
js[parser->pos] <= 102))) { /* a-f */
|
||||
parser->pos = start;
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
parser->pos++;
|
||||
}
|
||||
parser->pos--;
|
||||
break;
|
||||
/* Unexpected symbol */
|
||||
default:
|
||||
parser->pos = start;
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
parser->pos = start;
|
||||
return JSMN_ERROR_PART;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse JSON string and fill tokens.
|
||||
*/
|
||||
JSMN_API int
|
||||
jsmn_parse(jsmn_parser* parser,
|
||||
const char* js,
|
||||
const size_t len,
|
||||
jsmntok_t* tokens,
|
||||
const unsigned int num_tokens) {
|
||||
int r;
|
||||
int i;
|
||||
jsmntok_t* token;
|
||||
int count = parser->toknext;
|
||||
|
||||
for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) {
|
||||
char c;
|
||||
jsmntype_t type;
|
||||
|
||||
c = js[parser->pos];
|
||||
switch (c) {
|
||||
case '{':
|
||||
case '[':
|
||||
count++;
|
||||
if (tokens == NULL) {
|
||||
break;
|
||||
}
|
||||
token = jsmn_alloc_token(parser, tokens, num_tokens);
|
||||
if (token == NULL) {
|
||||
return JSMN_ERROR_NOMEM;
|
||||
}
|
||||
if (parser->toksuper != -1) {
|
||||
jsmntok_t* t = &tokens[parser->toksuper];
|
||||
#ifdef JSMN_STRICT
|
||||
/* In strict mode an object or array can't become a key */
|
||||
if (t->type == JSMN_OBJECT) {
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
#endif
|
||||
t->size++;
|
||||
#ifdef JSMN_PARENT_LINKS
|
||||
token->parent = parser->toksuper;
|
||||
#endif
|
||||
}
|
||||
token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY);
|
||||
token->start = parser->pos;
|
||||
parser->toksuper = parser->toknext - 1;
|
||||
break;
|
||||
case '}':
|
||||
case ']':
|
||||
if (tokens == NULL) {
|
||||
break;
|
||||
}
|
||||
type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
|
||||
#ifdef JSMN_PARENT_LINKS
|
||||
if (parser->toknext < 1) {
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
token = &tokens[parser->toknext - 1];
|
||||
for (;;) {
|
||||
if (token->start != -1 && token->end == -1) {
|
||||
if (token->type != type) {
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
token->end = parser->pos + 1;
|
||||
parser->toksuper = token->parent;
|
||||
break;
|
||||
}
|
||||
if (token->parent == -1) {
|
||||
if (token->type != type || parser->toksuper == -1) {
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
token = &tokens[token->parent];
|
||||
}
|
||||
#else
|
||||
for (i = parser->toknext - 1; i >= 0; i--) {
|
||||
token = &tokens[i];
|
||||
if (token->start != -1 && token->end == -1) {
|
||||
if (token->type != type) {
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
parser->toksuper = -1;
|
||||
token->end = parser->pos + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Error if unmatched closing bracket */
|
||||
if (i == -1) {
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
for (; i >= 0; i--) {
|
||||
token = &tokens[i];
|
||||
if (token->start != -1 && token->end == -1) {
|
||||
parser->toksuper = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
case '\"':
|
||||
r = jsmn_parse_string(parser, js, len, tokens, num_tokens);
|
||||
if (r < 0) {
|
||||
return r;
|
||||
}
|
||||
count++;
|
||||
if (parser->toksuper != -1 && tokens != NULL) {
|
||||
tokens[parser->toksuper].size++;
|
||||
}
|
||||
break;
|
||||
case '\t':
|
||||
case '\r':
|
||||
case '\n':
|
||||
case ' ':
|
||||
break;
|
||||
case ':':
|
||||
parser->toksuper = parser->toknext - 1;
|
||||
break;
|
||||
case ',':
|
||||
if (tokens != NULL && parser->toksuper != -1 &&
|
||||
tokens[parser->toksuper].type != JSMN_ARRAY &&
|
||||
tokens[parser->toksuper].type != JSMN_OBJECT) {
|
||||
#ifdef JSMN_PARENT_LINKS
|
||||
parser->toksuper = tokens[parser->toksuper].parent;
|
||||
#else
|
||||
for (i = parser->toknext - 1; i >= 0; i--) {
|
||||
if (tokens[i].type == JSMN_ARRAY ||
|
||||
tokens[i].type == JSMN_OBJECT) {
|
||||
if (tokens[i].start != -1 && tokens[i].end == -1) {
|
||||
parser->toksuper = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
#ifdef JSMN_STRICT
|
||||
/* In strict mode primitives are: numbers and booleans */
|
||||
case '-':
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
case 't':
|
||||
case 'f':
|
||||
case 'n':
|
||||
/* And they must not be keys of the object */
|
||||
if (tokens != NULL && parser->toksuper != -1) {
|
||||
const jsmntok_t* t = &tokens[parser->toksuper];
|
||||
if (t->type == JSMN_OBJECT ||
|
||||
(t->type == JSMN_STRING && t->size != 0)) {
|
||||
return JSMN_ERROR_INVAL;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* In non-strict mode every unquoted value is a primitive */
|
||||
default:
|
||||
#endif
|
||||
r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens);
|
||||
if (r < 0) {
|
||||
return r;
|
||||
}
|
||||
count++;
|
||||
if (parser->toksuper != -1 && tokens != NULL) {
|
||||
tokens[parser->toksuper].size++;
|
||||
}
|
||||
break;
|
||||
|
||||
#ifdef JSMN_STRICT
|
||||
/* Unexpected char in strict mode */
|
||||
default:
|
||||
return JSMN_ERROR_INVAL;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (tokens != NULL) {
|
||||
for (i = parser->toknext - 1; i >= 0; i--) {
|
||||
/* Unmatched opened object or array */
|
||||
if (tokens[i].start != -1 && tokens[i].end == -1) {
|
||||
return JSMN_ERROR_PART;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new parser based over a given buffer with an array of tokens
|
||||
* available.
|
||||
*/
|
||||
JSMN_API void
|
||||
jsmn_init(jsmn_parser* parser) {
|
||||
parser->pos = 0;
|
||||
parser->toknext = 0;
|
||||
parser->toksuper = -1;
|
||||
}
|
||||
|
||||
#endif /* JSMN_HEADER */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* JSMN_H */
|
|
@ -176,6 +176,7 @@ class QueryContext : public Context {
|
|||
const milvus::segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
milvus::Timestamp timestamp,
|
||||
int32_t consistency_level = 0,
|
||||
std::shared_ptr<QueryConfig> query_config =
|
||||
std::make_shared<QueryConfig>(),
|
||||
folly::Executor* executor = nullptr,
|
||||
|
@ -187,7 +188,8 @@ class QueryContext : public Context {
|
|||
active_count_(active_count),
|
||||
query_timestamp_(timestamp),
|
||||
query_config_(query_config),
|
||||
executor_(executor) {
|
||||
executor_(executor),
|
||||
consistency_level_(consistency_level) {
|
||||
}
|
||||
|
||||
folly::Executor*
|
||||
|
@ -270,6 +272,11 @@ class QueryContext : public Context {
|
|||
return std::move(retrieve_result_);
|
||||
}
|
||||
|
||||
int32_t
|
||||
get_consistency_level() {
|
||||
return consistency_level_;
|
||||
}
|
||||
|
||||
private:
|
||||
folly::Executor* executor_;
|
||||
//folly::Executor::KeepAlive<> executor_keepalive_;
|
||||
|
@ -291,6 +298,8 @@ class QueryContext : public Context {
|
|||
// used for store segment search/retrieve result
|
||||
milvus::SearchResult search_result_;
|
||||
milvus::RetrieveResult retrieve_result_;
|
||||
|
||||
int32_t consistency_level_ = 0;
|
||||
};
|
||||
|
||||
// Represent the state of one thread of query execution.
|
||||
|
|
|
@ -449,7 +449,8 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
|
|||
const std::string& name,
|
||||
const segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
int64_t batch_size)
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level)
|
||||
: SegmentExpr(std::move(input),
|
||||
name,
|
||||
segment,
|
||||
|
@ -457,7 +458,8 @@ class PhyBinaryArithOpEvalRangeExpr : public SegmentExpr {
|
|||
expr->column_.nested_path_,
|
||||
DataType::NONE,
|
||||
active_count,
|
||||
batch_size),
|
||||
batch_size,
|
||||
consistency_level),
|
||||
expr_(expr) {
|
||||
}
|
||||
|
||||
|
|
|
@ -385,6 +385,9 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) {
|
|||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
auto* input = context.get_offset_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecRangeVisitorImplForJsonForIndex<ValueType>();
|
||||
}
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -505,6 +508,246 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJsonForIndex() {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
#define BinaryRangeJSONIndexCompare(cmp) \
|
||||
do { \
|
||||
auto val = json.at<GetType>(offset, size); \
|
||||
if (val.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
auto val = json.at<double>(offset, size); \
|
||||
return !val.error() && (cmp); \
|
||||
} \
|
||||
return false; \
|
||||
} \
|
||||
return (cmp); \
|
||||
} while (false)
|
||||
#define BinaryRangeJSONTypeCompare(cmp) \
|
||||
do { \
|
||||
if constexpr (std::is_same_v<GetType, std::string_view>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING)) { \
|
||||
auto val = json.at_string(offset, size); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, double>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::INT64)) { \
|
||||
auto val = \
|
||||
std::stoll(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else if (type == uint8_t(milvus::index::JSONType::DOUBLE)) { \
|
||||
auto val = \
|
||||
std::stod(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::INT64)) { \
|
||||
auto val = \
|
||||
std::stoll(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else if (type == uint8_t(milvus::index::JSONType::DOUBLE)) { \
|
||||
auto val = \
|
||||
std::stod(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
return false; \
|
||||
} \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define BinaryRangeJSONTypeCompareWithValue(cmp) \
|
||||
do { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
float val = *reinterpret_cast<float*>(&value); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
int64_t val = value; \
|
||||
return (cmp); \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, double>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
float val = *reinterpret_cast<float*>(&value); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
int64_t val = value; \
|
||||
return (cmp); \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) { \
|
||||
bool val = *reinterpret_cast<bool*>(&value); \
|
||||
return (cmp); \
|
||||
} \
|
||||
} while (false)
|
||||
bool lower_inclusive = expr_->lower_inclusive_;
|
||||
bool upper_inclusive = expr_->upper_inclusive_;
|
||||
ValueType val1 = GetValueFromProto<ValueType>(expr_->lower_val_);
|
||||
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [segment,
|
||||
&field_id,
|
||||
val1,
|
||||
val2,
|
||||
lower_inclusive,
|
||||
upper_inclusive](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::INT32) &&
|
||||
type != uint8_t(milvus::index::JSONType::INT64) &&
|
||||
type != uint8_t(milvus::index::JSONType::FLOAT) &&
|
||||
type != uint8_t(milvus::index::JSONType::DOUBLE)) {
|
||||
return false;
|
||||
}
|
||||
} else if constexpr (std::is_same_v<GetType,
|
||||
std::string_view>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::STRING) &&
|
||||
type !=
|
||||
uint8_t(milvus::index::JSONType::STRING_ESCAPE)) {
|
||||
return false;
|
||||
}
|
||||
} else if constexpr (std::is_same_v<GetType, double>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::INT32) &&
|
||||
type != uint8_t(milvus::index::JSONType::INT64) &&
|
||||
type != uint8_t(milvus::index::JSONType::FLOAT) &&
|
||||
type != uint8_t(milvus::index::JSONType::DOUBLE)) {
|
||||
return false;
|
||||
}
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::BOOL)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) {
|
||||
BinaryRangeJSONTypeCompareWithValue(
|
||||
static_cast<float>(val1) <= val &&
|
||||
val <= static_cast<float>(val2));
|
||||
} else {
|
||||
BinaryRangeJSONTypeCompareWithValue(val1 <= val &&
|
||||
val <= val2);
|
||||
}
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) {
|
||||
BinaryRangeJSONTypeCompareWithValue(
|
||||
static_cast<float>(val1) <= val &&
|
||||
val < static_cast<float>(val2));
|
||||
} else {
|
||||
BinaryRangeJSONTypeCompareWithValue(val1 <= val &&
|
||||
val < val2);
|
||||
}
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) {
|
||||
BinaryRangeJSONTypeCompareWithValue(
|
||||
static_cast<float>(val1) < val &&
|
||||
val <= static_cast<float>(val2));
|
||||
} else {
|
||||
BinaryRangeJSONTypeCompareWithValue(val1 < val &&
|
||||
val <= val2);
|
||||
}
|
||||
} else {
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) {
|
||||
BinaryRangeJSONTypeCompareWithValue(
|
||||
static_cast<float>(val1) < val &&
|
||||
val < static_cast<float>(val2));
|
||||
} else {
|
||||
BinaryRangeJSONTypeCompareWithValue(val1 < val &&
|
||||
val < val2);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING) ||
|
||||
type == uint8_t(milvus::index::JSONType::DOUBLE) ||
|
||||
type == uint8_t(milvus::index::JSONType::INT64)) {
|
||||
BinaryRangeJSONTypeCompare(val1 <= val && val <= val2);
|
||||
} else {
|
||||
BinaryRangeJSONIndexCompare(
|
||||
val1 <= ValueType(val.value()) &&
|
||||
ValueType(val.value()) <= val2);
|
||||
}
|
||||
} else if (lower_inclusive && !upper_inclusive) {
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING) ||
|
||||
type == uint8_t(milvus::index::JSONType::DOUBLE) ||
|
||||
type == uint8_t(milvus::index::JSONType::INT64)) {
|
||||
BinaryRangeJSONTypeCompare(val1 <= val && val < val2);
|
||||
} else {
|
||||
BinaryRangeJSONIndexCompare(
|
||||
val1 <= ValueType(val.value()) &&
|
||||
ValueType(val.value()) < val2);
|
||||
}
|
||||
} else if (!lower_inclusive && upper_inclusive) {
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING) ||
|
||||
type == uint8_t(milvus::index::JSONType::DOUBLE) ||
|
||||
type == uint8_t(milvus::index::JSONType::INT64)) {
|
||||
BinaryRangeJSONTypeCompare(val1 < val && val <= val2);
|
||||
} else {
|
||||
BinaryRangeJSONIndexCompare(
|
||||
val1 < ValueType(val.value()) &&
|
||||
ValueType(val.value()) <= val2);
|
||||
}
|
||||
} else {
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING) ||
|
||||
type == uint8_t(milvus::index::JSONType::DOUBLE) ||
|
||||
type == uint8_t(milvus::index::JSONType::INT64)) {
|
||||
BinaryRangeJSONTypeCompare(val1 < val && val < val2);
|
||||
} else {
|
||||
BinaryRangeJSONIndexCompare(
|
||||
val1 < ValueType(val.value()) &&
|
||||
ValueType(val.value()) < val2);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) {
|
||||
|
|
|
@ -245,7 +245,8 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||
const std::string& name,
|
||||
const segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
int64_t batch_size)
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level)
|
||||
: SegmentExpr(std::move(input),
|
||||
name,
|
||||
segment,
|
||||
|
@ -253,7 +254,8 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||
expr->column_.nested_path_,
|
||||
DataType::NONE,
|
||||
active_count,
|
||||
batch_size),
|
||||
batch_size,
|
||||
consistency_level),
|
||||
expr_(expr) {
|
||||
}
|
||||
|
||||
|
@ -308,6 +310,10 @@ class PhyBinaryRangeFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
ExecRangeVisitorImplForJson(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForJsonForIndex();
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplForArray(EvalCtx& context);
|
||||
|
|
|
@ -98,6 +98,9 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) {
|
|||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return EvalJsonExistsForDataSegmentForIndex();
|
||||
}
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -159,5 +162,49 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyExistsFilterExpr::EvalJsonExistsForDataSegmentForIndex() {
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [segment, field_id, pointer](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
uint32_t value) {
|
||||
return true;
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
|
|
|
@ -42,7 +42,8 @@ class PhyExistsFilterExpr : public SegmentExpr {
|
|||
const std::string& name,
|
||||
const segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
int64_t batch_size)
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level)
|
||||
: SegmentExpr(std::move(input),
|
||||
name,
|
||||
segment,
|
||||
|
@ -51,7 +52,8 @@ class PhyExistsFilterExpr : public SegmentExpr {
|
|||
DataType::NONE,
|
||||
active_count,
|
||||
batch_size,
|
||||
true),
|
||||
true,
|
||||
consistency_level),
|
||||
expr_(expr) {
|
||||
}
|
||||
|
||||
|
@ -80,6 +82,9 @@ class PhyExistsFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
EvalJsonExistsForIndex();
|
||||
|
||||
VectorPtr
|
||||
EvalJsonExistsForDataSegmentForIndex();
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::ExistsExpr> expr_;
|
||||
};
|
||||
|
|
|
@ -154,7 +154,6 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
const std::unordered_set<std::string>& flatten_candidates,
|
||||
bool enable_constant_folding) {
|
||||
ExprPtr result;
|
||||
|
||||
auto compiled_inputs = CompileInputs(expr, context, flatten_candidates);
|
||||
|
||||
auto GetTypes = [](const std::vector<ExprPtr>& exprs) {
|
||||
|
@ -183,7 +182,8 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
"PhyUnaryRangeFilterExpr",
|
||||
context->get_segment(),
|
||||
context->get_active_count(),
|
||||
context->query_config()->get_expr_batch_size());
|
||||
context->query_config()->get_expr_batch_size(),
|
||||
context->get_consistency_level());
|
||||
} else if (auto casted_expr = std::dynamic_pointer_cast<
|
||||
const milvus::expr::LogicalUnaryExpr>(expr)) {
|
||||
result = std::make_shared<PhyLogicalUnaryExpr>(
|
||||
|
@ -197,7 +197,8 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
context->get_segment(),
|
||||
context->get_active_count(),
|
||||
context->get_query_timestamp(),
|
||||
context->query_config()->get_expr_batch_size());
|
||||
context->query_config()->get_expr_batch_size(),
|
||||
context->get_consistency_level());
|
||||
} else if (auto casted_expr = std::dynamic_pointer_cast<
|
||||
const milvus::expr::LogicalBinaryExpr>(expr)) {
|
||||
if (casted_expr->op_type_ ==
|
||||
|
@ -220,7 +221,8 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
"PhyBinaryRangeFilterExpr",
|
||||
context->get_segment(),
|
||||
context->get_active_count(),
|
||||
context->query_config()->get_expr_batch_size());
|
||||
context->query_config()->get_expr_batch_size(),
|
||||
context->get_consistency_level());
|
||||
} else if (auto casted_expr = std::dynamic_pointer_cast<
|
||||
const milvus::expr::AlwaysTrueExpr>(expr)) {
|
||||
result = std::make_shared<PhyAlwaysTrueExpr>(
|
||||
|
@ -238,7 +240,8 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
"PhyBinaryArithOpEvalRangeExpr",
|
||||
context->get_segment(),
|
||||
context->get_active_count(),
|
||||
context->query_config()->get_expr_batch_size());
|
||||
context->query_config()->get_expr_batch_size(),
|
||||
context->get_consistency_level());
|
||||
} else if (auto casted_expr =
|
||||
std::dynamic_pointer_cast<const milvus::expr::CompareExpr>(
|
||||
expr)) {
|
||||
|
@ -258,7 +261,8 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
"PhyExistsFilterExpr",
|
||||
context->get_segment(),
|
||||
context->get_active_count(),
|
||||
context->query_config()->get_expr_batch_size());
|
||||
context->query_config()->get_expr_batch_size(),
|
||||
context->get_consistency_level());
|
||||
} else if (auto casted_expr = std::dynamic_pointer_cast<
|
||||
const milvus::expr::JsonContainsExpr>(expr)) {
|
||||
result = std::make_shared<PhyJsonContainsFilterExpr>(
|
||||
|
@ -267,7 +271,8 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
"PhyJsonContainsFilterExpr",
|
||||
context->get_segment(),
|
||||
context->get_active_count(),
|
||||
context->query_config()->get_expr_batch_size());
|
||||
context->query_config()->get_expr_batch_size(),
|
||||
context->get_consistency_level());
|
||||
} else if (auto value_expr =
|
||||
std::dynamic_pointer_cast<const milvus::expr::ValueExpr>(
|
||||
expr)) {
|
||||
|
@ -298,7 +303,8 @@ CompileExpression(const expr::TypedExprPtr& expr,
|
|||
"PhyNullExpr",
|
||||
context->get_segment(),
|
||||
context->get_active_count(),
|
||||
context->query_config()->get_expr_batch_size());
|
||||
context->query_config()->get_expr_batch_size(),
|
||||
context->get_consistency_level());
|
||||
} else {
|
||||
PanicInfo(ExprInvalid, "unsupport expr: ", expr->ToString());
|
||||
}
|
||||
|
@ -481,7 +487,8 @@ ConvertMultiOrToInExpr(std::vector<std::shared_ptr<Expr>>& exprs,
|
|||
query_context->get_segment(),
|
||||
query_context->get_active_count(),
|
||||
query_context->get_query_timestamp(),
|
||||
query_context->query_config()->get_expr_batch_size());
|
||||
query_context->query_config()->get_expr_batch_size(),
|
||||
query_context->get_consistency_level());
|
||||
}
|
||||
|
||||
inline void
|
||||
|
|
|
@ -31,7 +31,9 @@
|
|||
#include "expr/ITypeExpr.h"
|
||||
#include "log/Log.h"
|
||||
#include "query/PlanProto.h"
|
||||
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "segcore/SegmentInterface.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
namespace milvus {
|
||||
namespace exec {
|
||||
|
||||
|
@ -138,7 +140,9 @@ class SegmentExpr : public Expr {
|
|||
const DataType value_type,
|
||||
int64_t active_count,
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level,
|
||||
bool allow_any_json_cast_type = false)
|
||||
|
||||
: Expr(DataType::BOOL, std::move(input), name),
|
||||
segment_(segment),
|
||||
field_id_(field_id),
|
||||
|
@ -146,7 +150,8 @@ class SegmentExpr : public Expr {
|
|||
value_type_(value_type),
|
||||
allow_any_json_cast_type_(allow_any_json_cast_type),
|
||||
active_count_(active_count),
|
||||
batch_size_(batch_size) {
|
||||
batch_size_(batch_size),
|
||||
consistency_level_(consistency_level) {
|
||||
size_per_chunk_ = segment_->size_per_chunk();
|
||||
AssertInfo(
|
||||
batch_size_ > 0,
|
||||
|
@ -1219,6 +1224,23 @@ class SegmentExpr : public Expr {
|
|||
use_index_ = false;
|
||||
}
|
||||
|
||||
bool
|
||||
CanUseJsonKeyIndex(FieldId field_id) const {
|
||||
if (segment_->type() == SegmentType::Sealed) {
|
||||
auto sealed_seg =
|
||||
dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
Assert(sealed_seg != nullptr);
|
||||
if (sealed_seg->GetJsonKeyIndex(field_id) != nullptr) {
|
||||
return true;
|
||||
}
|
||||
} else if (segment_->type() == SegmentType ::Growing) {
|
||||
if (segment_->GetJsonKeyIndex(field_id) != nullptr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
const segcore::SegmentInternalInterface* segment_;
|
||||
const FieldId field_id_;
|
||||
|
@ -1255,6 +1277,7 @@ class SegmentExpr : public Expr {
|
|||
|
||||
// Cache for text match.
|
||||
std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
|
||||
int32_t consistency_level_{0};
|
||||
};
|
||||
|
||||
bool
|
||||
|
|
|
@ -259,6 +259,11 @@ PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
|
|||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecJsonContainsByKeyIndex<ExprValueType>();
|
||||
}
|
||||
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -349,10 +354,99 @@ PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsByKeyIndex() {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
std::unordered_set<GetType> elements;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<GetType>>(expr_->vals_);
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
if (arg_set_->Empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [this, segment, &field_id](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
return false;
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
auto array = json.array_at(offset, size);
|
||||
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
for (auto&& it : array) {
|
||||
auto val = it.template get<GetType>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (this->arg_set_->In(val.value())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecJsonContainsArrayByKeyIndex();
|
||||
}
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -452,6 +546,85 @@ PhyJsonContainsFilterExpr::ExecJsonContainsArray(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsArrayByKeyIndex() {
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
std::vector<proto::plan::Array> elements;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
||||
}
|
||||
if (elements.empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [segment, &elements, &field_id](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
return false;
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
auto array = json.array_at(offset, size);
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
for (auto&& it : array) {
|
||||
auto val = it.get_array();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
for (auto const& element : elements) {
|
||||
if (CompareTwoJsonArray(val, element)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
|
||||
|
@ -519,7 +692,6 @@ PhyJsonContainsFilterExpr::ExecArrayContainsAll(EvalCtx& context) {
|
|||
}
|
||||
processed_cursor += size;
|
||||
};
|
||||
|
||||
int64_t processed_size;
|
||||
if (has_offset_input_) {
|
||||
processed_size =
|
||||
|
@ -550,6 +722,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
|||
ExprValueType>;
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecJsonContainsAllByKeyIndex<ExprValueType>();
|
||||
}
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -643,10 +820,98 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllByKeyIndex() {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
std::set<GetType> elements;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.insert(GetValueFromProto<GetType>(element));
|
||||
}
|
||||
if (elements.empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [segment, &elements, &field_id](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
return false;
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
auto array = json.array_at(offset, size);
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
std::set<GetType> tmp_elements(elements);
|
||||
for (auto&& it : array) {
|
||||
auto val = it.template get<GetType>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
tmp_elements.erase(val.value());
|
||||
if (tmp_elements.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return tmp_elements.empty();
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecJsonContainsAllWithDiffTypeByKeyIndex();
|
||||
}
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -805,10 +1070,157 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffTypeByKeyIndex() {
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
auto elements = expr_->vals_;
|
||||
std::set<int> elements_index;
|
||||
int i = 0;
|
||||
for (auto& element : elements) {
|
||||
elements_index.insert(i);
|
||||
i++;
|
||||
}
|
||||
if (elements.empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [segment, &elements, &elements_index, &field_id](
|
||||
bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
return false;
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
std::set<int> tmp_elements_index(elements_index);
|
||||
auto array = json.array_at(offset, size);
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
for (auto&& it : array) {
|
||||
int i = -1;
|
||||
for (auto& element : elements) {
|
||||
i++;
|
||||
switch (element.val_case()) {
|
||||
case proto::plan::GenericValue::kBoolVal: {
|
||||
auto val = it.template get<bool>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.bool_val()) {
|
||||
tmp_elements_index.erase(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
auto val = it.template get<int64_t>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.int64_val()) {
|
||||
tmp_elements_index.erase(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kFloatVal: {
|
||||
auto val = it.template get<double>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.float_val()) {
|
||||
tmp_elements_index.erase(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kStringVal: {
|
||||
auto val = it.template get<std::string_view>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.string_val()) {
|
||||
tmp_elements_index.erase(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kArrayVal: {
|
||||
auto val = it.get_array();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (CompareTwoJsonArray(val,
|
||||
element.array_val())) {
|
||||
tmp_elements_index.erase(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
DataTypeInvalid,
|
||||
fmt::format("unsupported data type {}",
|
||||
element.val_case()));
|
||||
}
|
||||
if (tmp_elements_index.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (tmp_elements_index.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return tmp_elements_index.size() == 0;
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecJsonContainsAllArrayByKeyIndex();
|
||||
}
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -914,10 +1326,97 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllArray(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsAllArrayByKeyIndex() {
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
std::vector<proto::plan::Array> elements;
|
||||
for (auto const& element : expr_->vals_) {
|
||||
elements.emplace_back(GetValueFromProto<proto::plan::Array>(element));
|
||||
}
|
||||
if (elements.empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [segment, &elements, &field_id](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
return false;
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
auto array = json.array_at(offset, size);
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
std::set<int> exist_elements_index;
|
||||
for (auto&& it : array) {
|
||||
auto json_array = it.get_array();
|
||||
if (json_array.error()) {
|
||||
continue;
|
||||
}
|
||||
for (int index = 0; index < elements.size(); ++index) {
|
||||
if (CompareTwoJsonArray(json_array, elements[index])) {
|
||||
exist_elements_index.insert(index);
|
||||
}
|
||||
}
|
||||
if (exist_elements_index.size() == elements.size()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return exist_elements_index.size() == elements.size();
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
|
||||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecJsonContainsWithDiffTypeByKeyIndex();
|
||||
}
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -1066,6 +1565,134 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffTypeByKeyIndex() {
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
auto elements = expr_->vals_;
|
||||
if (elements.empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
auto filter_func = [segment, &elements, &field_id](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
return false;
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
auto array = json.array_at(offset, size);
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
// Note: array can only be iterated once
|
||||
for (auto&& it : array) {
|
||||
for (auto const& element : elements) {
|
||||
switch (element.val_case()) {
|
||||
case proto::plan::GenericValue::kBoolVal: {
|
||||
auto val = it.template get<bool>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.bool_val()) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
auto val = it.template get<int64_t>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.int64_val()) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kFloatVal: {
|
||||
auto val = it.template get<double>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.float_val()) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kStringVal: {
|
||||
auto val = it.template get<std::string_view>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.string_val()) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kArrayVal: {
|
||||
auto val = it.get_array();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
if (CompareTwoJsonArray(val,
|
||||
element.array_val())) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
PanicInfo(
|
||||
DataTypeInvalid,
|
||||
fmt::format("unsupported data type {}",
|
||||
element.val_case()));
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
VectorPtr
|
||||
PhyJsonContainsFilterExpr::EvalArrayContainsForIndexSegment() {
|
||||
switch (expr_->column_.element_type_) {
|
||||
|
|
|
@ -36,7 +36,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||
const std::string& name,
|
||||
const segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
int64_t batch_size)
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level)
|
||||
: SegmentExpr(std::move(input),
|
||||
name,
|
||||
segment,
|
||||
|
@ -44,7 +45,8 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||
expr->column_.nested_path_,
|
||||
DataType::NONE,
|
||||
active_count,
|
||||
batch_size),
|
||||
batch_size,
|
||||
consistency_level),
|
||||
expr_(expr) {
|
||||
}
|
||||
|
||||
|
@ -74,6 +76,10 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
ExecJsonContains(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecJsonContainsByKeyIndex();
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecArrayContains(EvalCtx& context);
|
||||
|
@ -82,6 +88,10 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
ExecJsonContainsAll(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecJsonContainsAllByKeyIndex();
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecArrayContainsAll(EvalCtx& context);
|
||||
|
@ -89,15 +99,27 @@ class PhyJsonContainsFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
ExecJsonContainsArray(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsArrayByKeyIndex();
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsAllArray(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsAllArrayByKeyIndex();
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsAllWithDiffType(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsAllWithDiffTypeByKeyIndex();
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsWithDiffType(EvalCtx& context);
|
||||
|
||||
VectorPtr
|
||||
ExecJsonContainsWithDiffTypeByKeyIndex();
|
||||
|
||||
VectorPtr
|
||||
EvalArrayContainsForIndexSegment();
|
||||
|
||||
|
|
|
@ -35,7 +35,8 @@ class PhyNullExpr : public SegmentExpr {
|
|||
const std::string& name,
|
||||
const segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
int64_t batch_size)
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level)
|
||||
: SegmentExpr(std::move(input),
|
||||
name,
|
||||
segment,
|
||||
|
@ -43,7 +44,8 @@ class PhyNullExpr : public SegmentExpr {
|
|||
expr->column_.nested_path_,
|
||||
DataType::NONE,
|
||||
active_count,
|
||||
batch_size),
|
||||
batch_size,
|
||||
consistency_level),
|
||||
expr_(expr) {
|
||||
}
|
||||
|
||||
|
|
|
@ -539,6 +539,153 @@ PhyTermFilterExpr::ExecTermJsonVariableInField(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecJsonInVariableByKeyIndex() {
|
||||
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
|
||||
std::string_view,
|
||||
ValueType>;
|
||||
auto real_batch_size = GetNextBatchSize();
|
||||
|
||||
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SortVectorElement<ValueType>>(expr_->vals_);
|
||||
if constexpr (std::is_same_v<GetType, double>) {
|
||||
arg_set_float_ =
|
||||
std::make_shared<SortVectorElement<float>>(expr_->vals_);
|
||||
}
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
if (arg_set_->Empty()) {
|
||||
MoveCursor();
|
||||
return std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
auto vals = expr_->vals_;
|
||||
|
||||
Assert(index != nullptr);
|
||||
|
||||
auto filter_func = [this, segment, &field_id](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::INT32) &&
|
||||
type != uint8_t(milvus::index::JSONType::INT64) &&
|
||||
type != uint8_t(milvus::index::JSONType::FLOAT) &&
|
||||
type != uint8_t(milvus::index::JSONType::DOUBLE)) {
|
||||
return false;
|
||||
}
|
||||
} else if constexpr (std::is_same_v<GetType,
|
||||
std::string_view>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::STRING) &&
|
||||
type !=
|
||||
uint8_t(milvus::index::JSONType::STRING_ESCAPE)) {
|
||||
return false;
|
||||
}
|
||||
} else if constexpr (std::is_same_v<GetType, double>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::INT32) &&
|
||||
type != uint8_t(milvus::index::JSONType::INT64) &&
|
||||
type != uint8_t(milvus::index::JSONType::FLOAT) &&
|
||||
type != uint8_t(milvus::index::JSONType::DOUBLE)) {
|
||||
return false;
|
||||
}
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) {
|
||||
if (type != uint8_t(milvus::index::JSONType::BOOL)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) {
|
||||
return this->arg_set_->In(value);
|
||||
} else if constexpr (std::is_same_v<GetType, double>) {
|
||||
float restoredValue = *reinterpret_cast<float*>(&value);
|
||||
return this->arg_set_float_->In(restoredValue);
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) {
|
||||
bool restoredValue = *reinterpret_cast<bool*>(&value);
|
||||
return this->arg_set_->In(restoredValue);
|
||||
}
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING) ||
|
||||
type == uint8_t(milvus::index::JSONType::DOUBLE) ||
|
||||
type == uint8_t(milvus::index::JSONType::INT64)) {
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING)) {
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
std::string_view>) {
|
||||
auto val = json.at_string(offset, size);
|
||||
return this->arg_set_->In(ValueType(val));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (type ==
|
||||
uint8_t(milvus::index::JSONType::DOUBLE)) {
|
||||
if constexpr (std::is_same_v<GetType, double>) {
|
||||
auto val = std::stod(
|
||||
std::string(json.at_string(offset, size)));
|
||||
return this->arg_set_->In(ValueType(val));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (type ==
|
||||
uint8_t(milvus::index::JSONType::INT64)) {
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) {
|
||||
auto val = std::stoll(
|
||||
std::string(json.at_string(offset, size)));
|
||||
return this->arg_set_->In(ValueType(val));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto val = json.at<GetType>(offset, size);
|
||||
if (val.error()) {
|
||||
return false;
|
||||
}
|
||||
return this->arg_set_->In(ValueType(val.value()));
|
||||
}
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
|
||||
|
@ -548,6 +695,9 @@ PhyTermFilterExpr::ExecTermJsonFieldInVariable(EvalCtx& context) {
|
|||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecJsonInVariableByKeyIndex<ValueType>();
|
||||
}
|
||||
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
|
|
|
@ -57,7 +57,8 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||
const segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
milvus::Timestamp timestamp,
|
||||
int64_t batch_size)
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level)
|
||||
: SegmentExpr(std::move(input),
|
||||
name,
|
||||
segment,
|
||||
|
@ -67,7 +68,8 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||
? DataType::NONE
|
||||
: FromValCase(expr->vals_[0].val_case()),
|
||||
active_count,
|
||||
batch_size),
|
||||
batch_size,
|
||||
consistency_level),
|
||||
expr_(expr),
|
||||
query_timestamp_(timestamp) {
|
||||
}
|
||||
|
@ -137,6 +139,10 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
ExecTermArrayFieldInVariable(EvalCtx& context);
|
||||
|
||||
template <typename ValueType>
|
||||
VectorPtr
|
||||
ExecJsonInVariableByKeyIndex();
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::TermFilterExpr> expr_;
|
||||
milvus::Timestamp query_timestamp_;
|
||||
|
@ -144,7 +150,9 @@ class PhyTermFilterExpr : public SegmentExpr {
|
|||
TargetBitmap cached_bits_;
|
||||
bool arg_inited_{false};
|
||||
std::shared_ptr<MultiElement> arg_set_;
|
||||
std::shared_ptr<MultiElement> arg_set_float_;
|
||||
SingleElement arg_val_;
|
||||
int32_t consistency_level_ = 0;
|
||||
};
|
||||
} //namespace exec
|
||||
} // namespace milvus
|
||||
|
|
|
@ -21,9 +21,9 @@
|
|||
#include "common/type_c.h"
|
||||
#include "log/Log.h"
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
namespace milvus {
|
||||
namespace exec {
|
||||
|
||||
template <typename T>
|
||||
bool
|
||||
PhyUnaryRangeFilterExpr::CanUseIndexForArray() {
|
||||
|
@ -617,6 +617,11 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
|
|||
auto* input = context.get_offset_input();
|
||||
const auto& bitmap_input = context.get_bitmap_input();
|
||||
FieldId field_id = expr_->column_.field_id_;
|
||||
|
||||
if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) {
|
||||
return ExecRangeVisitorImplJsonForIndex<ExprValueType>();
|
||||
}
|
||||
|
||||
auto real_batch_size =
|
||||
has_offset_input_ ? input->size() : GetNextBatchSize();
|
||||
if (real_batch_size == 0) {
|
||||
|
@ -898,6 +903,506 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson(EvalCtx& context) {
|
|||
return res_vec;
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string>
|
||||
PhyUnaryRangeFilterExpr::SplitAtFirstSlashDigit(std::string input) {
|
||||
boost::regex rgx("/\\d+");
|
||||
boost::smatch match;
|
||||
if (boost::regex_search(input, match, rgx)) {
|
||||
std::string firstPart = input.substr(0, match.position());
|
||||
std::string secondPart = input.substr(match.position());
|
||||
return {firstPart, secondPart};
|
||||
} else {
|
||||
return {input, ""};
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJsonForIndex() {
|
||||
using GetType =
|
||||
std::conditional_t<std::is_same_v<ExprValueType, std::string>,
|
||||
std::string_view,
|
||||
ExprValueType>;
|
||||
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
|
||||
? active_count_ - current_data_chunk_pos_
|
||||
: batch_size_;
|
||||
auto pointerpath = milvus::Json::pointer(expr_->column_.nested_path_);
|
||||
auto pointerpair = SplitAtFirstSlashDigit(pointerpath);
|
||||
std::string pointer = pointerpair.first;
|
||||
std::string arrayIndex = pointerpair.second;
|
||||
|
||||
#define UnaryRangeJSONIndexCompare(cmp) \
|
||||
do { \
|
||||
auto x = json.at<GetType>(offset, size); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
auto x = json.at<double>(offset, size); \
|
||||
return !x.error() && (cmp); \
|
||||
} \
|
||||
return false; \
|
||||
} \
|
||||
return (cmp); \
|
||||
} while (false)
|
||||
|
||||
#define UnaryJSONTypeCompare(cmp) \
|
||||
do { \
|
||||
if constexpr (std::is_same_v<GetType, std::string_view>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::STRING)) { \
|
||||
auto x = json.at_string(offset, size); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, double>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::INT64)) { \
|
||||
auto x = \
|
||||
std::stoll(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else if (type == uint8_t(milvus::index::JSONType::DOUBLE)) { \
|
||||
auto x = std::stod(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::INT64)) { \
|
||||
auto x = \
|
||||
std::stoll(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else if (type == uint8_t(milvus::index::JSONType::DOUBLE)) { \
|
||||
auto x = std::stod(std::string(json.at_string(offset, size))); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
return false; \
|
||||
} \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define UnaryJSONTypeCompareWithValue(cmp) \
|
||||
do { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
float x = *reinterpret_cast<float*>(&value); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
int64_t x = value; \
|
||||
return (cmp); \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, double>) { \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
float x = *reinterpret_cast<float*>(&value); \
|
||||
return (cmp); \
|
||||
} else { \
|
||||
int64_t x = value; \
|
||||
return (cmp); \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) { \
|
||||
bool x = *reinterpret_cast<bool*>(&value); \
|
||||
return (cmp); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define CompareValueWithOpType(type, value, val, op_type) \
|
||||
switch (op_type) { \
|
||||
case proto::plan::GreaterThan: \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
UnaryJSONTypeCompareWithValue(x > static_cast<float>(val)); \
|
||||
} else { \
|
||||
UnaryJSONTypeCompareWithValue(x > val); \
|
||||
} \
|
||||
break; \
|
||||
case proto::plan::GreaterEqual: \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
UnaryJSONTypeCompareWithValue(x >= static_cast<float>(val)); \
|
||||
} else { \
|
||||
UnaryJSONTypeCompareWithValue(x >= val); \
|
||||
} \
|
||||
break; \
|
||||
case proto::plan::LessThan: \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
UnaryJSONTypeCompareWithValue(x < static_cast<float>(val)); \
|
||||
} else { \
|
||||
UnaryJSONTypeCompareWithValue(x < val); \
|
||||
} \
|
||||
break; \
|
||||
case proto::plan::LessEqual: \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
UnaryJSONTypeCompareWithValue(x <= static_cast<float>(val)); \
|
||||
} else { \
|
||||
UnaryJSONTypeCompareWithValue(x <= val); \
|
||||
} \
|
||||
break; \
|
||||
case proto::plan::Equal: \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
UnaryJSONTypeCompareWithValue(x == static_cast<float>(val)); \
|
||||
} else { \
|
||||
UnaryJSONTypeCompareWithValue(x == val); \
|
||||
} \
|
||||
break; \
|
||||
case proto::plan::NotEqual: \
|
||||
if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \
|
||||
UnaryJSONTypeCompareWithValue(x != static_cast<float>(val)); \
|
||||
} else { \
|
||||
UnaryJSONTypeCompareWithValue(x != val); \
|
||||
} \
|
||||
break; \
|
||||
default: \
|
||||
return false; \
|
||||
}
|
||||
|
||||
#define UnaryRangeJSONIndexCompareWithArrayIndex(cmp) \
|
||||
do { \
|
||||
if (type != uint8_t(milvus::index::JSONType::UNKNOWN)) { \
|
||||
return false; \
|
||||
} \
|
||||
auto array = json.array_at(offset, size); \
|
||||
if (array.error()) { \
|
||||
return false; \
|
||||
} \
|
||||
auto value = array.at_pointer(arrayIndex); \
|
||||
if (value.error()) { \
|
||||
return false; \
|
||||
} \
|
||||
if constexpr (std::is_same_v<GetType, int64_t> || \
|
||||
std::is_same_v<GetType, double>) { \
|
||||
if (!value.is_number()) { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, std::string_view>) { \
|
||||
if (!value.is_string()) { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) { \
|
||||
if (!value.is_bool()) { \
|
||||
return false; \
|
||||
} \
|
||||
} \
|
||||
auto x = value.get<GetType>(); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
auto x = value.get<double>(); \
|
||||
return !x.error() && (cmp); \
|
||||
} \
|
||||
} \
|
||||
return (cmp); \
|
||||
} while (false)
|
||||
|
||||
#define UnaryRangeJSONIndexCompareNotEqual(cmp) \
|
||||
do { \
|
||||
auto x = json.at<GetType>(offset, size); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
auto x = json.at<double>(offset, size); \
|
||||
return x.error() || (cmp); \
|
||||
} \
|
||||
return true; \
|
||||
} \
|
||||
return (cmp); \
|
||||
} while (false)
|
||||
#define UnaryRangeJSONIndexCompareNotEqualWithArrayIndex(cmp) \
|
||||
do { \
|
||||
auto array = json.array_at(offset, size); \
|
||||
if (array.error()) { \
|
||||
return false; \
|
||||
} \
|
||||
auto value = array.at_pointer(arrayIndex); \
|
||||
if (value.error()) { \
|
||||
return false; \
|
||||
} \
|
||||
if constexpr (std::is_same_v<GetType, int64_t> || \
|
||||
std::is_same_v<GetType, double>) { \
|
||||
if (!value.is_number()) { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, std::string_view>) { \
|
||||
if (!value.is_string()) { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) { \
|
||||
if (!value.is_bool()) { \
|
||||
return false; \
|
||||
} \
|
||||
} \
|
||||
auto x = value.get<GetType>(); \
|
||||
if (x.error()) { \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
auto x = value.get<double>(); \
|
||||
return x.error() || (cmp); \
|
||||
} \
|
||||
} \
|
||||
return (cmp); \
|
||||
} while (false)
|
||||
|
||||
#define CHECKISJSONTYPEWITHOFFSET(type) \
|
||||
(type == uint8_t(milvus::index::JSONType::STRING) || \
|
||||
type == uint8_t(milvus::index::JSONType::DOUBLE) || \
|
||||
type == uint8_t(milvus::index::JSONType::INT64))
|
||||
|
||||
#define CHECKJSONTYPEISNUMBER(type) \
|
||||
if ((type != uint8_t(milvus::index::JSONType::INT32)) && \
|
||||
(type != uint8_t(milvus::index::JSONType::INT64)) && \
|
||||
(type != uint8_t(milvus::index::JSONType::FLOAT)) && \
|
||||
(type != uint8_t(milvus::index::JSONType::DOUBLE))) { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
#define ISVALIDJSONTYPE(type, GetType) \
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) { \
|
||||
CHECKJSONTYPEISNUMBER(type) \
|
||||
} else if constexpr (std::is_same_v<GetType, std::string_view>) { \
|
||||
if ((type != uint8_t(milvus::index::JSONType::STRING)) && \
|
||||
(type != uint8_t(milvus::index::JSONType::STRING_ESCAPE))) { \
|
||||
return false; \
|
||||
} \
|
||||
} else if constexpr (std::is_same_v<GetType, double>) { \
|
||||
CHECKJSONTYPEISNUMBER(type) \
|
||||
} else if constexpr (std::is_same_v<GetType, bool>) { \
|
||||
if (type != uint8_t(milvus::index::JSONType::BOOL)) { \
|
||||
return false; \
|
||||
} \
|
||||
}
|
||||
|
||||
ExprValueType val = GetValueFromProto<ExprValueType>(expr_->val_);
|
||||
auto op_type = expr_->op_type_;
|
||||
|
||||
if (cached_index_chunk_id_ != 0) {
|
||||
const segcore::SegmentInternalInterface* segment = nullptr;
|
||||
if (segment_->type() == SegmentType::Growing) {
|
||||
segment =
|
||||
dynamic_cast<const segcore::SegmentGrowingImpl*>(segment_);
|
||||
} else if (segment_->type() == SegmentType::Sealed) {
|
||||
segment = dynamic_cast<const segcore::SegmentSealed*>(segment_);
|
||||
}
|
||||
auto field_id = expr_->column_.field_id_;
|
||||
auto* index = segment->GetJsonKeyIndex(field_id);
|
||||
Assert(index != nullptr);
|
||||
Assert(segment != nullptr);
|
||||
auto filter_func = [segment,
|
||||
field_id,
|
||||
op_type,
|
||||
val,
|
||||
arrayIndex,
|
||||
pointer](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
if (type == uint8_t(milvus::index::JSONType::UNKNOWN) ||
|
||||
!arrayIndex.empty()) {
|
||||
return false;
|
||||
}
|
||||
ISVALIDJSONTYPE(type, GetType);
|
||||
switch (op_type) {
|
||||
case proto::plan::GreaterThan:
|
||||
CompareValueWithOpType(type, value, val, op_type);
|
||||
case proto::plan::GreaterEqual:
|
||||
CompareValueWithOpType(type, value, val, op_type);
|
||||
case proto::plan::LessThan:
|
||||
CompareValueWithOpType(type, value, val, op_type);
|
||||
case proto::plan::LessEqual:
|
||||
CompareValueWithOpType(type, value, val, op_type);
|
||||
case proto::plan::Equal:
|
||||
CompareValueWithOpType(type, value, val, op_type);
|
||||
case proto::plan::NotEqual:
|
||||
CompareValueWithOpType(type, value, val, op_type);
|
||||
case proto::plan::PrefixMatch:
|
||||
case proto::plan::Match:
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
auto json_pair = segment->GetJsonData(field_id, row_id);
|
||||
if (!json_pair.second) {
|
||||
return false;
|
||||
}
|
||||
auto json = milvus::Json(json_pair.first.data(),
|
||||
json_pair.first.size());
|
||||
switch (op_type) {
|
||||
case proto::plan::GreaterThan:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
return false;
|
||||
} else {
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareWithArrayIndex(
|
||||
ExprValueType(x.value()) > val);
|
||||
} else {
|
||||
if (CHECKISJSONTYPEWITHOFFSET(type)) {
|
||||
UnaryJSONTypeCompare(x > val);
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompare(
|
||||
ExprValueType(x.value()) > val);
|
||||
}
|
||||
}
|
||||
}
|
||||
case proto::plan::GreaterEqual:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
return false;
|
||||
} else {
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareWithArrayIndex(
|
||||
ExprValueType(x.value()) >= val);
|
||||
} else {
|
||||
if (CHECKISJSONTYPEWITHOFFSET(type)) {
|
||||
UnaryJSONTypeCompare(x >= val);
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompare(
|
||||
ExprValueType(x.value()) >= val);
|
||||
}
|
||||
}
|
||||
}
|
||||
case proto::plan::LessThan:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
return false;
|
||||
} else {
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareWithArrayIndex(
|
||||
ExprValueType(x.value()) < val);
|
||||
} else {
|
||||
if (CHECKISJSONTYPEWITHOFFSET(type)) {
|
||||
UnaryJSONTypeCompare(x < val);
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompare(
|
||||
ExprValueType(x.value()) < val);
|
||||
}
|
||||
}
|
||||
}
|
||||
case proto::plan::LessEqual:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
return false;
|
||||
} else {
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareWithArrayIndex(
|
||||
ExprValueType(x.value()) <= val);
|
||||
} else {
|
||||
if (CHECKISJSONTYPEWITHOFFSET(type)) {
|
||||
UnaryJSONTypeCompare(x <= val);
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompare(
|
||||
ExprValueType(x.value()) <= val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case proto::plan::Equal:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
if (type !=
|
||||
uint8_t(milvus::index::JSONType::UNKNOWN)) {
|
||||
return false;
|
||||
}
|
||||
auto array = json.array_at(offset, size);
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
return CompareTwoJsonArray(array.value(), val);
|
||||
} else {
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareWithArrayIndex(
|
||||
ExprValueType(x.value()) == val);
|
||||
} else {
|
||||
if (CHECKISJSONTYPEWITHOFFSET(type)) {
|
||||
UnaryJSONTypeCompare(x == val);
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompare(
|
||||
ExprValueType(x.value()) == val);
|
||||
}
|
||||
}
|
||||
}
|
||||
case proto::plan::NotEqual:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
if (type !=
|
||||
uint8_t(milvus::index::JSONType::UNKNOWN)) {
|
||||
return false;
|
||||
}
|
||||
auto array = json.array_at(offset, size);
|
||||
if (array.error()) {
|
||||
return false;
|
||||
}
|
||||
return !CompareTwoJsonArray(array.value(), val);
|
||||
} else {
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareNotEqualWithArrayIndex(
|
||||
ExprValueType(x.value()) != val);
|
||||
} else {
|
||||
if (CHECKISJSONTYPEWITHOFFSET(type)) {
|
||||
UnaryJSONTypeCompare(x != val);
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompareNotEqual(
|
||||
ExprValueType(x.value()) != val);
|
||||
}
|
||||
}
|
||||
}
|
||||
case proto::plan::PrefixMatch:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
return false;
|
||||
} else {
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareWithArrayIndex(
|
||||
milvus::query::Match(
|
||||
ExprValueType(x.value()),
|
||||
val,
|
||||
op_type));
|
||||
} else {
|
||||
if (CHECKISJSONTYPEWITHOFFSET(type)) {
|
||||
UnaryJSONTypeCompare(
|
||||
milvus::query::Match(x, val, op_type));
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompare(
|
||||
milvus::query::Match(
|
||||
ExprValueType(x.value()),
|
||||
val,
|
||||
op_type));
|
||||
}
|
||||
}
|
||||
}
|
||||
case proto::plan::Match:
|
||||
if constexpr (std::is_same_v<GetType,
|
||||
proto::plan::Array>) {
|
||||
return false;
|
||||
} else {
|
||||
PatternMatchTranslator translator;
|
||||
auto regex_pattern = translator(val);
|
||||
RegexMatcher matcher(regex_pattern);
|
||||
if (!arrayIndex.empty()) {
|
||||
UnaryRangeJSONIndexCompareWithArrayIndex(
|
||||
matcher(ExprValueType(x.value())));
|
||||
} else {
|
||||
UnaryRangeJSONIndexCompare(
|
||||
matcher(ExprValueType(x.value())));
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
bool is_growing = segment_->type() == SegmentType::Growing;
|
||||
bool is_strong_consistency = consistency_level_ == 0;
|
||||
cached_index_chunk_res_ = index
|
||||
->FilterByPath(pointer,
|
||||
active_count_,
|
||||
is_growing,
|
||||
is_strong_consistency,
|
||||
filter_func)
|
||||
.clone();
|
||||
cached_index_chunk_id_ = 0;
|
||||
}
|
||||
TargetBitmap result;
|
||||
result.append(
|
||||
cached_index_chunk_res_, current_data_chunk_pos_, real_batch_size);
|
||||
current_data_chunk_pos_ += real_batch_size;
|
||||
return std::make_shared<ColumnVector>(std::move(result),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
VectorPtr
|
||||
PhyUnaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) {
|
||||
|
|
|
@ -335,7 +335,8 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||
const std::string& name,
|
||||
const segcore::SegmentInternalInterface* segment,
|
||||
int64_t active_count,
|
||||
int64_t batch_size)
|
||||
int64_t batch_size,
|
||||
int32_t consistency_level)
|
||||
: SegmentExpr(std::move(input),
|
||||
name,
|
||||
segment,
|
||||
|
@ -343,7 +344,8 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||
expr->column_.nested_path_,
|
||||
FromValCase(expr->val_.val_case()),
|
||||
active_count,
|
||||
batch_size),
|
||||
batch_size,
|
||||
consistency_level),
|
||||
expr_(expr) {
|
||||
}
|
||||
|
||||
|
@ -411,6 +413,10 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
ExecRangeVisitorImplJson(EvalCtx& context);
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplJsonForIndex();
|
||||
|
||||
template <typename ExprValueType>
|
||||
VectorPtr
|
||||
ExecRangeVisitorImplArray(EvalCtx& context);
|
||||
|
@ -442,6 +448,9 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
|
|||
VectorPtr
|
||||
ExecTextMatch();
|
||||
|
||||
std::pair<std::string, std::string>
|
||||
SplitAtFirstSlashDigit(std::string input);
|
||||
|
||||
private:
|
||||
std::shared_ptr<const milvus::expr::UnaryRangeFilterExpr> expr_;
|
||||
int64_t overflow_check_pos_{0};
|
||||
|
|
|
@ -64,6 +64,16 @@ CompareTwoJsonArray(T arr1, const proto::plan::Array& arr2) {
|
|||
simdjson::ondemand::value>>>) {
|
||||
json_array_length = arr1.size();
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<
|
||||
T,
|
||||
simdjson::simdjson_result<simdjson::dom::array>>) {
|
||||
json_array_length = arr1.size();
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, simdjson::dom::array>) {
|
||||
json_array_length = arr1.size();
|
||||
}
|
||||
if (arr2.array_size() != json_array_length) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -218,7 +218,7 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
|||
std::vector<std::string> null_offset_files;
|
||||
std::shared_ptr<FieldDataBase> null_offset_data;
|
||||
|
||||
auto find_file = [&](const std::string& target) -> auto{
|
||||
auto find_file = [&](const std::string& target) -> auto {
|
||||
return std::find_if(inverted_index_files.begin(),
|
||||
inverted_index_files.end(),
|
||||
[&](const std::string& filename) {
|
||||
|
|
|
@ -0,0 +1,476 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <boost/uuid/random_generator.hpp>
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
|
||||
#include "index/JsonKeyStatsInvertedIndex.h"
|
||||
#include "index/InvertedIndexUtil.h"
|
||||
#include "index/Utils.h"
|
||||
#include "storage/MmapManager.h"
|
||||
namespace milvus::index {
|
||||
constexpr const char* TMP_JSON_INVERTED_LOG_PREFIX =
|
||||
"/tmp/milvus/json-key-inverted-index-log/";
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::AddJSONEncodeValue(
|
||||
const std::vector<std::string>& paths,
|
||||
uint8_t flag,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t length,
|
||||
int32_t value,
|
||||
std::map<std::string, std::vector<int64_t>>& mp) {
|
||||
std::string key = "";
|
||||
if (!paths.empty()) {
|
||||
key = std::string("/") + Join(paths, "/");
|
||||
}
|
||||
LOG_DEBUG(
|
||||
"insert inverted key: {}, flag: {}, type: {}, row_id: {}, offset: "
|
||||
"{}, length:{}, value:{}",
|
||||
key,
|
||||
flag,
|
||||
type,
|
||||
row_id,
|
||||
offset,
|
||||
length,
|
||||
value);
|
||||
int64_t combine_id = 0;
|
||||
|
||||
if (flag) {
|
||||
combine_id = EncodeValue(flag, type, row_id, value);
|
||||
} else {
|
||||
combine_id = EncodeOffset(flag, type, row_id, offset, length);
|
||||
}
|
||||
|
||||
mp[key].push_back(combine_id);
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::AddInvertedRecord(
|
||||
std::map<std::string, std::vector<int64_t>>& mp) {
|
||||
for (auto& iter : mp) {
|
||||
for (auto value : iter.second) {
|
||||
wrapper_->add_array_data<std::string>(&iter.first, 1, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::TravelJson(
|
||||
const char* json,
|
||||
jsmntok* tokens,
|
||||
int& index,
|
||||
std::vector<std::string>& path,
|
||||
int32_t offset,
|
||||
std::map<std::string, std::vector<int64_t>>& mp) {
|
||||
jsmntok current = tokens[0];
|
||||
Assert(current.type != JSMN_UNDEFINED);
|
||||
if (current.type == JSMN_OBJECT) {
|
||||
if (!path.empty()) {
|
||||
AddJSONEncodeValue(path,
|
||||
0,
|
||||
0,
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
0,
|
||||
mp);
|
||||
}
|
||||
int j = 1;
|
||||
for (int i = 0; i < current.size; i++) {
|
||||
Assert(tokens[j].type == JSMN_STRING && tokens[j].size != 0);
|
||||
std::string key(json + tokens[j].start,
|
||||
tokens[j].end - tokens[j].start);
|
||||
path.push_back(key);
|
||||
j++;
|
||||
int consumed = 0;
|
||||
TravelJson(json, tokens + j, consumed, path, offset, mp);
|
||||
path.pop_back();
|
||||
j += consumed;
|
||||
}
|
||||
index = j;
|
||||
} else if (current.type == JSMN_PRIMITIVE) {
|
||||
std::string value(json + current.start, current.end - current.start);
|
||||
auto type = getType(value);
|
||||
|
||||
if (type == JSONType::INT32) {
|
||||
AddJSONEncodeValue(path,
|
||||
1,
|
||||
static_cast<uint8_t>(JSONType::INT32),
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
stoi(value),
|
||||
mp);
|
||||
} else if (type == JSONType::INT64) {
|
||||
AddJSONEncodeValue(path,
|
||||
0,
|
||||
static_cast<uint8_t>(JSONType::INT64),
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
0,
|
||||
mp);
|
||||
} else if (type == JSONType::FLOAT) {
|
||||
auto fvalue = stof(value);
|
||||
int32_t valueBits = *reinterpret_cast<int32_t*>(&fvalue);
|
||||
AddJSONEncodeValue(path,
|
||||
1,
|
||||
static_cast<uint8_t>(JSONType::FLOAT),
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
valueBits,
|
||||
mp);
|
||||
} else if (type == JSONType::DOUBLE) {
|
||||
AddJSONEncodeValue(path,
|
||||
0,
|
||||
static_cast<uint8_t>(JSONType::DOUBLE),
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
0,
|
||||
mp);
|
||||
} else if (type == JSONType::BOOL) {
|
||||
AddJSONEncodeValue(path,
|
||||
1,
|
||||
static_cast<uint8_t>(JSONType::BOOL),
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
value == "true" ? 1 : 0,
|
||||
mp);
|
||||
}
|
||||
|
||||
index++;
|
||||
} else if (current.type == JSMN_ARRAY) {
|
||||
AddJSONEncodeValue(path,
|
||||
0,
|
||||
static_cast<uint8_t>(JSONType::UNKNOWN),
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
0,
|
||||
mp);
|
||||
// skip array parse
|
||||
int count = current.size;
|
||||
int j = 1;
|
||||
while (count > 0) {
|
||||
count--;
|
||||
if (tokens[j].size != 0) {
|
||||
count += tokens[j].size;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
index = j;
|
||||
} else if (current.type == JSMN_STRING) {
|
||||
Assert(current.size == 0);
|
||||
std::string value(json + current.start, current.end - current.start);
|
||||
if (has_escape_sequence(value)) {
|
||||
AddJSONEncodeValue(path,
|
||||
0,
|
||||
static_cast<uint8_t>(JSONType::STRING_ESCAPE),
|
||||
offset,
|
||||
current.start - 1,
|
||||
current.end - current.start + 2,
|
||||
0,
|
||||
mp);
|
||||
} else {
|
||||
AddJSONEncodeValue(path,
|
||||
0,
|
||||
static_cast<uint8_t>(JSONType::STRING),
|
||||
offset,
|
||||
current.start,
|
||||
current.end - current.start,
|
||||
0,
|
||||
mp);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::AddJson(
|
||||
const char* json,
|
||||
int64_t offset,
|
||||
std::map<std::string, std::vector<int64_t>>& mp) {
|
||||
jsmn_parser parser;
|
||||
jsmntok_t* tokens = (jsmntok_t*)malloc(16 * sizeof(jsmntok_t));
|
||||
if (!tokens) {
|
||||
PanicInfo(ErrorCode::UnexpectedError, "alloc jsmn token failed");
|
||||
return;
|
||||
}
|
||||
int num_tokens = 0;
|
||||
int token_capacity = 16;
|
||||
|
||||
jsmn_init(&parser);
|
||||
|
||||
while (1) {
|
||||
int r = jsmn_parse(&parser, json, strlen(json), tokens, token_capacity);
|
||||
if (r < 0) {
|
||||
if (r == JSMN_ERROR_NOMEM) {
|
||||
// Reallocate tokens array if not enough space
|
||||
token_capacity *= 2;
|
||||
tokens = (jsmntok_t*)realloc(
|
||||
tokens, token_capacity * sizeof(jsmntok_t));
|
||||
if (!tokens) {
|
||||
PanicInfo(ErrorCode::UnexpectedError, "realloc failed");
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
free(tokens);
|
||||
PanicInfo(ErrorCode::UnexpectedError,
|
||||
"Failed to parse Json: {}, error: {}",
|
||||
json,
|
||||
int(r));
|
||||
}
|
||||
}
|
||||
num_tokens = r;
|
||||
break;
|
||||
}
|
||||
|
||||
int index = 0;
|
||||
std::vector<std::string> paths;
|
||||
TravelJson(json, tokens, index, paths, offset, mp);
|
||||
free(tokens);
|
||||
}
|
||||
|
||||
JsonKeyStatsInvertedIndex::JsonKeyStatsInvertedIndex(
|
||||
const storage::FileManagerContext& ctx,
|
||||
bool is_load,
|
||||
int64_t json_stats_tantivy_memory_budget,
|
||||
uint32_t tantivy_index_version)
|
||||
: commit_interval_in_ms_(std::numeric_limits<int64_t>::max()),
|
||||
last_commit_time_(stdclock::now()) {
|
||||
LOG_INFO("json_stats_tantivy_memory_budget:{}",
|
||||
json_stats_tantivy_memory_budget);
|
||||
schema_ = ctx.fieldDataMeta.field_schema;
|
||||
field_id_ = ctx.fieldDataMeta.field_id;
|
||||
mem_file_manager_ = std::make_shared<MemFileManager>(ctx);
|
||||
disk_file_manager_ = std::make_shared<DiskFileManager>(ctx);
|
||||
if (is_load) {
|
||||
auto prefix = disk_file_manager_->GetLocalJsonKeyIndexPrefix();
|
||||
path_ = prefix;
|
||||
} else {
|
||||
auto prefix = disk_file_manager_->GetJsonKeyIndexIdentifier();
|
||||
path_ = std::string(TMP_JSON_INVERTED_LOG_PREFIX) + prefix;
|
||||
boost::filesystem::create_directories(path_);
|
||||
std::string field_name =
|
||||
std::to_string(disk_file_manager_->GetFieldDataMeta().field_id);
|
||||
d_type_ = TantivyDataType::Keyword;
|
||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(
|
||||
field_name.c_str(),
|
||||
d_type_,
|
||||
path_.c_str(),
|
||||
tantivy_index_version,
|
||||
false,
|
||||
false,
|
||||
1,
|
||||
json_stats_tantivy_memory_budget);
|
||||
}
|
||||
}
|
||||
|
||||
JsonKeyStatsInvertedIndex::JsonKeyStatsInvertedIndex(
|
||||
int64_t commit_interval_in_ms, const char* unique_id)
|
||||
: commit_interval_in_ms_(commit_interval_in_ms),
|
||||
last_commit_time_(stdclock::now()) {
|
||||
d_type_ = TantivyDataType::Keyword;
|
||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(
|
||||
unique_id, d_type_, "", TANTIVY_INDEX_LATEST_VERSION, false, true);
|
||||
}
|
||||
|
||||
JsonKeyStatsInvertedIndex::JsonKeyStatsInvertedIndex(
|
||||
int64_t commit_interval_in_ms,
|
||||
const char* unique_id,
|
||||
const std::string& path)
|
||||
: commit_interval_in_ms_(commit_interval_in_ms),
|
||||
last_commit_time_(stdclock::now()) {
|
||||
boost::filesystem::path prefix = path;
|
||||
boost::filesystem::path sub_path = unique_id;
|
||||
path_ = (prefix / sub_path).string();
|
||||
boost::filesystem::create_directories(path_);
|
||||
d_type_ = TantivyDataType::Keyword;
|
||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(
|
||||
unique_id, d_type_, path_.c_str(), TANTIVY_INDEX_LATEST_VERSION);
|
||||
}
|
||||
|
||||
IndexStatsPtr
|
||||
JsonKeyStatsInvertedIndex::Upload(const Config& config) {
|
||||
finish();
|
||||
boost::filesystem::path p(path_);
|
||||
boost::filesystem::directory_iterator end_iter;
|
||||
|
||||
for (boost::filesystem::directory_iterator iter(p); iter != end_iter;
|
||||
iter++) {
|
||||
if (boost::filesystem::is_directory(*iter)) {
|
||||
LOG_WARN("{} is a directory", iter->path().string());
|
||||
} else {
|
||||
LOG_INFO("trying to add json key inverted index log: {}",
|
||||
iter->path().string());
|
||||
AssertInfo(
|
||||
disk_file_manager_->AddJsonKeyIndexLog(iter->path().string()),
|
||||
"failed to add json key inverted index log: {}",
|
||||
iter->path().string());
|
||||
LOG_INFO("json key inverted index log: {} added",
|
||||
iter->path().string());
|
||||
}
|
||||
}
|
||||
|
||||
auto remote_paths_to_size = disk_file_manager_->GetRemotePathsToFileSize();
|
||||
|
||||
auto binary_set = Serialize(config);
|
||||
mem_file_manager_->AddFile(binary_set);
|
||||
auto remote_mem_path_to_size =
|
||||
mem_file_manager_->GetRemotePathsToFileSize();
|
||||
|
||||
std::vector<SerializedIndexFileInfo> index_files;
|
||||
index_files.reserve(remote_paths_to_size.size() +
|
||||
remote_mem_path_to_size.size());
|
||||
for (auto& file : remote_paths_to_size) {
|
||||
index_files.emplace_back(disk_file_manager_->GetFileName(file.first),
|
||||
file.second);
|
||||
}
|
||||
for (auto& file : remote_mem_path_to_size) {
|
||||
index_files.emplace_back(file.first, file.second);
|
||||
}
|
||||
return IndexStats::New(mem_file_manager_->GetAddedTotalMemSize() +
|
||||
disk_file_manager_->GetAddedTotalFileSize(),
|
||||
std::move(index_files));
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::Load(milvus::tracer::TraceContext ctx,
|
||||
const Config& config) {
|
||||
auto index_files =
|
||||
GetValueFromConfig<std::vector<std::string>>(config, "index_files");
|
||||
AssertInfo(index_files.has_value(),
|
||||
"index file paths is empty when load json key index");
|
||||
for (auto& index_file : index_files.value()) {
|
||||
boost::filesystem::path p(index_file);
|
||||
if (!p.has_parent_path()) {
|
||||
auto remote_prefix =
|
||||
disk_file_manager_->GetRemoteJsonKeyLogPrefix();
|
||||
index_file = remote_prefix + "/" + index_file;
|
||||
}
|
||||
}
|
||||
disk_file_manager_->CacheJsonKeyIndexToDisk(index_files.value());
|
||||
AssertInfo(
|
||||
tantivy_index_exist(path_.c_str()), "index not exist: {}", path_);
|
||||
wrapper_ = std::make_shared<TantivyIndexWrapper>(path_.c_str());
|
||||
LOG_INFO("load json key index done for field id:{} with dir:{}",
|
||||
field_id_,
|
||||
path_);
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::BuildWithFieldData(
|
||||
const std::vector<FieldDataPtr>& field_datas) {
|
||||
AssertInfo(schema_.data_type() == proto::schema::DataType::JSON,
|
||||
"schema data type is {}",
|
||||
schema_.data_type());
|
||||
BuildWithFieldData(field_datas, schema_.nullable());
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::BuildWithFieldData(
|
||||
const std::vector<FieldDataPtr>& field_datas, bool nullable) {
|
||||
int64_t offset = 0;
|
||||
std::map<std::string, std::vector<int64_t>> mp;
|
||||
if (nullable) {
|
||||
for (const auto& data : field_datas) {
|
||||
auto n = data->get_num_rows();
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (!data->is_valid(i)) {
|
||||
continue;
|
||||
}
|
||||
AddJson(static_cast<const milvus::Json*>(data->RawValue(i))
|
||||
->data()
|
||||
.data(),
|
||||
offset++,
|
||||
mp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (const auto& data : field_datas) {
|
||||
auto n = data->get_num_rows();
|
||||
for (int i = 0; i < n; i++) {
|
||||
AddJson(static_cast<const milvus::Json*>(data->RawValue(i))
|
||||
->data()
|
||||
.data(),
|
||||
offset++,
|
||||
mp);
|
||||
}
|
||||
}
|
||||
}
|
||||
AddInvertedRecord(mp);
|
||||
LOG_INFO("build json key index done for field id:{}", field_id_);
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::AddJSONDatas(size_t n,
|
||||
const std::string* jsonDatas,
|
||||
const bool* valids,
|
||||
int64_t offset_begin) {
|
||||
std::map<std::string, std::vector<int64_t>> mp;
|
||||
for (int i = 0; i < n; i++) {
|
||||
auto offset = i + offset_begin;
|
||||
if (valids != nullptr && !valids[i]) {
|
||||
continue;
|
||||
}
|
||||
AddJson(jsonDatas[i].c_str(), offset, mp);
|
||||
}
|
||||
AddInvertedRecord(mp);
|
||||
is_data_uncommitted_ = true;
|
||||
LOG_INFO("build json key index done for AddJSONDatas");
|
||||
if (shouldTriggerCommit()) {
|
||||
Commit();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::Finish() {
|
||||
finish();
|
||||
}
|
||||
|
||||
bool
|
||||
JsonKeyStatsInvertedIndex::shouldTriggerCommit() {
|
||||
auto span = (std::chrono::duration<double, std::milli>(
|
||||
stdclock::now() - last_commit_time_.load()))
|
||||
.count();
|
||||
return span > commit_interval_in_ms_;
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::Commit() {
|
||||
std::unique_lock<std::mutex> lck(mtx_, std::defer_lock);
|
||||
if (lck.try_lock()) {
|
||||
is_data_uncommitted_ = false;
|
||||
wrapper_->commit();
|
||||
last_commit_time_.store(stdclock::now());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::Reload() {
|
||||
std::unique_lock<std::mutex> lck(mtx_, std::defer_lock);
|
||||
if (lck.try_lock()) {
|
||||
wrapper_->reload();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
JsonKeyStatsInvertedIndex::CreateReader() {
|
||||
wrapper_->create_reader();
|
||||
}
|
||||
|
||||
} // namespace milvus::index
|
|
@ -0,0 +1,298 @@
|
|||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <boost/filesystem.hpp>
|
||||
|
||||
#include "index/InvertedIndexTantivy.h"
|
||||
#include "common/jsmn.h"
|
||||
namespace milvus::index {
|
||||
enum class JSONType {
|
||||
UNKNOWN,
|
||||
BOOL,
|
||||
INT32,
|
||||
INT64,
|
||||
FLOAT,
|
||||
DOUBLE,
|
||||
STRING,
|
||||
STRING_ESCAPE
|
||||
};
|
||||
using stdclock = std::chrono::high_resolution_clock;
|
||||
class JsonKeyStatsInvertedIndex : public InvertedIndexTantivy<std::string> {
|
||||
public:
|
||||
explicit JsonKeyStatsInvertedIndex(
|
||||
const storage::FileManagerContext& ctx,
|
||||
bool is_load,
|
||||
int64_t json_stats_tantivy_memory_budget = 16777216,
|
||||
uint32_t tantivy_index_version = TANTIVY_INDEX_LATEST_VERSION);
|
||||
|
||||
explicit JsonKeyStatsInvertedIndex(int64_t commit_interval_in_ms,
|
||||
const char* unique_id);
|
||||
|
||||
explicit JsonKeyStatsInvertedIndex(int64_t commit_interval_in_ms,
|
||||
const char* unique_id,
|
||||
const std::string& path);
|
||||
|
||||
~JsonKeyStatsInvertedIndex() override{};
|
||||
|
||||
public:
|
||||
IndexStatsPtr
|
||||
Upload(const Config& config) override;
|
||||
|
||||
void
|
||||
Load(milvus::tracer::TraceContext ctx, const Config& config) override;
|
||||
|
||||
void
|
||||
BuildWithFieldData(const std::vector<FieldDataPtr>& datas) override;
|
||||
|
||||
void
|
||||
BuildWithFieldData(const std::vector<FieldDataPtr>& datas, bool nullable);
|
||||
|
||||
const TargetBitmap
|
||||
FilterByPath(
|
||||
const std::string& path,
|
||||
int32_t row,
|
||||
bool is_growing,
|
||||
bool is_strong_consistency,
|
||||
std::function<bool(
|
||||
bool, uint8_t, uint32_t, uint16_t, uint16_t, int32_t)> filter) {
|
||||
auto processArray = [this, &path, row, &filter]() {
|
||||
TargetBitmap bitset(row);
|
||||
auto array = wrapper_->term_query_i64(path);
|
||||
LOG_INFO("json key filter size:{}", array.array_.len);
|
||||
for (size_t j = 0; j < array.array_.len; j++) {
|
||||
auto the_offset = array.array_.array[j];
|
||||
|
||||
if (DecodeValid(the_offset)) {
|
||||
auto tuple = DecodeValue(the_offset);
|
||||
auto row_id = std::get<1>(tuple);
|
||||
if (row_id >= row) {
|
||||
continue;
|
||||
}
|
||||
bitset[row_id] = filter(true,
|
||||
std::get<0>(tuple),
|
||||
std::get<1>(tuple),
|
||||
0,
|
||||
0,
|
||||
std::get<2>(tuple));
|
||||
} else {
|
||||
auto tuple = DecodeOffset(the_offset);
|
||||
auto row_id = std::get<1>(tuple);
|
||||
if (row_id >= row) {
|
||||
continue;
|
||||
}
|
||||
bitset[row_id] = filter(false,
|
||||
std::get<0>(tuple),
|
||||
std::get<1>(tuple),
|
||||
std::get<2>(tuple),
|
||||
std::get<3>(tuple),
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
return bitset;
|
||||
};
|
||||
|
||||
if (is_growing) {
|
||||
if (shouldTriggerCommit() || is_strong_consistency) {
|
||||
if (is_data_uncommitted_) {
|
||||
Commit();
|
||||
}
|
||||
Reload();
|
||||
return processArray();
|
||||
} else {
|
||||
return processArray();
|
||||
}
|
||||
} else {
|
||||
return processArray();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
AddJSONDatas(size_t n,
|
||||
const std::string* jsonDatas,
|
||||
const bool* valids,
|
||||
int64_t offset_begin);
|
||||
|
||||
void
|
||||
Finish();
|
||||
|
||||
void
|
||||
Commit();
|
||||
|
||||
void
|
||||
Reload();
|
||||
|
||||
void
|
||||
CreateReader();
|
||||
|
||||
bool
|
||||
has_escape_sequence(const std::string& str) {
|
||||
for (size_t i = 0; i < str.size(); ++i) {
|
||||
if (str[i] == '\\' && i + 1 < str.size()) {
|
||||
char next = str[i + 1];
|
||||
if (next == 'n' || next == 't' || next == 'r' || next == 'b' ||
|
||||
next == 'f' || next == 'v' || next == '\\' ||
|
||||
next == '\"' || next == '\'' || next == '0' ||
|
||||
next == 'u' || next == '/') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
void
|
||||
AddJson(const char* json,
|
||||
int64_t offset,
|
||||
std::map<std::string, std::vector<int64_t>>& mp);
|
||||
|
||||
void
|
||||
TravelJson(const char* json,
|
||||
jsmntok* tokens,
|
||||
int& index,
|
||||
std::vector<std::string>& path,
|
||||
int32_t offset,
|
||||
std::map<std::string, std::vector<int64_t>>& mp);
|
||||
|
||||
void
|
||||
AddJSONEncodeValue(const std::vector<std::string>& paths,
|
||||
uint8_t flag,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t length,
|
||||
int32_t value,
|
||||
std::map<std::string, std::vector<int64_t>>& mp);
|
||||
|
||||
int64_t
|
||||
EncodeOffset(uint8_t flag,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t row_offset,
|
||||
uint16_t size) {
|
||||
row_id &= 0x0FFFFFFF;
|
||||
return static_cast<int64_t>(flag) << 63 |
|
||||
static_cast<int64_t>(type) << 60 |
|
||||
static_cast<int64_t>(row_id) << 32 |
|
||||
static_cast<int64_t>(row_offset) << 16 |
|
||||
static_cast<int64_t>(size);
|
||||
}
|
||||
|
||||
int64_t
|
||||
EncodeValue(uint8_t flag, uint8_t type, uint32_t row_id, int32_t value) {
|
||||
row_id &= 0x0FFFFFFF;
|
||||
return static_cast<int64_t>(flag) << 63 |
|
||||
static_cast<int64_t>(type) << 60 |
|
||||
static_cast<int64_t>(row_id) << 32 |
|
||||
static_cast<uint32_t>(value);
|
||||
}
|
||||
|
||||
bool
|
||||
DecodeValid(int64_t encode_offset) {
|
||||
return (encode_offset >> 63) & 1;
|
||||
}
|
||||
|
||||
std::tuple<uint8_t, uint32_t, int32_t>
|
||||
DecodeValue(int64_t encode_offset) {
|
||||
uint8_t type = (encode_offset >> 60) & 0x7;
|
||||
uint32_t row_id = (encode_offset >> 32) & 0x0FFFFFFF;
|
||||
int32_t value = static_cast<int32_t>(encode_offset & 0xFFFFFFFF);
|
||||
return std::make_tuple(type, row_id, value);
|
||||
}
|
||||
|
||||
std::tuple<uint8_t, uint32_t, uint16_t, uint16_t>
|
||||
DecodeOffset(int64_t encode_offset) {
|
||||
uint8_t type = (encode_offset >> 60) & 0x7;
|
||||
uint32_t row_id = (encode_offset >> 32) & 0x0FFFFFFF;
|
||||
uint16_t row_offset = (encode_offset >> 16) & 0xFFFF;
|
||||
uint16_t size = encode_offset & 0xFFFF;
|
||||
return std::make_tuple(type, row_id, row_offset, size);
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTriggerCommit();
|
||||
|
||||
bool
|
||||
isBoolean(const std::string& str) {
|
||||
return str == "true" || str == "false";
|
||||
}
|
||||
|
||||
bool
|
||||
isInt32(const std::string& str) {
|
||||
std::istringstream iss(str);
|
||||
int64_t num;
|
||||
iss >> num;
|
||||
|
||||
return !iss.fail() && iss.eof() &&
|
||||
num >= std::numeric_limits<int32_t>::min() &&
|
||||
num <= std::numeric_limits<int32_t>::max();
|
||||
}
|
||||
|
||||
bool
|
||||
isInt64(const std::string& str) {
|
||||
std::istringstream iss(str);
|
||||
int64_t num;
|
||||
iss >> num;
|
||||
|
||||
return !iss.fail() && iss.eof();
|
||||
}
|
||||
|
||||
bool
|
||||
isFloat(const std::string& str) {
|
||||
try {
|
||||
float d = std::stof(str);
|
||||
return true;
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
isDouble(const std::string& str) {
|
||||
try {
|
||||
double d = std::stod(str);
|
||||
return true;
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
JSONType
|
||||
getType(const std::string& str) {
|
||||
if (isBoolean(str)) {
|
||||
return JSONType::BOOL;
|
||||
} else if (isInt32(str)) {
|
||||
return JSONType::INT32;
|
||||
} else if (isInt64(str)) {
|
||||
return JSONType::INT64;
|
||||
} else if (isFloat(str)) {
|
||||
return JSONType::FLOAT;
|
||||
} else if (isDouble(str)) {
|
||||
return JSONType::DOUBLE;
|
||||
}
|
||||
return JSONType::UNKNOWN;
|
||||
}
|
||||
|
||||
void
|
||||
AddInvertedRecord(std::map<std::string, std::vector<int64_t>>& mp);
|
||||
|
||||
private:
|
||||
int64_t field_id_;
|
||||
mutable std::mutex mtx_;
|
||||
std::atomic<stdclock::time_point> last_commit_time_;
|
||||
int64_t commit_interval_in_ms_;
|
||||
std::atomic<bool> is_data_uncommitted_ = false;
|
||||
};
|
||||
} // namespace milvus::index
|
|
@ -34,6 +34,7 @@
|
|||
#include "pb/index_cgo_msg.pb.h"
|
||||
#include "storage/Util.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/JsonKeyStatsInvertedIndex.h"
|
||||
|
||||
using namespace milvus;
|
||||
CStatus
|
||||
|
@ -225,6 +226,81 @@ CreateIndex(CIndex* res_index,
|
|||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
BuildJsonKeyIndex(ProtoLayoutInterface result,
|
||||
const uint8_t* serialized_build_index_info,
|
||||
const uint64_t len) {
|
||||
try {
|
||||
auto build_index_info =
|
||||
std::make_unique<milvus::proto::indexcgo::BuildIndexInfo>();
|
||||
auto res =
|
||||
build_index_info->ParseFromArray(serialized_build_index_info, len);
|
||||
AssertInfo(res, "Unmarshall build index info failed");
|
||||
|
||||
auto field_type =
|
||||
static_cast<DataType>(build_index_info->field_schema().data_type());
|
||||
|
||||
auto storage_config =
|
||||
get_storage_config(build_index_info->storage_config());
|
||||
auto config = get_config(build_index_info);
|
||||
|
||||
// init file manager
|
||||
milvus::storage::FieldDataMeta field_meta{
|
||||
build_index_info->collectionid(),
|
||||
build_index_info->partitionid(),
|
||||
build_index_info->segmentid(),
|
||||
build_index_info->field_schema().fieldid(),
|
||||
build_index_info->field_schema()};
|
||||
|
||||
milvus::storage::IndexMeta index_meta{
|
||||
build_index_info->segmentid(),
|
||||
build_index_info->field_schema().fieldid(),
|
||||
build_index_info->buildid(),
|
||||
build_index_info->index_version(),
|
||||
"",
|
||||
build_index_info->field_schema().name(),
|
||||
field_type,
|
||||
build_index_info->dim(),
|
||||
};
|
||||
|
||||
uint32_t tantivy_index_version =
|
||||
milvus::index::GetValueFromConfig<int32_t>(
|
||||
config, milvus::index::TANTIVY_INDEX_VERSION)
|
||||
.value_or(milvus::index::TANTIVY_INDEX_LATEST_VERSION);
|
||||
auto chunk_manager =
|
||||
milvus::storage::CreateChunkManager(storage_config);
|
||||
|
||||
milvus::storage::FileManagerContext fileManagerContext(
|
||||
field_meta, index_meta, chunk_manager);
|
||||
|
||||
auto field_schema =
|
||||
FieldMeta::ParseFrom(build_index_info->field_schema());
|
||||
auto index = std::make_unique<index::JsonKeyStatsInvertedIndex>(
|
||||
fileManagerContext,
|
||||
false,
|
||||
build_index_info->json_key_stats_tantivy_memory(),
|
||||
tantivy_index_version);
|
||||
index->Build(config);
|
||||
auto create_index_result = index->Upload(config);
|
||||
create_index_result->SerializeAt(
|
||||
reinterpret_cast<milvus::ProtoLayout*>(result));
|
||||
auto status = CStatus();
|
||||
status.error_code = Success;
|
||||
status.error_msg = "";
|
||||
return status;
|
||||
} catch (SegcoreError& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = e.get_error_code();
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
} catch (std::exception& e) {
|
||||
auto status = CStatus();
|
||||
status.error_code = UnexpectedError;
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
BuildTextIndex(ProtoLayoutInterface result,
|
||||
const uint8_t* serialized_build_index_info,
|
||||
|
|
|
@ -36,6 +36,11 @@ CreateIndex(CIndex* res_index,
|
|||
CStatus
|
||||
DeleteIndex(CIndex index);
|
||||
|
||||
CStatus
|
||||
BuildJsonKeyIndex(ProtoLayoutInterface c_binary_set,
|
||||
const uint8_t* serialized_build_index_info,
|
||||
const uint64_t len);
|
||||
|
||||
CStatus
|
||||
BuildTextIndex(ProtoLayoutInterface c_binary_set,
|
||||
const uint8_t* serialized_build_index_info,
|
||||
|
|
|
@ -147,6 +147,12 @@ class ChunkedColumnBase : public ColumnBase {
|
|||
"GetBatchBuffer only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::string_view
|
||||
RawAt(const size_t i) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"RawAt only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
|
||||
StringViews(int64_t chunk_id,
|
||||
std::optional<std::pair<int64_t, int64_t>> offset_len) const {
|
||||
|
@ -387,7 +393,7 @@ class ChunkedVariableColumn : public ChunkedColumnBase {
|
|||
}
|
||||
|
||||
std::string_view
|
||||
RawAt(const int i) const {
|
||||
RawAt(const size_t i) const {
|
||||
return std::string_view((*this)[i]);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -333,6 +333,12 @@ class SingleChunkColumnBase : public ColumnBase {
|
|||
"viewsbyoffsets only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual std::string_view
|
||||
RawAt(const size_t i) const {
|
||||
PanicInfo(ErrorCode::Unsupported,
|
||||
"RawAt only supported for VariableColumn");
|
||||
}
|
||||
|
||||
virtual void
|
||||
AppendBatch(const FieldDataPtr data) override {
|
||||
size_t required_size = data_size_ + data->DataSize();
|
||||
|
@ -801,7 +807,7 @@ class SingleChunkVariableColumn : public SingleChunkColumnBase {
|
|||
}
|
||||
|
||||
std::string_view
|
||||
RawAt(const int i) const {
|
||||
RawAt(const size_t i) const {
|
||||
return std::string_view((*this)[i]);
|
||||
}
|
||||
|
||||
|
|
|
@ -33,10 +33,12 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
|
|||
public:
|
||||
ExecPlanNodeVisitor(const segcore::SegmentInterface& segment,
|
||||
Timestamp timestamp,
|
||||
const PlaceholderGroup& placeholder_group)
|
||||
const PlaceholderGroup& placeholder_group,
|
||||
int32_t consystency_level)
|
||||
: segment_(segment),
|
||||
timestamp_(timestamp),
|
||||
placeholder_group_(placeholder_group) {
|
||||
placeholder_group_(placeholder_group),
|
||||
consystency_level_(consystency_level) {
|
||||
}
|
||||
|
||||
SearchResult
|
||||
|
@ -60,6 +62,7 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
|
|||
const PlaceholderGroup& placeholder_group_;
|
||||
|
||||
SearchResultOpt search_result_opt_;
|
||||
int32_t consystency_level_ = 0;
|
||||
};
|
||||
} // namespace impl
|
||||
|
||||
|
@ -80,7 +83,6 @@ ExecPlanNodeVisitor::ExecuteTask(
|
|||
plan.plan_node_->ToString(),
|
||||
query_context->get_active_count(),
|
||||
query_context->get_query_timestamp());
|
||||
|
||||
auto task =
|
||||
milvus::exec::Task::Create(DEFAULT_TASK_ID, plan, 0, query_context);
|
||||
int64_t processed_num = 0;
|
||||
|
@ -127,8 +129,12 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
|
|||
auto plan = plan::PlanFragment(node.plannodes_);
|
||||
|
||||
// Set query context
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment, active_count, timestamp_);
|
||||
auto query_context =
|
||||
std::make_shared<milvus::exec::QueryContext>(DEAFULT_QUERY_ID,
|
||||
segment,
|
||||
active_count,
|
||||
timestamp_,
|
||||
consystency_level_);
|
||||
query_context->set_search_info(node.search_info_);
|
||||
query_context->set_placeholder_group(placeholder_group_);
|
||||
|
||||
|
@ -178,8 +184,12 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
|
|||
auto plan = plan::PlanFragment(node.plannodes_);
|
||||
|
||||
// Set query context
|
||||
auto query_context = std::make_shared<milvus::exec::QueryContext>(
|
||||
DEAFULT_QUERY_ID, segment, active_count, timestamp_);
|
||||
auto query_context =
|
||||
std::make_shared<milvus::exec::QueryContext>(DEAFULT_QUERY_ID,
|
||||
segment,
|
||||
active_count,
|
||||
timestamp_,
|
||||
consystency_level_);
|
||||
|
||||
// Do task execution
|
||||
auto bitset_holder = ExecuteTask(plan, query_context);
|
||||
|
|
|
@ -46,15 +46,20 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
|
|||
public:
|
||||
ExecPlanNodeVisitor(const segcore::SegmentInterface& segment,
|
||||
Timestamp timestamp,
|
||||
const PlaceholderGroup* placeholder_group)
|
||||
const PlaceholderGroup* placeholder_group,
|
||||
int32_t consystency_level = 0)
|
||||
: segment_(segment),
|
||||
timestamp_(timestamp),
|
||||
placeholder_group_(placeholder_group) {
|
||||
placeholder_group_(placeholder_group),
|
||||
consystency_level_(consystency_level) {
|
||||
}
|
||||
|
||||
ExecPlanNodeVisitor(const segcore::SegmentInterface& segment,
|
||||
Timestamp timestamp)
|
||||
: segment_(segment), timestamp_(timestamp) {
|
||||
Timestamp timestamp,
|
||||
int32_t consystency_level = 0)
|
||||
: segment_(segment),
|
||||
timestamp_(timestamp),
|
||||
consystency_level_(consystency_level) {
|
||||
placeholder_group_ = nullptr;
|
||||
}
|
||||
|
||||
|
@ -108,6 +113,7 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
|
|||
SearchResultOpt search_result_opt_;
|
||||
RetrieveResultOpt retrieve_result_opt_;
|
||||
bool expr_use_pk_index_ = false;
|
||||
int32_t consystency_level_ = 0;
|
||||
};
|
||||
|
||||
// for test use only
|
||||
|
|
|
@ -97,6 +97,31 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||
void
|
||||
LoadTextIndex(FieldId field_id,
|
||||
std::unique_ptr<index::TextMatchIndex> index) override;
|
||||
void
|
||||
LoadJsonKeyIndex(
|
||||
FieldId field_id,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex> index) override {
|
||||
std::unique_lock lck(mutex_);
|
||||
const auto& field_meta = schema_->operator[](field_id);
|
||||
json_key_indexes_[field_id] = std::move(index);
|
||||
}
|
||||
|
||||
index::JsonKeyStatsInvertedIndex*
|
||||
GetJsonKeyIndex(FieldId field_id) const override {
|
||||
std::shared_lock lck(mutex_);
|
||||
auto iter = json_key_indexes_.find(field_id);
|
||||
if (iter == json_key_indexes_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return iter->second.get();
|
||||
}
|
||||
|
||||
std::pair<std::string_view, bool>
|
||||
GetJsonData(FieldId field_id, size_t offset) const override {
|
||||
auto column = fields_.at(field_id);
|
||||
bool is_valid = column->IsValid(offset);
|
||||
return std::make_pair(std::move(column->RawAt(offset)), is_valid);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t
|
||||
|
@ -406,6 +431,10 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
|||
|
||||
// whether the segment is sorted by the pk
|
||||
bool is_sorted_by_pk_ = false;
|
||||
// used for json expr optimization
|
||||
std::unordered_map<FieldId,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex>>
|
||||
json_key_indexes_;
|
||||
};
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "common/EasyAssert.h"
|
||||
#include "common/FieldData.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/Common.h"
|
||||
#include "fmt/format.h"
|
||||
#include "log/Log.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
|
@ -170,6 +171,33 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset,
|
|||
reserved_offset);
|
||||
}
|
||||
|
||||
// index json.
|
||||
if (field_meta.enable_growing_jsonStats()) {
|
||||
std::vector<std::string> jsonDatas(
|
||||
insert_record_proto->fields_data(data_offset)
|
||||
.scalars()
|
||||
.json_data()
|
||||
.data()
|
||||
.begin(),
|
||||
insert_record_proto->fields_data(data_offset)
|
||||
.scalars()
|
||||
.json_data()
|
||||
.data()
|
||||
.end());
|
||||
FixedVector<bool> jsonDatas_valid_data(
|
||||
insert_record_proto->fields_data(data_offset)
|
||||
.valid_data()
|
||||
.begin(),
|
||||
insert_record_proto->fields_data(data_offset)
|
||||
.valid_data()
|
||||
.end());
|
||||
AddJSONDatas(field_id,
|
||||
jsonDatas.data(),
|
||||
jsonDatas_valid_data.data(),
|
||||
num_rows,
|
||||
reserved_offset);
|
||||
}
|
||||
|
||||
// update average row data size
|
||||
auto field_data_size = GetRawDataSizeOfDataArray(
|
||||
&insert_record_proto->fields_data(data_offset),
|
||||
|
@ -318,6 +346,15 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
|
|||
index->Reload();
|
||||
}
|
||||
|
||||
// build json match index
|
||||
if (field_meta.enable_growing_jsonStats()) {
|
||||
auto index = GetJsonKeyIndex(field_id);
|
||||
index->BuildWithFieldData(field_data, field_meta.is_nullable());
|
||||
index->Commit();
|
||||
// Reload reader so that the index can be read immediately
|
||||
index->Reload();
|
||||
}
|
||||
|
||||
// update the mem size
|
||||
stats_.mem_size += storage::GetByteSizeOfFieldDatas(field_data);
|
||||
|
||||
|
@ -939,4 +976,56 @@ SegmentGrowingImpl::AddTexts(milvus::FieldId field_id,
|
|||
iter->second->AddTexts(n, texts, texts_valid_data, offset_begin);
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::AddJSONDatas(FieldId field_id,
|
||||
const std::string* jsondatas,
|
||||
const bool* jsondatas_valid_data,
|
||||
size_t n,
|
||||
int64_t offset_begin) {
|
||||
std::unique_lock lock(mutex_);
|
||||
auto iter = json_indexes_.find(field_id);
|
||||
AssertInfo(iter != json_indexes_.end(), "json index not found");
|
||||
iter->second->AddJSONDatas(
|
||||
n, jsondatas, jsondatas_valid_data, offset_begin);
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::CreateJSONIndexes() {
|
||||
for (auto [field_id, field_meta] : schema_->get_fields()) {
|
||||
if (field_meta.enable_growing_jsonStats()) {
|
||||
CreateJSONIndex(FieldId(field_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SegmentGrowingImpl::CreateJSONIndex(FieldId field_id) {
|
||||
std::unique_lock lock(mutex_);
|
||||
const auto& field_meta = schema_->operator[](field_id);
|
||||
AssertInfo(IsJsonDataType(field_meta.get_data_type()),
|
||||
"cannot create json index on non-json type");
|
||||
std::string unique_id = GetUniqueFieldId(field_meta.get_id().get());
|
||||
auto index = std::make_unique<index::JsonKeyStatsInvertedIndex>(
|
||||
JSON_KEY_STATS_COMMIT_INTERVAL, unique_id.c_str());
|
||||
|
||||
index->Commit();
|
||||
index->CreateReader();
|
||||
|
||||
json_indexes_[field_id] = std::move(index);
|
||||
}
|
||||
|
||||
std::pair<std::string_view, bool>
|
||||
SegmentGrowingImpl::GetJsonData(FieldId field_id, size_t offset) const {
|
||||
auto vec_ptr = dynamic_cast<const ConcurrentVector<Json>*>(
|
||||
insert_record_.get_data_base(field_id));
|
||||
auto& src = *vec_ptr;
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
if (field_meta.is_nullable()) {
|
||||
auto valid_data_ptr = insert_record_.get_valid_data(field_id);
|
||||
return std::make_pair(std::string_view(src[offset]),
|
||||
valid_data_ptr->is_valid(offset));
|
||||
}
|
||||
return std::make_pair(std::string_view(src[offset]), true);
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -226,6 +226,9 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
int64_t count,
|
||||
const std::vector<std::string>& dynamic_field_names) const override;
|
||||
|
||||
virtual std::pair<std::string_view, bool>
|
||||
GetJsonData(FieldId field_id, size_t offset) const override;
|
||||
|
||||
public:
|
||||
friend std::unique_ptr<SegmentGrowing>
|
||||
CreateGrowingSegment(SchemaPtr schema,
|
||||
|
@ -264,6 +267,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
mcm->Register(mmap_descriptor_);
|
||||
}
|
||||
this->CreateTextIndexes();
|
||||
this->CreateJSONIndexes();
|
||||
}
|
||||
|
||||
~SegmentGrowingImpl() {
|
||||
|
@ -414,6 +418,19 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||
void
|
||||
CreateTextIndexes();
|
||||
|
||||
void
|
||||
AddJSONDatas(FieldId field_id,
|
||||
const std::string* jsondatas,
|
||||
const bool* jsondatas_valid_data,
|
||||
size_t n,
|
||||
int64_t offset_begin);
|
||||
|
||||
void
|
||||
CreateJSONIndexes();
|
||||
|
||||
void
|
||||
CreateJSONIndex(FieldId field_id);
|
||||
|
||||
private:
|
||||
storage::MmapChunkDescriptorPtr mmap_descriptor_ = nullptr;
|
||||
SegcoreConfig segcore_config_;
|
||||
|
|
|
@ -83,11 +83,13 @@ std::unique_ptr<SearchResult>
|
|||
SegmentInternalInterface::Search(
|
||||
const query::Plan* plan,
|
||||
const query::PlaceholderGroup* placeholder_group,
|
||||
Timestamp timestamp) const {
|
||||
Timestamp timestamp,
|
||||
int32_t consistency_level) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
milvus::tracer::AddEvent("obtained_segment_lock_mutex");
|
||||
check_search(plan);
|
||||
query::ExecPlanNodeVisitor visitor(*this, timestamp, placeholder_group);
|
||||
query::ExecPlanNodeVisitor visitor(
|
||||
*this, timestamp, placeholder_group, consistency_level);
|
||||
auto results = std::make_unique<SearchResult>();
|
||||
*results = visitor.get_moved_result(*plan->plan_node_);
|
||||
results->segment_ = (void*)this;
|
||||
|
@ -99,11 +101,12 @@ SegmentInternalInterface::Retrieve(tracer::TraceContext* trace_ctx,
|
|||
const query::RetrievePlan* plan,
|
||||
Timestamp timestamp,
|
||||
int64_t limit_size,
|
||||
bool ignore_non_pk) const {
|
||||
bool ignore_non_pk,
|
||||
int32_t consistency_level) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
tracer::AutoSpan span("Retrieve", tracer::GetRootSpan());
|
||||
auto results = std::make_unique<proto::segcore::RetrieveResults>();
|
||||
query::ExecPlanNodeVisitor visitor(*this, timestamp);
|
||||
query::ExecPlanNodeVisitor visitor(*this, timestamp, consistency_level);
|
||||
auto retrieve_results = visitor.get_retrieve_result(*plan->plan_node_);
|
||||
retrieve_results.segment_ = (void*)this;
|
||||
results->set_has_more_result(retrieve_results.has_more_result);
|
||||
|
@ -292,7 +295,8 @@ SegmentInternalInterface::get_real_count() const {
|
|||
milvus::plan::GetNextPlanNodeId(), sources);
|
||||
plan->plan_node_->plannodes_ = plannode;
|
||||
plan->plan_node_->is_count_ = true;
|
||||
auto res = Retrieve(nullptr, plan.get(), MAX_TIMESTAMP, INT64_MAX, false);
|
||||
auto res =
|
||||
Retrieve(nullptr, plan.get(), MAX_TIMESTAMP, INT64_MAX, false, 0);
|
||||
AssertInfo(res->fields_data().size() == 1,
|
||||
"count result should only have one column");
|
||||
AssertInfo(res->fields_data()[0].has_scalars(),
|
||||
|
@ -528,4 +532,13 @@ SegmentInternalInterface::bulk_subscript_not_exist_field(
|
|||
return result;
|
||||
}
|
||||
|
||||
index::JsonKeyStatsInvertedIndex*
|
||||
SegmentInternalInterface::GetJsonKeyIndex(FieldId field_id) const {
|
||||
std::shared_lock lock(mutex_);
|
||||
auto iter = json_indexes_.find(field_id);
|
||||
if (iter == json_indexes_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return iter->second.get();
|
||||
}
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "index/SkipIndex.h"
|
||||
#include "mmap/Column.h"
|
||||
#include "index/TextMatchIndex.h"
|
||||
#include "index/JsonKeyStatsInvertedIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -64,14 +65,16 @@ class SegmentInterface {
|
|||
virtual std::unique_ptr<SearchResult>
|
||||
Search(const query::Plan* Plan,
|
||||
const query::PlaceholderGroup* placeholder_group,
|
||||
Timestamp timestamp) const = 0;
|
||||
Timestamp timestamp,
|
||||
int32_t consistency_level = 0) const = 0;
|
||||
|
||||
virtual std::unique_ptr<proto::segcore::RetrieveResults>
|
||||
Retrieve(tracer::TraceContext* trace_ctx,
|
||||
const query::RetrievePlan* Plan,
|
||||
Timestamp timestamp,
|
||||
int64_t limit_size,
|
||||
bool ignore_non_pk) const = 0;
|
||||
bool ignore_non_pk,
|
||||
int32_t consistency_level = 0) const = 0;
|
||||
|
||||
virtual std::unique_ptr<proto::segcore::RetrieveResults>
|
||||
Retrieve(tracer::TraceContext* trace_ctx,
|
||||
|
@ -139,6 +142,11 @@ class SegmentInterface {
|
|||
GetJsonIndex(FieldId field_id, std::string path) const {
|
||||
return nullptr;
|
||||
}
|
||||
virtual index::JsonKeyStatsInvertedIndex*
|
||||
GetJsonKeyIndex(FieldId field_id) const = 0;
|
||||
|
||||
virtual std::pair<std::string_view, bool>
|
||||
GetJsonData(FieldId field_id, size_t offset) const = 0;
|
||||
};
|
||||
|
||||
// internal API for DSL calculation
|
||||
|
@ -247,7 +255,8 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
std::unique_ptr<SearchResult>
|
||||
Search(const query::Plan* Plan,
|
||||
const query::PlaceholderGroup* placeholder_group,
|
||||
Timestamp timestamp) const override;
|
||||
Timestamp timestamp,
|
||||
int32_t consistency_level = 0) const override;
|
||||
|
||||
void
|
||||
FillPrimaryKeys(const query::Plan* plan,
|
||||
|
@ -262,7 +271,8 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
const query::RetrievePlan* Plan,
|
||||
Timestamp timestamp,
|
||||
int64_t limit_size,
|
||||
bool ignore_non_pk) const override;
|
||||
bool ignore_non_pk,
|
||||
int32_t consistency_level = 0) const override;
|
||||
|
||||
std::unique_ptr<proto::segcore::RetrieveResults>
|
||||
Retrieve(tracer::TraceContext* trace_ctx,
|
||||
|
@ -325,6 +335,9 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
index::TextMatchIndex*
|
||||
GetTextIndex(FieldId field_id) const override;
|
||||
|
||||
virtual index::JsonKeyStatsInvertedIndex*
|
||||
GetJsonKeyIndex(FieldId field_id) const override;
|
||||
|
||||
public:
|
||||
virtual void
|
||||
vector_search(SearchInfo& search_info,
|
||||
|
@ -519,6 +532,10 @@ class SegmentInternalInterface : public SegmentInterface {
|
|||
// text-indexes used to do match.
|
||||
std::unordered_map<FieldId, std::unique_ptr<index::TextMatchIndex>>
|
||||
text_indexes_;
|
||||
|
||||
std::unordered_map<FieldId,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex>>
|
||||
json_indexes_;
|
||||
};
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -69,6 +69,17 @@ class SegmentSealed : public SegmentInternalInterface {
|
|||
return index->second.get();
|
||||
}
|
||||
|
||||
virtual void
|
||||
LoadJsonKeyIndex(
|
||||
FieldId field_id,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex> index) = 0;
|
||||
|
||||
virtual index::JsonKeyStatsInvertedIndex*
|
||||
GetJsonKeyIndex(FieldId field_id) const = 0;
|
||||
|
||||
virtual std::pair<std::string_view, bool>
|
||||
GetJsonData(FieldId field_id, size_t offset) const = 0;
|
||||
|
||||
SegmentType
|
||||
type() const override {
|
||||
return SegmentType::Sealed;
|
||||
|
|
|
@ -2147,4 +2147,29 @@ SegmentSealedImpl::LoadTextIndex(FieldId field_id,
|
|||
text_indexes_[field_id] = std::move(index);
|
||||
}
|
||||
|
||||
void
|
||||
SegmentSealedImpl::LoadJsonKeyIndex(
|
||||
FieldId field_id, std::unique_ptr<index::JsonKeyStatsInvertedIndex> index) {
|
||||
std::unique_lock lck(mutex_);
|
||||
const auto& field_meta = schema_->operator[](field_id);
|
||||
json_key_indexes_[field_id] = std::move(index);
|
||||
}
|
||||
|
||||
index::JsonKeyStatsInvertedIndex*
|
||||
SegmentSealedImpl::GetJsonKeyIndex(FieldId field_id) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
auto iter = json_key_indexes_.find(field_id);
|
||||
if (iter == json_key_indexes_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return iter->second.get();
|
||||
}
|
||||
|
||||
std::pair<std::string_view, bool>
|
||||
SegmentSealedImpl::GetJsonData(FieldId field_id, size_t offset) const {
|
||||
auto column = fields_.at(field_id);
|
||||
bool is_valid = column->IsValid(offset);
|
||||
return std::make_pair(std::move(column->RawAt(offset)), is_valid);
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "common/Types.h"
|
||||
#include "common/IndexMeta.h"
|
||||
#include "index/TextMatchIndex.h"
|
||||
#include "index/JsonKeyStatsInvertedIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
|
@ -100,6 +101,17 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
LoadTextIndex(FieldId field_id,
|
||||
std::unique_ptr<index::TextMatchIndex> index) override;
|
||||
|
||||
void
|
||||
LoadJsonKeyIndex(
|
||||
FieldId field_id,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex> index) override;
|
||||
|
||||
index::JsonKeyStatsInvertedIndex*
|
||||
GetJsonKeyIndex(FieldId field_id) const override;
|
||||
|
||||
std::pair<std::string_view, bool>
|
||||
GetJsonData(FieldId field_id, size_t offset) const override;
|
||||
|
||||
public:
|
||||
size_t
|
||||
GetMemoryUsageInBytes() const override {
|
||||
|
@ -412,6 +424,11 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||
|
||||
// whether the segment is sorted by the pk
|
||||
bool is_sorted_by_pk_ = false;
|
||||
|
||||
// used for json expr optimization
|
||||
std::unordered_map<FieldId,
|
||||
std::unique_ptr<index::JsonKeyStatsInvertedIndex>>
|
||||
json_key_indexes_;
|
||||
};
|
||||
|
||||
inline SegmentSealedUPtr
|
||||
|
|
|
@ -111,7 +111,8 @@ AsyncSearch(CTraceContext c_trace,
|
|||
CSegmentInterface c_segment,
|
||||
CSearchPlan c_plan,
|
||||
CPlaceholderGroup c_placeholder_group,
|
||||
uint64_t timestamp) {
|
||||
uint64_t timestamp,
|
||||
int32_t consistency_level) {
|
||||
auto segment = (milvus::segcore::SegmentInterface*)c_segment;
|
||||
auto plan = (milvus::query::Plan*)c_plan;
|
||||
auto phg_ptr = reinterpret_cast<const milvus::query::PlaceholderGroup*>(
|
||||
|
@ -120,7 +121,7 @@ AsyncSearch(CTraceContext c_trace,
|
|||
auto future = milvus::futures::Future<milvus::SearchResult>::async(
|
||||
milvus::futures::getGlobalCPUExecutor(),
|
||||
milvus::futures::ExecutePriority::HIGH,
|
||||
[c_trace, segment, plan, phg_ptr, timestamp](
|
||||
[c_trace, segment, plan, phg_ptr, timestamp, consistency_level](
|
||||
milvus::futures::CancellationToken cancel_token) {
|
||||
// save trace context into search_info
|
||||
auto& trace_ctx = plan->plan_node_->search_info_.trace_ctx_;
|
||||
|
@ -131,7 +132,8 @@ AsyncSearch(CTraceContext c_trace,
|
|||
auto span = milvus::tracer::StartSpan("SegCoreSearch", &trace_ctx);
|
||||
milvus::tracer::SetRootSpan(span);
|
||||
|
||||
auto search_result = segment->Search(plan, phg_ptr, timestamp);
|
||||
auto search_result =
|
||||
segment->Search(plan, phg_ptr, timestamp, consistency_level);
|
||||
if (!milvus::PositivelyRelated(
|
||||
plan->plan_node_->search_info_.metric_type_)) {
|
||||
for (auto& dis : search_result->distances_) {
|
||||
|
@ -179,21 +181,31 @@ AsyncRetrieve(CTraceContext c_trace,
|
|||
CRetrievePlan c_plan,
|
||||
uint64_t timestamp,
|
||||
int64_t limit_size,
|
||||
bool ignore_non_pk) {
|
||||
bool ignore_non_pk,
|
||||
int32_t consistency_level) {
|
||||
auto segment = static_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
auto plan = static_cast<const milvus::query::RetrievePlan*>(c_plan);
|
||||
|
||||
auto future = milvus::futures::Future<CRetrieveResult>::async(
|
||||
milvus::futures::getGlobalCPUExecutor(),
|
||||
milvus::futures::ExecutePriority::HIGH,
|
||||
[c_trace, segment, plan, timestamp, limit_size, ignore_non_pk](
|
||||
milvus::futures::CancellationToken cancel_token) {
|
||||
[c_trace,
|
||||
segment,
|
||||
plan,
|
||||
timestamp,
|
||||
limit_size,
|
||||
ignore_non_pk,
|
||||
consistency_level](milvus::futures::CancellationToken cancel_token) {
|
||||
auto trace_ctx = milvus::tracer::TraceContext{
|
||||
c_trace.traceID, c_trace.spanID, c_trace.traceFlags};
|
||||
milvus::tracer::AutoSpan span("SegCoreRetrieve", &trace_ctx, true);
|
||||
|
||||
auto retrieve_result = segment->Retrieve(
|
||||
&trace_ctx, plan, timestamp, limit_size, ignore_non_pk);
|
||||
auto retrieve_result = segment->Retrieve(&trace_ctx,
|
||||
plan,
|
||||
timestamp,
|
||||
limit_size,
|
||||
ignore_non_pk,
|
||||
consistency_level);
|
||||
|
||||
return CreateLeakedCRetrieveResultFromProto(
|
||||
std::move(retrieve_result));
|
||||
|
@ -479,6 +491,60 @@ LoadTextIndex(CSegmentInterface c_segment,
|
|||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
LoadJsonKeyIndex(CTraceContext c_trace,
|
||||
CSegmentInterface c_segment,
|
||||
const uint8_t* serialized_load_json_key_index_info,
|
||||
const uint64_t len) {
|
||||
try {
|
||||
auto ctx = milvus::tracer::TraceContext{
|
||||
c_trace.traceID, c_trace.spanID, c_trace.traceFlags};
|
||||
auto segment_interface =
|
||||
reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
|
||||
auto segment =
|
||||
dynamic_cast<milvus::segcore::SegmentSealed*>(segment_interface);
|
||||
AssertInfo(segment != nullptr, "segment conversion failed");
|
||||
|
||||
auto info_proto =
|
||||
std::make_unique<milvus::proto::indexcgo::LoadJsonKeyIndexInfo>();
|
||||
info_proto->ParseFromArray(serialized_load_json_key_index_info, len);
|
||||
|
||||
milvus::storage::FieldDataMeta field_meta{info_proto->collectionid(),
|
||||
info_proto->partitionid(),
|
||||
segment->get_segment_id(),
|
||||
info_proto->fieldid(),
|
||||
info_proto->schema()};
|
||||
milvus::storage::IndexMeta index_meta{segment->get_segment_id(),
|
||||
info_proto->fieldid(),
|
||||
info_proto->buildid(),
|
||||
info_proto->version()};
|
||||
auto remote_chunk_manager =
|
||||
milvus::storage::RemoteChunkManagerSingleton::GetInstance()
|
||||
.GetRemoteChunkManager();
|
||||
|
||||
milvus::Config config;
|
||||
std::vector<std::string> files;
|
||||
for (const auto& f : info_proto->files()) {
|
||||
files.push_back(f);
|
||||
}
|
||||
config["index_files"] = files;
|
||||
|
||||
milvus::storage::FileManagerContext file_ctx(
|
||||
field_meta, index_meta, remote_chunk_manager);
|
||||
|
||||
auto index = std::make_unique<milvus::index::JsonKeyStatsInvertedIndex>(
|
||||
file_ctx, true);
|
||||
index->Load(ctx, config);
|
||||
|
||||
segment->LoadJsonKeyIndex(milvus::FieldId(info_proto->fieldid()),
|
||||
std::move(index));
|
||||
|
||||
return milvus::SuccessCStatus();
|
||||
} catch (std::exception& e) {
|
||||
return milvus::FailureCStatus(&e);
|
||||
}
|
||||
}
|
||||
|
||||
CStatus
|
||||
UpdateFieldRawDataSize(CSegmentInterface c_segment,
|
||||
int64_t field_id,
|
||||
|
|
|
@ -50,7 +50,8 @@ AsyncSearch(CTraceContext c_trace,
|
|||
CSegmentInterface c_segment,
|
||||
CSearchPlan c_plan,
|
||||
CPlaceholderGroup c_placeholder_group,
|
||||
uint64_t timestamp);
|
||||
uint64_t timestamp,
|
||||
int32_t consistency_level);
|
||||
|
||||
void
|
||||
DeleteRetrieveResult(CRetrieveResult* retrieve_result);
|
||||
|
@ -61,7 +62,8 @@ AsyncRetrieve(CTraceContext c_trace,
|
|||
CRetrievePlan c_plan,
|
||||
uint64_t timestamp,
|
||||
int64_t limit_size,
|
||||
bool ignore_non_pk);
|
||||
bool ignore_non_pk,
|
||||
int32_t consistency_level);
|
||||
|
||||
CFuture* // Future<CRetrieveResult>
|
||||
AsyncRetrieveByOffsets(CTraceContext c_trace,
|
||||
|
@ -122,6 +124,12 @@ LoadTextIndex(CSegmentInterface c_segment,
|
|||
const uint8_t* serialized_load_text_index_info,
|
||||
const uint64_t len);
|
||||
|
||||
CStatus
|
||||
LoadJsonKeyIndex(CTraceContext c_trace,
|
||||
CSegmentInterface c_segment,
|
||||
const uint8_t* serialied_load_json_key_index_info,
|
||||
const uint64_t len);
|
||||
|
||||
CStatus
|
||||
UpdateFieldRawDataSize(CSegmentInterface c_segment,
|
||||
int64_t field_id,
|
||||
|
|
|
@ -79,8 +79,18 @@ DiskFileManagerImpl::GetRemoteTextLogPath(const std::string& file_name,
|
|||
return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetRemoteJsonKeyIndexPath(const std::string& file_name,
|
||||
int64_t slice_num) {
|
||||
auto remote_prefix = GetRemoteJsonKeyLogPrefix();
|
||||
return remote_prefix + "/" + file_name + "_" + std::to_string(slice_num);
|
||||
}
|
||||
|
||||
bool
|
||||
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
DiskFileManagerImpl::AddFileInternal(
|
||||
const std::string& file,
|
||||
const std::function<std::string(const std::string&, int)>&
|
||||
get_remote_path) noexcept {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
FILEMANAGER_TRY
|
||||
|
@ -116,8 +126,7 @@ DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
|||
}
|
||||
|
||||
auto batch_size = std::min(FILE_SLICE_SIZE, int64_t(fileSize) - offset);
|
||||
batch_remote_files.emplace_back(
|
||||
GetRemoteIndexPath(fileName, slice_num));
|
||||
batch_remote_files.emplace_back(get_remote_path(fileName, slice_num));
|
||||
remote_file_sizes.emplace_back(batch_size);
|
||||
local_file_offsets.emplace_back(offset);
|
||||
offset += batch_size;
|
||||
|
@ -132,58 +141,29 @@ DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
|||
return true;
|
||||
} // namespace knowhere
|
||||
|
||||
bool
|
||||
DiskFileManagerImpl::AddFile(const std::string& file) noexcept {
|
||||
return AddFileInternal(file,
|
||||
[this](const std::string& file_name, int slice_num) {
|
||||
return GetRemoteIndexPath(file_name, slice_num);
|
||||
});
|
||||
}
|
||||
|
||||
bool
|
||||
DiskFileManagerImpl::AddJsonKeyIndexLog(const std::string& file) noexcept {
|
||||
return AddFileInternal(
|
||||
file, [this](const std::string& file_name, int slice_num) {
|
||||
return GetRemoteJsonKeyIndexPath(file_name, slice_num);
|
||||
});
|
||||
}
|
||||
|
||||
bool
|
||||
DiskFileManagerImpl::AddTextLog(const std::string& file) noexcept {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
FILEMANAGER_TRY
|
||||
if (!local_chunk_manager->Exist(file)) {
|
||||
LOG_ERROR("local file {} not exists", file);
|
||||
return false;
|
||||
}
|
||||
|
||||
// record local file path
|
||||
local_paths_.emplace_back(file);
|
||||
|
||||
auto fileName = GetFileName(file);
|
||||
auto fileSize = local_chunk_manager->Size(file);
|
||||
added_total_file_size_ += fileSize;
|
||||
|
||||
std::vector<std::string> batch_remote_files;
|
||||
std::vector<int64_t> remote_file_sizes;
|
||||
std::vector<int64_t> local_file_offsets;
|
||||
|
||||
int slice_num = 0;
|
||||
auto parallel_degree =
|
||||
uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
|
||||
for (int64_t offset = 0; offset < fileSize; slice_num++) {
|
||||
if (batch_remote_files.size() >= parallel_degree) {
|
||||
AddBatchIndexFiles(file,
|
||||
local_file_offsets,
|
||||
batch_remote_files,
|
||||
|
||||
remote_file_sizes);
|
||||
batch_remote_files.clear();
|
||||
remote_file_sizes.clear();
|
||||
local_file_offsets.clear();
|
||||
}
|
||||
|
||||
auto batch_size = std::min(FILE_SLICE_SIZE, int64_t(fileSize) - offset);
|
||||
batch_remote_files.emplace_back(
|
||||
GetRemoteTextLogPath(fileName, slice_num));
|
||||
remote_file_sizes.emplace_back(batch_size);
|
||||
local_file_offsets.emplace_back(offset);
|
||||
offset += batch_size;
|
||||
}
|
||||
if (batch_remote_files.size() > 0) {
|
||||
AddBatchIndexFiles(
|
||||
file, local_file_offsets, batch_remote_files, remote_file_sizes);
|
||||
}
|
||||
FILEMANAGER_CATCH
|
||||
FILEMANAGER_END
|
||||
|
||||
return true;
|
||||
} // namespace knowhere
|
||||
return AddFileInternal(
|
||||
file, [this](const std::string& file_name, int slice_num) {
|
||||
return GetRemoteTextLogPath(file_name, slice_num);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
DiskFileManagerImpl::AddBatchIndexFiles(
|
||||
|
@ -238,8 +218,9 @@ DiskFileManagerImpl::AddBatchIndexFiles(
|
|||
}
|
||||
|
||||
void
|
||||
DiskFileManagerImpl::CacheIndexToDisk(
|
||||
const std::vector<std::string>& remote_files) {
|
||||
DiskFileManagerImpl::CacheIndexToDiskInternal(
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::function<std::string()>& get_local_index_prefix) noexcept {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
|
||||
|
@ -265,7 +246,7 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
|||
for (auto& slices : index_slices) {
|
||||
auto prefix = slices.first;
|
||||
auto local_index_file_name =
|
||||
GetLocalIndexObjectPrefix() +
|
||||
get_local_index_prefix() +
|
||||
prefix.substr(prefix.find_last_of('/') + 1);
|
||||
local_chunk_manager->CreateFile(local_index_file_name);
|
||||
auto file =
|
||||
|
@ -305,58 +286,25 @@ DiskFileManagerImpl::CacheIndexToDisk(
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
DiskFileManagerImpl::CacheIndexToDisk(
|
||||
const std::vector<std::string>& remote_files) {
|
||||
return CacheIndexToDiskInternal(
|
||||
remote_files, [this]() { return GetLocalIndexObjectPrefix(); });
|
||||
}
|
||||
|
||||
void
|
||||
DiskFileManagerImpl::CacheTextLogToDisk(
|
||||
const std::vector<std::string>& remote_files) {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
return CacheIndexToDiskInternal(
|
||||
remote_files, [this]() { return GetLocalTextIndexPrefix(); });
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<int>> index_slices;
|
||||
for (auto& file_path : remote_files) {
|
||||
auto pos = file_path.find_last_of("_");
|
||||
AssertInfo(pos > 0, "invalided index file path:{}", file_path);
|
||||
try {
|
||||
auto idx = std::stoi(file_path.substr(pos + 1));
|
||||
index_slices[file_path.substr(0, pos)].emplace_back(idx);
|
||||
} catch (const std::logic_error& e) {
|
||||
auto err_message = fmt::format(
|
||||
"invalided text log path:{}, error:{}", file_path, e.what());
|
||||
LOG_ERROR(err_message);
|
||||
throw std::logic_error(err_message);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& slices : index_slices) {
|
||||
std::sort(slices.second.begin(), slices.second.end());
|
||||
}
|
||||
|
||||
for (auto& slices : index_slices) {
|
||||
auto prefix = slices.first;
|
||||
auto local_index_file_name =
|
||||
GetLocalTextIndexPrefix() + "/" +
|
||||
prefix.substr(prefix.find_last_of('/') + 1);
|
||||
local_chunk_manager->CreateFile(local_index_file_name);
|
||||
auto file =
|
||||
File::Open(local_index_file_name, O_CREAT | O_RDWR | O_TRUNC);
|
||||
|
||||
// Get the remote files
|
||||
std::vector<std::string> batch_remote_files;
|
||||
batch_remote_files.reserve(slices.second.size());
|
||||
for (int& iter : slices.second) {
|
||||
auto origin_file = prefix + "_" + std::to_string(iter);
|
||||
batch_remote_files.push_back(origin_file);
|
||||
}
|
||||
|
||||
auto index_chunks = GetObjectData(rcm_.get(), batch_remote_files);
|
||||
for (auto& chunk : index_chunks) {
|
||||
auto index_data = chunk.get()->GetFieldData();
|
||||
auto index_size = index_data->Size();
|
||||
auto chunk_data = reinterpret_cast<uint8_t*>(
|
||||
const_cast<void*>(index_data->Data()));
|
||||
file.Write(chunk_data, index_size);
|
||||
}
|
||||
local_paths_.emplace_back(local_index_file_name);
|
||||
}
|
||||
void
|
||||
DiskFileManagerImpl::CacheJsonKeyIndexToDisk(
|
||||
const std::vector<std::string>& remote_files) {
|
||||
return CacheIndexToDiskInternal(
|
||||
remote_files, [this]() { return GetLocalJsonKeyIndexPrefix(); });
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
|
@ -649,6 +597,12 @@ DiskFileManagerImpl::GetFileName(const std::string& localfile) {
|
|||
return localPath.filename().string();
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetIndexIdentifier() {
|
||||
return GenIndexPathIdentifier(index_meta_.build_id,
|
||||
index_meta_.index_version);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetLocalIndexObjectPrefix() {
|
||||
auto local_chunk_manager =
|
||||
|
@ -657,6 +611,14 @@ DiskFileManagerImpl::GetLocalIndexObjectPrefix() {
|
|||
local_chunk_manager, index_meta_.build_id, index_meta_.index_version);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetTextIndexIdentifier() {
|
||||
return std::to_string(index_meta_.build_id) + "/" +
|
||||
std::to_string(index_meta_.index_version) + "/" +
|
||||
std::to_string(field_meta_.segment_id) + "/" +
|
||||
std::to_string(field_meta_.field_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetLocalTextIndexPrefix() {
|
||||
auto local_chunk_manager =
|
||||
|
@ -669,17 +631,37 @@ DiskFileManagerImpl::GetLocalTextIndexPrefix() {
|
|||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetIndexIdentifier() {
|
||||
return GenIndexPathIdentifier(index_meta_.build_id,
|
||||
index_meta_.index_version);
|
||||
DiskFileManagerImpl::GetJsonKeyIndexIdentifier() {
|
||||
return GenJsonKeyIndexPathIdentifier(index_meta_.build_id,
|
||||
index_meta_.index_version,
|
||||
field_meta_.collection_id,
|
||||
field_meta_.partition_id,
|
||||
field_meta_.segment_id,
|
||||
field_meta_.field_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetTextIndexIdentifier() {
|
||||
return std::to_string(index_meta_.build_id) + "/" +
|
||||
std::to_string(index_meta_.index_version) + "/" +
|
||||
std::to_string(field_meta_.segment_id) +
|
||||
std::to_string(field_meta_.field_id);
|
||||
DiskFileManagerImpl::GetLocalJsonKeyIndexPrefix() {
|
||||
auto local_chunk_manager =
|
||||
LocalChunkManagerSingleton::GetInstance().GetChunkManager();
|
||||
return GenJsonKeyIndexPathPrefix(local_chunk_manager,
|
||||
index_meta_.build_id,
|
||||
index_meta_.index_version,
|
||||
field_meta_.collection_id,
|
||||
field_meta_.partition_id,
|
||||
field_meta_.segment_id,
|
||||
field_meta_.field_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
DiskFileManagerImpl::GetRemoteJsonKeyLogPrefix() {
|
||||
return GenJsonKeyIndexPathPrefix(rcm_,
|
||||
index_meta_.build_id,
|
||||
index_meta_.index_version,
|
||||
field_meta_.collection_id,
|
||||
field_meta_.partition_id,
|
||||
field_meta_.segment_id,
|
||||
field_meta_.field_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
|
|
|
@ -51,28 +51,43 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||
bool
|
||||
AddTextLog(const std::string& filename) noexcept;
|
||||
|
||||
bool
|
||||
AddJsonKeyIndexLog(const std::string& filename) noexcept;
|
||||
|
||||
public:
|
||||
std::string
|
||||
GetName() const override {
|
||||
return "DiskFileManagerImpl";
|
||||
}
|
||||
|
||||
std::string
|
||||
GetLocalIndexObjectPrefix();
|
||||
|
||||
// Similar to GetTextIndexIdentifier, segment_id and field_id is also required.
|
||||
std::string
|
||||
GetLocalTextIndexPrefix();
|
||||
|
||||
std::string
|
||||
GetIndexIdentifier();
|
||||
|
||||
std::string
|
||||
GetLocalIndexObjectPrefix();
|
||||
|
||||
// Different from user index, a text index task may have multiple text fields sharing same build_id/task_id. So
|
||||
// segment_id and field_id are required to identify a unique text index, in case that we support multiple index task
|
||||
// in the same indexnode at the same time later.
|
||||
std::string
|
||||
GetTextIndexIdentifier();
|
||||
|
||||
// Similar to GetTextIndexIdentifier, segment_id and field_id is also required.
|
||||
std::string
|
||||
GetLocalTextIndexPrefix();
|
||||
|
||||
// Used for building index, using this index identifier mode to construct tmp building-index dir.
|
||||
std::string
|
||||
GetJsonKeyIndexIdentifier();
|
||||
|
||||
// Used for loading index, using this index prefix dir to store index.
|
||||
std::string
|
||||
GetLocalJsonKeyIndexPrefix();
|
||||
|
||||
// Used for upload index to remote storage, using this index prefix dir as remote storage directory
|
||||
std::string
|
||||
GetRemoteJsonKeyLogPrefix();
|
||||
|
||||
std::string
|
||||
GetLocalRawDataObjectPrefix();
|
||||
|
||||
|
@ -92,6 +107,9 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||
void
|
||||
CacheTextLogToDisk(const std::vector<std::string>& remote_files);
|
||||
|
||||
void
|
||||
CacheJsonKeyIndexToDisk(const std::vector<std::string>& remote_files);
|
||||
|
||||
void
|
||||
AddBatchIndexFiles(const std::string& local_file_name,
|
||||
const std::vector<int64_t>& local_file_offsets,
|
||||
|
@ -115,21 +133,34 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
|||
return added_total_file_size_;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetFileName(const std::string& localfile);
|
||||
|
||||
private:
|
||||
int64_t
|
||||
GetIndexBuildId() {
|
||||
return index_meta_.build_id;
|
||||
}
|
||||
|
||||
std::string
|
||||
GetFileName(const std::string& localfile);
|
||||
|
||||
std::string
|
||||
GetRemoteIndexPath(const std::string& file_name, int64_t slice_num) const;
|
||||
|
||||
std::string
|
||||
GetRemoteTextLogPath(const std::string& file_name, int64_t slice_num) const;
|
||||
|
||||
std::string
|
||||
GetRemoteJsonKeyIndexPath(const std::string& file_name, int64_t slice_num);
|
||||
|
||||
bool
|
||||
AddFileInternal(const std::string& file_name,
|
||||
const std::function<std::string(const std::string&, int)>&
|
||||
get_remote_path) noexcept;
|
||||
|
||||
void
|
||||
CacheIndexToDiskInternal(
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::function<std::string()>& get_local_index_prefix) noexcept;
|
||||
|
||||
private:
|
||||
// local file path (abs path)
|
||||
std::vector<std::string> local_paths_;
|
||||
|
|
|
@ -549,6 +549,37 @@ GenTextIndexPathPrefix(ChunkManagerPtr cm,
|
|||
return (prefix / path / path1).string();
|
||||
}
|
||||
|
||||
std::string
|
||||
GenJsonKeyIndexPathIdentifier(int64_t build_id,
|
||||
int64_t index_version,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id) {
|
||||
return std::to_string(build_id) + "/" + std::to_string(index_version) +
|
||||
"/" + std::to_string(collection_id) + "/" +
|
||||
std::to_string(partition_id) + "/" + std::to_string(segment_id) +
|
||||
"/" + std::to_string(field_id) + "/";
|
||||
}
|
||||
|
||||
std::string
|
||||
GenJsonKeyIndexPathPrefix(ChunkManagerPtr cm,
|
||||
int64_t build_id,
|
||||
int64_t index_version,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id) {
|
||||
return cm->GetRootPath() + "/" + std::string(JSON_KEY_INDEX_LOG_ROOT_PATH) +
|
||||
"/" +
|
||||
GenJsonKeyIndexPathIdentifier(build_id,
|
||||
index_version,
|
||||
collection_id,
|
||||
partition_id,
|
||||
segment_id,
|
||||
field_id);
|
||||
}
|
||||
|
||||
std::string
|
||||
GetIndexPathPrefixWithBuildID(ChunkManagerPtr cm, int64_t build_id) {
|
||||
boost::filesystem::path prefix = cm->GetRootPath();
|
||||
|
|
|
@ -92,6 +92,23 @@ GenTextIndexPathPrefix(ChunkManagerPtr cm,
|
|||
int64_t segment_id,
|
||||
int64_t field_id);
|
||||
|
||||
std::string
|
||||
GenJsonKeyIndexPathIdentifier(int64_t build_id,
|
||||
int64_t index_version,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id);
|
||||
|
||||
std::string
|
||||
GenJsonKeyIndexPathPrefix(ChunkManagerPtr cm,
|
||||
int64_t build_id,
|
||||
int64_t index_version,
|
||||
int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id);
|
||||
|
||||
std::string
|
||||
GenFieldRawDataPathPrefix(ChunkManagerPtr cm,
|
||||
int64_t segment_id,
|
||||
|
|
|
@ -69,6 +69,68 @@ struct RustArrayWrapper {
|
|||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct RustArrayI64Wrapper {
|
||||
NO_COPY_OR_ASSIGN(RustArrayI64Wrapper);
|
||||
|
||||
explicit RustArrayI64Wrapper(RustArrayI64&& array) {
|
||||
array_.array = array.array;
|
||||
array_.len = array.len;
|
||||
array_.cap = array.cap;
|
||||
array.array = nullptr;
|
||||
array.len = 0;
|
||||
array.cap = 0;
|
||||
}
|
||||
|
||||
RustArrayI64Wrapper(RustArrayI64Wrapper&& other) noexcept {
|
||||
array_.array = other.array_.array;
|
||||
array_.len = other.array_.len;
|
||||
array_.cap = other.array_.cap;
|
||||
other.array_.array = nullptr;
|
||||
other.array_.len = 0;
|
||||
other.array_.cap = 0;
|
||||
}
|
||||
|
||||
RustArrayI64Wrapper&
|
||||
operator=(RustArrayI64Wrapper&& other) noexcept {
|
||||
if (this != &other) {
|
||||
free();
|
||||
array_.array = other.array_.array;
|
||||
array_.len = other.array_.len;
|
||||
array_.cap = other.array_.cap;
|
||||
other.array_.array = nullptr;
|
||||
other.array_.len = 0;
|
||||
other.array_.cap = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~RustArrayI64Wrapper() {
|
||||
free();
|
||||
}
|
||||
|
||||
void
|
||||
debug() {
|
||||
std::stringstream ss;
|
||||
ss << "[ ";
|
||||
for (int i = 0; i < array_.len; i++) {
|
||||
ss << array_.array[i] << " ";
|
||||
}
|
||||
ss << "]";
|
||||
std::cout << ss.str() << std::endl;
|
||||
}
|
||||
|
||||
RustArrayI64 array_;
|
||||
|
||||
private:
|
||||
void
|
||||
free() {
|
||||
if (array_.array != nullptr) {
|
||||
free_rust_array_i64(array_);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct RustResultWrapper {
|
||||
NO_COPY_OR_ASSIGN(RustResultWrapper);
|
||||
|
||||
|
|
|
@ -149,6 +149,8 @@ RustResult tantivy_term_query_bool(void *ptr, bool term);
|
|||
|
||||
RustResult tantivy_term_query_keyword(void *ptr, const char *term);
|
||||
|
||||
RustResult tantivy_term_query_keyword_i64(void *ptr, const char *term);
|
||||
|
||||
RustResult tantivy_lower_bound_range_query_keyword(void *ptr,
|
||||
const char *lower_bound,
|
||||
bool inclusive);
|
||||
|
@ -180,7 +182,8 @@ RustResult tantivy_create_index(const char *field_name,
|
|||
const char *path,
|
||||
uint32_t tantivy_index_version,
|
||||
uintptr_t num_threads,
|
||||
uintptr_t overall_memory_budget_in_bytes);
|
||||
uintptr_t overall_memory_budget_in_bytes,
|
||||
bool in_ram);
|
||||
|
||||
RustResult tantivy_create_index_with_single_segment(const char *field_name,
|
||||
TantivyDataType data_type,
|
||||
|
|
|
@ -120,7 +120,7 @@ macro_rules! impl_from_for_enum {
|
|||
};
|
||||
}
|
||||
|
||||
impl_from_for_enum!(Value, None => (), RustArrayI64 => RustArrayI64, RustArray => RustArray, RustArray => Vec<u32>, U32 => u32, Ptr => *mut c_void);
|
||||
impl_from_for_enum!(Value, None => (), RustArrayI64 => RustArrayI64, RustArrayI64 => Vec<i64>, RustArray => RustArray, RustArray => Vec<u32>, U32 => u32, Ptr => *mut c_void);
|
||||
|
||||
#[repr(C)]
|
||||
pub struct RustResult {
|
||||
|
@ -202,7 +202,7 @@ macro_rules! cstr_to_str {
|
|||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn test_enum_with_array() -> RustResult {
|
||||
let array = vec![1, 2, 3];
|
||||
let array: Vec<u32> = vec![1, 2, 3];
|
||||
RustResult::from(Result::Ok(array))
|
||||
}
|
||||
|
||||
|
|
|
@ -162,7 +162,7 @@ impl IndexReaderWrapper {
|
|||
}
|
||||
|
||||
pub fn term_query_f64(&self, term: f64) -> Result<Vec<u32>> {
|
||||
let q = TermQuery::new(
|
||||
let q: TermQuery = TermQuery::new(
|
||||
Term::from_field_f64(self.field, term),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
|
@ -235,6 +235,14 @@ impl IndexReaderWrapper {
|
|||
self.search(&q)
|
||||
}
|
||||
|
||||
pub fn term_query_keyword_i64(&self, term: &str) -> Result<Vec<i64>> {
|
||||
let q = TermQuery::new(
|
||||
Term::from_field_text(self.field, term),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
self.search_i64(&q)
|
||||
}
|
||||
|
||||
pub fn lower_bound_range_query_keyword(
|
||||
&self,
|
||||
lower_bound: &str,
|
||||
|
|
|
@ -192,6 +192,13 @@ pub extern "C" fn tantivy_term_query_keyword(ptr: *mut c_void, term: *const c_ch
|
|||
unsafe { (*real).term_query_keyword(term).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_term_query_keyword_i64(ptr: *mut c_void, term: *const c_char) -> RustResult {
|
||||
let real = ptr as *mut IndexReaderWrapper;
|
||||
let term = cstr_to_str!(term);
|
||||
unsafe { (*real).term_query_keyword_i64(term).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_lower_bound_range_query_keyword(
|
||||
ptr: *mut c_void,
|
||||
|
|
|
@ -11,6 +11,7 @@ impl IndexReaderWrapper {
|
|||
// split the query string into multiple tokens using index's default tokenizer,
|
||||
// and then execute the disconjunction of term query.
|
||||
pub(crate) fn match_query(&self, q: &str) -> Result<Vec<u32>> {
|
||||
// clone the tokenizer to make `match_query` thread-safe.
|
||||
let mut tokenizer = self
|
||||
.index
|
||||
.tokenizer_for_field(self.field)
|
||||
|
|
|
@ -29,6 +29,7 @@ impl IndexWriterWrapper {
|
|||
num_threads: usize,
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
tanviy_index_version: TantivyIndexVersion,
|
||||
in_ram: bool,
|
||||
) -> Result<IndexWriterWrapper> {
|
||||
init_log();
|
||||
match tanviy_index_version {
|
||||
|
@ -39,6 +40,7 @@ impl IndexWriterWrapper {
|
|||
path,
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes,
|
||||
in_ram,
|
||||
)?;
|
||||
Ok(IndexWriterWrapper::V5(writer))
|
||||
}
|
||||
|
@ -49,12 +51,12 @@ impl IndexWriterWrapper {
|
|||
path,
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes,
|
||||
in_ram,
|
||||
)?;
|
||||
Ok(IndexWriterWrapper::V7(writer))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_single_segment(
|
||||
field_name: &str,
|
||||
data_type: TantivyDataType,
|
||||
|
|
|
@ -28,6 +28,7 @@ pub extern "C" fn tantivy_create_index(
|
|||
tantivy_index_version: u32,
|
||||
num_threads: usize,
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
in_ram : bool,
|
||||
) -> RustResult {
|
||||
let field_name_str = cstr_to_str!(field_name);
|
||||
let path_str = cstr_to_str!(path);
|
||||
|
@ -44,6 +45,7 @@ pub extern "C" fn tantivy_create_index(
|
|||
num_threads,
|
||||
overall_memory_budget_in_bytes,
|
||||
tantivy_index_version,
|
||||
in_ram,
|
||||
) {
|
||||
Ok(wrapper) => RustResult::from_ptr(create_binding(wrapper)),
|
||||
Err(e) => RustResult::from_error(e.to_string()),
|
||||
|
|
|
@ -104,6 +104,7 @@ impl IndexWriterWrapperImpl {
|
|||
path: String,
|
||||
num_threads: usize,
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
in_ram: bool,
|
||||
) -> Result<IndexWriterWrapperImpl> {
|
||||
info!(
|
||||
"create index writer, field_name: {}, data_type: {:?}, tantivy_index_version 5",
|
||||
|
@ -114,7 +115,11 @@ impl IndexWriterWrapperImpl {
|
|||
// We cannot build direct connection from rows in multi-segments to milvus row data. So we have this doc_id field.
|
||||
let id_field = schema_builder.add_i64_field("doc_id", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_dir(path.clone(), schema)?;
|
||||
let index = if in_ram {
|
||||
Index::create_in_ram(schema)
|
||||
} else {
|
||||
Index::create_in_dir(path.clone(), schema)?
|
||||
};
|
||||
let index_writer =
|
||||
index.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)?;
|
||||
Ok(IndexWriterWrapperImpl {
|
||||
|
|
|
@ -103,6 +103,7 @@ impl IndexWriterWrapperImpl {
|
|||
path: String,
|
||||
num_threads: usize,
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
in_ram: bool,
|
||||
) -> Result<IndexWriterWrapperImpl> {
|
||||
info!(
|
||||
"create index writer, field_name: {}, data_type: {:?}, tantivy_index_version 7",
|
||||
|
@ -113,7 +114,11 @@ impl IndexWriterWrapperImpl {
|
|||
// We cannot build direct connection from rows in multi-segments to milvus row data. So we have this doc_id field.
|
||||
let id_field = schema_builder.add_i64_field("doc_id", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_dir(path.clone(), schema)?;
|
||||
let index = if in_ram {
|
||||
Index::create_in_ram(schema)
|
||||
} else {
|
||||
Index::create_in_dir(path.clone(), schema)?
|
||||
};
|
||||
let index_writer =
|
||||
index.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)?;
|
||||
Ok(IndexWriterWrapperImpl {
|
||||
|
|
|
@ -84,6 +84,7 @@ struct TantivyIndexWrapper {
|
|||
const char* path,
|
||||
uint32_t tantivy_index_version,
|
||||
bool inverted_single_semgnent = false,
|
||||
bool in_ram = false,
|
||||
uintptr_t num_threads = DEFAULT_NUM_THREADS,
|
||||
uintptr_t overall_memory_budget_in_bytes =
|
||||
DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) {
|
||||
|
@ -101,7 +102,8 @@ struct TantivyIndexWrapper {
|
|||
path,
|
||||
tantivy_index_version,
|
||||
num_threads,
|
||||
overall_memory_budget_in_bytes));
|
||||
overall_memory_budget_in_bytes,
|
||||
in_ram));
|
||||
}
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to create index: {}",
|
||||
|
@ -146,7 +148,6 @@ struct TantivyIndexWrapper {
|
|||
writer_ = res.result_->value.ptr._0;
|
||||
path_ = std::string(path);
|
||||
}
|
||||
|
||||
// create reader.
|
||||
void
|
||||
create_reader() {
|
||||
|
@ -626,6 +627,22 @@ struct TantivyIndexWrapper {
|
|||
return RustArrayWrapper(std::move(res.result_->value.rust_array._0));
|
||||
}
|
||||
|
||||
RustArrayI64Wrapper
|
||||
term_query_i64(std::string term) {
|
||||
auto array = [&]() {
|
||||
return tantivy_term_query_keyword_i64(reader_, term.c_str());
|
||||
}();
|
||||
|
||||
auto res = RustResultWrapper(array);
|
||||
AssertInfo(res.result_->success,
|
||||
"TantivyIndexWrapper.term_query_i64: {}",
|
||||
res.result_->error);
|
||||
AssertInfo(res.result_->value.tag == Value::Tag::RustArrayI64,
|
||||
"TantivyIndexWrapper.term_query_i64: invalid result type");
|
||||
return RustArrayI64Wrapper(
|
||||
std::move(res.result_->value.rust_array_i64._0));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
RustArrayWrapper
|
||||
lower_bound_range_query(T lower_bound, bool inclusive) {
|
||||
|
|
|
@ -96,6 +96,7 @@ set(MILVUS_TEST_FILES
|
|||
test_cached_search_iterator.cpp
|
||||
test_random_sample.cpp
|
||||
test_json_index.cpp
|
||||
test_json_key_stats_index.cpp
|
||||
)
|
||||
|
||||
if ( INDEX_ENGINE STREQUAL "cardinal" )
|
||||
|
|
|
@ -93,7 +93,7 @@ Search_GrowingIndex(benchmark::State& state) {
|
|||
Timestamp ts = 10000000;
|
||||
|
||||
for (auto _ : state) {
|
||||
auto qr = segment->Search(search_plan.get(), ph_group.get(), ts);
|
||||
auto qr = segment->Search(search_plan.get(), ph_group.get(), ts, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,7 @@ Search_Sealed(benchmark::State& state) {
|
|||
Timestamp ts = 10000000;
|
||||
|
||||
for (auto _ : state) {
|
||||
auto qr = segment->Search(search_plan.get(), ph_group.get(), ts);
|
||||
auto qr = segment->Search(search_plan.get(), ph_group.get(), ts, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -229,7 +229,7 @@ TEST_P(BinlogIndexTest, AccuracyWithLoadFieldData) {
|
|||
ph_group.get()};
|
||||
auto nlist = segcore_config.get_nlist();
|
||||
auto binlog_index_sr =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63, 0);
|
||||
ASSERT_EQ(binlog_index_sr->total_nq_, num_queries);
|
||||
EXPECT_EQ(binlog_index_sr->unity_topK_, topk);
|
||||
EXPECT_EQ(binlog_index_sr->distances_.size(), num_queries * topk);
|
||||
|
@ -262,7 +262,7 @@ TEST_P(BinlogIndexTest, AccuracyWithLoadFieldData) {
|
|||
EXPECT_TRUE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
|
||||
auto ivf_sr = segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
auto ivf_sr = segment->Search(plan.get(), ph_group.get(), 1L << 63, 0);
|
||||
auto similary = GetKnnSearchRecall(num_queries,
|
||||
binlog_index_sr->seg_offsets_.data(),
|
||||
topk,
|
||||
|
@ -328,7 +328,7 @@ TEST_P(BinlogIndexTest, AccuracyWithMapFieldData) {
|
|||
ph_group.get()};
|
||||
auto nlist = segcore_config.get_nlist();
|
||||
auto binlog_index_sr =
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63);
|
||||
segment->Search(plan.get(), ph_group.get(), 1L << 63, 0);
|
||||
ASSERT_EQ(binlog_index_sr->total_nq_, num_queries);
|
||||
EXPECT_EQ(binlog_index_sr->unity_topK_, topk);
|
||||
EXPECT_EQ(binlog_index_sr->distances_.size(), num_queries * topk);
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include "segcore/load_index_c.h"
|
||||
#include "test_utils/c_api_test_utils.h"
|
||||
#include "segcore/vector_index_c.h"
|
||||
#include "common/jsmn.h"
|
||||
|
||||
namespace chrono = std::chrono;
|
||||
|
||||
|
@ -69,7 +70,7 @@ CRetrieve(CSegmentInterface c_segment,
|
|||
uint64_t timestamp,
|
||||
CRetrieveResult** result) {
|
||||
auto future = AsyncRetrieve(
|
||||
{}, c_segment, c_plan, timestamp, DEFAULT_MAX_OUTPUT_SIZE, false);
|
||||
{}, c_segment, c_plan, timestamp, DEFAULT_MAX_OUTPUT_SIZE, false, 0);
|
||||
auto futurePtr = static_cast<milvus::futures::IFuture*>(
|
||||
static_cast<void*>(static_cast<CFuture*>(future)));
|
||||
|
||||
|
|
|
@ -157,6 +157,7 @@ TEST_P(TaskTest, CallExprEmpty) {
|
|||
segment_.get(),
|
||||
100000,
|
||||
MAX_TIMESTAMP,
|
||||
0,
|
||||
std::make_shared<milvus::exec::QueryConfig>(
|
||||
std::unordered_map<std::string, std::string>{}));
|
||||
|
||||
|
@ -194,6 +195,7 @@ TEST_P(TaskTest, UnaryExpr) {
|
|||
segment_.get(),
|
||||
100000,
|
||||
MAX_TIMESTAMP,
|
||||
0,
|
||||
std::make_shared<milvus::exec::QueryConfig>(
|
||||
std::unordered_map<std::string, std::string>{}));
|
||||
|
||||
|
@ -240,6 +242,7 @@ TEST_P(TaskTest, LogicalExpr) {
|
|||
segment_.get(),
|
||||
100000,
|
||||
MAX_TIMESTAMP,
|
||||
0,
|
||||
std::make_shared<milvus::exec::QueryConfig>(
|
||||
std::unordered_map<std::string, std::string>{}));
|
||||
|
||||
|
|
|
@ -59,14 +59,18 @@ using namespace milvus;
|
|||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
|
||||
class ExprTest : public ::testing::TestWithParam<
|
||||
std::pair<milvus::DataType, knowhere::MetricType>> {
|
||||
class ExprTest
|
||||
: public ::testing::TestWithParam<
|
||||
std::tuple<std::pair<milvus::DataType, knowhere::MetricType>, bool>> {
|
||||
public:
|
||||
void
|
||||
SetUp() override {
|
||||
auto param = GetParam();
|
||||
data_type = param.first;
|
||||
metric_type = param.second;
|
||||
data_type = std::get<0>(param).first; // Get the DataType from the pair
|
||||
metric_type =
|
||||
std::get<0>(param).second; // Get the MetricType from the pair
|
||||
GROWING_JSON_KEY_STATS_ENABLED =
|
||||
std::get<1>(param); // Get the bool parameter
|
||||
}
|
||||
|
||||
// replace the metric type in the plan string with the proper type
|
||||
|
@ -81,13 +85,29 @@ class ExprTest : public ::testing::TestWithParam<
|
|||
knowhere::MetricType metric_type;
|
||||
};
|
||||
|
||||
// Instantiate test suite with new bool parameter
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
ExprTestSuite,
|
||||
ExprTest,
|
||||
::testing::Values(
|
||||
std::pair(milvus::DataType::VECTOR_FLOAT, knowhere::metric::L2),
|
||||
std::pair(milvus::DataType::VECTOR_SPARSE_FLOAT, knowhere::metric::IP),
|
||||
std::pair(milvus::DataType::VECTOR_BINARY, knowhere::metric::JACCARD)));
|
||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
|
||||
knowhere::metric::L2),
|
||||
false),
|
||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_FLOAT,
|
||||
knowhere::metric::IP),
|
||||
false),
|
||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,
|
||||
knowhere::metric::JACCARD),
|
||||
false),
|
||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_FLOAT,
|
||||
knowhere::metric::L2),
|
||||
true),
|
||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_SPARSE_FLOAT,
|
||||
knowhere::metric::IP),
|
||||
true),
|
||||
std::make_tuple(std::pair(milvus::DataType::VECTOR_BINARY,
|
||||
knowhere::metric::JACCARD),
|
||||
true)));
|
||||
|
||||
TEST_P(ExprTest, Range) {
|
||||
SUCCEED();
|
||||
|
@ -842,7 +862,7 @@ TEST_P(ExprTest, TestBinaryRangeJSON) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
for (auto testcase : testcases) {
|
||||
auto check = [&](int64_t value) {
|
||||
|
@ -966,7 +986,7 @@ TEST_P(ExprTest, TestBinaryRangeJSONNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
for (auto testcase : testcases) {
|
||||
auto check = [&](int64_t value, bool valid) {
|
||||
|
@ -1085,7 +1105,7 @@ TEST_P(ExprTest, TestExistsJson) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
for (auto testcase : testcases) {
|
||||
|
@ -1162,7 +1182,7 @@ TEST_P(ExprTest, TestExistsJsonNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
for (auto testcase : testcases) {
|
||||
|
@ -1245,16 +1265,13 @@ TEST_P(ExprTest, TestUnaryRangeJson) {
|
|||
int64_t val;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{10, {"int"}},
|
||||
{20, {"int"}},
|
||||
{30, {"int"}},
|
||||
{40, {"int"}},
|
||||
{10, {"double"}},
|
||||
{20, {"double"}},
|
||||
{30, {"double"}},
|
||||
{40, {"double"}},
|
||||
};
|
||||
std::vector<Testcase> testcases{{10, {"int"}},
|
||||
{20, {"int"}},
|
||||
{30, {"int"}},
|
||||
{40, {"int"}},
|
||||
{1, {"array", "0"}},
|
||||
{2, {"array", "1"}},
|
||||
{3, {"array", "2"}}};
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
|
@ -1278,7 +1295,7 @@ TEST_P(ExprTest, TestUnaryRangeJson) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
std::vector<OpType> ops{
|
||||
|
@ -1356,13 +1373,16 @@ TEST_P(ExprTest, TestUnaryRangeJson) {
|
|||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
if (testcase.nested_path[0] == "int") {
|
||||
if (testcase.nested_path[0] == "int" ||
|
||||
testcase.nested_path[0] == "array") {
|
||||
auto val =
|
||||
milvus::Json(simdjson::padded_string(json_col[i]))
|
||||
.template at<int64_t>(pointer)
|
||||
.value();
|
||||
|
||||
auto ref = f(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
ASSERT_EQ(ans, ref) << "@" << i << "op" << op;
|
||||
|
||||
if (i % 2 == 0) {
|
||||
ASSERT_EQ(view[int(i / 2)], ref);
|
||||
}
|
||||
|
@ -1381,6 +1401,272 @@ TEST_P(ExprTest, TestUnaryRangeJson) {
|
|||
}
|
||||
}
|
||||
|
||||
{
|
||||
struct Testcase {
|
||||
int64_t val;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{{1.1, {"double"}},
|
||||
{2.2, {"double"}},
|
||||
{3.3, {"double"}},
|
||||
{4.4, {"double"}},
|
||||
{1e40, {"double"}}};
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> json_col;
|
||||
int num_iters = 1;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_json_col = raw_data.get_col<std::string>(json_fid);
|
||||
|
||||
json_col.insert(
|
||||
json_col.end(), new_json_col.begin(), new_json_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
std::vector<OpType> ops{
|
||||
OpType::Equal,
|
||||
OpType::NotEqual,
|
||||
OpType::GreaterThan,
|
||||
OpType::GreaterEqual,
|
||||
OpType::LessThan,
|
||||
OpType::LessEqual,
|
||||
};
|
||||
for (const auto& testcase : testcases) {
|
||||
auto check = [&](double value) { return value == testcase.val; };
|
||||
std::function<bool(double)> f = check;
|
||||
for (auto& op : ops) {
|
||||
switch (op) {
|
||||
case OpType::Equal: {
|
||||
f = [&](double value) { return value == testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::NotEqual: {
|
||||
f = [&](double value) { return value != testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterEqual: {
|
||||
f = [&](double value) { return value >= testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterThan: {
|
||||
f = [&](double value) { return value > testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::LessEqual: {
|
||||
f = [&](double value) { return value <= testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::LessThan: {
|
||||
f = [&](double value) { return value < testcase.val; };
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo(Unsupported, "unsupported range node");
|
||||
}
|
||||
}
|
||||
|
||||
auto pointer = milvus::Json::pointer(testcase.nested_path);
|
||||
proto::plan::GenericValue value;
|
||||
value.set_float_val(testcase.val);
|
||||
auto expr =
|
||||
std::make_shared<milvus::expr::UnaryRangeFilterExpr>(
|
||||
milvus::expr::ColumnInfo(
|
||||
json_fid, DataType::JSON, testcase.nested_path),
|
||||
op,
|
||||
value,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto plan = std::make_shared<plan::FilterBitsNode>(
|
||||
DEFAULT_PLANNODE_ID, expr);
|
||||
auto final = ExecuteQueryExpr(
|
||||
plan, seg_promote, N * num_iters, MAX_TIMESTAMP);
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
// specify some offsets and do scalar filtering on these offsets
|
||||
milvus::exec::OffsetVector offsets;
|
||||
offsets.reserve(N * num_iters / 2);
|
||||
for (auto i = 0; i < N * num_iters; ++i) {
|
||||
if (i % 2 == 0) {
|
||||
offsets.emplace_back(i);
|
||||
}
|
||||
}
|
||||
auto col_vec = milvus::test::gen_filter_res(plan.get(),
|
||||
seg_promote,
|
||||
N * num_iters,
|
||||
MAX_TIMESTAMP,
|
||||
&offsets);
|
||||
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val =
|
||||
milvus::Json(simdjson::padded_string(json_col[i]))
|
||||
.template at<double>(pointer)
|
||||
.value();
|
||||
auto ref = f(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
if (i % 2 == 0) {
|
||||
ASSERT_EQ(view[int(i / 2)], ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
struct Testcase {
|
||||
std::string val;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{"abc", {"string"}},
|
||||
{"This is a line break\\nThis is a new line!", {"string"}}};
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
auto json_fid = schema->AddDebugField("json", DataType::JSON);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateGrowingSegment(schema, empty_index_meta);
|
||||
int N = 1000;
|
||||
std::vector<std::string> json_col;
|
||||
int num_iters = 1;
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
auto raw_data = DataGen(schema, N, iter);
|
||||
auto new_json_col = raw_data.get_col<std::string>(json_fid);
|
||||
|
||||
json_col.insert(
|
||||
json_col.end(), new_json_col.begin(), new_json_col.end());
|
||||
seg->PreInsert(N);
|
||||
seg->Insert(iter * N,
|
||||
N,
|
||||
raw_data.row_ids_.data(),
|
||||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
std::vector<OpType> ops{
|
||||
OpType::Equal,
|
||||
OpType::NotEqual,
|
||||
OpType::GreaterThan,
|
||||
OpType::GreaterEqual,
|
||||
OpType::LessThan,
|
||||
OpType::LessEqual,
|
||||
};
|
||||
for (const auto& testcase : testcases) {
|
||||
auto check = [&](std::string_view value) {
|
||||
return value == testcase.val;
|
||||
};
|
||||
std::function<bool(std::string_view)> f = check;
|
||||
for (auto& op : ops) {
|
||||
switch (op) {
|
||||
case OpType::Equal: {
|
||||
f = [&](std::string_view value) {
|
||||
return value == testcase.val;
|
||||
};
|
||||
break;
|
||||
}
|
||||
case OpType::NotEqual: {
|
||||
f = [&](std::string_view value) {
|
||||
return value != testcase.val;
|
||||
};
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterEqual: {
|
||||
f = [&](std::string_view value) {
|
||||
return value >= testcase.val;
|
||||
};
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterThan: {
|
||||
f = [&](std::string_view value) {
|
||||
return value > testcase.val;
|
||||
};
|
||||
break;
|
||||
}
|
||||
case OpType::LessEqual: {
|
||||
f = [&](std::string_view value) {
|
||||
return value <= testcase.val;
|
||||
};
|
||||
break;
|
||||
}
|
||||
case OpType::LessThan: {
|
||||
f = [&](std::string_view value) {
|
||||
return value < testcase.val;
|
||||
};
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo(Unsupported, "unsupported range node");
|
||||
}
|
||||
}
|
||||
|
||||
auto pointer = milvus::Json::pointer(testcase.nested_path);
|
||||
proto::plan::GenericValue value;
|
||||
value.set_string_val(testcase.val);
|
||||
auto expr =
|
||||
std::make_shared<milvus::expr::UnaryRangeFilterExpr>(
|
||||
milvus::expr::ColumnInfo(
|
||||
json_fid, DataType::JSON, testcase.nested_path),
|
||||
op,
|
||||
value,
|
||||
std::vector<proto::plan::GenericValue>{});
|
||||
auto plan = std::make_shared<plan::FilterBitsNode>(
|
||||
DEFAULT_PLANNODE_ID, expr);
|
||||
auto final = ExecuteQueryExpr(
|
||||
plan, seg_promote, N * num_iters, MAX_TIMESTAMP);
|
||||
EXPECT_EQ(final.size(), N * num_iters);
|
||||
|
||||
// specify some offsets and do scalar filtering on these offsets
|
||||
milvus::exec::OffsetVector offsets;
|
||||
offsets.reserve(N * num_iters / 2);
|
||||
for (auto i = 0; i < N * num_iters; ++i) {
|
||||
if (i % 2 == 0) {
|
||||
offsets.emplace_back(i);
|
||||
}
|
||||
}
|
||||
auto col_vec = milvus::test::gen_filter_res(plan.get(),
|
||||
seg_promote,
|
||||
N * num_iters,
|
||||
MAX_TIMESTAMP,
|
||||
&offsets);
|
||||
BitsetTypeView view(col_vec->GetRawData(), col_vec->size());
|
||||
EXPECT_EQ(view.size(), N * num_iters / 2);
|
||||
|
||||
for (int i = 0; i < N * num_iters; ++i) {
|
||||
auto ans = final[i];
|
||||
|
||||
auto val =
|
||||
milvus::Json(simdjson::padded_string(json_col[i]))
|
||||
.template at<std::string_view>(pointer)
|
||||
.value();
|
||||
auto ref = f(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
if (i % 2 == 0) {
|
||||
ASSERT_EQ(view[int(i / 2)], ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct TestArrayCase {
|
||||
proto::plan::GenericValue val;
|
||||
std::vector<std::string> nested_path;
|
||||
|
@ -1457,16 +1743,13 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) {
|
|||
int64_t val;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{10, {"int"}},
|
||||
{20, {"int"}},
|
||||
{30, {"int"}},
|
||||
{40, {"int"}},
|
||||
{10, {"double"}},
|
||||
{20, {"double"}},
|
||||
{30, {"double"}},
|
||||
{40, {"double"}},
|
||||
};
|
||||
std::vector<Testcase> testcases{{10, {"int"}},
|
||||
{20, {"int"}},
|
||||
{30, {"int"}},
|
||||
{40, {"int"}},
|
||||
{1, {"array", "0"}},
|
||||
{2, {"array", "1"}},
|
||||
{3, {"array", "2"}}};
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i64_fid = schema->AddDebugField("id", DataType::INT64);
|
||||
|
@ -1492,7 +1775,7 @@ TEST_P(ExprTest, TestUnaryRangeJsonNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
std::vector<OpType> ops{
|
||||
|
@ -1717,7 +2000,7 @@ TEST_P(ExprTest, TestTermJson) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
for (auto testcase : testcases) {
|
||||
|
@ -1810,7 +2093,7 @@ TEST_P(ExprTest, TestTermJsonNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
for (auto testcase : testcases) {
|
||||
|
@ -11573,7 +11856,7 @@ TEST_P(ExprTest, TestUnaryRangeWithJSON) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
|
@ -11833,7 +12116,7 @@ TEST_P(ExprTest, TestUnaryRangeWithJSONNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
|
@ -12139,7 +12422,7 @@ TEST_P(ExprTest, TestTermWithJSON) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
|
@ -12372,7 +12655,7 @@ TEST_P(ExprTest, TestTermWithJSONNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
|
@ -12550,7 +12833,7 @@ TEST_P(ExprTest, TestExistsWithJSON) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
|
@ -12778,7 +13061,7 @@ TEST_P(ExprTest, TestExistsWithJSONNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
|
@ -13661,7 +13944,7 @@ TEST_P(ExprTest, TestJsonContainsAny) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
|
||||
|
@ -13951,7 +14234,7 @@ TEST_P(ExprTest, TestJsonContainsAnyNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
|
||||
|
@ -14252,7 +14535,7 @@ TEST_P(ExprTest, TestJsonContainsAll) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
std::vector<Testcase<bool>> bool_testcases{{{true, true}, {"bool"}},
|
||||
|
@ -14566,7 +14849,7 @@ TEST_P(ExprTest, TestJsonContainsAllNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
std::vector<Testcase<bool>> bool_testcases{{{true, true}, {"bool"}},
|
||||
|
@ -14890,7 +15173,7 @@ TEST_P(ExprTest, TestJsonContainsArray) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
|
||||
|
@ -15278,7 +15561,7 @@ TEST_P(ExprTest, TestJsonContainsArrayNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
proto::plan::GenericValue generic_a;
|
||||
|
@ -15702,7 +15985,7 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArray) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
proto::plan::GenericValue int_value;
|
||||
|
@ -15833,7 +16116,7 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeArrayNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
|
||||
|
@ -15968,7 +16251,7 @@ TEST_P(ExprTest, TestJsonContainsDiffType) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
|
||||
proto::plan::GenericValue int_val;
|
||||
|
@ -16103,7 +16386,7 @@ TEST_P(ExprTest, TestJsonContainsDiffTypeNullable) {
|
|||
raw_data.timestamps_.data(),
|
||||
raw_data.raw_);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
query::ExecPlanNodeVisitor visitor(*seg_promote, MAX_TIMESTAMP);
|
||||
|
||||
|
|
|
@ -0,0 +1,588 @@
|
|||
// Copyright(C) 2019 - 2020 Zilliz.All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <functional>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
|
||||
#include "common/Tracer.h"
|
||||
#include "index/BitmapIndex.h"
|
||||
#include "storage/Util.h"
|
||||
#include "storage/InsertData.h"
|
||||
#include "indexbuilder/IndexFactory.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "test_utils/indexbuilder_test_utils.h"
|
||||
#include "index/Meta.h"
|
||||
#include "index/JsonKeyStatsInvertedIndex.h"
|
||||
#include "common/Json.h"
|
||||
#include "common/Types.h"
|
||||
using namespace milvus::index;
|
||||
using namespace milvus::indexbuilder;
|
||||
using namespace milvus;
|
||||
using namespace milvus::index;
|
||||
|
||||
static std::vector<milvus::Json>
|
||||
GenerateJsons(int size) {
|
||||
std::vector<Json> jsons;
|
||||
std::default_random_engine random(42);
|
||||
std::normal_distribution<> distr(0, 1);
|
||||
for (int i = 0; i < size; i++) {
|
||||
auto str = R"({"int":)" + std::to_string(random()) + R"(,"double":)" +
|
||||
std::to_string(static_cast<double>(random())) +
|
||||
R"(,"string":")" + std::to_string(random()) +
|
||||
R"(","bool": true)" + R"(, "array": [1,2,3])" + "}";
|
||||
jsons.push_back(milvus::Json(simdjson::padded_string(str)));
|
||||
}
|
||||
return jsons;
|
||||
}
|
||||
|
||||
class JsonKeyStatsIndexTest : public ::testing::TestWithParam<bool> {
|
||||
protected:
|
||||
void
|
||||
Init(int64_t collection_id,
|
||||
int64_t partition_id,
|
||||
int64_t segment_id,
|
||||
int64_t field_id,
|
||||
int64_t index_build_id,
|
||||
int64_t index_version,
|
||||
int64_t size) {
|
||||
proto::schema::FieldSchema field_schema;
|
||||
field_schema.set_data_type(proto::schema::DataType::JSON);
|
||||
field_schema.set_nullable(nullable_);
|
||||
auto field_meta = storage::FieldDataMeta{
|
||||
collection_id, partition_id, segment_id, field_id, field_schema};
|
||||
auto index_meta = storage::IndexMeta{
|
||||
segment_id, field_id, index_build_id, index_version};
|
||||
|
||||
data_ = std::move(GenerateJsons(size));
|
||||
auto field_data = storage::CreateFieldData(DataType::JSON, nullable_);
|
||||
if (nullable_) {
|
||||
valid_data.reserve(size_);
|
||||
for (size_t i = 0; i < size_; i++) {
|
||||
valid_data.push_back(false);
|
||||
}
|
||||
}
|
||||
if (nullable_) {
|
||||
int byteSize = (size_ + 7) / 8;
|
||||
uint8_t* valid_data_ = new uint8_t[byteSize];
|
||||
for (int i = 0; i < size_; i++) {
|
||||
bool value = valid_data[i];
|
||||
int byteIndex = i / 8;
|
||||
int bitIndex = i % 8;
|
||||
if (value) {
|
||||
valid_data_[byteIndex] |= (1 << bitIndex);
|
||||
} else {
|
||||
valid_data_[byteIndex] &= ~(1 << bitIndex);
|
||||
}
|
||||
}
|
||||
field_data->FillFieldData(data_.data(), valid_data_, data_.size());
|
||||
delete[] valid_data_;
|
||||
} else {
|
||||
field_data->FillFieldData(data_.data(), data_.size());
|
||||
}
|
||||
|
||||
storage::InsertData insert_data(field_data);
|
||||
insert_data.SetFieldDataMeta(field_meta);
|
||||
insert_data.SetTimestamps(0, 100);
|
||||
|
||||
auto serialized_bytes = insert_data.Serialize(storage::Remote);
|
||||
|
||||
auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}",
|
||||
"/tmp/test-jsonkey-index/",
|
||||
collection_id,
|
||||
partition_id,
|
||||
segment_id,
|
||||
field_id,
|
||||
0);
|
||||
chunk_manager_->Write(
|
||||
log_path, serialized_bytes.data(), serialized_bytes.size());
|
||||
|
||||
storage::FileManagerContext ctx(field_meta, index_meta, chunk_manager_);
|
||||
std::vector<std::string> index_files;
|
||||
|
||||
Config config;
|
||||
config["insert_files"] = std::vector<std::string>{log_path};
|
||||
|
||||
auto build_index =
|
||||
std::make_shared<JsonKeyStatsInvertedIndex>(ctx, false);
|
||||
build_index->Build(config);
|
||||
|
||||
auto create_index_result = build_index->Upload(config);
|
||||
auto memSize = create_index_result->GetMemSize();
|
||||
auto serializedSize = create_index_result->GetSerializedSize();
|
||||
ASSERT_GT(memSize, 0);
|
||||
ASSERT_GT(serializedSize, 0);
|
||||
index_files = create_index_result->GetIndexFiles();
|
||||
|
||||
index::CreateIndexInfo index_info{};
|
||||
config["index_files"] = index_files;
|
||||
|
||||
index_ = std::make_shared<JsonKeyStatsInvertedIndex>(ctx, true);
|
||||
index_->Load(milvus::tracer::TraceContext{}, config);
|
||||
}
|
||||
|
||||
void
|
||||
SetUp() override {
|
||||
nullable_ = GetParam();
|
||||
type_ = DataType::JSON;
|
||||
int64_t collection_id = 1;
|
||||
int64_t partition_id = 2;
|
||||
int64_t segment_id = 3;
|
||||
int64_t field_id = 101;
|
||||
int64_t index_build_id = 1000;
|
||||
int64_t index_version = 10000;
|
||||
size_ = 1;
|
||||
std::string root_path = "/tmp/test-jsonkey-index/";
|
||||
|
||||
storage::StorageConfig storage_config;
|
||||
storage_config.storage_type = "local";
|
||||
storage_config.root_path = root_path;
|
||||
chunk_manager_ = storage::CreateChunkManager(storage_config);
|
||||
|
||||
Init(collection_id,
|
||||
partition_id,
|
||||
segment_id,
|
||||
field_id,
|
||||
index_build_id,
|
||||
index_version,
|
||||
size_);
|
||||
}
|
||||
|
||||
virtual ~JsonKeyStatsIndexTest() override {
|
||||
boost::filesystem::remove_all(chunk_manager_->GetRootPath());
|
||||
}
|
||||
|
||||
public:
|
||||
std::shared_ptr<JsonKeyStatsInvertedIndex> index_;
|
||||
DataType type_;
|
||||
bool nullable_;
|
||||
size_t size_;
|
||||
FixedVector<bool> valid_data;
|
||||
std::vector<milvus::Json> data_;
|
||||
std::vector<std::string> json_col;
|
||||
std::shared_ptr<storage::ChunkManager> chunk_manager_;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(JsonKeyStatsIndexTestSuite,
|
||||
JsonKeyStatsIndexTest,
|
||||
::testing::Values(true, false));
|
||||
|
||||
TEST_P(JsonKeyStatsIndexTest, HasEscapeSequence) {
|
||||
EXPECT_TRUE(index_->has_escape_sequence("Hello\\nWorld"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("Tab\\tCharacter"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("Carriage\\rReturn"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("Backspace\\bTest"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("FormFeed\\fTest"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("Vertical\\vTab"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("Backslash\\\\Test"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("Quote\\\"Test"));
|
||||
EXPECT_TRUE(index_->has_escape_sequence("SingleQuote\\'Test"));
|
||||
|
||||
EXPECT_FALSE(index_->has_escape_sequence("No escape sequence here"));
|
||||
EXPECT_FALSE(index_->has_escape_sequence("Just a backslash \\"));
|
||||
EXPECT_FALSE(index_->has_escape_sequence(""));
|
||||
}
|
||||
|
||||
TEST_P(JsonKeyStatsIndexTest, TestTermInFunc) {
|
||||
struct Testcase {
|
||||
std::vector<int64_t> term;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{{1, 2, 3, 4}, {"int"}},
|
||||
{{10, 100, 1000, 10000}, {"int"}},
|
||||
{{100, 10000, 9999, 444}, {"int"}},
|
||||
{{23, 42, 66, 17, 25}, {"int"}},
|
||||
};
|
||||
for (auto testcase : testcases) {
|
||||
auto check = [&](int64_t value) {
|
||||
std::unordered_set<int64_t> term_set(testcase.term.begin(),
|
||||
testcase.term.end());
|
||||
return term_set.find(value) != term_set.end();
|
||||
};
|
||||
std::unordered_set<int64_t> term_set(testcase.term.begin(),
|
||||
testcase.term.end());
|
||||
auto filter_func = [&term_set, this](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
return term_set.find(int64_t(value)) != term_set.end();
|
||||
};
|
||||
auto pointer = milvus::Json::pointer(testcase.nested_path);
|
||||
auto bitset =
|
||||
index_->FilterByPath(pointer, size_, false, true, filter_func);
|
||||
ASSERT_EQ(bitset.size(), size_);
|
||||
for (int i = 0; i < bitset.size(); ++i) {
|
||||
if (nullable_ && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
auto val = data_[i].template at<int64_t>(pointer).value();
|
||||
auto ans = bitset[i];
|
||||
auto ref = check(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(JsonKeyStatsIndexTest, TestUnaryRangeInFunc) {
|
||||
struct Testcase {
|
||||
int64_t val;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{10, {"int"}},
|
||||
{20, {"int"}},
|
||||
{30, {"int"}},
|
||||
{40, {"int"}},
|
||||
};
|
||||
std::vector<OpType> ops{
|
||||
OpType::Equal,
|
||||
OpType::NotEqual,
|
||||
OpType::GreaterThan,
|
||||
OpType::GreaterEqual,
|
||||
OpType::LessThan,
|
||||
OpType::LessEqual,
|
||||
};
|
||||
for (const auto& testcase : testcases) {
|
||||
auto check = [&](int64_t value) { return value == testcase.val; };
|
||||
std::function<bool(int64_t)> f = check;
|
||||
for (auto& op : ops) {
|
||||
switch (op) {
|
||||
case OpType::Equal: {
|
||||
f = [&](int64_t value) { return value == testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::NotEqual: {
|
||||
f = [&](int64_t value) { return value != testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterEqual: {
|
||||
f = [&](int64_t value) { return value >= testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::GreaterThan: {
|
||||
f = [&](int64_t value) { return value > testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::LessEqual: {
|
||||
f = [&](int64_t value) { return value <= testcase.val; };
|
||||
break;
|
||||
}
|
||||
case OpType::LessThan: {
|
||||
f = [&](int64_t value) { return value < testcase.val; };
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo(Unsupported, "unsupported range node");
|
||||
}
|
||||
}
|
||||
|
||||
auto filter_func = [&op, &testcase, this](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
switch (op) {
|
||||
case OpType::GreaterThan:
|
||||
return int64_t(value) > testcase.val;
|
||||
case OpType::GreaterEqual:
|
||||
return int64_t(value) >= testcase.val;
|
||||
case OpType::LessThan:
|
||||
return int64_t(value) < testcase.val;
|
||||
case OpType::LessEqual:
|
||||
return int64_t(value) <= testcase.val;
|
||||
case OpType::Equal:
|
||||
return int64_t(value) == testcase.val;
|
||||
case OpType::NotEqual:
|
||||
return int64_t(value) != testcase.val;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
};
|
||||
auto pointer = milvus::Json::pointer(testcase.nested_path);
|
||||
auto bitset =
|
||||
index_->FilterByPath(pointer, size_, false, true, filter_func);
|
||||
ASSERT_EQ(bitset.size(), size_);
|
||||
for (int i = 0; i < bitset.size(); ++i) {
|
||||
if (nullable_ && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
auto ans = bitset[i];
|
||||
if (testcase.nested_path[0] == "int") {
|
||||
auto val =
|
||||
data_[i].template at<int64_t>(pointer).value();
|
||||
auto ref = f(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
} else {
|
||||
auto val =
|
||||
data_[i].template at<double>(pointer).value();
|
||||
auto ref = f(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(JsonKeyStatsIndexTest, TestBinaryRangeInFunc) {
|
||||
struct Testcase {
|
||||
bool lower_inclusive;
|
||||
bool upper_inclusive;
|
||||
int64_t lower;
|
||||
int64_t upper;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{true, false, 10, 20, {"int"}},
|
||||
{true, true, 20, 30, {"int"}},
|
||||
{false, true, 30, 40, {"int"}},
|
||||
{false, false, 40, 50, {"int"}},
|
||||
{true, false, 10, 20, {"double"}},
|
||||
{true, true, 20, 30, {"double"}},
|
||||
{false, true, 30, 40, {"double"}},
|
||||
{false, false, 40, 50, {"double"}},
|
||||
};
|
||||
for (const auto& testcase : testcases) {
|
||||
auto check = [&](int64_t value) {
|
||||
if (testcase.lower_inclusive && testcase.upper_inclusive) {
|
||||
return testcase.lower <= value && value <= testcase.upper;
|
||||
} else if (testcase.lower_inclusive && !testcase.upper_inclusive) {
|
||||
return testcase.lower <= value && value < testcase.upper;
|
||||
} else if (!testcase.lower_inclusive && testcase.upper_inclusive) {
|
||||
return testcase.lower < value && value <= testcase.upper;
|
||||
} else {
|
||||
return testcase.lower < value && value < testcase.upper;
|
||||
}
|
||||
};
|
||||
|
||||
auto filter_func = [&testcase, this](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (valid) {
|
||||
if (testcase.lower_inclusive && testcase.upper_inclusive) {
|
||||
return testcase.lower <= int64_t(value) &&
|
||||
int64_t(value) <= testcase.upper;
|
||||
} else if (testcase.lower_inclusive &&
|
||||
!testcase.upper_inclusive) {
|
||||
return testcase.lower <= int64_t(value) &&
|
||||
int64_t(value) < testcase.upper;
|
||||
} else if (!testcase.lower_inclusive &&
|
||||
testcase.upper_inclusive) {
|
||||
return testcase.lower < int64_t(value) &&
|
||||
int64_t(value) <= testcase.upper;
|
||||
} else {
|
||||
return testcase.lower < int64_t(value) &&
|
||||
int64_t(value) < testcase.upper;
|
||||
}
|
||||
} else {
|
||||
auto val =
|
||||
this->data_[row_id].template at<int64_t>(offset, size);
|
||||
if (val.error()) {
|
||||
return false;
|
||||
}
|
||||
if (testcase.lower_inclusive && testcase.upper_inclusive) {
|
||||
return testcase.lower <= int64_t(val.value()) &&
|
||||
int64_t(val.value()) <= testcase.upper;
|
||||
} else if (testcase.lower_inclusive &&
|
||||
!testcase.upper_inclusive) {
|
||||
return testcase.lower <= int64_t(val.value()) &&
|
||||
int64_t(val.value()) < testcase.upper;
|
||||
} else if (!testcase.lower_inclusive &&
|
||||
testcase.upper_inclusive) {
|
||||
return testcase.lower < int64_t(val.value()) &&
|
||||
int64_t(val.value()) <= testcase.upper;
|
||||
} else {
|
||||
return testcase.lower < int64_t(val.value()) &&
|
||||
int64_t(val.value()) < testcase.upper;
|
||||
}
|
||||
}
|
||||
};
|
||||
auto pointer = milvus::Json::pointer(testcase.nested_path);
|
||||
auto bitset =
|
||||
index_->FilterByPath(pointer, size_, false, true, filter_func);
|
||||
ASSERT_EQ(bitset.size(), size_);
|
||||
for (int i = 0; i < bitset.size(); ++i) {
|
||||
if (nullable_ && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
auto ans = bitset[i];
|
||||
if (testcase.nested_path[0] == "int") {
|
||||
auto val = data_[i].template at<int64_t>(pointer).value();
|
||||
auto ref = check(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
} else {
|
||||
auto val = data_[i].template at<double>(pointer).value();
|
||||
auto ref = check(val);
|
||||
ASSERT_EQ(ans, ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(JsonKeyStatsIndexTest, TestExistInFunc) {
|
||||
struct Testcase {
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
std::vector<Testcase> testcases{
|
||||
{{"A"}},
|
||||
{{"int"}},
|
||||
{{"double"}},
|
||||
{{"B"}},
|
||||
};
|
||||
for (const auto& testcase : testcases) {
|
||||
auto pointer = milvus::Json::pointer(testcase.nested_path);
|
||||
auto filter_func = [&pointer, this](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
return this->data_[row_id].exist(pointer);
|
||||
};
|
||||
|
||||
auto bitset =
|
||||
index_->FilterByPath(pointer, size_, false, true, filter_func);
|
||||
ASSERT_EQ(bitset.size(), size_);
|
||||
for (int i = 0; i < bitset.size(); ++i) {
|
||||
if (nullable_ && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
auto ans = bitset[i];
|
||||
auto val = data_[i].exist(pointer);
|
||||
ASSERT_EQ(ans, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_P(JsonKeyStatsIndexTest, TestJsonContainsAllFunc) {
|
||||
struct Testcase {
|
||||
std::vector<int64_t> term;
|
||||
std::vector<std::string> nested_path;
|
||||
};
|
||||
{
|
||||
std::vector<Testcase> testcases{
|
||||
{{1, 2, 3}, {"array"}},
|
||||
{{10, 100}, {"array"}},
|
||||
{{100, 1000}, {"array"}},
|
||||
};
|
||||
for (const auto& testcase : testcases) {
|
||||
auto check = [&](const std::vector<int64_t>& values) {
|
||||
for (auto const& e : testcase.term) {
|
||||
if (std::find(values.begin(), values.end(), e) ==
|
||||
values.end()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
auto pointer = milvus::Json::pointer(testcase.nested_path);
|
||||
std::unordered_set<int64_t> elements;
|
||||
for (auto const& element : testcase.term) {
|
||||
elements.insert(element);
|
||||
}
|
||||
auto filter_func = [&elements, this](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
auto array = this->data_[row_id].array_at(offset, size);
|
||||
std::unordered_set<int64_t> tmp_elements(elements);
|
||||
for (auto&& it : array) {
|
||||
auto val = it.template get<int64_t>();
|
||||
if (val.error()) {
|
||||
continue;
|
||||
}
|
||||
tmp_elements.erase(val.value());
|
||||
if (tmp_elements.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return tmp_elements.empty();
|
||||
};
|
||||
|
||||
auto bitset =
|
||||
index_->FilterByPath(pointer, size_, false, true, filter_func);
|
||||
ASSERT_EQ(bitset.size(), size_);
|
||||
for (int i = 0; i < bitset.size(); ++i) {
|
||||
if (nullable_ && !valid_data[i]) {
|
||||
ASSERT_EQ(bitset[i], false);
|
||||
} else {
|
||||
auto ans = bitset[i];
|
||||
auto array = data_[i].array_at(pointer);
|
||||
std::vector<int64_t> res;
|
||||
for (const auto& element : array) {
|
||||
res.push_back(element.template get<int64_t>());
|
||||
}
|
||||
ASSERT_EQ(ans, check(res));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GrowingJsonKeyStatsIndexTest, GrowingIndex) {
|
||||
using Index = index::JsonKeyStatsInvertedIndex;
|
||||
auto index = std::make_unique<Index>(std::numeric_limits<int64_t>::max(),
|
||||
"json",
|
||||
"/tmp/test-jsonkey-index/");
|
||||
auto str = R"({"int":)" + std::to_string(1) + R"(,"double":)" +
|
||||
std::to_string(static_cast<double>(1)) + R"(,"string":")" +
|
||||
std::to_string(1) + R"(","bool": true)" +
|
||||
R"(, "array": [1,2,3])" + "}";
|
||||
auto str1 = R"({"int":)" + std::to_string(2) + "}";
|
||||
auto str2 = R"({"int":)" + std::to_string(3) + "}";
|
||||
std::vector<std::string> jsonDatas;
|
||||
jsonDatas.push_back(str);
|
||||
jsonDatas.push_back(str1);
|
||||
jsonDatas.push_back(str2);
|
||||
std::vector<milvus::Json> jsons;
|
||||
for (const auto& jsonData : jsonDatas) {
|
||||
jsons.push_back(milvus::Json(simdjson::padded_string(jsonData)));
|
||||
}
|
||||
index->CreateReader();
|
||||
index->AddJSONDatas(jsonDatas.size(), jsonDatas.data(), nullptr, 0);
|
||||
index->Commit();
|
||||
index->Reload();
|
||||
int64_t checkVal = 1;
|
||||
auto filter_func = [jsons, checkVal](bool valid,
|
||||
uint8_t type,
|
||||
uint32_t row_id,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
int32_t value) {
|
||||
if (value == checkVal) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
auto pointer = milvus::Json::pointer({"int"});
|
||||
auto bitset =
|
||||
index->FilterByPath(pointer, jsonDatas.size(), true, true, filter_func);
|
||||
ASSERT_EQ(bitset.size(), jsonDatas.size());
|
||||
for (int i = 0; i < bitset.size(); ++i) {
|
||||
auto val = jsons[i].template at<int64_t>(pointer).value();
|
||||
auto ans = bitset[i];
|
||||
auto ref = val == checkVal;
|
||||
ASSERT_EQ(ans, ref);
|
||||
}
|
||||
}
|
|
@ -171,7 +171,7 @@ TEST_F(GrowingSegmentRegexQueryTest, RegexQueryOnJsonField) {
|
|||
auto typed_expr = parser.ParseExprs(*expr);
|
||||
auto parsed =
|
||||
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200) * 2);
|
||||
auto segpromote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
||||
BitsetType final;
|
||||
final = ExecuteQueryExpr(parsed, segpromote, N, MAX_TIMESTAMP);
|
||||
|
|
|
@ -187,7 +187,7 @@ CSearch(CSegmentInterface c_segment,
|
|||
uint64_t timestamp,
|
||||
CSearchResult* result) {
|
||||
auto future =
|
||||
AsyncSearch({}, c_segment, c_plan, c_placeholder_group, timestamp);
|
||||
AsyncSearch({}, c_segment, c_plan, c_placeholder_group, timestamp, 0);
|
||||
auto futurePtr = static_cast<milvus::futures::IFuture*>(
|
||||
static_cast<void*>(static_cast<CFuture*>(future)));
|
||||
|
||||
|
|
|
@ -54,7 +54,8 @@ get_default_mmap_config() {
|
|||
.disk_limit =
|
||||
uint64_t(2) * uint64_t(1024) * uint64_t(1024) * uint64_t(1024),
|
||||
.fix_file_size = uint64_t(4) * uint64_t(1024) * uint64_t(1024),
|
||||
.growing_enable_mmap = false};
|
||||
.growing_enable_mmap = false,
|
||||
};
|
||||
return mmap_config;
|
||||
}
|
||||
|
||||
|
|
|
@ -164,6 +164,7 @@ func (gc *garbageCollector) work(ctx context.Context) {
|
|||
gc.recycleUnusedSegIndexes(ctx)
|
||||
gc.recycleUnusedAnalyzeFiles(ctx)
|
||||
gc.recycleUnusedTextIndexFiles(ctx)
|
||||
gc.recycleUnusedJSONIndexFiles(ctx)
|
||||
})
|
||||
}()
|
||||
go func() {
|
||||
|
@ -470,11 +471,16 @@ func (gc *garbageCollector) recycleDroppedSegments(ctx context.Context) {
|
|||
logs[key] = struct{}{}
|
||||
}
|
||||
|
||||
for key := range getJSONKeyLogs(segment, gc) {
|
||||
logs[key] = struct{}{}
|
||||
}
|
||||
|
||||
log.Info("GC segment start...", zap.Int("insert_logs", len(segment.GetBinlogs())),
|
||||
zap.Int("delta_logs", len(segment.GetDeltalogs())),
|
||||
zap.Int("stats_logs", len(segment.GetStatslogs())),
|
||||
zap.Int("bm25_logs", len(segment.GetBm25Statslogs())),
|
||||
zap.Int("text_logs", len(segment.GetTextStatsLogs())))
|
||||
zap.Int("text_logs", len(segment.GetTextStatsLogs())),
|
||||
zap.Int("json_key_logs", len(segment.GetJsonKeyStats())))
|
||||
if err := gc.removeObjectFiles(ctx, logs); err != nil {
|
||||
log.Warn("GC segment remove logs failed", zap.Error(err))
|
||||
continue
|
||||
|
@ -585,6 +591,20 @@ func getTextLogs(sinfo *SegmentInfo) map[string]struct{} {
|
|||
return textLogs
|
||||
}
|
||||
|
||||
func getJSONKeyLogs(sinfo *SegmentInfo, gc *garbageCollector) map[string]struct{} {
|
||||
jsonkeyLogs := make(map[string]struct{})
|
||||
for _, flog := range sinfo.GetJsonKeyStats() {
|
||||
for _, file := range flog.GetFiles() {
|
||||
prefix := fmt.Sprintf("%s/%s/%d/%d/%d/%d/%d/%d", gc.option.cli.RootPath(), common.JSONIndexPath,
|
||||
flog.GetBuildID(), flog.GetVersion(), sinfo.GetCollectionID(), sinfo.GetPartitionID(), sinfo.GetID(), flog.GetFieldID())
|
||||
file = path.Join(prefix, file)
|
||||
jsonkeyLogs[file] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
return jsonkeyLogs
|
||||
}
|
||||
|
||||
// removeObjectFiles remove file from oss storage, return error if any log failed to remove.
|
||||
func (gc *garbageCollector) removeObjectFiles(ctx context.Context, filePaths map[string]struct{}) error {
|
||||
futures := make([]*conc.Future[struct{}], 0)
|
||||
|
@ -904,3 +924,64 @@ func (gc *garbageCollector) recycleUnusedTextIndexFiles(ctx context.Context) {
|
|||
|
||||
metrics.GarbageCollectorRunCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Add(1)
|
||||
}
|
||||
|
||||
// recycleUnusedJSONIndexFiles load meta file info and compares OSS keys
|
||||
// if missing found, performs gc cleanup
|
||||
func (gc *garbageCollector) recycleUnusedJSONIndexFiles(ctx context.Context) {
|
||||
start := time.Now()
|
||||
log := log.Ctx(ctx).With(zap.String("gcName", "recycleUnusedJSONIndexFiles"), zap.Time("startAt", start))
|
||||
log.Info("start recycleUnusedJSONIndexFiles...")
|
||||
defer func() { log.Info("recycleUnusedJSONIndexFiles done", zap.Duration("timeCost", time.Since(start))) }()
|
||||
|
||||
hasJSONIndexSegments := gc.meta.SelectSegments(ctx, SegmentFilterFunc(func(info *SegmentInfo) bool {
|
||||
return len(info.GetJsonKeyStats()) != 0
|
||||
}))
|
||||
fileNum := 0
|
||||
deletedFilesNum := atomic.NewInt32(0)
|
||||
|
||||
for _, seg := range hasJSONIndexSegments {
|
||||
for _, fieldStats := range seg.GetJsonKeyStats() {
|
||||
log := log.With(zap.Int64("segmentID", seg.GetID()), zap.Int64("fieldID", fieldStats.GetFieldID()))
|
||||
// clear low version task
|
||||
for i := int64(1); i < fieldStats.GetVersion(); i++ {
|
||||
prefix := fmt.Sprintf("%s/%s/%d/%d/%d/%d/%d/%d", gc.option.cli.RootPath(), common.JSONIndexPath,
|
||||
fieldStats.GetBuildID(), i, seg.GetCollectionID(), seg.GetPartitionID(), seg.GetID(), fieldStats.GetFieldID())
|
||||
futures := make([]*conc.Future[struct{}], 0)
|
||||
|
||||
err := gc.option.cli.WalkWithPrefix(ctx, prefix, true, func(files *storage.ChunkObjectInfo) bool {
|
||||
file := files.FilePath
|
||||
|
||||
future := gc.option.removeObjectPool.Submit(func() (struct{}, error) {
|
||||
log := log.With(zap.String("file", file))
|
||||
log.Info("garbageCollector recycleUnusedJSONIndexFiles remove file...")
|
||||
|
||||
if err := gc.option.cli.Remove(ctx, file); err != nil {
|
||||
log.Warn("garbageCollector recycleUnusedJSONIndexFiles remove file failed", zap.Error(err))
|
||||
return struct{}{}, err
|
||||
}
|
||||
deletedFilesNum.Inc()
|
||||
log.Info("garbageCollector recycleUnusedJSONIndexFiles remove file success")
|
||||
return struct{}{}, nil
|
||||
})
|
||||
futures = append(futures, future)
|
||||
return true
|
||||
})
|
||||
|
||||
// Wait for all remove tasks done.
|
||||
if err := conc.BlockOnAll(futures...); err != nil {
|
||||
// error is logged, and can be ignored here.
|
||||
log.Warn("some task failure in remove object pool", zap.Error(err))
|
||||
}
|
||||
|
||||
log = log.With(zap.Int("deleteJSONKeyIndexNum", int(deletedFilesNum.Load())), zap.Int("walkFileNum", fileNum))
|
||||
if err != nil {
|
||||
log.Warn("json index files recycle failed when walk with prefix", zap.Error(err))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Info("json index files recycle done")
|
||||
|
||||
metrics.GarbageCollectorRunCount.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Add(1)
|
||||
}
|
||||
|
|
|
@ -440,7 +440,6 @@ func newSegmentIndexMeta(catalog metastore.DataCoordCatalog) *indexMeta {
|
|||
}
|
||||
|
||||
func TestMeta_CreateIndex(t *testing.T) {
|
||||
|
||||
indexParams := []*commonpb.KeyValuePair{
|
||||
{
|
||||
Key: common.IndexTypeKey,
|
||||
|
|
|
@ -73,6 +73,9 @@ func (jm *statsJobManager) triggerStatsTaskLoop() {
|
|||
|
||||
ticker := time.NewTicker(Params.DataCoordCfg.TaskCheckInterval.GetAsDuration(time.Second))
|
||||
defer ticker.Stop()
|
||||
|
||||
lastJSONStatsLastTrigger := time.Now().Unix()
|
||||
maxJSONStatsTaskCount := 0
|
||||
for {
|
||||
select {
|
||||
case <-jm.ctx.Done():
|
||||
|
@ -82,6 +85,7 @@ func (jm *statsJobManager) triggerStatsTaskLoop() {
|
|||
jm.triggerSortStatsTask()
|
||||
jm.triggerTextStatsTask()
|
||||
jm.triggerBM25StatsTask()
|
||||
lastJSONStatsLastTrigger, maxJSONStatsTaskCount = jm.triggerJsonKeyIndexStatsTask(lastJSONStatsLastTrigger, maxJSONStatsTaskCount)
|
||||
|
||||
case segID := <-getStatsTaskChSingleton():
|
||||
log.Info("receive new segment to trigger stats task", zap.Int64("segmentID", segID))
|
||||
|
@ -141,10 +145,21 @@ func needDoTextIndex(segment *SegmentInfo, fieldIDs []UniqueID) bool {
|
|||
}
|
||||
|
||||
for _, fieldID := range fieldIDs {
|
||||
if segment.GetTextStatsLogs() == nil {
|
||||
if segment.GetTextStatsLogs()[fieldID] == nil {
|
||||
return true
|
||||
}
|
||||
if segment.GetTextStatsLogs()[fieldID] == nil {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func needDoJsonKeyIndex(segment *SegmentInfo, fieldIDs []UniqueID) bool {
|
||||
if !(isFlush(segment) && segment.GetLevel() != datapb.SegmentLevel_L0 &&
|
||||
segment.GetIsSorted()) {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, fieldID := range fieldIDs {
|
||||
if segment.GetJsonKeyStats()[fieldID] == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -182,6 +197,38 @@ func (jm *statsJobManager) triggerTextStatsTask() {
|
|||
}
|
||||
}
|
||||
|
||||
func (jm *statsJobManager) triggerJsonKeyIndexStatsTask(lastJSONStatsLastTrigger int64, maxJSONStatsTaskCount int) (int64, int) {
|
||||
collections := jm.mt.GetCollections()
|
||||
for _, collection := range collections {
|
||||
needTriggerFieldIDs := make([]UniqueID, 0)
|
||||
for _, field := range collection.Schema.GetFields() {
|
||||
h := typeutil.CreateFieldSchemaHelper(field)
|
||||
if h.EnableJSONKeyStatsIndex() && Params.CommonCfg.EnabledJSONKeyStats.GetAsBool() {
|
||||
needTriggerFieldIDs = append(needTriggerFieldIDs, field.GetFieldID())
|
||||
}
|
||||
}
|
||||
segments := jm.mt.SelectSegments(jm.ctx, WithCollection(collection.ID), SegmentFilterFunc(func(seg *SegmentInfo) bool {
|
||||
return needDoJsonKeyIndex(seg, needTriggerFieldIDs)
|
||||
}))
|
||||
if time.Now().Unix()-lastJSONStatsLastTrigger > int64(Params.DataCoordCfg.JSONStatsTriggerInterval.GetAsDuration(time.Minute).Seconds()) {
|
||||
lastJSONStatsLastTrigger = time.Now().Unix()
|
||||
maxJSONStatsTaskCount = 0
|
||||
}
|
||||
for _, segment := range segments {
|
||||
if maxJSONStatsTaskCount >= Params.DataCoordCfg.JSONStatsTriggerCount.GetAsInt() {
|
||||
break
|
||||
}
|
||||
if err := jm.SubmitStatsTask(segment.GetID(), segment.GetID(), indexpb.StatsSubJob_JsonKeyIndexJob, true); err != nil {
|
||||
log.Warn("create stats task with json key index for segment failed, wait for retry:",
|
||||
zap.Int64("segmentID", segment.GetID()), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
maxJSONStatsTaskCount++
|
||||
}
|
||||
}
|
||||
return lastJSONStatsLastTrigger, maxJSONStatsTaskCount
|
||||
}
|
||||
|
||||
func (jm *statsJobManager) triggerBM25StatsTask() {
|
||||
collections := jm.mt.GetCollections()
|
||||
for _, collection := range collections {
|
||||
|
|
|
@ -2158,6 +2158,7 @@ func (m *meta) SaveStatsResultSegment(oldSegmentID int64, result *workerpb.Stats
|
|||
Statslogs: result.GetStatsLogs(),
|
||||
TextStatsLogs: result.GetTextStatsLogs(),
|
||||
Bm25Statslogs: result.GetBm25Logs(),
|
||||
JsonKeyStats: result.GetJsonKeyStatsLogs(),
|
||||
Deltalogs: nil,
|
||||
CompactionFrom: []int64{oldSegmentID},
|
||||
IsSorted: true,
|
||||
|
|
|
@ -43,6 +43,18 @@ func SetTextIndexLogs(textIndexLogs map[int64]*datapb.TextIndexStats) SegmentOpe
|
|||
}
|
||||
}
|
||||
|
||||
func SetJsonKeyIndexLogs(jsonKeyIndexLogs map[int64]*datapb.JsonKeyStats) SegmentOperator {
|
||||
return func(segment *SegmentInfo) bool {
|
||||
if segment.JsonKeyStats == nil {
|
||||
segment.JsonKeyStats = make(map[int64]*datapb.JsonKeyStats)
|
||||
}
|
||||
for field, logs := range jsonKeyIndexLogs {
|
||||
segment.JsonKeyStats[field] = logs
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
type segmentCriterion struct {
|
||||
collectionID int64
|
||||
channel string
|
||||
|
|
|
@ -2,7 +2,6 @@ package datacoord
|
|||
|
||||
import (
|
||||
"context"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -33,6 +32,7 @@ import (
|
|||
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||
"github.com/milvus-io/milvus/pkg/v2/mq/msgstream"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/workerpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
||||
|
|
|
@ -244,10 +244,14 @@ func (st *statsTask) PreCheck(ctx context.Context, dependency *taskScheduler) bo
|
|||
CollectionTtl: collTtl.Nanoseconds(),
|
||||
CurrentTs: tsoutil.GetCurrentTime(),
|
||||
// update version after check
|
||||
TaskVersion: statsMeta.GetVersion() + 1,
|
||||
BinlogMaxSize: Params.DataNodeCfg.BinLogMaxSize.GetAsUint64(),
|
||||
StorageVersion: segment.StorageVersion,
|
||||
TaskSlot: st.taskSlot,
|
||||
TaskVersion: statsMeta.GetVersion() + 1,
|
||||
BinlogMaxSize: Params.DataNodeCfg.BinLogMaxSize.GetAsUint64(),
|
||||
StorageVersion: segment.StorageVersion,
|
||||
TaskSlot: st.taskSlot,
|
||||
EnableJsonKeyStats: Params.CommonCfg.EnabledJSONKeyStats.GetAsBool(),
|
||||
JsonKeyStatsTantivyMemory: Params.DataCoordCfg.JSONKeyStatsMemoryBudgetInTantivy.GetAsInt64(),
|
||||
JsonKeyStatsDataFormat: 1,
|
||||
EnableJsonKeyStatsInSort: Params.DataCoordCfg.EnabledJSONKeyStatsInSort.GetAsBool(),
|
||||
}
|
||||
|
||||
log.Info("stats task pre check successfully", zap.String("subJobType", st.subJobType.String()),
|
||||
|
@ -373,6 +377,13 @@ func (st *statsTask) SetJobInfo(meta *meta) error {
|
|||
zap.Int64("segmentID", st.segmentID), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
case indexpb.StatsSubJob_JsonKeyIndexJob:
|
||||
err := meta.UpdateSegment(st.taskInfo.GetSegmentID(), SetJsonKeyIndexLogs(st.taskInfo.GetJsonKeyStatsLogs()))
|
||||
if err != nil {
|
||||
log.Warn("save json key index stats result failed", zap.Int64("taskId", st.taskID),
|
||||
zap.Int64("segmentID", st.segmentID), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
case indexpb.StatsSubJob_BM25Job:
|
||||
// TODO: support bm25 job
|
||||
}
|
||||
|
|
|
@ -22,10 +22,9 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"go.uber.org/atomic"
|
||||
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/suite"
|
||||
"go.uber.org/atomic"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
|
|
|
@ -312,18 +312,19 @@ func (m *TaskManager) WaitTaskFinish() {
|
|||
}
|
||||
|
||||
type StatsTaskInfo struct {
|
||||
Cancel context.CancelFunc
|
||||
State indexpb.JobState
|
||||
FailReason string
|
||||
CollID typeutil.UniqueID
|
||||
PartID typeutil.UniqueID
|
||||
SegID typeutil.UniqueID
|
||||
InsertChannel string
|
||||
NumRows int64
|
||||
InsertLogs []*datapb.FieldBinlog
|
||||
StatsLogs []*datapb.FieldBinlog
|
||||
TextStatsLogs map[int64]*datapb.TextIndexStats
|
||||
Bm25Logs []*datapb.FieldBinlog
|
||||
Cancel context.CancelFunc
|
||||
State indexpb.JobState
|
||||
FailReason string
|
||||
CollID typeutil.UniqueID
|
||||
PartID typeutil.UniqueID
|
||||
SegID typeutil.UniqueID
|
||||
InsertChannel string
|
||||
NumRows int64
|
||||
InsertLogs []*datapb.FieldBinlog
|
||||
StatsLogs []*datapb.FieldBinlog
|
||||
TextStatsLogs map[int64]*datapb.TextIndexStats
|
||||
Bm25Logs []*datapb.FieldBinlog
|
||||
JSONKeyStatsLogs map[int64]*datapb.JsonKeyStats
|
||||
}
|
||||
|
||||
func (m *TaskManager) LoadOrStoreStatsTask(clusterID string, taskID typeutil.UniqueID, info *StatsTaskInfo) *StatsTaskInfo {
|
||||
|
@ -410,24 +411,46 @@ func (m *TaskManager) StoreStatsTextIndexResult(
|
|||
}
|
||||
}
|
||||
|
||||
func (m *TaskManager) StoreJSONKeyStatsResult(
|
||||
clusterID string,
|
||||
taskID typeutil.UniqueID,
|
||||
collID typeutil.UniqueID,
|
||||
partID typeutil.UniqueID,
|
||||
segID typeutil.UniqueID,
|
||||
channel string,
|
||||
jsonKeyIndexLogs map[int64]*datapb.JsonKeyStats,
|
||||
) {
|
||||
key := Key{ClusterID: clusterID, TaskID: taskID}
|
||||
m.stateLock.Lock()
|
||||
defer m.stateLock.Unlock()
|
||||
if info, ok := m.statsTasks[key]; ok {
|
||||
info.JSONKeyStatsLogs = jsonKeyIndexLogs
|
||||
info.SegID = segID
|
||||
info.CollID = collID
|
||||
info.PartID = partID
|
||||
info.InsertChannel = channel
|
||||
}
|
||||
}
|
||||
|
||||
func (m *TaskManager) GetStatsTaskInfo(clusterID string, taskID typeutil.UniqueID) *StatsTaskInfo {
|
||||
m.stateLock.Lock()
|
||||
defer m.stateLock.Unlock()
|
||||
|
||||
if info, ok := m.statsTasks[Key{ClusterID: clusterID, TaskID: taskID}]; ok {
|
||||
return &StatsTaskInfo{
|
||||
Cancel: info.Cancel,
|
||||
State: info.State,
|
||||
FailReason: info.FailReason,
|
||||
CollID: info.CollID,
|
||||
PartID: info.PartID,
|
||||
SegID: info.SegID,
|
||||
InsertChannel: info.InsertChannel,
|
||||
NumRows: info.NumRows,
|
||||
InsertLogs: info.InsertLogs,
|
||||
StatsLogs: info.StatsLogs,
|
||||
TextStatsLogs: info.TextStatsLogs,
|
||||
Bm25Logs: info.Bm25Logs,
|
||||
Cancel: info.Cancel,
|
||||
State: info.State,
|
||||
FailReason: info.FailReason,
|
||||
CollID: info.CollID,
|
||||
PartID: info.PartID,
|
||||
SegID: info.SegID,
|
||||
InsertChannel: info.InsertChannel,
|
||||
NumRows: info.NumRows,
|
||||
InsertLogs: info.InsertLogs,
|
||||
StatsLogs: info.StatsLogs,
|
||||
TextStatsLogs: info.TextStatsLogs,
|
||||
Bm25Logs: info.Bm25Logs,
|
||||
JSONKeyStatsLogs: info.JSONKeyStatsLogs,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -93,6 +93,20 @@ func (s *statsTaskInfoSuite) Test_Methods() {
|
|||
})
|
||||
})
|
||||
|
||||
s.Run("storeStatsJsonIndexResult", func() {
|
||||
s.manager.StoreJSONKeyStatsResult(s.cluster, s.taskID, 1, 2, 3, "ch1",
|
||||
map[int64]*datapb.JsonKeyStats{
|
||||
100: {
|
||||
FieldID: 100,
|
||||
Version: 1,
|
||||
Files: []string{"file1"},
|
||||
LogSize: 1024,
|
||||
MemorySize: 1024,
|
||||
JsonKeyStatsDataFormat: 1,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
s.Run("getStatsTaskInfo", func() {
|
||||
taskInfo := s.manager.GetStatsTaskInfo(s.cluster, s.taskID)
|
||||
|
||||
|
|
|
@ -38,12 +38,14 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/util/indexcgowrapper"
|
||||
"github.com/milvus-io/milvus/pkg/v2/common"
|
||||
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||
"github.com/milvus-io/milvus/pkg/v2/metrics"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexcgopb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/workerpb"
|
||||
_ "github.com/milvus-io/milvus/pkg/v2/util/funcutil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/metautil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/timerecord"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/tsoutil"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||
|
@ -311,6 +313,26 @@ func (st *statsTask) Execute(ctx context.Context) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
if (st.req.EnableJsonKeyStatsInSort && st.req.GetSubJobType() == indexpb.StatsSubJob_Sort) || st.req.GetSubJobType() == indexpb.StatsSubJob_JsonKeyIndexJob {
|
||||
if !st.req.GetEnableJsonKeyStats() {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = st.createJSONKeyStats(ctx,
|
||||
st.req.GetStorageConfig(),
|
||||
st.req.GetCollectionID(),
|
||||
st.req.GetPartitionID(),
|
||||
st.req.GetTargetSegmentID(),
|
||||
st.req.GetTaskVersion(),
|
||||
st.req.GetTaskID(),
|
||||
st.req.GetJsonKeyStatsTantivyMemory(),
|
||||
st.req.GetJsonKeyStatsDataFormat(),
|
||||
insertLogs)
|
||||
if err != nil {
|
||||
log.Warn("stats wrong, failed to create json index", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -466,3 +488,108 @@ func (st *statsTask) createTextIndex(ctx context.Context,
|
|||
textIndexLogs)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (st *statsTask) createJSONKeyStats(ctx context.Context,
|
||||
storageConfig *indexpb.StorageConfig,
|
||||
collectionID int64,
|
||||
partitionID int64,
|
||||
segmentID int64,
|
||||
version int64,
|
||||
taskID int64,
|
||||
tantivyMemory int64,
|
||||
jsonKeyStatsDataFormat int64,
|
||||
insertBinlogs []*datapb.FieldBinlog,
|
||||
) error {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("clusterID", st.req.GetClusterID()),
|
||||
zap.Int64("taskID", st.req.GetTaskID()),
|
||||
zap.Int64("collectionID", st.req.GetCollectionID()),
|
||||
zap.Int64("partitionID", st.req.GetPartitionID()),
|
||||
zap.Int64("segmentID", st.req.GetSegmentID()),
|
||||
zap.Any("statsJobType", st.req.GetSubJobType()),
|
||||
zap.Int64("jsonKeyStatsDataFormat", jsonKeyStatsDataFormat),
|
||||
)
|
||||
if jsonKeyStatsDataFormat != 1 {
|
||||
log.Info("create json key index failed dataformat invalid")
|
||||
return nil
|
||||
}
|
||||
fieldBinlogs := lo.GroupBy(insertBinlogs, func(binlog *datapb.FieldBinlog) int64 {
|
||||
return binlog.GetFieldID()
|
||||
})
|
||||
|
||||
getInsertFiles := func(fieldID int64) ([]string, error) {
|
||||
binlogs, ok := fieldBinlogs[fieldID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("field binlog not found for field %d", fieldID)
|
||||
}
|
||||
result := make([]string, 0, len(binlogs))
|
||||
for _, binlog := range binlogs {
|
||||
for _, file := range binlog.GetBinlogs() {
|
||||
result = append(result, metautil.BuildInsertLogPath(storageConfig.GetRootPath(), collectionID, partitionID, segmentID, fieldID, file.GetLogID()))
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
newStorageConfig, err := ParseStorageConfig(storageConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
jsonKeyIndexStats := make(map[int64]*datapb.JsonKeyStats)
|
||||
for _, field := range st.req.GetSchema().GetFields() {
|
||||
h := typeutil.CreateFieldSchemaHelper(field)
|
||||
if !h.EnableJSONKeyStatsIndex() {
|
||||
continue
|
||||
}
|
||||
log.Info("field enable json key index, ready to create json key index", zap.Int64("field id", field.GetFieldID()))
|
||||
files, err := getInsertFiles(field.GetFieldID())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
buildIndexParams := &indexcgopb.BuildIndexInfo{
|
||||
BuildID: taskID,
|
||||
CollectionID: collectionID,
|
||||
PartitionID: partitionID,
|
||||
SegmentID: segmentID,
|
||||
IndexVersion: version,
|
||||
InsertFiles: files,
|
||||
FieldSchema: field,
|
||||
StorageConfig: newStorageConfig,
|
||||
JsonKeyStatsTantivyMemory: tantivyMemory,
|
||||
}
|
||||
|
||||
uploaded, err := indexcgowrapper.CreateJSONKeyStats(ctx, buildIndexParams)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
jsonKeyIndexStats[field.GetFieldID()] = &datapb.JsonKeyStats{
|
||||
FieldID: field.GetFieldID(),
|
||||
Version: version,
|
||||
BuildID: taskID,
|
||||
Files: lo.Keys(uploaded),
|
||||
JsonKeyStatsDataFormat: jsonKeyStatsDataFormat,
|
||||
}
|
||||
log.Info("field enable json key index, create json key index done",
|
||||
zap.Int64("field id", field.GetFieldID()),
|
||||
zap.Strings("files", lo.Keys(uploaded)),
|
||||
)
|
||||
}
|
||||
|
||||
totalElapse := st.tr.RecordSpan()
|
||||
|
||||
st.manager.StoreJSONKeyStatsResult(st.req.GetClusterID(),
|
||||
st.req.GetTaskID(),
|
||||
st.req.GetCollectionID(),
|
||||
st.req.GetPartitionID(),
|
||||
st.req.GetTargetSegmentID(),
|
||||
st.req.GetInsertChannel(),
|
||||
jsonKeyIndexStats)
|
||||
|
||||
metrics.DataNodeBuildJSONStatsLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(totalElapse.Seconds())
|
||||
log.Info("create json key index done",
|
||||
zap.Int64("target segmentID", st.req.GetTargetSegmentID()),
|
||||
zap.Duration("total elapse", totalElapse))
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -29,12 +29,12 @@ import "C"
|
|||
|
||||
import (
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/common"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/hardware"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
)
|
||||
|
||||
func getCurrentIndexVersion(v int32) int32 {
|
||||
|
|
|
@ -462,18 +462,19 @@ func (node *DataNode) QueryJobsV2(ctx context.Context, req *workerpb.QueryJobsV2
|
|||
info := node.taskManager.GetStatsTaskInfo(req.GetClusterID(), taskID)
|
||||
if info != nil {
|
||||
results = append(results, &workerpb.StatsResult{
|
||||
TaskID: taskID,
|
||||
State: info.State,
|
||||
FailReason: info.FailReason,
|
||||
CollectionID: info.CollID,
|
||||
PartitionID: info.PartID,
|
||||
SegmentID: info.SegID,
|
||||
Channel: info.InsertChannel,
|
||||
InsertLogs: info.InsertLogs,
|
||||
StatsLogs: info.StatsLogs,
|
||||
TextStatsLogs: info.TextStatsLogs,
|
||||
Bm25Logs: info.Bm25Logs,
|
||||
NumRows: info.NumRows,
|
||||
TaskID: taskID,
|
||||
State: info.State,
|
||||
FailReason: info.FailReason,
|
||||
CollectionID: info.CollID,
|
||||
PartitionID: info.PartID,
|
||||
SegmentID: info.SegID,
|
||||
Channel: info.InsertChannel,
|
||||
InsertLogs: info.InsertLogs,
|
||||
StatsLogs: info.StatsLogs,
|
||||
TextStatsLogs: info.TextStatsLogs,
|
||||
Bm25Logs: info.Bm25Logs,
|
||||
NumRows: info.NumRows,
|
||||
JsonKeyStatsLogs: info.JSONKeyStatsLogs,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -540,22 +540,23 @@ func (s *IndexServiceSuite) Test_CreateStatsTask() {
|
|||
s.Run("normal case", func() {
|
||||
taskID := int64(100)
|
||||
req := &workerpb.CreateStatsRequest{
|
||||
ClusterID: "cluster2",
|
||||
TaskID: taskID,
|
||||
CollectionID: s.collID,
|
||||
PartitionID: s.partID,
|
||||
InsertChannel: "ch1",
|
||||
SegmentID: s.segID,
|
||||
InsertLogs: fieldBinlogs,
|
||||
DeltaLogs: nil,
|
||||
StorageConfig: s.storageConfig,
|
||||
Schema: generateTestSchema(),
|
||||
TargetSegmentID: s.segID + 1,
|
||||
StartLogID: s.logID + 100,
|
||||
EndLogID: s.logID + 200,
|
||||
NumRows: s.numRows,
|
||||
BinlogMaxSize: 131000,
|
||||
SubJobType: indexpb.StatsSubJob_Sort,
|
||||
ClusterID: "cluster2",
|
||||
TaskID: taskID,
|
||||
CollectionID: s.collID,
|
||||
PartitionID: s.partID,
|
||||
InsertChannel: "ch1",
|
||||
SegmentID: s.segID,
|
||||
InsertLogs: fieldBinlogs,
|
||||
DeltaLogs: nil,
|
||||
StorageConfig: s.storageConfig,
|
||||
Schema: generateTestSchema(),
|
||||
TargetSegmentID: s.segID + 1,
|
||||
StartLogID: s.logID + 100,
|
||||
EndLogID: s.logID + 200,
|
||||
NumRows: s.numRows,
|
||||
BinlogMaxSize: 131000,
|
||||
SubJobType: indexpb.StatsSubJob_Sort,
|
||||
EnableJsonKeyStats: false,
|
||||
}
|
||||
|
||||
status, err := s.in.CreateJobV2(ctx, &workerpb.CreateJobV2Request{
|
||||
|
|
|
@ -1240,7 +1240,7 @@ func GenSimpleRetrievePlan(collection *segcore.CCollection) (*segcore.RetrievePl
|
|||
return nil, err
|
||||
}
|
||||
|
||||
plan, err2 := segcore.NewRetrievePlan(collection, planBytes, timestamp, 100)
|
||||
plan, err2 := segcore.NewRetrievePlan(collection, planBytes, timestamp, 100, 0)
|
||||
return plan, err2
|
||||
}
|
||||
|
||||
|
|
|
@ -3825,7 +3825,8 @@ func (node *Proxy) Query(ctx context.Context, request *milvuspb.QueryRequest) (*
|
|||
commonpbutil.WithMsgType(commonpb.MsgType_Retrieve),
|
||||
commonpbutil.WithSourceID(paramtable.GetNodeID()),
|
||||
),
|
||||
ReqID: paramtable.GetNodeID(),
|
||||
ReqID: paramtable.GetNodeID(),
|
||||
ConsistencyLevel: request.ConsistencyLevel,
|
||||
},
|
||||
request: request,
|
||||
qc: node.queryCoord,
|
||||
|
|
|
@ -599,7 +599,7 @@ func (t *queryTask) queryShard(ctx context.Context, nodeID int64, qn types.Query
|
|||
retrieveReq.MvccTimestamp = mvccTs
|
||||
retrieveReq.GuaranteeTimestamp = mvccTs
|
||||
}
|
||||
|
||||
retrieveReq.ConsistencyLevel = t.ConsistencyLevel
|
||||
req := &querypb.QueryRequest{
|
||||
Req: retrieveReq,
|
||||
DmlChannels: []string{channel},
|
||||
|
|
|
@ -969,8 +969,9 @@ func (t *searchTask) Requery(span trace.Span) error {
|
|||
commonpbutil.WithMsgType(commonpb.MsgType_Retrieve),
|
||||
commonpbutil.WithSourceID(paramtable.GetNodeID()),
|
||||
),
|
||||
ReqID: paramtable.GetNodeID(),
|
||||
PartitionIDs: t.GetPartitionIDs(), // use search partitionIDs
|
||||
ReqID: paramtable.GetNodeID(),
|
||||
PartitionIDs: t.GetPartitionIDs(), // use search partitionIDs
|
||||
ConsistencyLevel: t.ConsistencyLevel,
|
||||
},
|
||||
request: queryReq,
|
||||
plan: plan,
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
"github.com/samber/lo"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
|
@ -31,6 +32,7 @@ import (
|
|||
"github.com/milvus-io/milvus/pkg/v2/log"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
||||
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
||||
)
|
||||
|
||||
|
@ -89,20 +91,28 @@ func (c *IndexChecker) Check(ctx context.Context) []task.Task {
|
|||
}
|
||||
|
||||
collection := c.meta.CollectionManager.GetCollection(ctx, collectionID)
|
||||
schema := c.meta.CollectionManager.GetCollectionSchema(ctx, collectionID)
|
||||
if collection == nil {
|
||||
log.Warn("collection released during check index", zap.Int64("collection", collectionID))
|
||||
continue
|
||||
}
|
||||
if schema == nil && paramtable.Get().CommonCfg.EnabledJSONKeyStats.GetAsBool() {
|
||||
collectionSchema, err1 := c.broker.DescribeCollection(ctx, collectionID)
|
||||
if err1 == nil {
|
||||
schema = collectionSchema.GetSchema()
|
||||
c.meta.PutCollectionSchema(ctx, collectionID, collectionSchema.GetSchema())
|
||||
}
|
||||
}
|
||||
replicas := c.meta.ReplicaManager.GetByCollection(ctx, collectionID)
|
||||
for _, replica := range replicas {
|
||||
tasks = append(tasks, c.checkReplica(ctx, collection, replica, indexInfos)...)
|
||||
tasks = append(tasks, c.checkReplica(ctx, collection, replica, indexInfos, schema)...)
|
||||
}
|
||||
}
|
||||
|
||||
return tasks
|
||||
}
|
||||
|
||||
func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collection, replica *meta.Replica, indexInfos []*indexpb.IndexInfo) []task.Task {
|
||||
func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collection, replica *meta.Replica, indexInfos []*indexpb.IndexInfo, schema *schemapb.CollectionSchema) []task.Task {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.Int64("collectionID", collection.GetCollectionID()),
|
||||
)
|
||||
|
@ -113,6 +123,9 @@ func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collec
|
|||
|
||||
roNodeSet := typeutil.NewUniqueSet(replica.GetRONodes()...)
|
||||
targets := make(map[int64][]int64) // segmentID => FieldID
|
||||
|
||||
idSegmentsStats := make(map[int64]*meta.Segment)
|
||||
targetsStats := make(map[int64][]int64) // segmentID => FieldID
|
||||
for _, segment := range segments {
|
||||
// skip update index in read only node
|
||||
if roNodeSet.Contain(segment.Node) {
|
||||
|
@ -120,9 +133,13 @@ func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collec
|
|||
}
|
||||
|
||||
missing := c.checkSegment(segment, indexInfos)
|
||||
missingStats := c.checkSegmentStats(segment, schema, collection.LoadFields)
|
||||
if len(missing) > 0 {
|
||||
targets[segment.GetID()] = missing
|
||||
idSegments[segment.GetID()] = segment
|
||||
} else if len(missingStats) > 0 {
|
||||
targetsStats[segment.GetID()] = missingStats
|
||||
idSegmentsStats[segment.GetID()] = segment
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -150,6 +167,29 @@ func (c *IndexChecker) checkReplica(ctx context.Context, collection *meta.Collec
|
|||
return c.createSegmentUpdateTask(ctx, idSegments[segmentID], replica)
|
||||
})
|
||||
|
||||
segmentsStatsToUpdate := typeutil.NewSet[int64]()
|
||||
for _, segmentIDs := range lo.Chunk(lo.Keys(idSegmentsStats), MaxSegmentNumPerGetIndexInfoRPC) {
|
||||
segmentInfos, err := c.broker.GetSegmentInfo(ctx, segmentIDs...)
|
||||
if err != nil {
|
||||
log.Warn("failed to get SegmentInfo for segments", zap.Int64s("segmentIDs", segmentIDs), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
for _, segmentInfo := range segmentInfos {
|
||||
fields := targetsStats[segmentInfo.ID]
|
||||
missingFields := typeutil.NewSet(fields...)
|
||||
for field := range segmentInfo.GetJsonKeyStats() {
|
||||
if missingFields.Contain(field) {
|
||||
segmentsStatsToUpdate.Insert(segmentInfo.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tasksStats := lo.FilterMap(segmentsStatsToUpdate.Collect(), func(segmentID int64, _ int) (task.Task, bool) {
|
||||
return c.createSegmentStatsUpdateTask(ctx, idSegmentsStats[segmentID], replica)
|
||||
})
|
||||
tasks = append(tasks, tasksStats...)
|
||||
|
||||
return tasks
|
||||
}
|
||||
|
||||
|
@ -193,3 +233,58 @@ func (c *IndexChecker) createSegmentUpdateTask(ctx context.Context, segment *met
|
|||
t.SetReason("missing index")
|
||||
return t, true
|
||||
}
|
||||
|
||||
func (c *IndexChecker) checkSegmentStats(segment *meta.Segment, schema *schemapb.CollectionSchema, loadField []int64) (missFieldIDs []int64) {
|
||||
var result []int64
|
||||
|
||||
if paramtable.Get().CommonCfg.EnabledJSONKeyStats.GetAsBool() {
|
||||
if schema == nil {
|
||||
log.Warn("schema released during check index", zap.Int64("collection", segment.GetCollectionID()))
|
||||
return result
|
||||
}
|
||||
loadFieldMap := make(map[int64]struct{})
|
||||
for _, v := range loadField {
|
||||
loadFieldMap[v] = struct{}{}
|
||||
}
|
||||
jsonStatsFieldMap := make(map[int64]struct{})
|
||||
for _, v := range segment.JSONIndexField {
|
||||
jsonStatsFieldMap[v] = struct{}{}
|
||||
}
|
||||
for _, field := range schema.GetFields() {
|
||||
// Check if the field exists in both loadFieldMap and jsonStatsFieldMap
|
||||
h := typeutil.CreateFieldSchemaHelper(field)
|
||||
if h.EnableJSONKeyStatsIndex() {
|
||||
if _, ok := loadFieldMap[field.FieldID]; ok {
|
||||
if _, ok := jsonStatsFieldMap[field.FieldID]; !ok {
|
||||
result = append(result, field.FieldID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (c *IndexChecker) createSegmentStatsUpdateTask(ctx context.Context, segment *meta.Segment, replica *meta.Replica) (task.Task, bool) {
|
||||
action := task.NewSegmentActionWithScope(segment.Node, task.ActionTypeStatsUpdate, segment.GetInsertChannel(), segment.GetID(), querypb.DataScope_Historical, int(segment.GetNumOfRows()))
|
||||
t, err := task.NewSegmentTask(
|
||||
ctx,
|
||||
params.Params.QueryCoordCfg.SegmentTaskTimeout.GetAsDuration(time.Millisecond),
|
||||
c.ID(),
|
||||
segment.GetCollectionID(),
|
||||
replica,
|
||||
action,
|
||||
)
|
||||
if err != nil {
|
||||
log.Warn("create segment stats update task failed",
|
||||
zap.Int64("collection", segment.GetCollectionID()),
|
||||
zap.String("channel", segment.GetInsertChannel()),
|
||||
zap.Int64("node", segment.Node),
|
||||
zap.Error(err),
|
||||
)
|
||||
return nil, false
|
||||
}
|
||||
t.SetPriority(task.TaskPriorityLow)
|
||||
t.SetReason("missing json stats")
|
||||
return t, true
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||
"github.com/milvus-io/milvus/internal/metastore/kv/querycoord"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
|
@ -97,6 +98,12 @@ func (suite *IndexCheckerSuite) TestLoadIndex() {
|
|||
// meta
|
||||
coll := utils.CreateTestCollection(1, 1)
|
||||
coll.FieldIndexID = map[int64]int64{101: 1000}
|
||||
coll.Schema = &schemapb.CollectionSchema{
|
||||
Name: "test_loadJsonIndex",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 101, DataType: schemapb.DataType_JSON, Name: "JSON"},
|
||||
},
|
||||
}
|
||||
checker.meta.CollectionManager.PutCollection(ctx, coll)
|
||||
checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(200, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
|
@ -133,6 +140,8 @@ func (suite *IndexCheckerSuite) TestLoadIndex() {
|
|||
},
|
||||
}, nil)
|
||||
|
||||
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, mock.Anything).
|
||||
Return([]*datapb.SegmentInfo{}, nil).Maybe()
|
||||
tasks := checker.Check(context.Background())
|
||||
suite.Require().Len(tasks, 1)
|
||||
|
||||
|
@ -162,6 +171,12 @@ func (suite *IndexCheckerSuite) TestIndexInfoNotMatch() {
|
|||
// meta
|
||||
coll := utils.CreateTestCollection(1, 1)
|
||||
coll.FieldIndexID = map[int64]int64{101: 1000}
|
||||
coll.Schema = &schemapb.CollectionSchema{
|
||||
Name: "test_loadJsonIndex",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 101, DataType: schemapb.DataType_JSON, Name: "JSON"},
|
||||
},
|
||||
}
|
||||
checker.meta.CollectionManager.PutCollection(ctx, coll)
|
||||
checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(200, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
|
@ -211,7 +226,8 @@ func (suite *IndexCheckerSuite) TestIndexInfoNotMatch() {
|
|||
IndexID: 1000,
|
||||
},
|
||||
}, nil)
|
||||
|
||||
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, mock.Anything).
|
||||
Return([]*datapb.SegmentInfo{}, nil).Maybe()
|
||||
tasks := checker.Check(context.Background())
|
||||
suite.Require().Len(tasks, 0)
|
||||
}
|
||||
|
@ -223,6 +239,12 @@ func (suite *IndexCheckerSuite) TestGetIndexInfoFailed() {
|
|||
// meta
|
||||
coll := utils.CreateTestCollection(1, 1)
|
||||
coll.FieldIndexID = map[int64]int64{101: 1000}
|
||||
coll.Schema = &schemapb.CollectionSchema{
|
||||
Name: "test_loadJsonIndex",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 101, DataType: schemapb.DataType_JSON, Name: "JSON"},
|
||||
},
|
||||
}
|
||||
checker.meta.CollectionManager.PutCollection(ctx, coll)
|
||||
checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(200, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
|
@ -251,7 +273,8 @@ func (suite *IndexCheckerSuite) TestGetIndexInfoFailed() {
|
|||
IndexID: 1000,
|
||||
},
|
||||
}, nil)
|
||||
|
||||
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, mock.Anything).
|
||||
Return([]*datapb.SegmentInfo{}, nil).Maybe()
|
||||
tasks := checker.Check(context.Background())
|
||||
suite.Require().Len(tasks, 0)
|
||||
}
|
||||
|
@ -263,6 +286,12 @@ func (suite *IndexCheckerSuite) TestCreateNewIndex() {
|
|||
// meta
|
||||
coll := utils.CreateTestCollection(1, 1)
|
||||
coll.FieldIndexID = map[int64]int64{101: 1000}
|
||||
coll.Schema = &schemapb.CollectionSchema{
|
||||
Name: "test_loadJsonIndex",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 101, DataType: schemapb.DataType_JSON, Name: "JSON"},
|
||||
},
|
||||
}
|
||||
checker.meta.CollectionManager.PutCollection(ctx, coll)
|
||||
checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(200, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
|
@ -317,13 +346,234 @@ func (suite *IndexCheckerSuite) TestCreateNewIndex() {
|
|||
IndexFilePaths: []string{"index"},
|
||||
},
|
||||
}}, nil)
|
||||
|
||||
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, mock.Anything).
|
||||
Return([]*datapb.SegmentInfo{}, nil).Maybe()
|
||||
tasks := checker.Check(context.Background())
|
||||
suite.Len(tasks, 1)
|
||||
suite.Len(tasks[0].Actions(), 1)
|
||||
suite.Equal(tasks[0].Actions()[0].(*task.SegmentAction).Type(), task.ActionTypeUpdate)
|
||||
}
|
||||
|
||||
func (suite *IndexCheckerSuite) TestLoadJsonIndex() {
|
||||
checker := suite.checker
|
||||
ctx := context.Background()
|
||||
paramtable.Get().Save(paramtable.Get().CommonCfg.EnabledJSONKeyStats.Key, "true")
|
||||
defer paramtable.Get().Reset(paramtable.Get().CommonCfg.EnabledJSONKeyStats.Key)
|
||||
// meta
|
||||
coll := utils.CreateTestCollection(1, 1)
|
||||
coll.FieldIndexID = map[int64]int64{101: 1000}
|
||||
coll.Schema = &schemapb.CollectionSchema{
|
||||
Name: "test_loadJsonIndex",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 101, DataType: schemapb.DataType_JSON, Name: "JSON"},
|
||||
},
|
||||
}
|
||||
coll.LoadFields = []int64{101}
|
||||
checker.meta.CollectionManager.PutCollection(ctx, coll)
|
||||
checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(200, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
NodeID: 1,
|
||||
Address: "localhost",
|
||||
Hostname: "localhost",
|
||||
}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
NodeID: 2,
|
||||
Address: "localhost",
|
||||
Hostname: "localhost",
|
||||
}))
|
||||
checker.meta.ResourceManager.HandleNodeUp(ctx, 1)
|
||||
checker.meta.ResourceManager.HandleNodeUp(ctx, 2)
|
||||
|
||||
// dist
|
||||
fieldIndexInfo := &querypb.FieldIndexInfo{
|
||||
FieldID: 101,
|
||||
IndexID: 1000,
|
||||
EnableIndex: true,
|
||||
}
|
||||
|
||||
indexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
||||
indexInfo[fieldIndexInfo.IndexID] = fieldIndexInfo
|
||||
segment := utils.CreateTestSegment(1, 1, 2, 1, 1, "test-insert-channel")
|
||||
segment.IndexInfo = indexInfo
|
||||
checker.dist.SegmentDistManager.Update(1, segment)
|
||||
|
||||
// broker
|
||||
suite.broker.EXPECT().ListIndexes(mock.Anything, mock.Anything).Call.Return(
|
||||
func(ctx context.Context, collectionID int64) ([]*indexpb.IndexInfo, error) {
|
||||
return []*indexpb.IndexInfo{
|
||||
{
|
||||
FieldID: 101,
|
||||
IndexID: 1000,
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
)
|
||||
mockJSONKeyStats := map[int64]*datapb.JsonKeyStats{
|
||||
101: {
|
||||
FieldID: 101,
|
||||
},
|
||||
}
|
||||
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, mock.Anything).
|
||||
Return([]*datapb.SegmentInfo{
|
||||
{
|
||||
ID: 2,
|
||||
JsonKeyStats: mockJSONKeyStats,
|
||||
},
|
||||
}, nil).Maybe()
|
||||
tasks := checker.Check(context.Background())
|
||||
suite.Require().Len(tasks, 1)
|
||||
|
||||
t := tasks[0]
|
||||
suite.Require().Len(t.Actions(), 1)
|
||||
|
||||
action, ok := t.Actions()[0].(*task.SegmentAction)
|
||||
suite.Require().True(ok)
|
||||
suite.EqualValues(200, t.ReplicaID())
|
||||
suite.Equal(task.ActionTypeStatsUpdate, action.Type())
|
||||
suite.EqualValues(2, action.GetSegmentID())
|
||||
|
||||
// test skip load json index for read only node
|
||||
suite.nodeMgr.Stopping(1)
|
||||
suite.nodeMgr.Stopping(2)
|
||||
suite.meta.ResourceManager.HandleNodeStopping(ctx, 1)
|
||||
suite.meta.ResourceManager.HandleNodeStopping(ctx, 2)
|
||||
utils.RecoverAllCollection(suite.meta)
|
||||
tasks = checker.Check(context.Background())
|
||||
suite.Require().Len(tasks, 0)
|
||||
}
|
||||
|
||||
func (suite *IndexCheckerSuite) TestJsonIndexNotMatch() {
|
||||
checker := suite.checker
|
||||
ctx := context.Background()
|
||||
paramtable.Get().Save(paramtable.Get().CommonCfg.EnabledJSONKeyStats.Key, "true")
|
||||
defer paramtable.Get().Reset(paramtable.Get().CommonCfg.EnabledJSONKeyStats.Key)
|
||||
// meta
|
||||
coll := utils.CreateTestCollection(1, 1)
|
||||
coll.FieldIndexID = map[int64]int64{101: 1000}
|
||||
coll.Schema = &schemapb.CollectionSchema{
|
||||
Name: "test_loadJsonIndex",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 101, DataType: schemapb.DataType_JSON, Name: "JSON"},
|
||||
},
|
||||
}
|
||||
checker.meta.CollectionManager.PutCollection(ctx, coll)
|
||||
checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(200, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
NodeID: 1,
|
||||
Address: "localhost",
|
||||
Hostname: "localhost",
|
||||
}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
NodeID: 2,
|
||||
Address: "localhost",
|
||||
Hostname: "localhost",
|
||||
}))
|
||||
checker.meta.ResourceManager.HandleNodeUp(ctx, 1)
|
||||
checker.meta.ResourceManager.HandleNodeUp(ctx, 2)
|
||||
|
||||
// dist
|
||||
checker.dist.SegmentDistManager.Update(1, utils.CreateTestSegment(1, 1, 2, 1, 1, "test-insert-channel"))
|
||||
|
||||
// broker
|
||||
suite.broker.EXPECT().ListIndexes(mock.Anything, mock.Anything).Call.Return(
|
||||
func(ctx context.Context, collectionID int64) ([]*indexpb.IndexInfo, error) {
|
||||
return []*indexpb.IndexInfo{
|
||||
{
|
||||
FieldID: 101,
|
||||
IndexID: 1000,
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
)
|
||||
suite.broker.EXPECT().GetIndexInfo(mock.Anything, mock.Anything, mock.AnythingOfType("int64")).
|
||||
Return(map[int64][]*querypb.FieldIndexInfo{2: {
|
||||
{
|
||||
FieldID: 101,
|
||||
IndexID: 1000,
|
||||
EnableIndex: false,
|
||||
IndexFilePaths: []string{"index"},
|
||||
},
|
||||
}}, nil)
|
||||
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, mock.Anything).
|
||||
Return([]*datapb.SegmentInfo{
|
||||
{},
|
||||
}, nil).Maybe()
|
||||
tasks := checker.Check(context.Background())
|
||||
suite.Require().Len(tasks, 0)
|
||||
}
|
||||
|
||||
func (suite *IndexCheckerSuite) TestCreateNewJsonIndex() {
|
||||
checker := suite.checker
|
||||
ctx := context.Background()
|
||||
paramtable.Get().Save(paramtable.Get().CommonCfg.EnabledJSONKeyStats.Key, "true")
|
||||
defer paramtable.Get().Reset(paramtable.Get().CommonCfg.EnabledJSONKeyStats.Key)
|
||||
// meta
|
||||
coll := utils.CreateTestCollection(1, 1)
|
||||
coll.FieldIndexID = map[int64]int64{101: 1000}
|
||||
coll.LoadFields = []int64{101}
|
||||
coll.Schema = &schemapb.CollectionSchema{
|
||||
Name: "test_loadJsonIndex",
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{FieldID: 101, DataType: schemapb.DataType_JSON, Name: "JSON"},
|
||||
},
|
||||
}
|
||||
checker.meta.CollectionManager.PutCollection(ctx, coll)
|
||||
checker.meta.ReplicaManager.Put(ctx, utils.CreateTestReplica(200, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
NodeID: 1,
|
||||
Address: "localhost",
|
||||
Hostname: "localhost",
|
||||
}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
|
||||
NodeID: 2,
|
||||
Address: "localhost",
|
||||
Hostname: "localhost",
|
||||
}))
|
||||
checker.meta.ResourceManager.HandleNodeUp(ctx, 1)
|
||||
checker.meta.ResourceManager.HandleNodeUp(ctx, 2)
|
||||
|
||||
// dist
|
||||
fieldIndexInfo := &querypb.FieldIndexInfo{
|
||||
FieldID: 101,
|
||||
IndexID: 1000,
|
||||
EnableIndex: true,
|
||||
}
|
||||
|
||||
indexInfo := make(map[int64]*querypb.FieldIndexInfo)
|
||||
indexInfo[fieldIndexInfo.IndexID] = fieldIndexInfo
|
||||
segment := utils.CreateTestSegment(1, 1, 2, 1, 1, "test-insert-channel")
|
||||
segment.IndexInfo = indexInfo
|
||||
checker.dist.SegmentDistManager.Update(1, segment)
|
||||
|
||||
// broker
|
||||
suite.broker.EXPECT().ListIndexes(mock.Anything, mock.Anything).Call.Return(
|
||||
func(ctx context.Context, collectionID int64) ([]*indexpb.IndexInfo, error) {
|
||||
return []*indexpb.IndexInfo{
|
||||
{
|
||||
FieldID: 101,
|
||||
IndexID: 1000,
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
)
|
||||
mockJSONKeyStats := map[int64]*datapb.JsonKeyStats{
|
||||
101: {
|
||||
FieldID: 101,
|
||||
},
|
||||
}
|
||||
suite.broker.EXPECT().GetSegmentInfo(mock.Anything, mock.Anything).
|
||||
Return([]*datapb.SegmentInfo{
|
||||
{
|
||||
ID: 2,
|
||||
JsonKeyStats: mockJSONKeyStats,
|
||||
},
|
||||
}, nil).Maybe()
|
||||
tasks := checker.Check(context.Background())
|
||||
suite.Len(tasks, 1)
|
||||
suite.Len(tasks[0].Actions(), 1)
|
||||
suite.Equal(tasks[0].Actions()[0].(*task.SegmentAction).Type(), task.ActionTypeStatsUpdate)
|
||||
}
|
||||
|
||||
func TestIndexChecker(t *testing.T) {
|
||||
suite.Run(t, new(IndexCheckerSuite))
|
||||
}
|
||||
|
|
|
@ -173,6 +173,7 @@ func (dh *distHandler) updateSegmentsDistribution(ctx context.Context, resp *que
|
|||
Version: s.GetVersion(),
|
||||
LastDeltaTimestamp: s.GetLastDeltaTimestamp(),
|
||||
IndexInfo: s.GetIndexInfo(),
|
||||
JSONIndexField: s.GetFieldJsonIndexStats(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -222,6 +222,7 @@ func (job *LoadCollectionJob) Execute() error {
|
|||
},
|
||||
CreatedAt: time.Now(),
|
||||
LoadSpan: sp,
|
||||
Schema: job.collInfo.GetSchema(),
|
||||
}
|
||||
job.undo.IsNewCollection = true
|
||||
err = job.meta.CollectionManager.PutCollection(job.ctx, collection, partitions...)
|
||||
|
@ -426,6 +427,7 @@ func (job *LoadPartitionJob) Execute() error {
|
|||
},
|
||||
CreatedAt: time.Now(),
|
||||
LoadSpan: sp,
|
||||
Schema: job.collInfo.GetSchema(),
|
||||
}
|
||||
err = job.meta.CollectionManager.PutCollection(job.ctx, collection, partitions...)
|
||||
if err != nil {
|
||||
|
|
|
@ -50,6 +50,7 @@ type Collection struct {
|
|||
mut sync.RWMutex
|
||||
refreshNotifier chan struct{}
|
||||
LoadSpan trace.Span
|
||||
Schema *schemapb.CollectionSchema
|
||||
}
|
||||
|
||||
func (collection *Collection) SetRefreshNotifier(notifier chan struct{}) {
|
||||
|
@ -85,6 +86,7 @@ func (collection *Collection) Clone() *Collection {
|
|||
UpdatedAt: collection.UpdatedAt,
|
||||
refreshNotifier: collection.refreshNotifier,
|
||||
LoadSpan: collection.LoadSpan,
|
||||
Schema: collection.Schema,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -238,6 +240,7 @@ func (m *CollectionManager) upgradeLoadFields(ctx context.Context, collection *q
|
|||
err = m.putCollection(ctx, true, &Collection{
|
||||
CollectionLoadInfo: collection,
|
||||
LoadPercentage: 100,
|
||||
Schema: resp.GetSchema(),
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -253,6 +256,27 @@ func (m *CollectionManager) GetCollection(ctx context.Context, collectionID type
|
|||
return m.collections[collectionID]
|
||||
}
|
||||
|
||||
func (m *CollectionManager) GetCollectionSchema(ctx context.Context, collectionID typeutil.UniqueID) *schemapb.CollectionSchema {
|
||||
m.rwmutex.RLock()
|
||||
defer m.rwmutex.RUnlock()
|
||||
collection, ok := m.collections[collectionID]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return collection.Schema
|
||||
}
|
||||
|
||||
func (m *CollectionManager) PutCollectionSchema(ctx context.Context, collectionID typeutil.UniqueID, schema *schemapb.CollectionSchema) {
|
||||
m.rwmutex.Lock()
|
||||
defer m.rwmutex.Unlock()
|
||||
|
||||
collection, ok := m.collections[collectionID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
collection.Schema = schema
|
||||
}
|
||||
|
||||
func (m *CollectionManager) GetPartition(ctx context.Context, partitionID typeutil.UniqueID) *Partition {
|
||||
m.rwmutex.RLock()
|
||||
defer m.rwmutex.RUnlock()
|
||||
|
|
|
@ -125,6 +125,7 @@ type Segment struct {
|
|||
Version int64 // Version is the timestamp of loading segment
|
||||
LastDeltaTimestamp uint64 // The timestamp of the last delta record
|
||||
IndexInfo map[int64]*querypb.FieldIndexInfo // index info of loaded segment, indexID -> FieldIndexInfo
|
||||
JSONIndexField []int64 // json index info of loaded segment
|
||||
}
|
||||
|
||||
func SegmentFromInfo(info *datapb.SegmentInfo) *Segment {
|
||||
|
|
|
@ -33,12 +33,14 @@ const (
|
|||
ActionTypeGrow ActionType = iota + 1
|
||||
ActionTypeReduce
|
||||
ActionTypeUpdate
|
||||
ActionTypeStatsUpdate
|
||||
)
|
||||
|
||||
var ActionTypeName = map[ActionType]string{
|
||||
ActionTypeGrow: "Grow",
|
||||
ActionTypeReduce: "Reduce",
|
||||
ActionTypeUpdate: "Update",
|
||||
ActionTypeGrow: "Grow",
|
||||
ActionTypeReduce: "Reduce",
|
||||
ActionTypeUpdate: "Update",
|
||||
ActionTypeStatsUpdate: "StatsUpdate",
|
||||
}
|
||||
|
||||
func (t ActionType) String() string {
|
||||
|
|
|
@ -156,7 +156,7 @@ func (ex *Executor) removeTask(task Task, step int) {
|
|||
|
||||
func (ex *Executor) executeSegmentAction(task *SegmentTask, step int) {
|
||||
switch task.Actions()[step].Type() {
|
||||
case ActionTypeGrow, ActionTypeUpdate:
|
||||
case ActionTypeGrow, ActionTypeUpdate, ActionTypeStatsUpdate:
|
||||
ex.loadSegment(task, step)
|
||||
|
||||
case ActionTypeReduce:
|
||||
|
@ -469,6 +469,9 @@ func (ex *Executor) executeLeaderAction(task *LeaderTask, step int) {
|
|||
|
||||
case ActionTypeUpdate:
|
||||
ex.updatePartStatsVersions(task, step)
|
||||
|
||||
case ActionTypeStatsUpdate:
|
||||
ex.updatePartStatsVersions(task, step)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -49,13 +49,15 @@ const (
|
|||
TaskTypeReduce
|
||||
TaskTypeMove
|
||||
TaskTypeUpdate
|
||||
TaskTypeStatsUpdate
|
||||
)
|
||||
|
||||
var TaskTypeName = map[Type]string{
|
||||
TaskTypeGrow: "Grow",
|
||||
TaskTypeReduce: "Reduce",
|
||||
TaskTypeMove: "Move",
|
||||
TaskTypeUpdate: "Update",
|
||||
TaskTypeGrow: "Grow",
|
||||
TaskTypeReduce: "Reduce",
|
||||
TaskTypeMove: "Move",
|
||||
TaskTypeUpdate: "Update",
|
||||
TaskTypeStatsUpdate: "StatsUpdate",
|
||||
}
|
||||
|
||||
type Type int32
|
||||
|
|
|
@ -95,6 +95,8 @@ func GetTaskType(task Task) Type {
|
|||
return TaskTypeReduce
|
||||
case task.Actions()[0].Type() == ActionTypeUpdate:
|
||||
return TaskTypeUpdate
|
||||
case task.Actions()[0].Type() == ActionTypeStatsUpdate:
|
||||
return TaskTypeStatsUpdate
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
@ -132,6 +134,10 @@ func packLoadSegmentRequest(
|
|||
loadScope = querypb.LoadScope_Index
|
||||
}
|
||||
|
||||
if action.Type() == ActionTypeStatsUpdate {
|
||||
loadScope = querypb.LoadScope_Stats
|
||||
}
|
||||
|
||||
if task.Source() == utils.LeaderChecker {
|
||||
loadScope = querypb.LoadScope_Delta
|
||||
}
|
||||
|
|
|
@ -74,22 +74,23 @@ func PackSegmentLoadInfo(segment *datapb.SegmentInfo, channelCheckpoint *msgpb.M
|
|||
zap.Duration("tsLag", tsLag))
|
||||
}
|
||||
loadInfo := &querypb.SegmentLoadInfo{
|
||||
SegmentID: segment.ID,
|
||||
PartitionID: segment.PartitionID,
|
||||
CollectionID: segment.CollectionID,
|
||||
BinlogPaths: segment.Binlogs,
|
||||
NumOfRows: segment.NumOfRows,
|
||||
Statslogs: segment.Statslogs,
|
||||
Deltalogs: segment.Deltalogs,
|
||||
Bm25Logs: segment.Bm25Statslogs,
|
||||
InsertChannel: segment.InsertChannel,
|
||||
IndexInfos: indexes,
|
||||
StartPosition: segment.GetStartPosition(),
|
||||
DeltaPosition: channelCheckpoint,
|
||||
Level: segment.GetLevel(),
|
||||
StorageVersion: segment.GetStorageVersion(),
|
||||
IsSorted: segment.GetIsSorted(),
|
||||
TextStatsLogs: segment.GetTextStatsLogs(),
|
||||
SegmentID: segment.ID,
|
||||
PartitionID: segment.PartitionID,
|
||||
CollectionID: segment.CollectionID,
|
||||
BinlogPaths: segment.Binlogs,
|
||||
NumOfRows: segment.NumOfRows,
|
||||
Statslogs: segment.Statslogs,
|
||||
Deltalogs: segment.Deltalogs,
|
||||
Bm25Logs: segment.Bm25Statslogs,
|
||||
InsertChannel: segment.InsertChannel,
|
||||
IndexInfos: indexes,
|
||||
StartPosition: segment.GetStartPosition(),
|
||||
DeltaPosition: channelCheckpoint,
|
||||
Level: segment.GetLevel(),
|
||||
StorageVersion: segment.GetStorageVersion(),
|
||||
IsSorted: segment.GetIsSorted(),
|
||||
TextStatsLogs: segment.GetTextStatsLogs(),
|
||||
JsonKeyStatsLogs: segment.GetJsonKeyStats(),
|
||||
}
|
||||
return loadInfo
|
||||
}
|
||||
|
|
|
@ -175,6 +175,45 @@ func (node *QueryNode) loadIndex(ctx context.Context, req *querypb.LoadSegmentsR
|
|||
return status
|
||||
}
|
||||
|
||||
func (node *QueryNode) loadStats(ctx context.Context, req *querypb.LoadSegmentsRequest) *commonpb.Status {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.Int64("collectionID", req.GetCollectionID()),
|
||||
zap.Int64s("segmentIDs", lo.Map(req.GetInfos(), func(info *querypb.SegmentLoadInfo, _ int) int64 { return info.GetSegmentID() })),
|
||||
)
|
||||
|
||||
status := merr.Success()
|
||||
log.Info("start to load stats")
|
||||
|
||||
for _, info := range req.GetInfos() {
|
||||
log := log.With(zap.Int64("segmentID", info.GetSegmentID()))
|
||||
segment := node.manager.Segment.GetSealed(info.GetSegmentID())
|
||||
if segment == nil {
|
||||
log.Warn("segment not found for load stats operation")
|
||||
continue
|
||||
}
|
||||
localSegment, ok := segment.(*segments.LocalSegment)
|
||||
if !ok {
|
||||
log.Warn("segment not local for load stats opeartion")
|
||||
continue
|
||||
}
|
||||
|
||||
if localSegment.IsLazyLoad() {
|
||||
localSegment.SetLoadInfo(info)
|
||||
localSegment.SetNeedUpdatedVersion(req.GetVersion())
|
||||
node.manager.DiskCache.MarkItemNeedReload(ctx, localSegment.ID())
|
||||
return nil
|
||||
}
|
||||
err := node.loader.LoadJSONIndex(ctx, localSegment, info)
|
||||
if err != nil {
|
||||
log.Warn("failed to load stats", zap.Error(err))
|
||||
status = merr.Status(err)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return status
|
||||
}
|
||||
|
||||
func (node *QueryNode) queryChannel(ctx context.Context, req *querypb.QueryRequest, channel string) (*internalpb.RetrieveResults, error) {
|
||||
msgID := req.Req.Base.GetMsgID()
|
||||
traceID := trace.SpanFromContext(ctx).SpanContext().TraceID()
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue