mirror of https://github.com/milvus-io/milvus.git
test: add bitmap index cases (#35909)
Signed-off-by: wangting0128 <ting.wang@zilliz.com>pull/35935/head
parent
74048ce34f
commit
cb49b32358
|
@ -1,5 +1,6 @@
|
|||
import pytest
|
||||
import sys
|
||||
from typing import Dict, List
|
||||
from pymilvus import DefaultConfig
|
||||
|
||||
from base.database_wrapper import ApiDatabaseWrapper
|
||||
|
@ -15,6 +16,7 @@ from base.high_level_api_wrapper import HighLevelApiWrapper
|
|||
from utils.util_log import test_log as log
|
||||
from common import common_func as cf
|
||||
from common import common_type as ct
|
||||
from common.common_params import IndexPrams
|
||||
|
||||
from pymilvus import ResourceGroupInfo
|
||||
|
||||
|
@ -395,3 +397,23 @@ class TestcaseBase(Base):
|
|||
|
||||
return tmp_user, tmp_pwd, tmp_role
|
||||
|
||||
def build_multi_index(self, index_params: Dict[str, IndexPrams], collection_obj: ApiCollectionWrapper = None):
|
||||
collection_obj = collection_obj or self.collection_wrap
|
||||
for k, v in index_params.items():
|
||||
collection_obj.create_index(field_name=k, index_params=v.to_dict, index_name=k)
|
||||
log.info(f"[TestcaseBase] Build all indexes done: {list(index_params.keys())}")
|
||||
return collection_obj
|
||||
|
||||
def drop_multi_index(self, index_names: List[str], collection_obj: ApiCollectionWrapper = None,
|
||||
check_task=None, check_items=None):
|
||||
collection_obj = collection_obj or self.collection_wrap
|
||||
for n in index_names:
|
||||
collection_obj.drop_index(index_name=n, check_task=check_task, check_items=check_items)
|
||||
log.info(f"[TestcaseBase] Drop all indexes done: {index_names}")
|
||||
return collection_obj
|
||||
|
||||
def show_indexes(self, collection_obj: ApiCollectionWrapper = None):
|
||||
collection_obj = collection_obj or self.collection_wrap
|
||||
indexes = {n.field_name: n.params for n in self.collection_wrap.indexes}
|
||||
log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes))
|
||||
return indexes
|
||||
|
|
|
@ -104,6 +104,10 @@ class ResponseChecker:
|
|||
# describe collection interface(high level api) response check
|
||||
result = self.check_describe_collection_property(self.response, self.func_name, self.check_items)
|
||||
|
||||
elif self.check_task == CheckTasks.check_insert_result:
|
||||
# check `insert` interface response
|
||||
result = self.check_insert_response(check_items=self.check_items)
|
||||
|
||||
# Add check_items here if something new need verify
|
||||
|
||||
return result
|
||||
|
@ -602,3 +606,18 @@ class ResponseChecker:
|
|||
log.error("[CheckFunc] Response of API is not an error: %s" % str(res))
|
||||
assert False
|
||||
return True
|
||||
|
||||
def check_insert_response(self, check_items):
|
||||
# check request successful
|
||||
self.assert_succ(self.succ, True)
|
||||
|
||||
# get insert count
|
||||
real = check_items.get("insert_count", None) if isinstance(check_items, dict) else None
|
||||
if real is None:
|
||||
real = len(self.kwargs_dict.get("data", [[]])[0])
|
||||
|
||||
# check insert count
|
||||
error_message = "[CheckFunc] Insert count does not meet expectations, response:{0} != expected:{1}"
|
||||
assert self.response.insert_count == real, error_message.format(self.response.insert_count, real)
|
||||
|
||||
return True
|
||||
|
|
|
@ -32,3 +32,9 @@ class PartitionErrorMessage(ExceptionsMessage):
|
|||
|
||||
class IndexErrorMessage(ExceptionsMessage):
|
||||
WrongFieldName = "cannot create index on non-vector field: %s"
|
||||
DropLoadedIndex = "index cannot be dropped, collection is loaded, please release it first"
|
||||
CheckVectorIndex = "data type {0} can't build with this index {1}"
|
||||
SparseFloatVectorMetricType = "only IP is the supported metric type for sparse index"
|
||||
VectorMetricTypeExist = "metric type not set for vector index"
|
||||
CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
|
||||
CheckBitmapOnPK = "create bitmap index on primary key not supported"
|
||||
|
|
|
@ -14,7 +14,7 @@ from npy_append_array import NpyAppendArray
|
|||
from faker import Faker
|
||||
from pathlib import Path
|
||||
from minio import Minio
|
||||
from pymilvus import DataType
|
||||
from pymilvus import DataType, CollectionSchema
|
||||
from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrapper
|
||||
from common import common_type as ct
|
||||
from utils.util_log import test_log as log
|
||||
|
@ -24,6 +24,12 @@ fake = Faker()
|
|||
"""" Methods of processing data """
|
||||
|
||||
|
||||
try:
|
||||
RNG = np.random.default_rng(seed=0)
|
||||
except ValueError as e:
|
||||
RNG = None
|
||||
|
||||
|
||||
@singledispatch
|
||||
def to_serializable(val):
|
||||
"""Used by default."""
|
||||
|
@ -1230,20 +1236,23 @@ def gen_data_by_collection_field(field, nb=None, start=None):
|
|||
if data_type == DataType.BFLOAT16_VECTOR:
|
||||
dim = field.params['dim']
|
||||
if nb is None:
|
||||
raw_vector = [random.random() for _ in range(dim)]
|
||||
bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
|
||||
return bytes(bf16_vector)
|
||||
bf16_vectors = []
|
||||
for i in range(nb):
|
||||
raw_vector = [random.random() for _ in range(dim)]
|
||||
bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
|
||||
bf16_vectors.append(bytes(bf16_vector))
|
||||
return bf16_vectors
|
||||
return RNG.uniform(size=dim).astype(bfloat16)
|
||||
return [RNG.uniform(size=dim).astype(bfloat16) for _ in range(int(nb))]
|
||||
# if nb is None:
|
||||
# raw_vector = [random.random() for _ in range(dim)]
|
||||
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
|
||||
# return bytes(bf16_vector)
|
||||
# bf16_vectors = []
|
||||
# for i in range(nb):
|
||||
# raw_vector = [random.random() for _ in range(dim)]
|
||||
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
|
||||
# bf16_vectors.append(bytes(bf16_vector))
|
||||
# return bf16_vectors
|
||||
if data_type == DataType.FLOAT16_VECTOR:
|
||||
dim = field.params['dim']
|
||||
if nb is None:
|
||||
return [random.random() for i in range(dim)]
|
||||
return [[random.random() for i in range(dim)] for _ in range(nb)]
|
||||
return np.array([random.random() for _ in range(int(dim))], dtype=np.float16)
|
||||
return [np.array([random.random() for _ in range(int(dim))], dtype=np.float16) for _ in range(int(nb))]
|
||||
if data_type == DataType.BINARY_VECTOR:
|
||||
dim = field.params['dim']
|
||||
if nb is None:
|
||||
|
@ -1251,9 +1260,21 @@ def gen_data_by_collection_field(field, nb=None, start=None):
|
|||
binary_byte = bytes(np.packbits(raw_vector, axis=-1).tolist())
|
||||
return binary_byte
|
||||
return [bytes(np.packbits([random.randint(0, 1) for _ in range(dim)], axis=-1).tolist()) for _ in range(nb)]
|
||||
if data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||
if nb is None:
|
||||
return gen_sparse_vectors(nb=1)[0]
|
||||
return gen_sparse_vectors(nb=nb)
|
||||
if data_type == DataType.ARRAY:
|
||||
max_capacity = field.params['max_capacity']
|
||||
element_type = field.element_type
|
||||
if element_type == DataType.INT8:
|
||||
if nb is None:
|
||||
return [random.randint(-128, 127) for _ in range(max_capacity)]
|
||||
return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
if element_type == DataType.INT16:
|
||||
if nb is None:
|
||||
return [random.randint(-32768, 32767) for _ in range(max_capacity)]
|
||||
return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
if element_type == DataType.INT32:
|
||||
if nb is None:
|
||||
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
|
||||
|
@ -1279,7 +1300,6 @@ def gen_data_by_collection_field(field, nb=None, start=None):
|
|||
if nb is None:
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
|
||||
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
@ -1296,6 +1316,25 @@ def gen_data_by_collection_schema(schema, nb, r=0):
|
|||
return data
|
||||
|
||||
|
||||
def gen_varchar_values(nb: int, length: int = 0):
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
|
||||
|
||||
|
||||
def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}):
|
||||
"""
|
||||
generate default value according to the collection fields,
|
||||
which can replace the value of the specified field
|
||||
"""
|
||||
data = []
|
||||
for field in schema.fields:
|
||||
default_value = default_values.get(field.name, None)
|
||||
if default_value is not None:
|
||||
data.append(default_value)
|
||||
elif field.auto_id is False:
|
||||
data.append(gen_data_by_collection_field(field, nb, start_id * nb))
|
||||
return data
|
||||
|
||||
|
||||
def gen_json_files_for_bulk_insert(data, schema, data_dir):
|
||||
for d in data:
|
||||
if len(d) > 0:
|
||||
|
@ -2288,3 +2327,71 @@ def gen_vectors_based_on_vector_type(num, dim, vector_data_type):
|
|||
vectors = gen_sparse_vectors(num, dim)
|
||||
|
||||
return vectors
|
||||
|
||||
|
||||
def field_types() -> dict:
|
||||
return dict(sorted(dict(DataType.__members__).items(), key=lambda item: item[0], reverse=True))
|
||||
|
||||
|
||||
def get_array_element_type(data_type: str):
|
||||
if hasattr(DataType, "ARRAY") and data_type.startswith(DataType.ARRAY.name):
|
||||
element_type = data_type.lstrip(DataType.ARRAY.name).lstrip("_")
|
||||
for _field in field_types().keys():
|
||||
if str(element_type).upper().startswith(_field):
|
||||
return _field, getattr(DataType, _field)
|
||||
raise ValueError(f"[get_array_data_type] Can't find element type:{element_type} for array:{data_type}")
|
||||
raise ValueError(f"[get_array_data_type] Data type is not start with array: {data_type}")
|
||||
|
||||
|
||||
def set_field_schema(field: str, params: dict):
|
||||
for k, v in field_types().items():
|
||||
if str(field).upper().startswith(k):
|
||||
_kwargs = {}
|
||||
|
||||
_field_element, _data_type = k, DataType.NONE
|
||||
if hasattr(DataType, "ARRAY") and _field_element == DataType.ARRAY.name:
|
||||
_field_element, _data_type = get_array_element_type(field)
|
||||
_kwargs.update({"max_capacity": ct.default_max_capacity, "element_type": _data_type})
|
||||
|
||||
if _field_element in [DataType.STRING.name, DataType.VARCHAR.name]:
|
||||
_kwargs.update({"max_length": ct.default_length})
|
||||
|
||||
elif _field_element in [DataType.BINARY_VECTOR.name, DataType.FLOAT_VECTOR.name,
|
||||
DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name]:
|
||||
_kwargs.update({"dim": ct.default_dim})
|
||||
|
||||
if isinstance(params, dict):
|
||||
_kwargs.update(params)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"[set_field_schema] Field `{field}` params is not a dict, type: {type(params)}, params: {params}")
|
||||
return ApiFieldSchemaWrapper().init_field_schema(name=field, dtype=v, **_kwargs)[0]
|
||||
raise ValueError(f"[set_field_schema] Can't set field:`{field}` schema: {params}")
|
||||
|
||||
|
||||
def set_collection_schema(fields: list, field_params: dict = {}, **kwargs):
|
||||
"""
|
||||
:param fields: List[str]
|
||||
:param field_params: {<field name>: dict<field params>}
|
||||
int64_1:
|
||||
is_primary: bool
|
||||
description: str
|
||||
varchar_1:
|
||||
is_primary: bool
|
||||
description: str
|
||||
max_length: int = 65535
|
||||
array_int8_1:
|
||||
max_capacity: int = 100
|
||||
array_varchar_1:
|
||||
max_capacity: int = 100
|
||||
max_length: int = 65535
|
||||
float_vector:
|
||||
dim: int = 128
|
||||
:param kwargs: <params for collection schema>
|
||||
description: str
|
||||
primary_field: str
|
||||
auto_id: bool
|
||||
enable_dynamic_field: bool
|
||||
"""
|
||||
field_schemas = [set_field_schema(field=field, params=field_params.get(field, {})) for field in fields]
|
||||
return ApiCollectionSchemaWrapper().init_collection_schema(fields=field_schemas, **kwargs)[0]
|
||||
|
|
|
@ -0,0 +1,365 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import List, Dict
|
||||
|
||||
""" Define param names"""
|
||||
|
||||
|
||||
class IndexName:
|
||||
# Vector
|
||||
AUTOINDEX = "AUTOINDEX"
|
||||
FLAT = "FLAT"
|
||||
IVF_FLAT = "IVF_FLAT"
|
||||
IVF_SQ8 = "IVF_SQ8"
|
||||
IVF_PQ = "IVF_PQ"
|
||||
IVF_HNSW = "IVF_HNSW"
|
||||
HNSW = "HNSW"
|
||||
DISKANN = "DISKANN"
|
||||
SCANN = "SCANN"
|
||||
# binary
|
||||
BIN_FLAT = "BIN_FLAT"
|
||||
BIN_IVF_FLAT = "BIN_IVF_FLAT"
|
||||
# Sparse
|
||||
SPARSE_WAND = "SPARSE_WAND"
|
||||
SPARSE_INVERTED_INDEX = "SPARSE_INVERTED_INDEX"
|
||||
# GPU
|
||||
GPU_IVF_FLAT = "GPU_IVF_FLAT"
|
||||
GPU_IVF_PQ = "GPU_IVF_PQ"
|
||||
GPU_CAGRA = "GPU_CAGRA"
|
||||
GPU_BRUTE_FORCE = "GPU_BRUTE_FORCE"
|
||||
|
||||
# Scalar
|
||||
INVERTED = "INVERTED"
|
||||
BITMAP = "BITMAP"
|
||||
Trie = "Trie"
|
||||
STL_SORT = "STL_SORT"
|
||||
|
||||
|
||||
class MetricType:
|
||||
L2 = "L2"
|
||||
IP = "IP"
|
||||
COSINE = "COSINE"
|
||||
JACCARD = "JACCARD"
|
||||
|
||||
|
||||
""" expressions """
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExprBase:
|
||||
expr: str
|
||||
|
||||
@property
|
||||
def subset(self):
|
||||
return f"({self.expr})"
|
||||
|
||||
def __repr__(self):
|
||||
return self.expr
|
||||
|
||||
|
||||
class Expr:
|
||||
# BooleanConstant: 'true' | 'True' | 'TRUE' | 'false' | 'False' | 'FALSE'
|
||||
|
||||
@staticmethod
|
||||
def LT(left, right):
|
||||
return ExprBase(expr=f"{left} < {right}")
|
||||
|
||||
@staticmethod
|
||||
def LE(left, right):
|
||||
return ExprBase(expr=f"{left} <= {right}")
|
||||
|
||||
@staticmethod
|
||||
def GT(left, right):
|
||||
return ExprBase(expr=f"{left} > {right}")
|
||||
|
||||
@staticmethod
|
||||
def GE(left, right):
|
||||
return ExprBase(expr=f"{left} >= {right}")
|
||||
|
||||
@staticmethod
|
||||
def EQ(left, right):
|
||||
return ExprBase(expr=f"{left} == {right}")
|
||||
|
||||
@staticmethod
|
||||
def NE(left, right):
|
||||
return ExprBase(expr=f"{left} != {right}")
|
||||
|
||||
@staticmethod
|
||||
def like(left, right):
|
||||
return ExprBase(expr=f'{left} like "{right}"')
|
||||
|
||||
@staticmethod
|
||||
def LIKE(left, right):
|
||||
return ExprBase(expr=f'{left} LIKE "{right}"')
|
||||
|
||||
@staticmethod
|
||||
def exists(name):
|
||||
return ExprBase(expr=f'exists {name}')
|
||||
|
||||
@staticmethod
|
||||
def EXISTS(name):
|
||||
return ExprBase(expr=f'EXISTS {name}')
|
||||
|
||||
@staticmethod
|
||||
def ADD(left, right):
|
||||
return ExprBase(expr=f"{left} + {right}")
|
||||
|
||||
@staticmethod
|
||||
def SUB(left, right):
|
||||
return ExprBase(expr=f"{left} - {right}")
|
||||
|
||||
@staticmethod
|
||||
def MUL(left, right):
|
||||
return ExprBase(expr=f"{left} * {right}")
|
||||
|
||||
@staticmethod
|
||||
def DIV(left, right):
|
||||
return ExprBase(expr=f"{left} / {right}")
|
||||
|
||||
@staticmethod
|
||||
def MOD(left, right):
|
||||
return ExprBase(expr=f"{left} % {right}")
|
||||
|
||||
@staticmethod
|
||||
def POW(left, right):
|
||||
return ExprBase(expr=f"{left} ** {right}")
|
||||
|
||||
@staticmethod
|
||||
def SHL(left, right):
|
||||
# Note: not supported
|
||||
return ExprBase(expr=f"{left}<<{right}")
|
||||
|
||||
@staticmethod
|
||||
def SHR(left, right):
|
||||
# Note: not supported
|
||||
return ExprBase(expr=f"{left}>>{right}")
|
||||
|
||||
@staticmethod
|
||||
def BAND(left, right):
|
||||
# Note: not supported
|
||||
return ExprBase(expr=f"{left} & {right}")
|
||||
|
||||
@staticmethod
|
||||
def BOR(left, right):
|
||||
# Note: not supported
|
||||
return ExprBase(expr=f"{left} | {right}")
|
||||
|
||||
@staticmethod
|
||||
def BXOR(left, right):
|
||||
# Note: not supported
|
||||
return ExprBase(expr=f"{left} ^ {right}")
|
||||
|
||||
@staticmethod
|
||||
def AND(left, right):
|
||||
return ExprBase(expr=f"{left} && {right}")
|
||||
|
||||
@staticmethod
|
||||
def And(left, right):
|
||||
return ExprBase(expr=f"{left} and {right}")
|
||||
|
||||
@staticmethod
|
||||
def OR(left, right):
|
||||
return ExprBase(expr=f"{left} || {right}")
|
||||
|
||||
@staticmethod
|
||||
def Or(left, right):
|
||||
return ExprBase(expr=f"{left} or {right}")
|
||||
|
||||
@staticmethod
|
||||
def BNOT(name):
|
||||
# Note: not supported
|
||||
return ExprBase(expr=f"~{name}")
|
||||
|
||||
@staticmethod
|
||||
def NOT(name):
|
||||
return ExprBase(expr=f"!{name}")
|
||||
|
||||
@staticmethod
|
||||
def Not(name):
|
||||
return ExprBase(expr=f"not {name}")
|
||||
|
||||
@staticmethod
|
||||
def In(left, right):
|
||||
return ExprBase(expr=f"{left} in {right}")
|
||||
|
||||
@staticmethod
|
||||
def Nin(left, right):
|
||||
return ExprBase(expr=f"{left} not in {right}")
|
||||
|
||||
@staticmethod
|
||||
def json_contains(left, right):
|
||||
return ExprBase(expr=f"json_contains({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def JSON_CONTAINS(left, right):
|
||||
return ExprBase(expr=f"JSON_CONTAINS({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def json_contains_all(left, right):
|
||||
return ExprBase(expr=f"json_contains_all({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def JSON_CONTAINS_ALL(left, right):
|
||||
return ExprBase(expr=f"JSON_CONTAINS_ALL({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def json_contains_any(left, right):
|
||||
return ExprBase(expr=f"json_contains_any({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def JSON_CONTAINS_ANY(left, right):
|
||||
return ExprBase(expr=f"JSON_CONTAINS_ANY({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def array_contains(left, right):
|
||||
return ExprBase(expr=f"array_contains({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def ARRAY_CONTAINS(left, right):
|
||||
return ExprBase(expr=f"ARRAY_CONTAINS({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def array_contains_all(left, right):
|
||||
return ExprBase(expr=f"array_contains_all({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def ARRAY_CONTAINS_ALL(left, right):
|
||||
return ExprBase(expr=f"ARRAY_CONTAINS_ALL({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def array_contains_any(left, right):
|
||||
return ExprBase(expr=f"array_contains_any({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def ARRAY_CONTAINS_ANY(left, right):
|
||||
return ExprBase(expr=f"ARRAY_CONTAINS_ANY({left}, {right})")
|
||||
|
||||
@staticmethod
|
||||
def array_length(name):
|
||||
return ExprBase(expr=f"array_length({name})")
|
||||
|
||||
@staticmethod
|
||||
def ARRAY_LENGTH(name):
|
||||
return ExprBase(expr=f"ARRAY_LENGTH({name})")
|
||||
|
||||
|
||||
"""" Define pass in params """
|
||||
|
||||
|
||||
@dataclass
|
||||
class BasePrams:
|
||||
@property
|
||||
def to_dict(self):
|
||||
return {k: v for k, v in vars(self).items() if v is not None}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FieldParams(BasePrams):
|
||||
description: str = None
|
||||
|
||||
# varchar
|
||||
max_length: int = None
|
||||
|
||||
# array
|
||||
max_capacity: int = None
|
||||
|
||||
# for vector
|
||||
dim: int = None
|
||||
|
||||
# scalar
|
||||
is_primary: bool = None
|
||||
# auto_id: bool = None
|
||||
is_partition_key: bool = None
|
||||
is_clustering_key: bool = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexPrams(BasePrams):
|
||||
index_type: str = None
|
||||
params: dict = None
|
||||
metric_type: str = None
|
||||
|
||||
|
||||
""" Define default params """
|
||||
|
||||
|
||||
class DefaultVectorIndexParams:
|
||||
|
||||
@staticmethod
|
||||
def FLAT(field: str, metric_type=MetricType.L2):
|
||||
return {field: IndexPrams(index_type=IndexName.FLAT, params={}, metric_type=metric_type)}
|
||||
|
||||
@staticmethod
|
||||
def IVF_FLAT(field: str, nlist: int = 1024, metric_type=MetricType.L2):
|
||||
return {
|
||||
field: IndexPrams(index_type=IndexName.IVF_FLAT, params={"nlist": nlist}, metric_type=metric_type)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def IVF_SQ8(field: str, nlist: int = 1024, metric_type=MetricType.L2):
|
||||
return {
|
||||
field: IndexPrams(index_type=IndexName.IVF_SQ8, params={"nlist": nlist}, metric_type=metric_type)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def HNSW(field: str, m: int = 8, ef: int = 200, metric_type=MetricType.L2):
|
||||
return {
|
||||
field: IndexPrams(index_type=IndexName.HNSW, params={"M": m, "efConstruction": ef}, metric_type=metric_type)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def DISKANN(field: str, metric_type=MetricType.L2):
|
||||
return {field: IndexPrams(index_type=IndexName.DISKANN, params={}, metric_type=metric_type)}
|
||||
|
||||
@staticmethod
|
||||
def BIN_FLAT(field: str, nlist: int = 1024, metric_type=MetricType.JACCARD):
|
||||
return {
|
||||
field: IndexPrams(index_type=IndexName.BIN_FLAT, params={"nlist": nlist}, metric_type=metric_type)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def BIN_IVF_FLAT(field: str, nlist: int = 1024, metric_type=MetricType.JACCARD):
|
||||
return {
|
||||
field: IndexPrams(index_type=IndexName.BIN_IVF_FLAT, params={"nlist": nlist},
|
||||
metric_type=metric_type)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def SPARSE_WAND(field: str, drop_ratio_build: int = 0.2, metric_type=MetricType.IP):
|
||||
return {
|
||||
field: IndexPrams(index_type=IndexName.SPARSE_WAND, params={"drop_ratio_build": drop_ratio_build},
|
||||
metric_type=metric_type)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def SPARSE_INVERTED_INDEX(field: str, drop_ratio_build: int = 0.2, metric_type=MetricType.IP):
|
||||
return {
|
||||
field: IndexPrams(index_type=IndexName.SPARSE_INVERTED_INDEX, params={"drop_ratio_build": drop_ratio_build},
|
||||
metric_type=metric_type)
|
||||
}
|
||||
|
||||
|
||||
class DefaultScalarIndexParams:
|
||||
|
||||
@staticmethod
|
||||
def Default(field: str):
|
||||
return {field: IndexPrams()}
|
||||
|
||||
@staticmethod
|
||||
def Trie(field: str):
|
||||
return {field: IndexPrams(index_type=IndexName.Trie)}
|
||||
|
||||
@staticmethod
|
||||
def STL_SORT(field: str):
|
||||
return {field: IndexPrams(index_type=IndexName.STL_SORT)}
|
||||
|
||||
@staticmethod
|
||||
def INVERTED(field: str):
|
||||
return {field: IndexPrams(index_type=IndexName.INVERTED)}
|
||||
|
||||
@staticmethod
|
||||
def BITMAP(field: str):
|
||||
return {field: IndexPrams(index_type=IndexName.BITMAP)}
|
||||
|
||||
@staticmethod
|
||||
def list_bitmap(fields: List[str]) -> Dict[str, IndexPrams]:
|
||||
return {n: IndexPrams(index_type=IndexName.BITMAP) for n in fields}
|
|
@ -286,6 +286,7 @@ class CheckTasks:
|
|||
check_value_equal = "check_value_equal"
|
||||
check_rg_property = "check_resource_group_property"
|
||||
check_describe_collection_property = "check_describe_collection_property"
|
||||
check_insert_result = "check_insert_result"
|
||||
|
||||
|
||||
class BulkLoadStates:
|
||||
|
|
|
@ -13,6 +13,9 @@ from common import common_type as ct
|
|||
from common.common_type import CaseLabel, CheckTasks
|
||||
from common.code_mapping import CollectionErrorMessage as clem
|
||||
from common.code_mapping import IndexErrorMessage as iem
|
||||
from common.common_params import (
|
||||
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType
|
||||
)
|
||||
|
||||
from utils.util_pymilvus import *
|
||||
from common.constants import *
|
||||
|
@ -336,7 +339,8 @@ class TestIndexOperation(TestcaseBase):
|
|||
vec_field2 = cf.gen_float_vec_field(name="vec_field2", dim=32)
|
||||
str_field = cf.gen_string_field(name="str_field")
|
||||
str_field2 = cf.gen_string_field(name="str_field2")
|
||||
schema, _ = self.collection_schema_wrap.init_collection_schema([id_field, vec_field, vec_field2, str_field, str_field2])
|
||||
schema, _ = self.collection_schema_wrap.init_collection_schema(
|
||||
[id_field, vec_field, vec_field2, str_field, str_field2])
|
||||
collection_w = self.init_collection_wrap(schema=schema)
|
||||
vec_index = ct.default_index
|
||||
vec_index_name = "my_index"
|
||||
|
@ -378,7 +382,7 @@ class TestIndexOperation(TestcaseBase):
|
|||
cf.assert_equal_index(index, collection_w.collection.indexes[0])
|
||||
self.index_wrap.drop()
|
||||
assert len(collection_w.indexes) == 0
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_index_drop_repeatedly(self):
|
||||
"""
|
||||
|
@ -640,7 +644,8 @@ class TestNewIndexBase(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
data = cf.gen_default_list_data()
|
||||
collection_w.insert(data=data)
|
||||
index_prams = [default_ivf_flat_index, {"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
|
||||
index_prams = [default_ivf_flat_index,
|
||||
{"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
|
||||
for index in index_prams:
|
||||
index_name = cf.gen_unique_str("name")
|
||||
collection_w.create_index(default_float_vec_field_name, index, index_name=index_name)
|
||||
|
@ -1018,7 +1023,6 @@ class TestNewIndexBase(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.GPU)
|
||||
class TestNewIndexBinary(TestcaseBase):
|
||||
|
||||
"""
|
||||
******************************************************************
|
||||
The following cases are used to test `create_index` function
|
||||
|
@ -1176,7 +1180,7 @@ class TestIndexInvalid(TestcaseBase):
|
|||
Test create / describe / drop index interfaces with invalid collection names
|
||||
"""
|
||||
|
||||
@pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED"])
|
||||
@pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED", IndexName.BITMAP])
|
||||
def scalar_index(self, request):
|
||||
yield request.param
|
||||
|
||||
|
@ -1366,7 +1370,7 @@ class TestIndexInvalid(TestcaseBase):
|
|||
collection_w.alter_index("random_index_345", {'mmap.enabled': True},
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 65535,
|
||||
ct.err_msg: f"index not found"})
|
||||
ct.err_msg: f"index not found"})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_load_mmap_index(self):
|
||||
|
@ -1460,8 +1464,8 @@ class TestIndexInvalid(TestcaseBase):
|
|||
params = {"index_type": index, "metric_type": metric_type, "params": param}
|
||||
error = {ct.err_code: 65535, ct.err_msg: "only IP is the supported metric type for sparse index"}
|
||||
index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=error)
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("ratio", [-0.5, 1, 3])
|
||||
|
@ -1478,7 +1482,8 @@ class TestIndexInvalid(TestcaseBase):
|
|||
data = cf.gen_default_list_sparse_data()
|
||||
collection_w.insert(data=data)
|
||||
params = {"index_type": index, "metric_type": "IP", "params": {"drop_ratio_build": ratio}}
|
||||
error = {ct.err_code: 1100, ct.err_msg: f"invalid drop_ratio_build: {ratio}, must be in range [0, 1): invalid parameter[expected=valid index params"}
|
||||
error = {ct.err_code: 1100,
|
||||
ct.err_msg: f"invalid drop_ratio_build: {ratio}, must be in range [0, 1): invalid parameter[expected=valid index params"}
|
||||
index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=error)
|
||||
|
@ -1605,7 +1610,8 @@ class TestIndexString(TestcaseBase):
|
|||
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
|
||||
default_string_index_params)
|
||||
cf.assert_equal_index(index, collection_w.indexes[0])
|
||||
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index, index_name="vector_flat")
|
||||
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
|
||||
index_name="vector_flat")
|
||||
collection_w.load()
|
||||
assert collection_w.num_entities == default_nb
|
||||
|
||||
|
@ -1621,7 +1627,8 @@ class TestIndexString(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
data = cf.gen_default_list_data(ct.default_nb)
|
||||
collection_w.insert(data=data)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index, index_name="vector_flat")
|
||||
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
|
||||
index_name="vector_flat")
|
||||
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
|
||||
default_string_index_params)
|
||||
collection_w.load()
|
||||
|
@ -1753,7 +1760,7 @@ class TestIndexString(TestcaseBase):
|
|||
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
|
||||
collection_w.drop_index(index_name=index_name2)
|
||||
assert len(collection_w.indexes) == 0
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_index_with_string_field_empty(self):
|
||||
"""
|
||||
|
@ -1767,7 +1774,7 @@ class TestIndexString(TestcaseBase):
|
|||
|
||||
nb = 3000
|
||||
data = cf.gen_default_list_data(nb)
|
||||
data[2] = [""for _ in range(nb)]
|
||||
data[2] = ["" for _ in range(nb)]
|
||||
collection_w.insert(data=data)
|
||||
|
||||
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
|
||||
|
@ -1783,6 +1790,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
The following cases are used to test create index about diskann
|
||||
******************************************************************
|
||||
"""
|
||||
|
||||
@pytest.fixture(scope="function", params=[False, True])
|
||||
def _async(self, request):
|
||||
yield request.param
|
||||
|
@ -1804,14 +1812,15 @@ class TestIndexDiskann(TestcaseBase):
|
|||
data = cf.gen_default_list_data()
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == default_nb
|
||||
index, _ = self.index_wrap.init_index(collection_w.collection, default_float_vec_field_name, ct.default_diskann_index)
|
||||
index, _ = self.index_wrap.init_index(collection_w.collection, default_float_vec_field_name,
|
||||
ct.default_diskann_index)
|
||||
log.info(self.index_wrap.params)
|
||||
cf.assert_equal_index(index, collection_w.indexes[0])
|
||||
collection_w.load()
|
||||
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||
search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
|
||||
ct.default_diskann_search_params, default_limit,
|
||||
default_search_exp,
|
||||
default_search_exp,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": default_limit})
|
||||
|
@ -1854,11 +1863,11 @@ class TestIndexDiskann(TestcaseBase):
|
|||
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||
search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
|
||||
ct.default_diskann_search_params, default_limit,
|
||||
default_search_exp,
|
||||
default_search_exp,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": default_limit})
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_create_diskann_index_drop_with_async(self, _async):
|
||||
"""
|
||||
|
@ -1901,7 +1910,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
index_name=field_name)
|
||||
collection_w.load()
|
||||
assert collection_w.has_index(index_name=field_name)[0] is True
|
||||
assert len(collection_w.indexes) == 1
|
||||
assert len(collection_w.indexes) == 1
|
||||
collection_w.release()
|
||||
collection_w.drop_index(index_name=field_name)
|
||||
assert collection_w.has_index(index_name=field_name)[0] is False
|
||||
|
@ -1926,7 +1935,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
collection_w.release()
|
||||
collection_w.drop_index(index_name=index_name1)
|
||||
assert collection_w.has_index(index_name=index_name1)[0] is False
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_drop_diskann_index_and_create_again(self):
|
||||
"""
|
||||
|
@ -1968,7 +1977,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
default_params = {}
|
||||
collection_w.create_index("float", default_params, index_name="c")
|
||||
assert collection_w.has_index(index_name="c")[0] == True
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_drop_diskann_index_with_partition(self):
|
||||
"""
|
||||
|
@ -2045,7 +2054,8 @@ class TestIndexDiskann(TestcaseBase):
|
|||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
|
||||
collection_w.insert(cf.gen_default_list_data())
|
||||
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, index_name=ct.default_index_name)
|
||||
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index,
|
||||
index_name=ct.default_index_name)
|
||||
collection_w.set_properties({'mmap.enabled': True})
|
||||
desc, _ = collection_w.describe()
|
||||
pro = desc.get("properties")
|
||||
|
@ -2204,7 +2214,6 @@ class TestInvertedIndexValid(TestcaseBase):
|
|||
def vector_data_type(self, request):
|
||||
yield request.param
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("scalar_field_name", [ct.default_int8_field_name, ct.default_int16_field_name,
|
||||
ct.default_int32_field_name, ct.default_int64_field_name,
|
||||
|
@ -2285,3 +2294,416 @@ class TestInvertedIndexValid(TestcaseBase):
|
|||
scalar_index_params = {"index_type": f"{scalar_index[i]}"}
|
||||
collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name)
|
||||
assert collection_w.has_index(index_name=index_name)[0] is True
|
||||
|
||||
|
||||
class TestBitmapIndex(TestcaseBase):
|
||||
"""
|
||||
Functional `BITMAP` index
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_method(self, method):
|
||||
super().setup_method(method)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect()
|
||||
|
||||
@property
|
||||
def get_bitmap_support_dtype_names(self):
|
||||
dtypes = [DataType.BOOL, DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.VARCHAR]
|
||||
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes]
|
||||
return dtype_names
|
||||
|
||||
@property
|
||||
def get_bitmap_not_support_dtype_names(self):
|
||||
dtypes = [DataType.FLOAT, DataType.DOUBLE]
|
||||
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + [DataType.JSON.name]
|
||||
return dtype_names
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
def test_bitmap_on_primary_key_field(self, request, primary_field, auto_id):
|
||||
"""
|
||||
target:
|
||||
1. build BITMAP index on primary key field
|
||||
method:
|
||||
1. create an empty collection
|
||||
2. build `BITMAP` index on primary key field
|
||||
expected:
|
||||
1. Primary key filed does not support building bitmap index
|
||||
"""
|
||||
# init params
|
||||
collection_name = f"{request.function.__name__}_{primary_field}_{auto_id}"
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
auto_id=auto_id
|
||||
)
|
||||
)
|
||||
|
||||
# build `BITMAP` index on primary key field
|
||||
self.collection_wrap.create_index(
|
||||
field_name=primary_field, index_params={"index_type": IndexName.BITMAP}, index_name=primary_field,
|
||||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapOnPK})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_bitmap_on_not_supported_fields(self, request):
|
||||
"""
|
||||
target:
|
||||
1. build `BITMAP` index on not supported fields
|
||||
method:
|
||||
1. create an empty collection with fields:
|
||||
[`varchar_pk`, `SPARSE_FLOAT_VECTOR`, `FLOAT`, `DOUBLE`, `JSON`, `ARRAY`, `ARRAY_FLOAT`, `ARRAY_DOUBLE`]
|
||||
2. build different `BITMAP` index params on not supported fields
|
||||
expected:
|
||||
1. check build index failed, assert error code and message
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field = f"{request.function.__name__}", "varchar_pk"
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.get_bitmap_not_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict}
|
||||
)
|
||||
)
|
||||
|
||||
# build `BITMAP` index on sparse vector field
|
||||
for msg, index_params in {
|
||||
iem.VectorMetricTypeExist: IndexPrams(index_type=IndexName.BITMAP),
|
||||
iem.SparseFloatVectorMetricType: IndexPrams(index_type=IndexName.BITMAP, metric_type=MetricType.L2),
|
||||
iem.CheckVectorIndex.format(DataType.SPARSE_FLOAT_VECTOR, IndexName.BITMAP): IndexPrams(
|
||||
index_type=IndexName.BITMAP, metric_type=MetricType.IP)
|
||||
}.items():
|
||||
self.collection_wrap.create_index(
|
||||
field_name=DataType.SPARSE_FLOAT_VECTOR.name, index_params=index_params.to_dict,
|
||||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: msg}
|
||||
)
|
||||
|
||||
# build `BITMAP` index on not supported scalar fields
|
||||
for _field_name in self.get_bitmap_not_support_dtype_names:
|
||||
self.collection_wrap.create_index(
|
||||
field_name=_field_name, index_params=IndexPrams(index_type=IndexName.BITMAP).to_dict,
|
||||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapIndex}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
def test_bitmap_on_empty_collection(self, request, primary_field, auto_id):
|
||||
"""
|
||||
target:
|
||||
1. create `BITMAP` index on all supported fields
|
||||
2. build scalar index on loaded collection
|
||||
method:
|
||||
1. build and drop `BITMAP` index on an empty collection
|
||||
2. rebuild `BITMAP` index on loaded collection
|
||||
3. drop index on loaded collection and raises expected error
|
||||
4. re-build the same index on loaded collection
|
||||
expected:
|
||||
1. build and drop index successful on a not loaded collection
|
||||
2. build index successful on non-indexed and loaded fields
|
||||
3. can not drop index on loaded collection
|
||||
"""
|
||||
# init params
|
||||
collection_name, nb = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
auto_id=auto_id
|
||||
)
|
||||
)
|
||||
|
||||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# drop scalars' index
|
||||
self.drop_multi_index(index_names=list(set(index_params.keys()) - {DataType.FLOAT_VECTOR.name}))
|
||||
assert len(self.collection_wrap.indexes) == 1
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# build scalars' index after loading collection
|
||||
self.build_multi_index(index_params={k: v for k, v in index_params.items() if v.index_type == IndexName.BITMAP})
|
||||
|
||||
# reload collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# re-drop scalars' index
|
||||
self.drop_multi_index(index_names=list(set(index_params.keys()) - {DataType.FLOAT_VECTOR.name}),
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 65535, ct.err_msg: iem.DropLoadedIndex})
|
||||
|
||||
# re-build loaded index
|
||||
self.build_multi_index(index_params=index_params)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
def test_bitmap_insert_after_loading(self, request, primary_field, auto_id):
|
||||
"""
|
||||
target:
|
||||
1. insert data after building `BITMAP` index and loading collection
|
||||
method:
|
||||
1. build index and loaded an empty collection
|
||||
2. insert 3k data
|
||||
3. check no indexed data
|
||||
4. flush collection, re-build index and refresh load collection
|
||||
5. row number of indexed data equal to insert data
|
||||
expected:
|
||||
1. insertion is successful
|
||||
2. segment row number == inserted rows
|
||||
"""
|
||||
# init params
|
||||
collection_name, nb = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
auto_id=auto_id
|
||||
)
|
||||
)
|
||||
|
||||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# prepare 3k data (> 1024 triggering index building)
|
||||
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
|
||||
check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# check no indexed segments
|
||||
res, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
|
||||
assert len(res) == 0
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# re-build vector field index
|
||||
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name))
|
||||
# load refresh, ensure that loaded indexed segments
|
||||
self.collection_wrap.load(_refresh=True)
|
||||
|
||||
# check segment row number
|
||||
counts = [int(n.num_rows) for n in self.utility_wrap.get_query_segment_info(collection_name=collection_name)[0]]
|
||||
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
def test_bitmap_insert_before_loading(self, request, primary_field, auto_id):
|
||||
"""
|
||||
target:
|
||||
1. insert data before building `BITMAP` index and loading collection
|
||||
method:
|
||||
1. insert data into an empty collection
|
||||
2. flush collection, build index and load collection
|
||||
3. the number of segments equal to shards_num
|
||||
expected:
|
||||
1. insertion is successful
|
||||
2. the number of segments == shards_num
|
||||
3. segment row number == inserted rows
|
||||
"""
|
||||
# init params
|
||||
collection_name, nb, shards_num = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000, 16
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
auto_id=auto_id
|
||||
),
|
||||
shards_num=shards_num
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
pk_type = "str" if primary_field.startswith(DataType.VARCHAR.name.lower()) else "int"
|
||||
default_values = {} if auto_id else {primary_field: [eval(f"{pk_type}({n})") for n in range(nb)]}
|
||||
self.collection_wrap.insert(
|
||||
data=cf.gen_values(self.collection_wrap.schema, nb=nb, default_values=default_values),
|
||||
check_task=CheckTasks.check_insert_result
|
||||
)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `BITMAP` index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# get segment info
|
||||
segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
|
||||
|
||||
# check segment counts == shards_num
|
||||
assert len(segment_info) == shards_num
|
||||
|
||||
# check segment row number
|
||||
counts = [int(n.num_rows) for n in segment_info]
|
||||
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
@pytest.mark.parametrize("shards_num, nb", [(2, 3791), (16, 1600), (16, 10)])
|
||||
def test_bitmap_primary_field_data_repeated(self, request, primary_field, shards_num, nb):
|
||||
"""
|
||||
target:
|
||||
1. the same pk value is inserted into the same shard
|
||||
method:
|
||||
1. generate the same pk value and insert data into an empty collection
|
||||
2. flush collection, build index and load collection
|
||||
3. the number of segments equal to 1
|
||||
4. row number of indexed data equal to insert data
|
||||
expected:
|
||||
1. insertion is successful
|
||||
2. the number of segments == 1
|
||||
3. segment row number == inserted rows
|
||||
"""
|
||||
# init params
|
||||
collection_name = f"{request.function.__name__}_{primary_field}_{shards_num}_{nb}"
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
),
|
||||
shards_num=shards_num
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
pk_key = str(shards_num) if primary_field.startswith(DataType.VARCHAR.name.lower()) else shards_num
|
||||
self.collection_wrap.insert(
|
||||
data=cf.gen_values(self.collection_wrap.schema, nb=nb,
|
||||
default_values={primary_field: [pk_key for _ in range(nb)]}),
|
||||
check_task=CheckTasks.check_insert_result
|
||||
)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `BITMAP` index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# get segment info
|
||||
segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
|
||||
|
||||
# check segments count
|
||||
msg = f"`{collection_name}` Segments count:{len(segment_info)} != 1, pk field data is repeated."
|
||||
assert len(segment_info) == 1, msg
|
||||
|
||||
# check segment row number
|
||||
counts = [int(n.num_rows) for n in segment_info]
|
||||
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
@pytest.mark.parametrize("shards_num, nb", [(1, 1000), (2, 3791), (16, 1600), (16, 10)])
|
||||
def test_bitmap_primary_field_data_not_repeated(self, request, primary_field, shards_num, nb):
|
||||
"""
|
||||
target:
|
||||
1. different pk values are inserted into the different shards
|
||||
method:
|
||||
1. generate different pk values and insert data into an empty collection
|
||||
2. flush collection, build index and load collection
|
||||
3. the number of segments equal to shards_num or less than insert data
|
||||
4. row number of indexed data equal to insert data
|
||||
expected:
|
||||
1. insertion is successful
|
||||
2. the number of segments == shards_num or <= insert data
|
||||
3. segment row number == inserted rows
|
||||
"""
|
||||
# init params
|
||||
collection_name = f"{request.function.__name__}_{primary_field}_{shards_num}_{nb}"
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
),
|
||||
shards_num=shards_num
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
pk_type = "str" if primary_field.startswith(DataType.VARCHAR.name.lower()) else "int"
|
||||
self.collection_wrap.insert(
|
||||
data=cf.gen_values(self.collection_wrap.schema, nb=nb,
|
||||
default_values={primary_field: [eval(f"{pk_type}({n})") for n in range(nb)]}),
|
||||
check_task=CheckTasks.check_insert_result
|
||||
)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# get segment info
|
||||
segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
|
||||
|
||||
# check segments count
|
||||
if shards_num > nb:
|
||||
msg = f"`{collection_name}` Segments count:{len(segment_info)} > insert data:{nb}"
|
||||
assert len(segment_info) <= nb, msg
|
||||
else:
|
||||
msg = f"`{collection_name}` Segments count:{len(segment_info)} != shards_num:{shards_num}"
|
||||
assert len(segment_info) == shards_num, msg
|
||||
|
||||
# check segment row number
|
||||
counts = [int(n.num_rows) for n in segment_info]
|
||||
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
|
||||
|
|
Loading…
Reference in New Issue