mirror of https://github.com/milvus-io/milvus.git
471 lines
24 KiB
Python
471 lines
24 KiB
Python
import pytest
|
|
from base.client_base import TestcaseBase
|
|
from common import common_func as cf
|
|
from common import common_type as ct
|
|
from common.common_type import CaseLabel, CheckTasks
|
|
from utils.util_pymilvus import *
|
|
|
|
|
|
class TestFieldPartialLoad(TestcaseBase):
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
def test_field_partial_load_default(self):
|
|
"""
|
|
target: test field partial load
|
|
method:
|
|
1. create a collection with fields
|
|
2. index/not index fields to be loaded; index/not index fields to be skipped
|
|
3. load a part of the fields
|
|
expected:
|
|
1. verify the collection loaded successfully
|
|
2. verify the loaded fields can be searched in expr and output_fields
|
|
3. verify the skipped fields not loaded, and cannot search with them in expr or output_fields
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 128
|
|
nb = 2000
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
load_int64_field = cf.gen_int64_field(name="int64_load")
|
|
not_load_int64_field = cf.gen_int64_field(name="int64_not_load")
|
|
load_string_field = cf.gen_string_field(name="string_load")
|
|
not_load_string_field = cf.gen_string_field(name="string_not_load")
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, load_int64_field, not_load_int64_field,
|
|
load_string_field, not_load_string_field, vector_field],
|
|
auto_id=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
int_values = [i for i in range(nb)]
|
|
string_values = [str(i) for i in range(nb)]
|
|
float_vec_values = gen_vectors(nb, dim)
|
|
collection_w.insert([int_values, int_values, string_values, string_values, float_vec_values])
|
|
|
|
# build index
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name, load_string_field.name, load_int64_field.name],
|
|
replica_number=1)
|
|
# search
|
|
search_params = ct.default_search_params
|
|
nq = 2
|
|
search_vectors = float_vec_values[0:nq]
|
|
res, _ = collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
limit=100, output_fields=["*"])
|
|
assert pk_field.name in res[0][0].fields.keys() \
|
|
and vector_field.name in res[0][0].fields.keys() \
|
|
and load_string_field.name in res[0][0].fields.keys() \
|
|
and load_int64_field.name in res[0][0].fields.keys() \
|
|
and not_load_string_field.name not in res[0][0].fields.keys() \
|
|
and not_load_int64_field.name not in res[0][0].fields.keys()
|
|
|
|
# release and reload with some other fields
|
|
collection_w.release()
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name,
|
|
not_load_string_field.name, not_load_int64_field.name])
|
|
res, _ = collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
limit=100, output_fields=["*"])
|
|
assert pk_field.name in res[0][0].fields.keys() \
|
|
and vector_field.name in res[0][0].fields.keys() \
|
|
and load_string_field.name not in res[0][0].fields.keys() \
|
|
and load_int64_field.name not in res[0][0].fields.keys() \
|
|
and not_load_string_field.name in res[0][0].fields.keys() \
|
|
and not_load_int64_field.name in res[0][0].fields.keys()
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_skip_load_dynamic_field(self):
|
|
"""
|
|
target: test skip load dynamic field
|
|
method:
|
|
1. create a collection with dynamic field
|
|
2. load
|
|
3. search on dynamic field in expr and/or output_fields
|
|
expected: search successfully
|
|
4. release and reload with skip load dynamic field=true
|
|
5. search on dynamic field in expr and/or output_fields
|
|
expected: raise exception
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 128
|
|
nb = 2000
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
load_int64_field = cf.gen_int64_field(name="int64_load")
|
|
load_string_field = cf.gen_string_field(name="string_load")
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, load_int64_field, load_string_field, vector_field],
|
|
auto_id=True, enable_dynamic_field=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
data = []
|
|
for i in range(nb):
|
|
data.append({
|
|
f"{load_int64_field.name}": i,
|
|
f"{load_string_field.name}": str(i),
|
|
f"{vector_field.name}": [random.uniform(-1, 1) for _ in range(dim)],
|
|
"color": i,
|
|
"tag": i,
|
|
})
|
|
collection_w.insert(data)
|
|
|
|
# build index
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
collection_w.load()
|
|
# search
|
|
search_params = ct.default_search_params
|
|
nq = 2
|
|
search_vectors = cf.gen_vectors(nq, dim)
|
|
res, _ = collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
expr="color > 0",
|
|
limit=100, output_fields=["*"],
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"nq": nq, "limit": 100})
|
|
|
|
collection_w.release()
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name, load_string_field.name],
|
|
skip_load_dynamic_field=True)
|
|
error = {ct.err_code: 999, ct.err_msg: f"field color cannot be returned since dynamic field not loaded"}
|
|
collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
limit=100, output_fields=["color"],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
error = {ct.err_code: 999, ct.err_msg: f"field color is dynamic but dynamic field is not loaded"}
|
|
collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
expr="color > 0", limit=100, output_fields=["*"],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_skip_load_some_vector_fields(self):
|
|
"""
|
|
target: test skip load some vector fields
|
|
method:
|
|
1. create a collection with multiple vector fields
|
|
2. not create index for skip load vector fields
|
|
2. load some vector fields
|
|
3. search on vector fields in expr and/or output_fields
|
|
expected: search successfully
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 128
|
|
nb = 2000
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
load_string_field = cf.gen_string_field(name="string_load")
|
|
vector_field = cf.gen_float_vec_field(name="vec_float32", dim=dim)
|
|
sparse_vector_field = cf.gen_float_vec_field(name="sparse", vector_data_type="SPARSE_FLOAT_VECTOR")
|
|
schema = cf.gen_collection_schema(fields=[pk_field, load_string_field, vector_field, sparse_vector_field],
|
|
auto_id=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
string_values = [str(i) for i in range(nb)]
|
|
float_vec_values = cf.gen_vectors(nb, dim)
|
|
sparse_vec_values = cf.gen_vectors(nb, dim, vector_data_type="SPARSE_FLOAT_VECTOR")
|
|
collection_w.insert([string_values, float_vec_values, sparse_vec_values])
|
|
|
|
# build index on one of vector fields
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
# not load sparse vector field
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name, load_string_field.name])
|
|
# search
|
|
search_params = ct.default_search_params
|
|
nq = 2
|
|
search_vectors = float_vec_values[0:nq]
|
|
res, _ = collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
limit=100, output_fields=["*"],
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"nq": nq, "limit": 100})
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_partial_load_with_partition(self):
|
|
"""
|
|
target: test partial load with partitions
|
|
method:
|
|
1. create a collection with fields
|
|
2. create 2 partitions: p1, p2
|
|
3. partial load p1
|
|
4. search on p1
|
|
5. load p2 with different fields
|
|
expected: p2 load fail
|
|
6. load p2 with the same partial fields
|
|
7. search on p2
|
|
expected: search successfully
|
|
8. load the collection with all fields
|
|
expected: load fail
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 128
|
|
nb = 2000
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
load_int64_field = cf.gen_int64_field(name="int64_load")
|
|
not_load_int64_field = cf.gen_int64_field(name="int64_not_load")
|
|
load_string_field = cf.gen_string_field(name="string_load")
|
|
not_load_string_field = cf.gen_string_field(name="string_not_load")
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, load_int64_field, not_load_int64_field,
|
|
load_string_field, not_load_string_field, vector_field],
|
|
auto_id=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
p1 = self.init_partition_wrap(collection_w, name='p1')
|
|
p2 = self.init_partition_wrap(collection_w, name='p2')
|
|
int_values = [i for i in range(nb)]
|
|
string_values = [str(i) for i in range(nb)]
|
|
float_vec_values = gen_vectors(nb, dim)
|
|
p1.insert([int_values, int_values, string_values, string_values, float_vec_values])
|
|
p2.insert([int_values, int_values, string_values, string_values, float_vec_values])
|
|
|
|
# build index
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
# p1 load with partial fields
|
|
p1.load(load_fields=[pk_field.name, vector_field.name, load_string_field.name, load_int64_field.name])
|
|
# search
|
|
search_params = ct.default_search_params
|
|
nq = 2
|
|
search_vectors = float_vec_values[0:nq]
|
|
res, _ = p1.search(data=search_vectors, anns_field=vector_field.name, params=search_params,
|
|
limit=100, output_fields=["*"])
|
|
assert pk_field.name in res[0][0].fields.keys() \
|
|
and vector_field.name in res[0][0].fields.keys()
|
|
# load p2 with different fields
|
|
error = {ct.err_code: 999, ct.err_msg: f"can't change the load field list for loaded collection"}
|
|
p2.load(load_fields=[pk_field.name, vector_field.name, not_load_string_field.name, not_load_int64_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
# load p2 with the same partial fields
|
|
p2.load(load_fields=[pk_field.name, vector_field.name, load_string_field.name, load_int64_field.name])
|
|
res, _ = p2.search(data=search_vectors, anns_field=vector_field.name, params=search_params,
|
|
limit=100, output_fields=["*"])
|
|
assert pk_field.name in res[0][0].fields.keys() \
|
|
and vector_field.name in res[0][0].fields.keys()
|
|
|
|
# load the collection with all fields
|
|
collection_w.load(check_task=CheckTasks.err_res, check_items=error)
|
|
collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
limit=100, output_fields=["*"],
|
|
check_task=CheckTasks.check_search_results,
|
|
check_items={"nq": nq, "limit": 100})
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
def test_skip_load_on_all_scalar_field_types(self):
|
|
"""
|
|
target: test skip load on all scalar field types
|
|
method:
|
|
1. create a collection with fields define skip load on all scalar field types
|
|
expected:
|
|
1. load and search successfully
|
|
"""
|
|
prefix = "partial_load"
|
|
collection_w = self.init_collection_general(prefix, insert_data=True, is_index=True,
|
|
is_all_data_type=True, with_json=True)[0]
|
|
collection_w.release()
|
|
# load with only pk field and vector field
|
|
collection_w.load(load_fields=[ct.default_int64_field_name, ct.default_float_vec_field_name])
|
|
search_vectors = cf.gen_vectors(1, ct.default_dim)
|
|
search_params = {"params": {}}
|
|
res = collection_w.search(data=search_vectors, anns_field=ct.default_float_vec_field_name,
|
|
param=search_params, limit=10, output_fields=["*"],
|
|
check_tasks=CheckTasks.check_search_results, check_items={"nq": 1, "limit": 10})[0]
|
|
assert len(res[0][0].fields.keys()) == 2
|
|
|
|
|
|
class TestFieldPartialLoadInvalid(TestcaseBase):
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_skip_load_on_pk_field_or_vector_field(self):
|
|
"""
|
|
target: test skip load on pk field
|
|
method:
|
|
1. create a collection with fields define skip load on pk field
|
|
expected:
|
|
1. raise exception
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 32
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
load_int64_field = cf.gen_int64_field(name="int64_load")
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, load_int64_field, vector_field], auto_id=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
# load without pk field
|
|
error = {ct.err_code: 999, ct.err_msg: f"does not contain primary key field {pk_field.name}"}
|
|
collection_w.load(load_fields=[vector_field.name, load_int64_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
error = {ct.err_code: 999, ct.err_msg: f"does not contain vector field"}
|
|
collection_w.load(load_fields=[pk_field.name, load_int64_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_skip_load_on_partition_key_field(self):
|
|
"""
|
|
target: test skip load on partition key field
|
|
method:
|
|
1. create a collection with fields define skip load on partition key field
|
|
expected:
|
|
1. raise exception
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 32
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
partition_key_field = cf.gen_int64_field(name="int64_load", is_partition_key=True)
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, partition_key_field, vector_field], auto_id=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
# load without pk field
|
|
error = {ct.err_code: 999, ct.err_msg: f"does not contain partition key field {partition_key_field.name}"}
|
|
collection_w.load(load_fields=[vector_field.name, pk_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_skip_load_on_clustering_key_field(self):
|
|
"""
|
|
target: test skip load on clustering key field
|
|
method:
|
|
1. create a collection with fields define skip load on clustering key field
|
|
expected:
|
|
1. raise exception
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 32
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
clustering_key_field = cf.gen_int64_field(name="int64_load", is_clustering_key=True)
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, clustering_key_field, vector_field], auto_id=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
# load without pk field
|
|
error = {ct.err_code: 999, ct.err_msg: f"does not contain clustering key field {clustering_key_field.name}"}
|
|
collection_w.load(load_fields=[vector_field.name, pk_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_update_load_fields_list_when_reloading_collection(self):
|
|
"""
|
|
target: test update load fields list when reloading collection
|
|
method:
|
|
1. create a collection with fields
|
|
2. load a part of the fields
|
|
3. update load fields list when reloading collection
|
|
expected:
|
|
1. raise exception
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 32
|
|
nb = 2000
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
not_load_int64_field = cf.gen_int64_field(name="not_int64_load")
|
|
load_string_field = cf.gen_string_field(name="string_load")
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, not_load_int64_field, load_string_field, vector_field],
|
|
auto_id=True, enable_dynamic_field=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
int_values = [i for i in range(nb)]
|
|
string_values = [str(i) for i in range(nb)]
|
|
float_vec_values = cf.gen_vectors(nb, dim)
|
|
collection_w.insert([int_values, string_values, float_vec_values])
|
|
|
|
# build index
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name, load_string_field.name])
|
|
# search
|
|
search_params = ct.default_search_params
|
|
nq = 1
|
|
search_vectors = float_vec_values[0:nq]
|
|
collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
limit=10, output_fields=[load_string_field.name],
|
|
check_task=CheckTasks.check_search_results, check_items={"nq": nq, "limit": 10})
|
|
# try to add more fields in load fields list when reloading
|
|
error = {ct.err_code: 999, ct.err_msg: f"can't change the load field list for loaded collection"}
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name,
|
|
load_string_field.name, not_load_int64_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
# try to remove fields in load fields list when reloading
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_one_of_dynamic_fields_in_load_fields_list(self):
|
|
"""
|
|
target: test one of dynamic fields in load fields list
|
|
method:
|
|
1. create a collection with fields
|
|
3. add one of dynamic fields in load fields list when loading
|
|
expected: raise exception
|
|
4. add non_existing field in load fields list when loading
|
|
expected: raise exception
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 32
|
|
nb = 2000
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
load_int64_field = cf.gen_int64_field(name="int64_load")
|
|
load_string_field = cf.gen_string_field(name="string_load")
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, load_int64_field, load_string_field, vector_field],
|
|
auto_id=True, enable_dynamic_field=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
data = []
|
|
for i in range(nb):
|
|
data.append({
|
|
f"{load_int64_field.name}": i,
|
|
f"{load_string_field.name}": str(i),
|
|
f"{vector_field.name}": [random.uniform(-1, 1) for _ in range(dim)],
|
|
"color": i,
|
|
"tag": i,
|
|
})
|
|
collection_w.insert(data)
|
|
# build index
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
# add one of dynamic fields in load fields list
|
|
error = {ct.err_code: 999,
|
|
ct.err_msg: f"failed to get field schema by name: fieldName(color) not found"}
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name, "color"],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
# add non_existing field in load fields list
|
|
error = {ct.err_code: 999,
|
|
ct.err_msg: f"failed to get field schema by name: fieldName(not_existing) not found"}
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name, "not_existing"],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
def test_search_on_not_loaded_fields(self):
|
|
"""
|
|
target: test search on skipped fields
|
|
method:
|
|
1. create a collection with fields
|
|
2. load a part of the fields
|
|
3. search on skipped fields in expr and/or output_fields
|
|
expected:
|
|
1. raise exception
|
|
"""
|
|
self._connect()
|
|
name = cf.gen_unique_str()
|
|
dim = 32
|
|
nb = 2000
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
not_load_int64_field = cf.gen_int64_field(name="not_int64_load")
|
|
load_string_field = cf.gen_string_field(name="string_load")
|
|
vector_field = cf.gen_float_vec_field(dim=dim)
|
|
schema = cf.gen_collection_schema(fields=[pk_field, not_load_int64_field, load_string_field, vector_field],
|
|
auto_id=True, enable_dynamic_field=True)
|
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
|
int_values = [i for i in range(nb)]
|
|
string_values = [str(i) for i in range(nb)]
|
|
float_vec_values = cf.gen_vectors(nb, dim)
|
|
collection_w.insert([int_values, string_values, float_vec_values])
|
|
|
|
# build index
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
collection_w.load(load_fields=[pk_field.name, vector_field.name, load_string_field.name])
|
|
# search
|
|
search_params = ct.default_search_params
|
|
nq = 1
|
|
search_vectors = float_vec_values[0:nq]
|
|
error = {ct.err_code: 999, ct.err_msg: f"field {not_load_int64_field.name} is not loaded"}
|
|
collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
limit=10, output_fields=[not_load_int64_field.name, load_string_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
error = {ct.err_code: 999, ct.err_msg: f"cannot parse expression"}
|
|
collection_w.search(data=search_vectors, anns_field=vector_field.name, param=search_params,
|
|
expr=f"{not_load_int64_field.name} > 0",
|
|
limit=10, output_fields=[load_string_field.name],
|
|
check_task=CheckTasks.err_res, check_items=error)
|