mirror of https://github.com/milvus-io/milvus.git
test: Add nullable test cases for bulk writer (#37572)
issue: #36129 Signed-off-by: binbin lv <binbin.lv@zilliz.com>pull/37561/head
parent
c1eccce2fa
commit
21b68029a0
|
@ -1477,7 +1477,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
@pytest.mark.parametrize("entities", [1000]) # 1000
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
@pytest.mark.parametrize("sparse_format", ["doc", "coo"])
|
||||
def test_with_all_field_json_with_bulk_writer(self, auto_id, dim, entities, enable_dynamic_field, sparse_format):
|
||||
@pytest.mark.parametrize("nullable", [True, False])
|
||||
def test_with_all_field_json_with_bulk_writer(self, auto_id, dim, entities, enable_dynamic_field, sparse_format, nullable):
|
||||
"""
|
||||
collection schema 1: [pk, int64, float64, string float_vector]
|
||||
data file: vectors.npy and uid.npy,
|
||||
|
@ -1489,14 +1490,14 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
self._connect()
|
||||
fields = [
|
||||
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
|
||||
cf.gen_int64_field(name=df.int_field),
|
||||
cf.gen_float_field(name=df.float_field),
|
||||
cf.gen_string_field(name=df.string_field),
|
||||
cf.gen_json_field(name=df.json_field),
|
||||
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64),
|
||||
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT),
|
||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100),
|
||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL),
|
||||
cf.gen_int64_field(name=df.int_field, nullable=nullable),
|
||||
cf.gen_float_field(name=df.float_field, nullable=nullable),
|
||||
cf.gen_string_field(name=df.string_field, nullable=nullable),
|
||||
cf.gen_json_field(name=df.json_field, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
||||
cf.gen_float_vec_field(name=df.float_vec_field, dim=dim),
|
||||
cf.gen_float16_vec_field(name=df.fp16_vec_field, dim=dim),
|
||||
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=dim),
|
||||
|
@ -1528,14 +1529,14 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
for i in range(entities):
|
||||
row = {
|
||||
df.pk_field: i,
|
||||
df.int_field: 1,
|
||||
df.float_field: 1.0,
|
||||
df.string_field: "string",
|
||||
df.json_field: json_value[i%len(json_value)],
|
||||
df.array_int_field: [1, 2],
|
||||
df.array_float_field: [1.0, 2.0],
|
||||
df.array_string_field: ["string1", "string2"],
|
||||
df.array_bool_field: [True, False],
|
||||
df.int_field: 1 if not (nullable and random.random() < 0.5) else None,
|
||||
df.float_field: 1.0 if not (nullable and random.random() < 0.5) else None,
|
||||
df.string_field: "string" if not (nullable and random.random() < 0.5) else None,
|
||||
df.json_field: json_value[i%len(json_value)] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_int_field: [1, 2] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_float_field: [1.0, 2.0] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
|
||||
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
||||
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0],
|
||||
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0],
|
||||
|
@ -1606,13 +1607,17 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
@pytest.mark.parametrize("dim", [128]) # 128
|
||||
@pytest.mark.parametrize("entities", [1000]) # 1000
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_with_all_field_numpy_with_bulk_writer(self, auto_id, dim, entities, enable_dynamic_field):
|
||||
@pytest.mark.parametrize("nullable", [True, False])
|
||||
def test_with_all_field_numpy_with_bulk_writer(self, auto_id, dim, entities, enable_dynamic_field, nullable):
|
||||
"""
|
||||
"""
|
||||
if nullable is True:
|
||||
pytest.skip("not support bulk writer numpy files in field(int_scalar) which has 'None' data")
|
||||
|
||||
self._connect()
|
||||
fields = [
|
||||
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
|
||||
cf.gen_int64_field(name=df.int_field),
|
||||
cf.gen_int64_field(name=df.int_field, nullable=nullable),
|
||||
cf.gen_float_field(name=df.float_field),
|
||||
cf.gen_string_field(name=df.string_field),
|
||||
cf.gen_json_field(name=df.json_field),
|
||||
|
@ -1646,7 +1651,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
for i in range(entities):
|
||||
row = {
|
||||
df.pk_field: i,
|
||||
df.int_field: 1,
|
||||
df.int_field: 1 if not (nullable and random.random() < 0.5) else None,
|
||||
df.float_field: 1.0,
|
||||
df.string_field: "string",
|
||||
df.json_field: json_value[i%len(json_value)],
|
||||
|
@ -1720,20 +1725,21 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
@pytest.mark.parametrize("entities", [1000]) # 1000
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
@pytest.mark.parametrize("sparse_format", ["doc", "coo"])
|
||||
def test_with_all_field_parquet_with_bulk_writer(self, auto_id, dim, entities, enable_dynamic_field, sparse_format):
|
||||
@pytest.mark.parametrize("nullable", [True, False])
|
||||
def test_with_all_field_parquet_with_bulk_writer(self, auto_id, dim, entities, enable_dynamic_field, sparse_format, nullable):
|
||||
"""
|
||||
"""
|
||||
self._connect()
|
||||
fields = [
|
||||
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
|
||||
cf.gen_int64_field(name=df.int_field),
|
||||
cf.gen_float_field(name=df.float_field),
|
||||
cf.gen_string_field(name=df.string_field),
|
||||
cf.gen_json_field(name=df.json_field),
|
||||
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64),
|
||||
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT),
|
||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100),
|
||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL),
|
||||
cf.gen_int64_field(name=df.int_field, nullable=nullable),
|
||||
cf.gen_float_field(name=df.float_field, nullable=nullable),
|
||||
cf.gen_string_field(name=df.string_field, nullable=nullable),
|
||||
cf.gen_json_field(name=df.json_field, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
||||
cf.gen_float_vec_field(name=df.float_vec_field, dim=dim),
|
||||
cf.gen_float16_vec_field(name=df.fp16_vec_field, dim=dim),
|
||||
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=dim),
|
||||
|
@ -1765,14 +1771,14 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
for i in range(entities):
|
||||
row = {
|
||||
df.pk_field: i,
|
||||
df.int_field: 1,
|
||||
df.float_field: 1.0,
|
||||
df.string_field: "string",
|
||||
df.json_field: json_value[i%len(json_value)],
|
||||
df.array_int_field: [1, 2],
|
||||
df.array_float_field: [1.0, 2.0],
|
||||
df.array_string_field: ["string1", "string2"],
|
||||
df.array_bool_field: [True, False],
|
||||
df.int_field: 1 if not (nullable and random.random() < 0.5) else None,
|
||||
df.float_field: 1.0 if not (nullable and random.random() < 0.5) else None,
|
||||
df.string_field: "string" if not (nullable and random.random() < 0.5) else None,
|
||||
df.json_field: json_value[i%len(json_value)] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_int_field: [1, 2] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_float_field: [1.0, 2.0] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
|
||||
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
||||
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0],
|
||||
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0],
|
||||
|
|
|
@ -4809,6 +4809,73 @@ class TestCollectionSearch(TestcaseBase):
|
|||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 999, "err_msg": f"invalid dimension: {dim}."})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.skip(reason="issue #37547")
|
||||
def test_search_verify_expr_cache(self, is_flush):
|
||||
"""
|
||||
target: test search case to test expr cache
|
||||
method: 1. create collection with a double datatype field
|
||||
2. search with expr "doubleField == 0"
|
||||
3. drop this collection
|
||||
4. create collection with same collection name and same field name but modify the type of double field
|
||||
as varchar datatype
|
||||
5. search with expr "doubleField == 0" again
|
||||
expected: 1. search successfully with limit(topK) for the first collection
|
||||
2. report error for the second collection with the same name
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(prefix, True, is_flush=is_flush)[0:5]
|
||||
collection_name = collection_w.name
|
||||
# 2. generate search data
|
||||
vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim)
|
||||
# 3. search with expr "nullableFid == 0"
|
||||
search_exp = f"{ct.default_float_field_name} == 0"
|
||||
output_fields = [default_int64_field_name, default_float_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": 1,
|
||||
"output_fields": output_fields})
|
||||
# 4. drop collection
|
||||
collection_w.drop()
|
||||
# 5. create the same collection name with same field name but varchar field type
|
||||
int64_field = cf.gen_int64_field(is_primary=True)
|
||||
string_field = cf.gen_string_field(ct.default_float_field_name)
|
||||
json_field = cf.gen_json_field()
|
||||
float_vector_field = cf.gen_float_vec_field()
|
||||
fields = [int64_field, string_field, json_field, float_vector_field]
|
||||
schema = cf.gen_collection_schema(fields)
|
||||
collection_w = self.init_collection_wrap(name=collection_name, schema=schema)
|
||||
int64_values = pd.Series(data=[i for i in range(default_nb)])
|
||||
string_values = pd.Series(data=[str(i) for i in range(default_nb)], dtype="string")
|
||||
json_values = [{"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(default_nb)]
|
||||
float_vec_values = cf.gen_vectors(default_nb, default_dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int64_values,
|
||||
ct.default_float_field_name: string_values,
|
||||
ct.default_json_field_name: json_values,
|
||||
ct.default_float_vec_field_name: float_vec_values
|
||||
})
|
||||
collection_w.insert(df)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
||||
collection_w.load()
|
||||
collection_w.flush()
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1100,
|
||||
"err_msg": "failed to create query plan: cannot parse expression: float == 0, "
|
||||
"error: comparisons between VarChar and Int64 are not supported: "
|
||||
"invalid parameter"})
|
||||
|
||||
|
||||
class TestSearchBase(TestcaseBase):
|
||||
@pytest.fixture(
|
||||
|
@ -13279,6 +13346,74 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
|||
"limit": default_limit,
|
||||
"output_fields": output_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.skip(reason="issue #37547")
|
||||
def test_search_none_data_expr_cache(self, is_flush):
|
||||
"""
|
||||
target: test search case with none data to test expr cache
|
||||
method: 1. create collection with double datatype as nullable field
|
||||
2. search with expr "nullableFid == 0"
|
||||
3. drop this collection
|
||||
4. create collection with same collection name and same field name but modify the type of nullable field
|
||||
as varchar datatype
|
||||
5. search with expr "nullableFid == 0" again
|
||||
expected: 1. search successfully with limit(topK) for the first collection
|
||||
2. report error for the second collection with the same name
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(prefix, True, is_flush=is_flush,
|
||||
nullable_fields={ct.default_float_field_name: 0.5})[0:5]
|
||||
collection_name = collection_w.name
|
||||
# 2. generate search data
|
||||
vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim)
|
||||
# 3. search with expr "nullableFid == 0"
|
||||
search_exp = f"{ct.default_float_field_name} == 0"
|
||||
output_fields = [default_int64_field_name, default_float_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": 1,
|
||||
"output_fields": output_fields})
|
||||
# 4. drop collection
|
||||
collection_w.drop()
|
||||
# 5. create the same collection name with same field name but varchar field type
|
||||
int64_field = cf.gen_int64_field(is_primary=True)
|
||||
string_field = cf.gen_string_field(ct.default_float_field_name, nullable=True)
|
||||
json_field = cf.gen_json_field()
|
||||
float_vector_field = cf.gen_float_vec_field()
|
||||
fields = [int64_field, string_field, json_field, float_vector_field]
|
||||
schema = cf.gen_collection_schema(fields)
|
||||
collection_w = self.init_collection_wrap(name=collection_name, schema=schema)
|
||||
int64_values = pd.Series(data=[i for i in range(default_nb)])
|
||||
string_values = pd.Series(data=[str(i) for i in range(default_nb)], dtype="string")
|
||||
json_values = [{"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(default_nb)]
|
||||
float_vec_values = cf.gen_vectors(default_nb, default_dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int64_values,
|
||||
ct.default_float_field_name: None,
|
||||
ct.default_json_field_name: json_values,
|
||||
ct.default_float_vec_field_name: float_vec_values
|
||||
})
|
||||
collection_w.insert(df)
|
||||
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
||||
collection_w.load()
|
||||
collection_w.flush()
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1100,
|
||||
"err_msg": "failed to create query plan: cannot parse expression: float == 0, "
|
||||
"error: comparisons between VarChar and Int64 are not supported: "
|
||||
"invalid parameter"})
|
||||
|
||||
|
||||
class TestSearchWithTextMatchFilter(TestcaseBase):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue