import random from sklearn import preprocessing import numpy as np import sys import json import time from utils import constant from utils.utils import gen_collection_name from utils.util_log import test_log as logger import pytest from base.testbase import TestBase from utils.utils import (get_data_by_payload, get_common_fields_by_data) @pytest.mark.L0 class TestInsertVector(TestBase): @pytest.mark.parametrize("insert_round", [2, 1]) @pytest.mark.parametrize("nb", [100, 10, 1]) @pytest.mark.parametrize("dim", [32, 128]) @pytest.mark.parametrize("primary_field", ["id", "url"]) @pytest.mark.parametrize("vector_field", ["vector", "embedding"]) @pytest.mark.parametrize("db_name", ["prod", "default"]) def test_insert_vector_with_simple_payload(self, db_name, vector_field, primary_field, nb, dim, insert_round): """ Insert a vector with a simple payload """ self.update_database(db_name=db_name) # create a collection name = gen_collection_name() collection_payload = { "collectionName": name, "dimension": dim, "primaryField": primary_field, "vectorField": vector_field, } rsp = self.collection_client.collection_create(collection_payload) assert rsp['code'] == 200 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") assert rsp['code'] == 200 # insert data for i in range(insert_round): data = get_data_by_payload(collection_payload, nb) payload = { "collectionName": name, "data": data, } body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) assert rsp['code'] == 200 assert rsp['data']['insertCount'] == nb logger.info("finished") @pytest.mark.L0 @pytest.mark.parametrize("insert_round", [10]) def test_insert_vector_with_multi_round(self, insert_round): """ Insert a vector with a simple payload """ # create a collection name = gen_collection_name() collection_payload = { "collectionName": name, "dimension": 768, } rsp = self.collection_client.collection_create(collection_payload) assert rsp['code'] == 200 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") assert rsp['code'] == 200 # insert data nb = 300 for i in range(insert_round): data = get_data_by_payload(collection_payload, nb) payload = { "collectionName": name, "data": data, } body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) assert rsp['code'] == 200 assert rsp['data']['insertCount'] == nb @pytest.mark.L1 class TestInsertVectorNegative(TestBase): def test_insert_vector_with_invalid_api_key(self): """ Insert a vector with invalid api key """ # create a collection name = gen_collection_name() dim = 128 payload = { "collectionName": name, "dimension": dim, } rsp = self.collection_client.collection_create(payload) assert rsp['code'] == 200 rsp = self.collection_client.collection_describe(name) assert rsp['code'] == 200 # insert data nb = 10 data = [ { "vector": [np.float64(random.random()) for _ in range(dim)], } for _ in range(nb) ] payload = { "collectionName": name, "data": data, } body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") client = self.vector_client client.api_key = "invalid_api_key" rsp = client.vector_insert(payload) assert rsp['code'] == 1800 def test_insert_vector_with_invalid_collection_name(self): """ Insert a vector with an invalid collection name """ # create a collection name = gen_collection_name() dim = 128 payload = { "collectionName": name, "dimension": dim, } rsp = self.collection_client.collection_create(payload) assert rsp['code'] == 200 rsp = self.collection_client.collection_describe(name) assert rsp['code'] == 200 # insert data nb = 100 data = get_data_by_payload(payload, nb) payload = { "collectionName": "invalid_collection_name", "data": data, } body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) assert rsp['code'] == 1 def test_insert_vector_with_invalid_database_name(self): """ Insert a vector with an invalid database name """ # create a collection name = gen_collection_name() dim = 128 payload = { "collectionName": name, "dimension": dim, } rsp = self.collection_client.collection_create(payload) assert rsp['code'] == 200 rsp = self.collection_client.collection_describe(name) assert rsp['code'] == 200 # insert data nb = 10 data = get_data_by_payload(payload, nb) payload = { "collectionName": name, "data": data, } body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") success = False rsp = self.vector_client.vector_insert(payload, db_name="invalid_database") assert rsp['code'] == 800 def test_insert_vector_with_mismatch_dim(self): """ Insert a vector with mismatch dim """ # create a collection name = gen_collection_name() dim = 32 payload = { "collectionName": name, "dimension": dim, } rsp = self.collection_client.collection_create(payload) assert rsp['code'] == 200 rsp = self.collection_client.collection_describe(name) assert rsp['code'] == 200 # insert data nb = 1 data = [ { "vector": [np.float64(random.random()) for _ in range(dim + 1)], } for i in range(nb) ] payload = { "collectionName": name, "data": data, } body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) assert rsp['code'] == 1804 assert rsp['message'] == "fail to deal the insert data" @pytest.mark.L0 class TestSearchVector(TestBase): @pytest.mark.parametrize("metric_type", ["IP", "L2"]) def test_search_vector_with_simple_payload(self, metric_type): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name self.init_collection(name, metric_type=metric_type) # search data dim = 128 vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() payload = { "collectionName": name, "vector": vector_to_search, } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) assert len(res) == limit ids = [item['id'] for item in res] assert len(ids) == len(set(ids)) distance = [item['distance'] for item in res] if metric_type == "L2": assert distance == sorted(distance) if metric_type == "IP": assert distance == sorted(distance, reverse=True) @pytest.mark.parametrize("sum_limit_offset", [16384, 16385]) @pytest.mark.xfail(reason="") def test_search_vector_with_exceed_sum_limit_offset(self, sum_limit_offset): """ Search a vector with a simple payload """ max_search_sum_limit_offset = constant.MAX_SUM_OFFSET_AND_LIMIT name = gen_collection_name() self.name = name nb = sum_limit_offset + 2000 metric_type = "IP" limit = 100 self.init_collection(name, metric_type=metric_type, nb=nb, batch_size=2000) # search data dim = 128 vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() payload = { "collectionName": name, "vector": vector_to_search, "limit": limit, "offset": sum_limit_offset - limit, } rsp = self.vector_client.vector_search(payload) if sum_limit_offset > max_search_sum_limit_offset: assert rsp['code'] == 65535 return assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) assert len(res) == limit ids = [item['id'] for item in res] assert len(ids) == len(set(ids)) distance = [item['distance'] for item in res] if metric_type == "L2": assert distance == sorted(distance) if metric_type == "IP": assert distance == sorted(distance, reverse=True) @pytest.mark.parametrize("level", [0, 1, 2]) @pytest.mark.parametrize("offset", [0, 10, 100]) @pytest.mark.parametrize("limit", [1, 100]) @pytest.mark.parametrize("metric_type", ["L2", "IP"]) def test_search_vector_with_complex_payload(self, limit, offset, level, metric_type): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name nb = limit + offset + 100 dim = 128 schema_payload, data = self.init_collection(name, dim=dim, nb=nb, metric_type=metric_type) vector_field = schema_payload.get("vectorField") # search data vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) payload = { "collectionName": name, "vector": vector_to_search, "outputFields": output_fields, "filter": "uid >= 0", "limit": limit, "offset": offset, } rsp = self.vector_client.vector_search(payload) if offset + limit > constant.MAX_SUM_OFFSET_AND_LIMIT: assert rsp['code'] == 90126 return assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) == limit for item in res: assert item.get("uid") >= 0 for field in output_fields: assert field in item @pytest.mark.parametrize("filter_expr", ["uid >= 0", "uid >= 0 and uid < 100", "uid in [1,2,3]"]) def test_search_vector_with_complex_int_filter(self, filter_expr): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name nb = 200 dim = 128 limit = 100 schema_payload, data = self.init_collection(name, dim=dim, nb=nb) vector_field = schema_payload.get("vectorField") # search data vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) payload = { "collectionName": name, "vector": vector_to_search, "outputFields": output_fields, "filter": filter_expr, "limit": limit, "offset": 0, } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit for item in res: uid = item.get("uid") eval(filter_expr) @pytest.mark.parametrize("filter_expr", ["name > \"placeholder\"", "name like \"placeholder%\""]) def test_search_vector_with_complex_varchar_filter(self, filter_expr): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name nb = 200 dim = 128 limit = 100 schema_payload, data = self.init_collection(name, dim=dim, nb=nb) names = [] for item in data: names.append(item.get("name")) names.sort() logger.info(f"names: {names}") mid = len(names) // 2 prefix = names[mid][0:2] vector_field = schema_payload.get("vectorField") # search data vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) filter_expr = filter_expr.replace("placeholder", prefix) logger.info(f"filter_expr: {filter_expr}") payload = { "collectionName": name, "vector": vector_to_search, "outputFields": output_fields, "filter": filter_expr, "limit": limit, "offset": 0, } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit for item in res: name = item.get("name") logger.info(f"name: {name}") if ">" in filter_expr: assert name > prefix if "like" in filter_expr: assert name.startswith(prefix) @pytest.mark.parametrize("filter_expr", ["uid < 100 and name > \"placeholder\"", "uid < 100 and name like \"placeholder%\"" ]) def test_search_vector_with_complex_int64_varchar_and_filter(self, filter_expr): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name nb = 200 dim = 128 limit = 100 schema_payload, data = self.init_collection(name, dim=dim, nb=nb) names = [] for item in data: names.append(item.get("name")) names.sort() logger.info(f"names: {names}") mid = len(names) // 2 prefix = names[mid][0:2] vector_field = schema_payload.get("vectorField") # search data vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) filter_expr = filter_expr.replace("placeholder", prefix) logger.info(f"filter_expr: {filter_expr}") payload = { "collectionName": name, "vector": vector_to_search, "outputFields": output_fields, "filter": filter_expr, "limit": limit, "offset": 0, } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit for item in res: uid = item.get("uid") name = item.get("name") logger.info(f"name: {name}") uid_expr = filter_expr.split("and")[0] assert eval(uid_expr) is True varchar_expr = filter_expr.split("and")[1] if ">" in varchar_expr: assert name > prefix if "like" in varchar_expr: assert name.startswith(prefix) @pytest.mark.L1 class TestSearchVectorNegative(TestBase): @pytest.mark.parametrize("limit", [0, 16385]) def test_search_vector_with_invalid_limit(self, limit): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name dim = 128 schema_payload, data = self.init_collection(name, dim=dim) vector_field = schema_payload.get("vectorField") # search data vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) payload = { "collectionName": name, "vector": vector_to_search, "outputFields": output_fields, "filter": "uid >= 0", "limit": limit, "offset": 0, } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 1 @pytest.mark.parametrize("offset", [-1, 100_001]) def test_search_vector_with_invalid_offset(self, offset): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name dim = 128 schema_payload, data = self.init_collection(name, dim=dim) vector_field = schema_payload.get("vectorField") # search data dim = 128 vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) payload = { "collectionName": name, "vector": vector_to_search, "outputFields": output_fields, "filter": "uid >= 0", "limit": 100, "offset": offset, } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 1 def test_search_vector_with_illegal_api_key(self): """ Search a vector with an illegal api key """ pass def test_search_vector_with_invalid_collection_name(self): """ Search a vector with an invalid collection name """ pass def test_search_vector_with_invalid_output_field(self): """ Search a vector with an invalid output field """ pass @pytest.mark.parametrize("invalid_expr", ["invalid_field > 0", "12-s", "中文", "a", " "]) def test_search_vector_with_invalid_expression(self, invalid_expr): """ Search a vector with an invalid expression """ pass def test_search_vector_with_invalid_vector_field(self): """ Search a vector with an invalid vector field for ann search """ pass @pytest.mark.parametrize("dim_offset", [1, -1]) def test_search_vector_with_mismatch_vector_dim(self, dim_offset): """ Search a vector with a mismatch vector dim """ pass @pytest.mark.L0 class TestQueryVector(TestBase): @pytest.mark.parametrize("expr", ["10+20 <= uid < 20+30", "uid in [1,2,3,4]", "uid > 0", "uid >= 0", "uid > 0", "uid > -100 and uid < 100"]) @pytest.mark.parametrize("include_output_fields", [True, False]) @pytest.mark.parametrize("partial_fields", [True, False]) def test_query_vector_with_int64_filter(self, expr, include_output_fields, partial_fields): """ Query a vector with a simple payload """ name = gen_collection_name() self.name = name schema_payload, data = self.init_collection(name) output_fields = get_common_fields_by_data(data) if partial_fields: output_fields = output_fields[:len(output_fields) // 2] if "uid" not in output_fields: output_fields.append("uid") else: output_fields = output_fields # query data payload = { "collectionName": name, "filter": expr, "limit": 100, "offset": 0, "outputFields": output_fields } if not include_output_fields: payload.pop("outputFields") if 'vector' in output_fields: output_fields.remove("vector") time.sleep(5) rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") for r in res: uid = r['uid'] assert eval(expr) is True for field in output_fields: assert field in r @pytest.mark.parametrize("filter_expr", ["name > \"placeholder\"", "name like \"placeholder%\""]) @pytest.mark.parametrize("include_output_fields", [True, False]) def test_query_vector_with_varchar_filter(self, filter_expr, include_output_fields): """ Query a vector with a complex payload """ name = gen_collection_name() self.name = name nb = 200 dim = 128 limit = 100 schema_payload, data = self.init_collection(name, dim=dim, nb=nb) names = [] for item in data: names.append(item.get("name")) names.sort() logger.info(f"names: {names}") mid = len(names) // 2 prefix = names[mid][0:2] # search data output_fields = get_common_fields_by_data(data) filter_expr = filter_expr.replace("placeholder", prefix) logger.info(f"filter_expr: {filter_expr}") payload = { "collectionName": name, "outputFields": output_fields, "filter": filter_expr, "limit": limit, "offset": 0, } if not include_output_fields: payload.pop("outputFields") rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit for item in res: name = item.get("name") logger.info(f"name: {name}") if ">" in filter_expr: assert name > prefix if "like" in filter_expr: assert name.startswith(prefix) @pytest.mark.parametrize("sum_of_limit_offset", [16384]) def test_query_vector_with_large_sum_of_limit_offset(self, sum_of_limit_offset): """ Query a vector with sum of limit and offset larger than max value """ max_sum_of_limit_offset = 16384 name = gen_collection_name() filter_expr = "name > \"placeholder\"" self.name = name nb = 200 dim = 128 limit = 100 offset = sum_of_limit_offset - limit schema_payload, data = self.init_collection(name, dim=dim, nb=nb) names = [] for item in data: names.append(item.get("name")) names.sort() logger.info(f"names: {names}") mid = len(names) // 2 prefix = names[mid][0:2] # search data output_fields = get_common_fields_by_data(data) filter_expr = filter_expr.replace("placeholder", prefix) logger.info(f"filter_expr: {filter_expr}") payload = { "collectionName": name, "outputFields": output_fields, "filter": filter_expr, "limit": limit, "offset": offset, } rsp = self.vector_client.vector_query(payload) if sum_of_limit_offset > max_sum_of_limit_offset: assert rsp['code'] == 1 return assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit for item in res: name = item.get("name") logger.info(f"name: {name}") if ">" in filter_expr: assert name > prefix if "like" in filter_expr: assert name.startswith(prefix) @pytest.mark.L0 class TestGetVector(TestBase): def test_get_vector_with_simple_payload(self): """ Search a vector with a simple payload """ name = gen_collection_name() self.name = name self.init_collection(name) # search data dim = 128 vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() payload = { "collectionName": name, "vector": vector_to_search, } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) assert len(res) == limit ids = [item['id'] for item in res] assert len(ids) == len(set(ids)) payload = { "collectionName": name, "outputFields": ["*"], "id": ids[0], } rsp = self.vector_client.vector_get(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {res}") logger.info(f"res: {len(res)}") for item in res: assert item['id'] == ids[0] @pytest.mark.L0 @pytest.mark.parametrize("id_field_type", ["list", "one"]) @pytest.mark.parametrize("include_invalid_id", [True, False]) @pytest.mark.parametrize("include_output_fields", [True, False]) def test_get_vector_complex(self, id_field_type, include_output_fields, include_invalid_id): name = gen_collection_name() self.name = name nb = 200 dim = 128 schema_payload, data = self.init_collection(name, dim=dim, nb=nb) output_fields = get_common_fields_by_data(data) uids = [] for item in data: uids.append(item.get("uid")) payload = { "collectionName": name, "outputFields": output_fields, "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] for r in res: ids.append(r['id']) logger.info(f"ids: {len(ids)}") id_to_get = None if id_field_type == "list": id_to_get = ids if id_field_type == "one": id_to_get = ids[0] if include_invalid_id: if isinstance(id_to_get, list): id_to_get[-1] = 0 else: id_to_get = 0 # get by id list payload = { "collectionName": name, "outputFields": output_fields, "id": id_to_get } rsp = self.vector_client.vector_get(payload) assert rsp['code'] == 200 res = rsp['data'] if isinstance(id_to_get, list): if include_invalid_id: assert len(res) == len(id_to_get) - 1 else: assert len(res) == len(id_to_get) else: if include_invalid_id: assert len(res) == 0 else: assert len(res) == 1 for r in rsp['data']: if isinstance(id_to_get, list): assert r['id'] in id_to_get else: assert r['id'] == id_to_get if include_output_fields: for field in output_fields: assert field in r @pytest.mark.L0 class TestDeleteVector(TestBase): @pytest.mark.parametrize("include_invalid_id", [True, False]) @pytest.mark.parametrize("id_field_type", ["list", "one"]) def test_delete_vector_default(self, id_field_type, include_invalid_id): name = gen_collection_name() self.name = name nb = 200 dim = 128 schema_payload, data = self.init_collection(name, dim=dim, nb=nb) time.sleep(1) output_fields = get_common_fields_by_data(data) uids = [] for item in data: uids.append(item.get("uid")) payload = { "collectionName": name, "outputFields": output_fields, "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] for r in res: ids.append(r['id']) logger.info(f"ids: {len(ids)}") id_to_get = None if id_field_type == "list": id_to_get = ids if id_field_type == "one": id_to_get = ids[0] if include_invalid_id: if isinstance(id_to_get, list): id_to_get.append(0) else: id_to_get = 0 if isinstance(id_to_get, list): if len(id_to_get) >= 100: id_to_get = id_to_get[-100:] # delete by id list payload = { "collectionName": name, "id": id_to_get } rsp = self.vector_client.vector_delete(payload) assert rsp['code'] == 200 logger.info(f"delete res: {rsp}") # verify data deleted if not isinstance(id_to_get, list): id_to_get = [id_to_get] payload = { "collectionName": name, "filter": f"id in {id_to_get}", } time.sleep(5) rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 assert len(rsp['data']) == 0 @pytest.mark.L1 class TestDeleteVector(TestBase): def test_delete_vector_with_invalid_api_key(self): """ Delete a vector with an invalid api key """ name = gen_collection_name() self.name = name nb = 200 dim = 128 schema_payload, data = self.init_collection(name, dim=dim, nb=nb) output_fields = get_common_fields_by_data(data) uids = [] for item in data: uids.append(item.get("uid")) payload = { "collectionName": name, "outputFields": output_fields, "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] for r in res: ids.append(r['id']) logger.info(f"ids: {len(ids)}") id_to_get = ids # delete by id list payload = { "collectionName": name, "id": id_to_get } client = self.vector_client client.api_key = "invalid_api_key" rsp = client.vector_delete(payload) assert rsp['code'] == 1800 def test_delete_vector_with_invalid_collection_name(self): """ Delete a vector with an invalid collection name """ name = gen_collection_name() self.name = name self.init_collection(name, dim=128, nb=3000) # query data # expr = f"id in {[i for i in range(10)]}".replace("[", "(").replace("]", ")") expr = "id > 0" payload = { "collectionName": name, "filter": expr, "limit": 3000, "offset": 0, "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") id_list = [r['id'] for r in res] delete_expr = f"id in {[i for i in id_list[:10]]}" # query data before delete payload = { "collectionName": name, "filter": delete_expr, "limit": 3000, "offset": 0, "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") # delete data payload = { "collectionName": name + "_invalid", "filter": delete_expr, } rsp = self.vector_client.vector_delete(payload) assert rsp['code'] == 1 def test_delete_vector_with_non_primary_key(self): """ Delete a vector with a non-primary key, expect no data were deleted """ name = gen_collection_name() self.name = name self.init_collection(name, dim=128, nb=300) expr = "uid > 0" payload = { "collectionName": name, "filter": expr, "limit": 3000, "offset": 0, "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] logger.info(f"res: {len(res)}") id_list = [r['uid'] for r in res] delete_expr = f"uid in {[i for i in id_list[:10]]}" # query data before delete payload = { "collectionName": name, "filter": delete_expr, "limit": 3000, "offset": 0, "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) assert rsp['code'] == 200 res = rsp['data'] num_before_delete = len(res) logger.info(f"res: {len(res)}") # delete data payload = { "collectionName": name, "filter": delete_expr, } rsp = self.vector_client.vector_delete(payload) # query data after delete payload = { "collectionName": name, "filter": delete_expr, "limit": 3000, "offset": 0, "outputFields": ["id", "uid"] } time.sleep(1) rsp = self.vector_client.vector_query(payload) assert len(rsp["data"]) == num_before_delete