mirror of https://github.com/milvus-io/milvus.git
Add structure cases (#1675)
* enable nsg case Signed-off-by: zhenwu <zw@zilliz.com> * enable sub/superstructure Signed-off-by: zhenwu <zw@zilliz.com>pull/1680/head
parent
59dab6cb84
commit
21c7b8f09c
|
@ -203,5 +203,48 @@ def tanimoto_collection(request, connect):
|
|||
connect.drop_collection(collection_name)
|
||||
|
||||
request.addfinalizer(teardown)
|
||||
|
||||
return collection_name
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def substructure_collection(request, connect):
|
||||
ori_collection_name = getattr(request.module, "collection_id", "test")
|
||||
collection_name = gen_unique_str(ori_collection_name)
|
||||
dim = getattr(request.module, "dim", "128")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUBSTRUCTURE}
|
||||
status = connect.create_collection(param)
|
||||
# logging.getLogger().info(status)
|
||||
if not status.OK():
|
||||
pytest.exit("collection can not be created, exit pytest ...")
|
||||
|
||||
def teardown():
|
||||
status, collection_names = connect.show_collections()
|
||||
for collection_name in collection_names:
|
||||
connect.drop_collection(collection_name)
|
||||
|
||||
request.addfinalizer(teardown)
|
||||
return collection_name
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def superstructure_collection(request, connect):
|
||||
ori_collection_name = getattr(request.module, "collection_id", "test")
|
||||
collection_name = gen_unique_str(ori_collection_name)
|
||||
dim = getattr(request.module, "dim", "128")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUPERSTRUCTURE}
|
||||
status = connect.create_collection(param)
|
||||
# logging.getLogger().info(status)
|
||||
if not status.OK():
|
||||
pytest.exit("collection can not be created, exit pytest ...")
|
||||
|
||||
def teardown():
|
||||
status, collection_names = connect.show_collections()
|
||||
for collection_name in collection_names:
|
||||
connect.drop_collection(collection_name)
|
||||
|
||||
request.addfinalizer(teardown)
|
||||
return collection_name
|
||||
|
|
|
@ -77,6 +77,34 @@ class TestCollection:
|
|||
status = connect.create_collection(param)
|
||||
assert status.OK()
|
||||
|
||||
def test_create_collection_substructure(self, connect):
|
||||
'''
|
||||
target: test create normal collection
|
||||
method: create collection with corrent params
|
||||
expected: create status return ok
|
||||
'''
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUBSTRUCTURE}
|
||||
status = connect.create_collection(param)
|
||||
assert status.OK()
|
||||
|
||||
def test_create_collection_superstructure(self, connect):
|
||||
'''
|
||||
target: test create normal collection
|
||||
method: create collection with corrent params
|
||||
expected: create status return ok
|
||||
'''
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUPERSTRUCTURE}
|
||||
status = connect.create_collection(param)
|
||||
assert status.OK()
|
||||
|
||||
@pytest.mark.level(2)
|
||||
def test_create_collection_without_connection(self, dis_connect):
|
||||
'''
|
||||
|
@ -253,6 +281,38 @@ class TestCollection:
|
|||
assert res.collection_name == collection_name
|
||||
assert res.metric_type == MetricType.HAMMING
|
||||
|
||||
def test_collection_describe_collection_name_substructure(self, connect):
|
||||
'''
|
||||
target: test describe collection created with correct params
|
||||
method: create collection, assert the value returned by describe method
|
||||
expected: collection_name equals with the collection name created
|
||||
'''
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUBSTRUCTURE}
|
||||
connect.create_collection(param)
|
||||
status, res = connect.describe_collection(collection_name)
|
||||
assert res.collection_name == collection_name
|
||||
assert res.metric_type == MetricType.SUBSTRUCTURE
|
||||
|
||||
def test_collection_describe_collection_name_superstructure(self, connect):
|
||||
'''
|
||||
target: test describe collection created with correct params
|
||||
method: create collection, assert the value returned by describe method
|
||||
expected: collection_name equals with the collection name created
|
||||
'''
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUPERSTRUCTURE}
|
||||
connect.create_collection(param)
|
||||
status, res = connect.describe_collection(collection_name)
|
||||
assert res.collection_name == collection_name
|
||||
assert res.metric_type == MetricType.SUPERSTRUCTURE
|
||||
|
||||
# TODO: enable
|
||||
@pytest.mark.level(2)
|
||||
def _test_collection_describe_collection_name_multiprocessing(self, connect, args):
|
||||
|
@ -658,6 +718,38 @@ class TestCollection:
|
|||
assert status.OK()
|
||||
assert collection_name in result
|
||||
|
||||
def test_show_collections_substructure(self, connect):
|
||||
'''
|
||||
target: test show collections is correct or not, if collection created
|
||||
method: create collection, assert the value returned by show_collections method is equal to 0
|
||||
expected: collection_name in show collections
|
||||
'''
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUBSTRUCTURE}
|
||||
connect.create_collection(param)
|
||||
status, result = connect.show_collections()
|
||||
assert status.OK()
|
||||
assert collection_name in result
|
||||
|
||||
def test_show_collections_superstructure(self, connect):
|
||||
'''
|
||||
target: test show collections is correct or not, if collection created
|
||||
method: create collection, assert the value returned by show_collections method is equal to 0
|
||||
expected: collection_name in show collections
|
||||
'''
|
||||
collection_name = gen_unique_str("test_collection")
|
||||
param = {'collection_name': collection_name,
|
||||
'dimension': dim,
|
||||
'index_file_size': index_file_size,
|
||||
'metric_type': MetricType.SUPERSTRUCTURE}
|
||||
connect.create_collection(param)
|
||||
status, result = connect.show_collections()
|
||||
assert status.OK()
|
||||
assert collection_name in result
|
||||
|
||||
@pytest.mark.level(2)
|
||||
def test_show_collections_without_connection(self, dis_connect):
|
||||
'''
|
||||
|
|
|
@ -485,7 +485,7 @@ class TestCollectionCountJAC:
|
|||
assert status.OK()
|
||||
assert res == nq
|
||||
|
||||
class TestCollectionCountHAM:
|
||||
class TestCollectionCountBinary:
|
||||
"""
|
||||
params means different nb, the nb value may trigger merge, or not
|
||||
"""
|
||||
|
@ -516,6 +516,28 @@ class TestCollectionCountHAM:
|
|||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=gen_simple_index()
|
||||
)
|
||||
def get_substructure_index(self, request, connect):
|
||||
logging.getLogger().info(request.param)
|
||||
if request.param["index_type"] == IndexType.FLAT:
|
||||
return request.param
|
||||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=gen_simple_index()
|
||||
)
|
||||
def get_superstructure_index(self, request, connect):
|
||||
logging.getLogger().info(request.param)
|
||||
if request.param["index_type"] == IndexType.FLAT:
|
||||
return request.param
|
||||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
def test_collection_rows_count(self, connect, ham_collection, add_vectors_nb):
|
||||
'''
|
||||
target: test collection rows_count is correct or not
|
||||
|
@ -530,6 +552,34 @@ class TestCollectionCountHAM:
|
|||
status, res = connect.count_collection(ham_collection)
|
||||
assert res == nb
|
||||
|
||||
def test_collection_rows_count_substructure(self, connect, substructure_collection, add_vectors_nb):
|
||||
'''
|
||||
target: test collection rows_count is correct or not
|
||||
method: create collection and add vectors in it,
|
||||
assert the value returned by count_collection method is equal to length of vectors
|
||||
expected: the count is equal to the length of vectors
|
||||
'''
|
||||
nb = add_vectors_nb
|
||||
tmp, vectors = gen_binary_vectors(nb, dim)
|
||||
res = connect.add_vectors(collection_name=substructure_collection, records=vectors)
|
||||
connect.flush([substructure_collection])
|
||||
status, res = connect.count_collection(substructure_collection)
|
||||
assert res == nb
|
||||
|
||||
def test_collection_rows_count_superstructure(self, connect, superstructure_collection, add_vectors_nb):
|
||||
'''
|
||||
target: test collection rows_count is correct or not
|
||||
method: create collection and add vectors in it,
|
||||
assert the value returned by count_collection method is equal to length of vectors
|
||||
expected: the count is equal to the length of vectors
|
||||
'''
|
||||
nb = add_vectors_nb
|
||||
tmp, vectors = gen_binary_vectors(nb, dim)
|
||||
res = connect.add_vectors(collection_name=superstructure_collection, records=vectors)
|
||||
connect.flush([superstructure_collection])
|
||||
status, res = connect.count_collection(superstructure_collection)
|
||||
assert res == nb
|
||||
|
||||
def test_collection_rows_count_after_index_created(self, connect, ham_collection, get_hamming_index):
|
||||
'''
|
||||
target: test count_collection, after index have been created
|
||||
|
@ -546,6 +596,38 @@ class TestCollectionCountHAM:
|
|||
status, res = connect.count_collection(ham_collection)
|
||||
assert res == nb
|
||||
|
||||
def test_collection_rows_count_after_index_created_substructure(self, connect, substructure_collection, get_substructure_index):
|
||||
'''
|
||||
target: test count_collection, after index have been created
|
||||
method: add vectors in db, and create index, then calling count_collection with correct params
|
||||
expected: count_collection raise exception
|
||||
'''
|
||||
nb = 100
|
||||
index_type = get_substructure_index["index_type"]
|
||||
index_param = get_substructure_index["index_param"]
|
||||
tmp, vectors = gen_binary_vectors(nb, dim)
|
||||
res = connect.add_vectors(collection_name=substructure_collection, records=vectors)
|
||||
connect.flush([substructure_collection])
|
||||
connect.create_index(substructure_collection, index_type, index_param)
|
||||
status, res = connect.count_collection(substructure_collection)
|
||||
assert res == nb
|
||||
|
||||
def test_collection_rows_count_after_index_created_superstructure(self, connect, superstructure_collection, get_superstructure_index):
|
||||
'''
|
||||
target: test count_collection, after index have been created
|
||||
method: add vectors in db, and create index, then calling count_collection with correct params
|
||||
expected: count_collection raise exception
|
||||
'''
|
||||
nb = 100
|
||||
index_type = get_superstructure_index["index_type"]
|
||||
index_param = get_superstructure_index["index_param"]
|
||||
tmp, vectors = gen_binary_vectors(nb, dim)
|
||||
res = connect.add_vectors(collection_name=superstructure_collection, records=vectors)
|
||||
connect.flush([superstructure_collection])
|
||||
connect.create_index(superstructure_collection, index_type, index_param)
|
||||
status, res = connect.count_collection(superstructure_collection)
|
||||
assert res == nb
|
||||
|
||||
@pytest.mark.level(2)
|
||||
def test_count_without_connection(self, ham_collection, dis_connect):
|
||||
'''
|
||||
|
|
|
@ -1437,7 +1437,7 @@ class TestIndexJAC:
|
|||
assert result._index_type == IndexType.FLAT
|
||||
|
||||
|
||||
class TestIndexHAM:
|
||||
class TestIndexBinary:
|
||||
tmp, vectors = gen_binary_vectors(nb, dim)
|
||||
|
||||
@pytest.fixture(
|
||||
|
@ -1475,6 +1475,28 @@ class TestIndexHAM:
|
|||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=gen_simple_index()
|
||||
)
|
||||
def get_substructure_index(self, request, connect):
|
||||
logging.getLogger().info(request.param)
|
||||
if request.param["index_type"] == IndexType.FLAT:
|
||||
return request.param
|
||||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=gen_simple_index()
|
||||
)
|
||||
def get_superstructure_index(self, request, connect):
|
||||
logging.getLogger().info(request.param)
|
||||
if request.param["index_type"] == IndexType.FLAT:
|
||||
return request.param
|
||||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
"""
|
||||
******************************************************************
|
||||
The following cases are used to test `create_index` function
|
||||
|
@ -1514,6 +1536,23 @@ class TestIndexHAM:
|
|||
status, res = connect.count_collection(ham_collection)
|
||||
assert res == len(self.vectors)
|
||||
|
||||
@pytest.mark.timeout(BUILD_TIMEOUT)
|
||||
def test_create_index_partition_structure(self, connect, substructure_collection, get_substructure_index):
|
||||
'''
|
||||
target: test create index interface
|
||||
method: create collection, create partition, and add vectors in it, create index
|
||||
expected: return code equals to 0, and search success
|
||||
'''
|
||||
index_param = get_substructure_index["index_param"]
|
||||
index_type = get_substructure_index["index_type"]
|
||||
logging.getLogger().info(get_substructure_index)
|
||||
status = connect.create_partition(substructure_collection, tag)
|
||||
status, ids = connect.add_vectors(substructure_collection, self.vectors, partition_tag=tag)
|
||||
status = connect.create_index(substructure_collection, index_type, index_param)
|
||||
assert status.OK()
|
||||
status, res = connect.count_collection(substructure_collection,)
|
||||
assert res == len(self.vectors)
|
||||
|
||||
@pytest.mark.level(2)
|
||||
def test_create_index_without_connect(self, dis_connect, ham_collection):
|
||||
'''
|
||||
|
@ -1547,6 +1586,27 @@ class TestIndexHAM:
|
|||
assert status.OK()
|
||||
assert len(result) == len(query_vecs)
|
||||
|
||||
@pytest.mark.timeout(BUILD_TIMEOUT)
|
||||
def test_create_index_search_with_query_vectors_superstructure(self, connect, superstructure_collection, get_superstructure_index):
|
||||
'''
|
||||
target: test create index interface, search with more query vectors
|
||||
method: create collection and add vectors in it, create index
|
||||
expected: return code equals to 0, and search success
|
||||
'''
|
||||
index_param = get_superstructure_index["index_param"]
|
||||
index_type = get_superstructure_index["index_type"]
|
||||
logging.getLogger().info(get_superstructure_index)
|
||||
status, ids = connect.add_vectors(superstructure_collection, self.vectors)
|
||||
status = connect.create_index(superstructure_collection, index_type, index_param)
|
||||
logging.getLogger().info(connect.describe_index(superstructure_collection))
|
||||
query_vecs = [self.vectors[0], self.vectors[1], self.vectors[2]]
|
||||
top_k = 5
|
||||
search_param = get_search_param(index_type)
|
||||
status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param)
|
||||
logging.getLogger().info(result)
|
||||
assert status.OK()
|
||||
assert len(result) == len(query_vecs)
|
||||
|
||||
"""
|
||||
******************************************************************
|
||||
The following cases are used to test `describe_index` function
|
||||
|
@ -1588,6 +1648,24 @@ class TestIndexHAM:
|
|||
assert result._collection_name == ham_collection
|
||||
assert result._index_type == index_type
|
||||
|
||||
def test_describe_index_partition_superstructrue(self, connect, superstructure_collection, get_superstructure_index):
|
||||
'''
|
||||
target: test describe index interface
|
||||
method: create collection, create partition and add vectors in it, create index, call describe index
|
||||
expected: return code 0, and index instructure
|
||||
'''
|
||||
index_param = get_superstructure_index["index_param"]
|
||||
index_type = get_superstructure_index["index_type"]
|
||||
logging.getLogger().info(get_superstructure_index)
|
||||
status = connect.create_partition(superstructure_collection, tag)
|
||||
status, ids = connect.add_vectors(superstructure_collection, vectors, partition_tag=tag)
|
||||
status = connect.create_index(superstructure_collection, index_type, index_param)
|
||||
status, result = connect.describe_index(superstructure_collection)
|
||||
logging.getLogger().info(result)
|
||||
assert result._params == index_param
|
||||
assert result._collection_name == superstructure_collection
|
||||
assert result._index_type == index_type
|
||||
|
||||
"""
|
||||
******************************************************************
|
||||
The following cases are used to test `drop_index` function
|
||||
|
@ -1616,6 +1694,27 @@ class TestIndexHAM:
|
|||
assert result._collection_name == ham_collection
|
||||
assert result._index_type == IndexType.FLAT
|
||||
|
||||
def test_drop_index_substructure(self, connect, substructure_collection, get_substructure_index):
|
||||
'''
|
||||
target: test drop index interface
|
||||
method: create collection and add vectors in it, create index, call drop index
|
||||
expected: return code 0, and default index param
|
||||
'''
|
||||
index_param = get_substructure_index["index_param"]
|
||||
index_type = get_substructure_index["index_type"]
|
||||
status, mode = connect._cmd("mode")
|
||||
assert status.OK()
|
||||
status = connect.create_index(substructure_collection, index_type, index_param)
|
||||
assert status.OK()
|
||||
status, result = connect.describe_index(substructure_collection)
|
||||
logging.getLogger().info(result)
|
||||
status = connect.drop_index(substructure_collection)
|
||||
assert status.OK()
|
||||
status, result = connect.describe_index(substructure_collection)
|
||||
logging.getLogger().info(result)
|
||||
assert result._collection_name == substructure_collection
|
||||
assert result._index_type == IndexType.FLAT
|
||||
|
||||
def test_drop_index_partition(self, connect, ham_collection, get_hamming_index):
|
||||
'''
|
||||
target: test drop index interface
|
||||
|
|
|
@ -120,6 +120,17 @@ class TestSearchBase:
|
|||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=gen_simple_index()
|
||||
)
|
||||
def get_structure_index(self, request, connect):
|
||||
logging.getLogger().info(request.param)
|
||||
if request.param["index_type"] == IndexType.FLAT:
|
||||
return request.param
|
||||
else:
|
||||
pytest.skip("Skip index Temporary")
|
||||
|
||||
"""
|
||||
generate top-k params
|
||||
"""
|
||||
|
@ -640,6 +651,58 @@ class TestSearchBase:
|
|||
logging.getLogger().info(result)
|
||||
assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon
|
||||
|
||||
def test_search_distance_substructure_flat_index(self, connect, substructure_collection):
|
||||
'''
|
||||
target: search ip_collection, and check the result: distance
|
||||
method: compare the return distance value with value computed with Inner product
|
||||
expected: the return distance equals to the computed value
|
||||
'''
|
||||
# from scipy.spatial import distance
|
||||
top_k = 1
|
||||
nprobe = 512
|
||||
int_vectors, vectors, ids = self.init_binary_data(connect, substructure_collection, nb=2)
|
||||
index_type = IndexType.FLAT
|
||||
index_param = {
|
||||
"nlist": 16384
|
||||
}
|
||||
connect.create_index(substructure_collection, index_type, index_param)
|
||||
logging.getLogger().info(connect.describe_collection(substructure_collection))
|
||||
logging.getLogger().info(connect.describe_index(substructure_collection))
|
||||
query_int_vectors, query_vecs, tmp_ids = self.init_binary_data(connect, substructure_collection, nb=1, insert=False)
|
||||
distance_0 = substructure(query_int_vectors[0], int_vectors[0])
|
||||
distance_1 = substructure(query_int_vectors[0], int_vectors[1])
|
||||
search_param = get_search_param(index_type)
|
||||
status, result = connect.search_vectors(substructure_collection, top_k, query_vecs, params=search_param)
|
||||
logging.getLogger().info(status)
|
||||
logging.getLogger().info(result)
|
||||
assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon
|
||||
|
||||
def test_search_distance_superstructure_flat_index(self, connect, superstructure_collection):
|
||||
'''
|
||||
target: search ip_collection, and check the result: distance
|
||||
method: compare the return distance value with value computed with Inner product
|
||||
expected: the return distance equals to the computed value
|
||||
'''
|
||||
# from scipy.spatial import distance
|
||||
top_k = 1
|
||||
nprobe = 512
|
||||
int_vectors, vectors, ids = self.init_binary_data(connect, superstructure_collection, nb=2)
|
||||
index_type = IndexType.FLAT
|
||||
index_param = {
|
||||
"nlist": 16384
|
||||
}
|
||||
connect.create_index(superstructure_collection, index_type, index_param)
|
||||
logging.getLogger().info(connect.describe_collection(superstructure_collection))
|
||||
logging.getLogger().info(connect.describe_index(superstructure_collection))
|
||||
query_int_vectors, query_vecs, tmp_ids = self.init_binary_data(connect, superstructure_collection, nb=1, insert=False)
|
||||
distance_0 = superstructure(query_int_vectors[0], int_vectors[0])
|
||||
distance_1 = superstructure(query_int_vectors[0], int_vectors[1])
|
||||
search_param = get_search_param(index_type)
|
||||
status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param)
|
||||
logging.getLogger().info(status)
|
||||
logging.getLogger().info(result)
|
||||
assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon
|
||||
|
||||
def test_search_distance_tanimoto_flat_index(self, connect, tanimoto_collection):
|
||||
'''
|
||||
target: search ip_collection, and check the result: distance
|
||||
|
|
|
@ -55,6 +55,18 @@ def tanimoto(x, y):
|
|||
return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
|
||||
|
||||
|
||||
def substructure(x, y):
|
||||
x = np.asarray(x, np.bool)
|
||||
y = np.asarray(y, np.bool)
|
||||
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y)
|
||||
|
||||
|
||||
def superstructure(x, y):
|
||||
x = np.asarray(x, np.bool)
|
||||
y = np.asarray(y, np.bool)
|
||||
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
|
||||
|
||||
|
||||
def gen_single_vector(dim):
|
||||
return [[random.random() for _ in range(dim)]]
|
||||
|
||||
|
|
Loading…
Reference in New Issue