Add structure cases (#1675)

* enable nsg case

Signed-off-by: zhenwu <zw@zilliz.com>

* enable sub/superstructure

Signed-off-by: zhenwu <zw@zilliz.com>
pull/1680/head
del-zhenwu 2020-03-16 21:33:25 +08:00 committed by GitHub
parent 59dab6cb84
commit 21c7b8f09c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 394 additions and 3 deletions

View File

@ -203,5 +203,48 @@ def tanimoto_collection(request, connect):
connect.drop_collection(collection_name)
request.addfinalizer(teardown)
return collection_name
@pytest.fixture(scope="function")
def substructure_collection(request, connect):
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
dim = getattr(request.module, "dim", "128")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUBSTRUCTURE}
status = connect.create_collection(param)
# logging.getLogger().info(status)
if not status.OK():
pytest.exit("collection can not be created, exit pytest ...")
def teardown():
status, collection_names = connect.show_collections()
for collection_name in collection_names:
connect.drop_collection(collection_name)
request.addfinalizer(teardown)
return collection_name
@pytest.fixture(scope="function")
def superstructure_collection(request, connect):
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
dim = getattr(request.module, "dim", "128")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUPERSTRUCTURE}
status = connect.create_collection(param)
# logging.getLogger().info(status)
if not status.OK():
pytest.exit("collection can not be created, exit pytest ...")
def teardown():
status, collection_names = connect.show_collections()
for collection_name in collection_names:
connect.drop_collection(collection_name)
request.addfinalizer(teardown)
return collection_name

View File

@ -77,6 +77,34 @@ class TestCollection:
status = connect.create_collection(param)
assert status.OK()
def test_create_collection_substructure(self, connect):
'''
target: test create normal collection
method: create collection with corrent params
expected: create status return ok
'''
collection_name = gen_unique_str("test_collection")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUBSTRUCTURE}
status = connect.create_collection(param)
assert status.OK()
def test_create_collection_superstructure(self, connect):
'''
target: test create normal collection
method: create collection with corrent params
expected: create status return ok
'''
collection_name = gen_unique_str("test_collection")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUPERSTRUCTURE}
status = connect.create_collection(param)
assert status.OK()
@pytest.mark.level(2)
def test_create_collection_without_connection(self, dis_connect):
'''
@ -253,6 +281,38 @@ class TestCollection:
assert res.collection_name == collection_name
assert res.metric_type == MetricType.HAMMING
def test_collection_describe_collection_name_substructure(self, connect):
'''
target: test describe collection created with correct params
method: create collection, assert the value returned by describe method
expected: collection_name equals with the collection name created
'''
collection_name = gen_unique_str("test_collection")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUBSTRUCTURE}
connect.create_collection(param)
status, res = connect.describe_collection(collection_name)
assert res.collection_name == collection_name
assert res.metric_type == MetricType.SUBSTRUCTURE
def test_collection_describe_collection_name_superstructure(self, connect):
'''
target: test describe collection created with correct params
method: create collection, assert the value returned by describe method
expected: collection_name equals with the collection name created
'''
collection_name = gen_unique_str("test_collection")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUPERSTRUCTURE}
connect.create_collection(param)
status, res = connect.describe_collection(collection_name)
assert res.collection_name == collection_name
assert res.metric_type == MetricType.SUPERSTRUCTURE
# TODO: enable
@pytest.mark.level(2)
def _test_collection_describe_collection_name_multiprocessing(self, connect, args):
@ -658,6 +718,38 @@ class TestCollection:
assert status.OK()
assert collection_name in result
def test_show_collections_substructure(self, connect):
'''
target: test show collections is correct or not, if collection created
method: create collection, assert the value returned by show_collections method is equal to 0
expected: collection_name in show collections
'''
collection_name = gen_unique_str("test_collection")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUBSTRUCTURE}
connect.create_collection(param)
status, result = connect.show_collections()
assert status.OK()
assert collection_name in result
def test_show_collections_superstructure(self, connect):
'''
target: test show collections is correct or not, if collection created
method: create collection, assert the value returned by show_collections method is equal to 0
expected: collection_name in show collections
'''
collection_name = gen_unique_str("test_collection")
param = {'collection_name': collection_name,
'dimension': dim,
'index_file_size': index_file_size,
'metric_type': MetricType.SUPERSTRUCTURE}
connect.create_collection(param)
status, result = connect.show_collections()
assert status.OK()
assert collection_name in result
@pytest.mark.level(2)
def test_show_collections_without_connection(self, dis_connect):
'''

View File

@ -485,7 +485,7 @@ class TestCollectionCountJAC:
assert status.OK()
assert res == nq
class TestCollectionCountHAM:
class TestCollectionCountBinary:
"""
params means different nb, the nb value may trigger merge, or not
"""
@ -516,6 +516,28 @@ class TestCollectionCountHAM:
else:
pytest.skip("Skip index Temporary")
@pytest.fixture(
scope="function",
params=gen_simple_index()
)
def get_substructure_index(self, request, connect):
logging.getLogger().info(request.param)
if request.param["index_type"] == IndexType.FLAT:
return request.param
else:
pytest.skip("Skip index Temporary")
@pytest.fixture(
scope="function",
params=gen_simple_index()
)
def get_superstructure_index(self, request, connect):
logging.getLogger().info(request.param)
if request.param["index_type"] == IndexType.FLAT:
return request.param
else:
pytest.skip("Skip index Temporary")
def test_collection_rows_count(self, connect, ham_collection, add_vectors_nb):
'''
target: test collection rows_count is correct or not
@ -530,6 +552,34 @@ class TestCollectionCountHAM:
status, res = connect.count_collection(ham_collection)
assert res == nb
def test_collection_rows_count_substructure(self, connect, substructure_collection, add_vectors_nb):
'''
target: test collection rows_count is correct or not
method: create collection and add vectors in it,
assert the value returned by count_collection method is equal to length of vectors
expected: the count is equal to the length of vectors
'''
nb = add_vectors_nb
tmp, vectors = gen_binary_vectors(nb, dim)
res = connect.add_vectors(collection_name=substructure_collection, records=vectors)
connect.flush([substructure_collection])
status, res = connect.count_collection(substructure_collection)
assert res == nb
def test_collection_rows_count_superstructure(self, connect, superstructure_collection, add_vectors_nb):
'''
target: test collection rows_count is correct or not
method: create collection and add vectors in it,
assert the value returned by count_collection method is equal to length of vectors
expected: the count is equal to the length of vectors
'''
nb = add_vectors_nb
tmp, vectors = gen_binary_vectors(nb, dim)
res = connect.add_vectors(collection_name=superstructure_collection, records=vectors)
connect.flush([superstructure_collection])
status, res = connect.count_collection(superstructure_collection)
assert res == nb
def test_collection_rows_count_after_index_created(self, connect, ham_collection, get_hamming_index):
'''
target: test count_collection, after index have been created
@ -546,6 +596,38 @@ class TestCollectionCountHAM:
status, res = connect.count_collection(ham_collection)
assert res == nb
def test_collection_rows_count_after_index_created_substructure(self, connect, substructure_collection, get_substructure_index):
'''
target: test count_collection, after index have been created
method: add vectors in db, and create index, then calling count_collection with correct params
expected: count_collection raise exception
'''
nb = 100
index_type = get_substructure_index["index_type"]
index_param = get_substructure_index["index_param"]
tmp, vectors = gen_binary_vectors(nb, dim)
res = connect.add_vectors(collection_name=substructure_collection, records=vectors)
connect.flush([substructure_collection])
connect.create_index(substructure_collection, index_type, index_param)
status, res = connect.count_collection(substructure_collection)
assert res == nb
def test_collection_rows_count_after_index_created_superstructure(self, connect, superstructure_collection, get_superstructure_index):
'''
target: test count_collection, after index have been created
method: add vectors in db, and create index, then calling count_collection with correct params
expected: count_collection raise exception
'''
nb = 100
index_type = get_superstructure_index["index_type"]
index_param = get_superstructure_index["index_param"]
tmp, vectors = gen_binary_vectors(nb, dim)
res = connect.add_vectors(collection_name=superstructure_collection, records=vectors)
connect.flush([superstructure_collection])
connect.create_index(superstructure_collection, index_type, index_param)
status, res = connect.count_collection(superstructure_collection)
assert res == nb
@pytest.mark.level(2)
def test_count_without_connection(self, ham_collection, dis_connect):
'''

View File

@ -1437,7 +1437,7 @@ class TestIndexJAC:
assert result._index_type == IndexType.FLAT
class TestIndexHAM:
class TestIndexBinary:
tmp, vectors = gen_binary_vectors(nb, dim)
@pytest.fixture(
@ -1475,6 +1475,28 @@ class TestIndexHAM:
else:
pytest.skip("Skip index Temporary")
@pytest.fixture(
scope="function",
params=gen_simple_index()
)
def get_substructure_index(self, request, connect):
logging.getLogger().info(request.param)
if request.param["index_type"] == IndexType.FLAT:
return request.param
else:
pytest.skip("Skip index Temporary")
@pytest.fixture(
scope="function",
params=gen_simple_index()
)
def get_superstructure_index(self, request, connect):
logging.getLogger().info(request.param)
if request.param["index_type"] == IndexType.FLAT:
return request.param
else:
pytest.skip("Skip index Temporary")
"""
******************************************************************
The following cases are used to test `create_index` function
@ -1514,6 +1536,23 @@ class TestIndexHAM:
status, res = connect.count_collection(ham_collection)
assert res == len(self.vectors)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_partition_structure(self, connect, substructure_collection, get_substructure_index):
'''
target: test create index interface
method: create collection, create partition, and add vectors in it, create index
expected: return code equals to 0, and search success
'''
index_param = get_substructure_index["index_param"]
index_type = get_substructure_index["index_type"]
logging.getLogger().info(get_substructure_index)
status = connect.create_partition(substructure_collection, tag)
status, ids = connect.add_vectors(substructure_collection, self.vectors, partition_tag=tag)
status = connect.create_index(substructure_collection, index_type, index_param)
assert status.OK()
status, res = connect.count_collection(substructure_collection,)
assert res == len(self.vectors)
@pytest.mark.level(2)
def test_create_index_without_connect(self, dis_connect, ham_collection):
'''
@ -1547,6 +1586,27 @@ class TestIndexHAM:
assert status.OK()
assert len(result) == len(query_vecs)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_search_with_query_vectors_superstructure(self, connect, superstructure_collection, get_superstructure_index):
'''
target: test create index interface, search with more query vectors
method: create collection and add vectors in it, create index
expected: return code equals to 0, and search success
'''
index_param = get_superstructure_index["index_param"]
index_type = get_superstructure_index["index_type"]
logging.getLogger().info(get_superstructure_index)
status, ids = connect.add_vectors(superstructure_collection, self.vectors)
status = connect.create_index(superstructure_collection, index_type, index_param)
logging.getLogger().info(connect.describe_index(superstructure_collection))
query_vecs = [self.vectors[0], self.vectors[1], self.vectors[2]]
top_k = 5
search_param = get_search_param(index_type)
status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param)
logging.getLogger().info(result)
assert status.OK()
assert len(result) == len(query_vecs)
"""
******************************************************************
The following cases are used to test `describe_index` function
@ -1588,6 +1648,24 @@ class TestIndexHAM:
assert result._collection_name == ham_collection
assert result._index_type == index_type
def test_describe_index_partition_superstructrue(self, connect, superstructure_collection, get_superstructure_index):
'''
target: test describe index interface
method: create collection, create partition and add vectors in it, create index, call describe index
expected: return code 0, and index instructure
'''
index_param = get_superstructure_index["index_param"]
index_type = get_superstructure_index["index_type"]
logging.getLogger().info(get_superstructure_index)
status = connect.create_partition(superstructure_collection, tag)
status, ids = connect.add_vectors(superstructure_collection, vectors, partition_tag=tag)
status = connect.create_index(superstructure_collection, index_type, index_param)
status, result = connect.describe_index(superstructure_collection)
logging.getLogger().info(result)
assert result._params == index_param
assert result._collection_name == superstructure_collection
assert result._index_type == index_type
"""
******************************************************************
The following cases are used to test `drop_index` function
@ -1616,6 +1694,27 @@ class TestIndexHAM:
assert result._collection_name == ham_collection
assert result._index_type == IndexType.FLAT
def test_drop_index_substructure(self, connect, substructure_collection, get_substructure_index):
'''
target: test drop index interface
method: create collection and add vectors in it, create index, call drop index
expected: return code 0, and default index param
'''
index_param = get_substructure_index["index_param"]
index_type = get_substructure_index["index_type"]
status, mode = connect._cmd("mode")
assert status.OK()
status = connect.create_index(substructure_collection, index_type, index_param)
assert status.OK()
status, result = connect.describe_index(substructure_collection)
logging.getLogger().info(result)
status = connect.drop_index(substructure_collection)
assert status.OK()
status, result = connect.describe_index(substructure_collection)
logging.getLogger().info(result)
assert result._collection_name == substructure_collection
assert result._index_type == IndexType.FLAT
def test_drop_index_partition(self, connect, ham_collection, get_hamming_index):
'''
target: test drop index interface

View File

@ -120,6 +120,17 @@ class TestSearchBase:
else:
pytest.skip("Skip index Temporary")
@pytest.fixture(
scope="function",
params=gen_simple_index()
)
def get_structure_index(self, request, connect):
logging.getLogger().info(request.param)
if request.param["index_type"] == IndexType.FLAT:
return request.param
else:
pytest.skip("Skip index Temporary")
"""
generate top-k params
"""
@ -640,6 +651,58 @@ class TestSearchBase:
logging.getLogger().info(result)
assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon
def test_search_distance_substructure_flat_index(self, connect, substructure_collection):
'''
target: search ip_collection, and check the result: distance
method: compare the return distance value with value computed with Inner product
expected: the return distance equals to the computed value
'''
# from scipy.spatial import distance
top_k = 1
nprobe = 512
int_vectors, vectors, ids = self.init_binary_data(connect, substructure_collection, nb=2)
index_type = IndexType.FLAT
index_param = {
"nlist": 16384
}
connect.create_index(substructure_collection, index_type, index_param)
logging.getLogger().info(connect.describe_collection(substructure_collection))
logging.getLogger().info(connect.describe_index(substructure_collection))
query_int_vectors, query_vecs, tmp_ids = self.init_binary_data(connect, substructure_collection, nb=1, insert=False)
distance_0 = substructure(query_int_vectors[0], int_vectors[0])
distance_1 = substructure(query_int_vectors[0], int_vectors[1])
search_param = get_search_param(index_type)
status, result = connect.search_vectors(substructure_collection, top_k, query_vecs, params=search_param)
logging.getLogger().info(status)
logging.getLogger().info(result)
assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon
def test_search_distance_superstructure_flat_index(self, connect, superstructure_collection):
'''
target: search ip_collection, and check the result: distance
method: compare the return distance value with value computed with Inner product
expected: the return distance equals to the computed value
'''
# from scipy.spatial import distance
top_k = 1
nprobe = 512
int_vectors, vectors, ids = self.init_binary_data(connect, superstructure_collection, nb=2)
index_type = IndexType.FLAT
index_param = {
"nlist": 16384
}
connect.create_index(superstructure_collection, index_type, index_param)
logging.getLogger().info(connect.describe_collection(superstructure_collection))
logging.getLogger().info(connect.describe_index(superstructure_collection))
query_int_vectors, query_vecs, tmp_ids = self.init_binary_data(connect, superstructure_collection, nb=1, insert=False)
distance_0 = superstructure(query_int_vectors[0], int_vectors[0])
distance_1 = superstructure(query_int_vectors[0], int_vectors[1])
search_param = get_search_param(index_type)
status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param)
logging.getLogger().info(status)
logging.getLogger().info(result)
assert abs(result[0][0].distance - min(distance_0, distance_1).astype(float)) <= epsilon
def test_search_distance_tanimoto_flat_index(self, connect, tanimoto_collection):
'''
target: search ip_collection, and check the result: distance

View File

@ -55,6 +55,18 @@ def tanimoto(x, y):
return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
def substructure(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y)
def superstructure(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
def gen_single_vector(dim):
return [[random.random() for _ in range(dim)]]