milvus/tests/milvus_python_test/utils.py

630 lines
16 KiB
Python

# STL imports
import random
import string
import struct
import sys
import logging
import time, datetime
import copy
import numpy as np
from milvus import Milvus, IndexType, MetricType
port = 19530
epsilon = 0.000001
def get_milvus(handler=None):
if handler is None:
handler = "GRPC"
return Milvus(handler=handler)
def gen_inaccuracy(num):
return num / 255.0
def gen_vectors(num, dim):
return [[random.random() for _ in range(dim)] for _ in range(num)]
def gen_binary_vectors(num, dim):
raw_vectors = []
binary_vectors = []
for i in range(num):
raw_vector = [random.randint(0, 1) for i in range(dim)]
raw_vectors.append(raw_vector)
binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
return raw_vectors, binary_vectors
def jaccard(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())
def hamming(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return np.bitwise_xor(x, y).sum()
def tanimoto(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
def gen_single_vector(dim):
return [[random.random() for _ in range(dim)]]
def gen_vector(nb, d, seed=np.random.RandomState(1234)):
xb = seed.rand(nb, d).astype("float32")
return xb.tolist()
def gen_unique_str(str_value=None):
prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
return "test_" + prefix if str_value is None else str_value + "_" + prefix
def gen_long_str(num):
string = ''
for _ in range(num):
char = random.choice('tomorrow')
string += char
def gen_invalid_ips():
ips = [
# "255.0.0.0",
# "255.255.0.0",
# "255.255.255.0",
# "255.255.255.255",
"127.0.0",
# "123.0.0.2",
"12-s",
" ",
"12 s",
"BB。A",
" siede ",
"(mn)",
"\n",
"\t",
"中文",
"a".join("a" for _ in range(256))
]
return ips
def gen_invalid_ports():
ports = [
# empty
" ",
-1,
# too big port
100000,
# not correct port
39540,
"BB。A",
" siede ",
"(mn)",
"\n",
"\t",
"中文"
]
return ports
def gen_invalid_uris():
ip = None
uris = [
" ",
"中文",
# invalid protocol
# "tc://%s:%s" % (ip, port),
# "tcp%s:%s" % (ip, port),
# # invalid port
# "tcp://%s:100000" % ip,
# "tcp://%s: " % ip,
# "tcp://%s:19540" % ip,
# "tcp://%s:-1" % ip,
# "tcp://%s:string" % ip,
# invalid ip
"tcp:// :19530",
# "tcp://123.0.0.1:%s" % port,
"tcp://127.0.0:19530",
# "tcp://255.0.0.0:%s" % port,
# "tcp://255.255.0.0:%s" % port,
# "tcp://255.255.255.0:%s" % port,
# "tcp://255.255.255.255:%s" % port,
"tcp://\n:19530",
]
return uris
def gen_invalid_table_names():
table_names = [
"12-s",
"12/s",
" ",
# "",
# None,
"12 s",
"BB。A",
"c|c",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"a".join("a" for i in range(256))
]
return table_names
def gen_invalid_top_ks():
top_ks = [
0,
-1,
None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"a".join("a" for i in range(256))
]
return top_ks
def gen_invalid_dims():
dims = [
0,
-1,
100001,
1000000000000001,
None,
False,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"a".join("a" for i in range(256))
]
return dims
def gen_invalid_file_sizes():
file_sizes = [
0,
-1,
1000000000000001,
None,
False,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"a".join("a" for i in range(256))
]
return file_sizes
def gen_invalid_index_types():
invalid_types = [
0,
-1,
100,
1000000000000001,
# None,
False,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"a".join("a" for i in range(256))
]
return invalid_types
def gen_invalid_params():
params = [
9999999999,
-1,
# None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文"
]
return params
def gen_invalid_nprobes():
nprobes = [
0,
-1,
1000000000000001,
None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文"
]
return nprobes
def gen_invalid_metric_types():
metric_types = [
0,
-1,
1000000000000001,
# None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文"
]
return metric_types
def gen_invalid_vectors():
invalid_vectors = [
"1*2",
[],
[1],
[1,2],
[" "],
['a'],
[None],
None,
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"a".join("a" for i in range(256))
]
return invalid_vectors
def gen_invalid_vector_ids():
invalid_vector_ids = [
1.0,
-1.0,
None,
# int 64
10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,
" ",
"",
"String",
"BB。A",
" siede ",
"(mn)",
"#12s",
"=c",
"\n",
"中文",
]
return invalid_vector_ids
def gen_invalid_cache_config():
invalid_configs = [
0,
-1,
9223372036854775808,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"'123'",
"さようなら"
]
return invalid_configs
def gen_invalid_engine_config():
invalid_configs = [
-1,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"#12s",
"pip+",
"=c",
"\n",
"\t",
"中文",
"'123'",
]
return invalid_configs
def gen_invaild_search_params():
index_types = [
IndexType.FLAT,
IndexType.IVFLAT,
IndexType.IVF_SQ8,
IndexType.IVF_SQ8H,
IndexType.IVF_PQ,
IndexType.HNSW,
# IndexType.RNSG
]
search_params = []
for index_type in index_types:
if index_type in [IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]:
for nprobe in gen_invalid_params():
ivf_search_params = {"index_type": index_type, "search_param": {"nprobe": nprobe}}
search_params.append(ivf_search_params)
search_params.append({"index_type": index_type, "search_param": {"invalid_key": 100}})
elif index_type == IndexType.HNSW:
for ef in gen_invalid_params():
hnsw_search_param = {"index_type": index_type, "search_param": {"ef": ef}}
search_params.append(hnsw_search_param)
search_params.append({"index_type": index_type, "search_param": {"invalid_key": 100}})
# elif index_type == IndexType.RNSG:
# for search_length in gen_invalid_params():
# nsg_search_param = {"index_type": index_type, "search_param": {"search_length": search_length}}
# search_params.append(nsg_search_param)
# search_params.append({"index_type": index_type, "search_param": {"invalid_key": 100}})
return search_params
def gen_invalid_index():
index_params = []
for index_type in gen_invalid_index_types():
index_param = {"index_type": index_type, "index_param": {"nlist": 1024}}
index_params.append(index_param)
for nlist in gen_invalid_params():
index_param = {"index_type": IndexType.IVFLAT, "index_param": {"nlist": nlist}}
index_params.append(index_param)
for M in gen_invalid_params():
index_param = {"index_type": IndexType.HNSW, "index_param": {"M": M, "efConstruction": 100}}
index_params.append(index_param)
for efConstruction in gen_invalid_params():
index_param = {"index_type": IndexType.HNSW, "index_param": {"M": 16, "efConstruction": efConstruction}}
index_params.append(index_param)
# for search_length in gen_invalid_params():
# index_param = {"index_type": IndexType.RNSG,
# "index_param": {"search_length": search_length, "out_degree": 40, "candidate_pool_size": 50,
# "knng": 100}}
# index_params.append(index_param)
# for out_degree in gen_invalid_params():
# index_param = {"index_type": IndexType.RNSG,
# "index_param": {"search_length": 100, "out_degree": out_degree, "candidate_pool_size": 50,
# "knng": 100}}
# index_params.append(index_param)
# for candidate_pool_size in gen_invalid_params():
# index_param = {"index_type": IndexType.RNSG, "index_param": {"search_length": 100, "out_degree": 40,
# "candidate_pool_size": candidate_pool_size,
# "knng": 100}}
# index_params.append(index_param)
index_params.append({"index_type": IndexType.IVF_FLAT, "index_param": {"invalid_key": 1024}})
index_params.append({"index_type": IndexType.HNSW, "index_param": {"invalid_key": 16, "efConstruction": 100}})
# index_params.append({"index_type": IndexType.RNSG,
# "index_param": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300,
# "knng": 100}})
return index_params
def gen_index():
index_types = [
IndexType.FLAT,
IndexType.IVFLAT,
IndexType.IVF_SQ8,
IndexType.IVF_SQ8H,
IndexType.IVF_PQ,
IndexType.HNSW,
# IndexType.RNSG
]
nlists = [1, 1024, 16384]
pq_ms = [128, 64, 32, 16, 8, 4]
Ms = [5, 24, 48]
efConstructions = [100, 300, 500]
search_lengths = [10, 100, 300]
out_degrees = [5, 40, 300]
candidate_pool_sizes = [50, 100, 300]
knngs = [5, 100, 300]
index_params = []
for index_type in index_types:
if index_type == IndexType.FLAT:
index_params.append({"index_type": index_type, "index_param": {"nlist": 1024}})
elif index_type in [IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H]:
ivf_params = [{"index_type": index_type, "index_param": {"nlist": nlist}} \
for nlist in nlists]
index_params.extend(ivf_params)
elif index_type == IndexType.IVF_PQ:
ivf_pq_params = [{"index_type": index_type, "index_param": {"nlist": nlist, "m": m}} \
for nlist in nlists \
for m in pq_ms]
index_params.extend(ivf_pq_params)
elif index_type == IndexType.HNSW:
hnsw_params = [{"index_type": index_type, "index_param": {"M": M, "efConstruction": efConstruction}} \
for M in Ms \
for efConstruction in efConstructions]
index_params.extend(hnsw_params)
# elif index_type == IndexType.RNSG:
# nsg_params = [{"index_type": index_type,
# "index_param": {"search_length": search_length, "out_degree": out_degree,
# "candidate_pool_size": candidate_pool_size, "knng": knng}} \
# for search_length in search_lengths \
# for out_degree in out_degrees \
# for candidate_pool_size in candidate_pool_sizes \
# for knng in knngs]
# index_params.extend(nsg_params)
return index_params
def gen_simple_index():
index_types = [
IndexType.FLAT,
IndexType.IVFLAT,
IndexType.IVF_SQ8,
IndexType.IVF_SQ8H,
IndexType.IVF_PQ,
IndexType.HNSW,
# IndexType.RNSG
]
params = [
{"nlist": 1024},
{"nlist": 1024},
{"nlist": 1024},
{"nlist": 1024},
{"nlist": 1024, "m": 16},
{"M": 16, "efConstruction": 500},
# {"search_length": 100, "out_degree": 40, "candidate_pool_size": 66, "knng": 100}
]
index_params = []
for i in range(len(index_types)):
index_params.append({"index_type": index_types[i], "index_param": params[i]})
return index_params
def get_search_param(index_type):
if index_type in [IndexType.FLAT, IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]:
return {"nprobe": 32}
elif index_type == IndexType.HNSW:
return {"ef": 64}
# elif index_type == IndexType.RNSG:
# return {"search_length": 100}
else:
logging.getLogger().info("Invalid index_type.")
def assert_has_table(conn, table_name):
status, ok = conn.has_table(table_name)
return status.OK() and ok
def assert_equal_vector(v1, v2):
if len(v1) != len(v2):
assert False
for i in range(len(v1)):
assert abs(v1[i] - v2[i]) < epsilon