milvus/tests/milvus_python_test/utils.py

697 lines
20 KiB
Python

import os
import sys
import random
import string
import struct
import logging
import time, datetime
import copy
import numpy as np
from milvus import Milvus, IndexType, MetricType
port = 19530
epsilon = 0.000001
default_flush_interval = 1
big_flush_interval = 1000
all_index_types = [
IndexType.FLAT,
IndexType.IVFLAT,
IndexType.IVF_SQ8,
IndexType.IVF_SQ8H,
IndexType.IVF_PQ,
IndexType.HNSW,
IndexType.RNSG,
IndexType.ANNOY
]
def get_milvus(host, port, uri=None, handler=None, **kwargs):
if handler is None:
handler = "GRPC"
try_connect = kwargs.get("try_connect", True)
if uri is not None:
milvus = Milvus(uri=uri, handler=handler, try_connect=try_connect)
else:
milvus = Milvus(host=host, port=port, handler=handler, try_connect=try_connect)
return milvus
def disable_flush(connect):
status, reply = connect.set_config("storage", "auto_flush_interval", big_flush_interval)
assert status.OK()
def enable_flush(connect):
# reset auto_flush_interval=1
status, reply = connect.set_config("storage", "auto_flush_interval", default_flush_interval)
assert status.OK()
status, config_value = connect.get_config("storage", "auto_flush_interval")
assert status.OK()
assert config_value == str(default_flush_interval)
def gen_inaccuracy(num):
return num / 255.0
def gen_vectors(num, dim):
return [[random.random() for _ in range(dim)] for _ in range(num)]
def gen_binary_vectors(num, dim):
raw_vectors = []
binary_vectors = []
for i in range(num):
raw_vector = [random.randint(0, 1) for i in range(dim)]
raw_vectors.append(raw_vector)
binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
return raw_vectors, binary_vectors
def jaccard(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())
def hamming(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return np.bitwise_xor(x, y).sum()
def tanimoto(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
def substructure(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y)
def superstructure(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
def gen_binary_sub_vectors(vectors, length):
raw_vectors = []
binary_vectors = []
dim = len(vectors[0])
for i in range(length):
raw_vector = [0 for i in range(dim)]
vector = vectors[i]
for index, j in enumerate(vector):
if j == 1:
raw_vector[index] = 1
raw_vectors.append(raw_vector)
binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
return raw_vectors, binary_vectors
def gen_binary_super_vectors(vectors, length):
raw_vectors = []
binary_vectors = []
dim = len(vectors[0])
for i in range(length):
cnt_1 = np.count_nonzero(vectors[i])
raw_vector = [1 for i in range(dim)]
raw_vectors.append(raw_vector)
binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
return raw_vectors, binary_vectors
def gen_single_vector(dim):
return [[random.random() for _ in range(dim)]]
def gen_vector(nb, d, seed=np.random.RandomState(1234)):
xb = seed.rand(nb, d).astype("float32")
return xb.tolist()
def gen_unique_str(str_value=None):
prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
return "test_" + prefix if str_value is None else str_value + "_" + prefix
def gen_long_str(num):
string = ''
for _ in range(num):
char = random.choice('tomorrow')
string += char
def gen_invalid_ips():
ips = [
# "255.0.0.0",
# "255.255.0.0",
# "255.255.255.0",
# "255.255.255.255",
"127.0.0",
# "123.0.0.2",
"12-s",
" ",
"12 s",
"BB。A",
" siede ",
"(mn)",
"中文",
"a".join("a" for _ in range(256))
]
return ips
def gen_invalid_ports():
ports = [
# empty
" ",
-1,
# too big port
100000,
# not correct port
39540,
"BB。A",
" siede ",
"(mn)",
"中文"
]
return ports
def gen_invalid_uris():
ip = None
uris = [
" ",
"中文",
# invalid protocol
# "tc://%s:%s" % (ip, port),
# "tcp%s:%s" % (ip, port),
# # invalid port
# "tcp://%s:100000" % ip,
# "tcp://%s: " % ip,
# "tcp://%s:19540" % ip,
# "tcp://%s:-1" % ip,
# "tcp://%s:string" % ip,
# invalid ip
"tcp:// :19530",
# "tcp://123.0.0.1:%s" % port,
"tcp://127.0.0:19530",
# "tcp://255.0.0.0:%s" % port,
# "tcp://255.255.0.0:%s" % port,
# "tcp://255.255.255.0:%s" % port,
# "tcp://255.255.255.255:%s" % port,
"tcp://\n:19530",
]
return uris
def gen_invalid_collection_names():
collection_names = [
"12-s",
" ",
# "",
# None,
"12 s",
"BB。A",
"c|c",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"a".join("a" for i in range(256))
]
return collection_names
def gen_invalid_top_ks():
top_ks = [
0,
-1,
None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"a".join("a" for i in range(256))
]
return top_ks
def gen_invalid_dims():
dims = [
0,
-1,
100001,
1000000000000001,
None,
False,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"a".join("a" for i in range(256))
]
return dims
def gen_invalid_file_sizes():
file_sizes = [
0,
-1,
1000000000000001,
None,
False,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"a".join("a" for i in range(256))
]
return file_sizes
def gen_invalid_index_types():
invalid_types = [
0,
-1,
100,
1000000000000001,
# None,
False,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"a".join("a" for i in range(256))
]
return invalid_types
def gen_invalid_params():
params = [
9999999999,
-1,
# None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文"
]
return params
def gen_invalid_nprobes():
nprobes = [
0,
-1,
1000000000000001,
None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文"
]
return nprobes
def gen_invalid_metric_types():
metric_types = [
0,
-1,
1000000000000001,
# None,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文"
]
return metric_types
def gen_invalid_vectors():
invalid_vectors = [
"1*2",
[],
[1],
[1,2],
[" "],
['a'],
[None],
None,
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"a".join("a" for i in range(256))
]
return invalid_vectors
def gen_invalid_vector_ids():
invalid_vector_ids = [
1.0,
-1.0,
None,
# int 64
10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,
" ",
"",
"String",
"BB。A",
" siede ",
"(mn)",
"=c",
"中文",
]
return invalid_vector_ids
def gen_invalid_cache_config():
invalid_configs = [
0,
-1,
9223372036854775808,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"'123'",
"さようなら"
]
return invalid_configs
def gen_invalid_gpu_config():
invalid_configs = [
-1,
[1,2,3],
(1,2),
{"a": 1},
" ",
"",
"String",
"12-s",
"BB。A",
" siede ",
"(mn)",
"pip+",
"=c",
"中文",
"'123'",
]
return invalid_configs
def gen_invaild_search_params():
invalid_search_key = 100
search_params = []
for index_type in all_index_types:
if index_type == IndexType.FLAT:
continue
search_params.append({"index_type": index_type, "search_param": {"invalid_key": invalid_search_key}})
if index_type in [IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]:
for nprobe in gen_invalid_params():
ivf_search_params = {"index_type": index_type, "search_param": {"nprobe": nprobe}}
search_params.append(ivf_search_params)
elif index_type == IndexType.HNSW:
for ef in gen_invalid_params():
hnsw_search_param = {"index_type": index_type, "search_param": {"ef": ef}}
search_params.append(hnsw_search_param)
elif index_type == IndexType.RNSG:
for search_length in gen_invalid_params():
nsg_search_param = {"index_type": index_type, "search_param": {"search_length": search_length}}
search_params.append(nsg_search_param)
search_params.append({"index_type": index_type, "search_param": {"invalid_key": 100}})
elif index_type == IndexType.ANNOY:
for search_k in gen_invalid_params():
if isinstance(search_k, int):
continue
annoy_search_param = {"index_type": index_type, "search_param": {"search_k": search_k}}
search_params.append(annoy_search_param)
return search_params
def gen_invalid_index():
index_params = []
for index_type in gen_invalid_index_types():
index_param = {"index_type": index_type, "index_param": {"nlist": 1024}}
index_params.append(index_param)
for nlist in gen_invalid_params():
index_param = {"index_type": IndexType.IVFLAT, "index_param": {"nlist": nlist}}
index_params.append(index_param)
for M in gen_invalid_params():
index_param = {"index_type": IndexType.HNSW, "index_param": {"M": M, "efConstruction": 100}}
index_params.append(index_param)
for efConstruction in gen_invalid_params():
index_param = {"index_type": IndexType.HNSW, "index_param": {"M": 16, "efConstruction": efConstruction}}
index_params.append(index_param)
for search_length in gen_invalid_params():
index_param = {"index_type": IndexType.RNSG,
"index_param": {"search_length": search_length, "out_degree": 40, "candidate_pool_size": 50,
"knng": 100}}
index_params.append(index_param)
for out_degree in gen_invalid_params():
index_param = {"index_type": IndexType.RNSG,
"index_param": {"search_length": 100, "out_degree": out_degree, "candidate_pool_size": 50,
"knng": 100}}
index_params.append(index_param)
for candidate_pool_size in gen_invalid_params():
index_param = {"index_type": IndexType.RNSG, "index_param": {"search_length": 100, "out_degree": 40,
"candidate_pool_size": candidate_pool_size,
"knng": 100}}
index_params.append(index_param)
index_params.append({"index_type": IndexType.IVF_FLAT, "index_param": {"invalid_key": 1024}})
index_params.append({"index_type": IndexType.HNSW, "index_param": {"invalid_key": 16, "efConstruction": 100}})
index_params.append({"index_type": IndexType.RNSG,
"index_param": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300,
"knng": 100}})
for invalid_n_trees in gen_invalid_params():
index_params.append({"index_type": IndexType.ANNOY, "index_param": {"n_trees": invalid_n_trees}})
return index_params
def gen_index():
nlists = [1, 1024, 16384]
pq_ms = [128, 64, 32, 16, 8, 4]
Ms = [5, 24, 48]
efConstructions = [100, 300, 500]
search_lengths = [10, 100, 300]
out_degrees = [5, 40, 300]
candidate_pool_sizes = [50, 100, 300]
knngs = [5, 100, 300]
index_params = []
for index_type in all_index_types:
if index_type == IndexType.FLAT:
index_params.append({"index_type": index_type, "index_param": {"nlist": 1024}})
elif index_type in [IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H]:
ivf_params = [{"index_type": index_type, "index_param": {"nlist": nlist}} \
for nlist in nlists]
index_params.extend(ivf_params)
elif index_type == IndexType.IVF_PQ:
ivf_pq_params = [{"index_type": index_type, "index_param": {"nlist": nlist, "m": m}} \
for nlist in nlists \
for m in pq_ms]
index_params.extend(ivf_pq_params)
elif index_type == IndexType.HNSW:
hnsw_params = [{"index_type": index_type, "index_param": {"M": M, "efConstruction": efConstruction}} \
for M in Ms \
for efConstruction in efConstructions]
index_params.extend(hnsw_params)
elif index_type == IndexType.RNSG:
nsg_params = [{"index_type": index_type,
"index_param": {"search_length": search_length, "out_degree": out_degree,
"candidate_pool_size": candidate_pool_size, "knng": knng}} \
for search_length in search_lengths \
for out_degree in out_degrees \
for candidate_pool_size in candidate_pool_sizes \
for knng in knngs]
index_params.extend(nsg_params)
return index_params
def gen_simple_index():
params = [
{"nlist": 1024},
{"nlist": 1024},
{"nlist": 1024},
{"nlist": 1024},
{"nlist": 1024, "m": 16},
{"M": 48, "efConstruction": 500},
{"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50},
{"n_trees": 4}
]
index_params = []
for i in range(len(all_index_types)):
index_params.append({"index_type": all_index_types[i], "index_param": params[i]})
return index_params
def get_search_param(index_type):
if index_type in [IndexType.FLAT, IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]:
return {"nprobe": 32}
elif index_type == IndexType.HNSW:
return {"ef": 64}
elif index_type == IndexType.RNSG:
return {"search_length": 100}
elif index_type == IndexType.ANNOY:
return {"search_k": 100}
else:
logging.getLogger().info("Invalid index_type.")
def assert_has_collection(conn, collection_name):
status, ok = conn.has_collection(collection_name)
return status.OK() and ok
def assert_equal_vector(v1, v2):
if len(v1) != len(v2):
assert False
for i in range(len(v1)):
assert abs(v1[i] - v2[i]) < epsilon
def restart_server(helm_release_name):
res = True
timeout = 120
from kubernetes import client, config
client.rest.logger.setLevel(logging.WARNING)
namespace = "milvus"
# service_name = "%s.%s.svc.cluster.local" % (helm_release_name, namespace)
config.load_kube_config()
v1 = client.CoreV1Api()
pod_name = None
# config_map_names = v1.list_namespaced_config_map(namespace, pretty='true')
# body = {"replicas": 0}
pods = v1.list_namespaced_pod(namespace)
for i in pods.items:
if i.metadata.name.find(helm_release_name) != -1 and i.metadata.name.find("mysql") == -1:
pod_name = i.metadata.name
break
# v1.patch_namespaced_config_map(config_map_name, namespace, body, pretty='true')
# status_res = v1.read_namespaced_service_status(helm_release_name, namespace, pretty='true')
# print(status_res)
if pod_name is not None:
try:
v1.delete_namespaced_pod(pod_name, namespace)
except Exception as e:
logging.error(str(e))
logging.error("Exception when calling CoreV1Api->delete_namespaced_pod")
res = False
return res
time.sleep(5)
# check if restart successfully
pods = v1.list_namespaced_pod(namespace)
for i in pods.items:
pod_name_tmp = i.metadata.name
if pod_name_tmp.find(helm_release_name) != -1:
logging.debug(pod_name_tmp)
start_time = time.time()
while time.time() - start_time > timeout:
status_res = v1.read_namespaced_pod_status(pod_name_tmp, namespace, pretty='true')
if status_res.status.phase == "Running":
break
time.sleep(1)
if time.time() - start_time > timeout:
logging.error("Restart pod: %s timeout" % pod_name_tmp)
res = False
return res
else:
logging.error("Pod: %s not found" % helm_release_name)
res = False
return res