mirror of https://github.com/milvus-io/milvus.git
551 lines
12 KiB
Python
551 lines
12 KiB
Python
# STL imports
|
|
import random
|
|
import string
|
|
import struct
|
|
import sys
|
|
import time, datetime
|
|
import copy
|
|
import numpy as np
|
|
from utils import *
|
|
from milvus import Milvus, IndexType, MetricType
|
|
|
|
|
|
def gen_inaccuracy(num):
|
|
return num/255.0
|
|
|
|
def gen_vectors(num, dim):
|
|
return [[random.random() for _ in range(dim)] for _ in range(num)]
|
|
|
|
|
|
def gen_single_vector(dim):
|
|
return [[random.random() for _ in range(dim)]]
|
|
|
|
|
|
def gen_vector(nb, d, seed=np.random.RandomState(1234)):
|
|
xb = seed.rand(nb, d).astype("float32")
|
|
return xb.tolist()
|
|
|
|
|
|
def gen_unique_str(str_value=None):
|
|
prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
|
return "test_"+prefix if str_value is None else str_value+"_"+prefix
|
|
|
|
|
|
def get_current_day():
|
|
return time.strftime('%Y-%m-%d', time.localtime())
|
|
|
|
|
|
def get_last_day(day):
|
|
tmp = datetime.datetime.now()-datetime.timedelta(days=day)
|
|
return tmp.strftime('%Y-%m-%d')
|
|
|
|
|
|
def get_next_day(day):
|
|
tmp = datetime.datetime.now()+datetime.timedelta(days=day)
|
|
return tmp.strftime('%Y-%m-%d')
|
|
|
|
|
|
def gen_long_str(num):
|
|
string = ''
|
|
for _ in range(num):
|
|
char = random.choice('tomorrow')
|
|
string += char
|
|
|
|
|
|
def gen_invalid_ips():
|
|
ips = [
|
|
# "255.0.0.0",
|
|
# "255.255.0.0",
|
|
# "255.255.255.0",
|
|
# "255.255.255.255",
|
|
"127.0.0",
|
|
# "123.0.0.2",
|
|
"12-s",
|
|
" ",
|
|
"12 s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"\n",
|
|
"\t",
|
|
"中文",
|
|
"a".join("a" for _ in range(256))
|
|
]
|
|
return ips
|
|
|
|
|
|
def gen_invalid_ports():
|
|
ports = [
|
|
# empty
|
|
" ",
|
|
-1,
|
|
# too big port
|
|
100000,
|
|
# not correct port
|
|
39540,
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"\n",
|
|
"\t",
|
|
"中文"
|
|
]
|
|
return ports
|
|
|
|
|
|
def gen_invalid_uris():
|
|
ip = None
|
|
port = 19530
|
|
|
|
uris = [
|
|
" ",
|
|
"中文",
|
|
|
|
# invalid protocol
|
|
# "tc://%s:%s" % (ip, port),
|
|
# "tcp%s:%s" % (ip, port),
|
|
|
|
# # invalid port
|
|
# "tcp://%s:100000" % ip,
|
|
# "tcp://%s: " % ip,
|
|
# "tcp://%s:19540" % ip,
|
|
# "tcp://%s:-1" % ip,
|
|
# "tcp://%s:string" % ip,
|
|
|
|
# invalid ip
|
|
"tcp:// :%s" % port,
|
|
# "tcp://123.0.0.1:%s" % port,
|
|
"tcp://127.0.0:%s" % port,
|
|
# "tcp://255.0.0.0:%s" % port,
|
|
# "tcp://255.255.0.0:%s" % port,
|
|
# "tcp://255.255.255.0:%s" % port,
|
|
# "tcp://255.255.255.255:%s" % port,
|
|
"tcp://\n:%s" % port,
|
|
|
|
]
|
|
return uris
|
|
|
|
|
|
def gen_invalid_table_names():
|
|
table_names = [
|
|
"12-s",
|
|
"12/s",
|
|
" ",
|
|
# "",
|
|
# None,
|
|
"12 s",
|
|
"BB。A",
|
|
"c|c",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文",
|
|
"a".join("a" for i in range(256))
|
|
]
|
|
return table_names
|
|
|
|
|
|
def gen_invalid_top_ks():
|
|
top_ks = [
|
|
0,
|
|
-1,
|
|
None,
|
|
[1,2,3],
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文",
|
|
"a".join("a" for i in range(256))
|
|
]
|
|
return top_ks
|
|
|
|
|
|
def gen_invalid_dims():
|
|
dims = [
|
|
0,
|
|
-1,
|
|
100001,
|
|
1000000000000001,
|
|
None,
|
|
False,
|
|
[1,2,3],
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文",
|
|
"a".join("a" for i in range(256))
|
|
]
|
|
return dims
|
|
|
|
|
|
def gen_invalid_file_sizes():
|
|
file_sizes = [
|
|
0,
|
|
-1,
|
|
1000000000000001,
|
|
None,
|
|
False,
|
|
[1,2,3],
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文",
|
|
"a".join("a" for i in range(256))
|
|
]
|
|
return file_sizes
|
|
|
|
|
|
def gen_invalid_index_types():
|
|
invalid_types = [
|
|
0,
|
|
-1,
|
|
100,
|
|
1000000000000001,
|
|
# None,
|
|
False,
|
|
[1,2,3],
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文",
|
|
"a".join("a" for i in range(256))
|
|
]
|
|
return invalid_types
|
|
|
|
|
|
def gen_invalid_nlists():
|
|
nlists = [
|
|
-1,
|
|
# None,
|
|
[1,2,3],
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文"
|
|
]
|
|
return nlists
|
|
|
|
|
|
def gen_invalid_nprobes():
|
|
nprobes = [
|
|
0,
|
|
-1,
|
|
1000000000000001,
|
|
None,
|
|
[1,2,3],
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文"
|
|
]
|
|
return nprobes
|
|
|
|
|
|
def gen_invalid_metric_types():
|
|
metric_types = [
|
|
0,
|
|
-1,
|
|
1000000000000001,
|
|
# None,
|
|
[1,2,3],
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文"
|
|
]
|
|
return metric_types
|
|
|
|
|
|
def gen_invalid_vectors():
|
|
invalid_vectors = [
|
|
"1*2",
|
|
[],
|
|
[1],
|
|
[1,2],
|
|
[" "],
|
|
['a'],
|
|
[None],
|
|
None,
|
|
(1,2),
|
|
{"a": 1},
|
|
" ",
|
|
"",
|
|
"String",
|
|
"12-s",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"pip+",
|
|
"=c",
|
|
"\n",
|
|
"\t",
|
|
"中文",
|
|
"a".join("a" for i in range(256))
|
|
]
|
|
return invalid_vectors
|
|
|
|
|
|
def gen_invalid_vector_ids():
|
|
invalid_vector_ids = [
|
|
1.0,
|
|
-1.0,
|
|
None,
|
|
# int 64
|
|
10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,
|
|
" ",
|
|
"",
|
|
"String",
|
|
"BB。A",
|
|
" siede ",
|
|
"(mn)",
|
|
"#12s",
|
|
"=c",
|
|
"\n",
|
|
"中文",
|
|
]
|
|
return invalid_vector_ids
|
|
|
|
|
|
|
|
def gen_invalid_query_ranges():
|
|
query_ranges = [
|
|
[(get_last_day(1), "")],
|
|
[(get_current_day(), "")],
|
|
[(get_next_day(1), "")],
|
|
[(get_current_day(), get_last_day(1))],
|
|
[(get_next_day(1), get_last_day(1))],
|
|
[(get_next_day(1), get_current_day())],
|
|
[(0, get_next_day(1))],
|
|
[(-1, get_next_day(1))],
|
|
[(1, get_next_day(1))],
|
|
[(100001, get_next_day(1))],
|
|
[(1000000000000001, get_next_day(1))],
|
|
[(None, get_next_day(1))],
|
|
[([1,2,3], get_next_day(1))],
|
|
[((1,2), get_next_day(1))],
|
|
[({"a": 1}, get_next_day(1))],
|
|
[(" ", get_next_day(1))],
|
|
[("", get_next_day(1))],
|
|
[("String", get_next_day(1))],
|
|
[("12-s", get_next_day(1))],
|
|
[("BB。A", get_next_day(1))],
|
|
[(" siede ", get_next_day(1))],
|
|
[("(mn)", get_next_day(1))],
|
|
[("#12s", get_next_day(1))],
|
|
[("pip+", get_next_day(1))],
|
|
[("=c", get_next_day(1))],
|
|
[("\n", get_next_day(1))],
|
|
[("\t", get_next_day(1))],
|
|
[("中文", get_next_day(1))],
|
|
[("a".join("a" for i in range(256)), get_next_day(1))]
|
|
]
|
|
return query_ranges
|
|
|
|
|
|
def gen_invalid_index_params():
|
|
index_params = []
|
|
for index_type in gen_invalid_index_types():
|
|
index_param = {"index_type": index_type, "nlist": 16384}
|
|
index_params.append(index_param)
|
|
for nlist in gen_invalid_nlists():
|
|
index_param = {"index_type": IndexType.IVFLAT, "nlist": nlist}
|
|
index_params.append(index_param)
|
|
return index_params
|
|
|
|
|
|
def gen_index_params():
|
|
index_params = []
|
|
index_types = [IndexType.FLAT, IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]
|
|
nlists = [1, 16384, 50000]
|
|
|
|
def gen_params(index_types, nlists):
|
|
return [ {"index_type": index_type, "nlist": nlist} \
|
|
for index_type in index_types \
|
|
for nlist in nlists]
|
|
|
|
return gen_params(index_types, nlists)
|
|
|
|
|
|
def gen_simple_index_params():
|
|
index_params = []
|
|
index_types = [IndexType.FLAT, IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]
|
|
nlists = [1024]
|
|
|
|
def gen_params(index_types, nlists):
|
|
return [ {"index_type": index_type, "nlist": nlist} \
|
|
for index_type in index_types \
|
|
for nlist in nlists]
|
|
|
|
return gen_params(index_types, nlists)
|
|
|
|
|
|
def assert_has_table(conn, table_name):
|
|
status, ok = conn.has_table(table_name)
|
|
return status.OK() and ok
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import numpy
|
|
|
|
dim = 128
|
|
nq = 10000
|
|
table = "test"
|
|
|
|
|
|
file_name = 'query.npy'
|
|
data = np.load(file_name)
|
|
vectors = data[0:nq].tolist()
|
|
# print(vectors)
|
|
|
|
connect = Milvus()
|
|
# connect.connect(host="192.168.1.27")
|
|
# print(connect.show_tables())
|
|
# print(connect.get_table_row_count(table))
|
|
# sys.exit()
|
|
connect.connect(host="127.0.0.1")
|
|
connect.delete_table(table)
|
|
# sys.exit()
|
|
# time.sleep(2)
|
|
print(connect.get_table_row_count(table))
|
|
param = {'table_name': table,
|
|
'dimension': dim,
|
|
'metric_type': MetricType.L2,
|
|
'index_file_size': 10}
|
|
status = connect.create_table(param)
|
|
print(status)
|
|
print(connect.get_table_row_count(table))
|
|
# add vectors
|
|
for i in range(10):
|
|
status, ids = connect.add_vectors(table, vectors)
|
|
print(status)
|
|
print(ids[0])
|
|
# print(ids[0])
|
|
index_params = {"index_type": IndexType.IVFLAT, "nlist": 16384}
|
|
status = connect.create_index(table, index_params)
|
|
print(status)
|
|
# sys.exit()
|
|
query_vec = [vectors[0]]
|
|
# print(numpy.inner(numpy.array(query_vec[0]), numpy.array(query_vec[0])))
|
|
top_k = 12
|
|
nprobe = 1
|
|
for i in range(2):
|
|
result = connect.search_vectors(table, top_k, nprobe, query_vec)
|
|
print(result)
|
|
sys.exit()
|
|
|
|
|
|
table = gen_unique_str("test_add_vector_with_multiprocessing")
|
|
uri = "tcp://%s:%s" % (args["ip"], args["port"])
|
|
param = {'table_name': table,
|
|
'dimension': dim,
|
|
'index_file_size': index_file_size}
|
|
# create table
|
|
milvus = Milvus()
|
|
milvus.connect(uri=uri)
|
|
milvus.create_table(param)
|
|
vector = gen_single_vector(dim)
|
|
|
|
process_num = 4
|
|
loop_num = 10
|
|
processes = []
|
|
# with dependent connection
|
|
def add(milvus):
|
|
i = 0
|
|
while i < loop_num:
|
|
status, ids = milvus.add_vectors(table, vector)
|
|
i = i + 1
|
|
for i in range(process_num):
|
|
milvus = Milvus()
|
|
milvus.connect(uri=uri)
|
|
p = Process(target=add, args=(milvus,))
|
|
processes.append(p)
|
|
p.start()
|
|
time.sleep(0.2)
|
|
for p in processes:
|
|
p.join()
|
|
time.sleep(3)
|
|
status, count = milvus.get_table_row_count(table)
|
|
assert count == process_num * loop_num
|