mirror of https://github.com/milvus-io/milvus.git
Bench scripts for 2.0 (#6263)
* [skip ci] update benchmark scripts for 2.0 Signed-off-by: del-zhenwu <zhenxiang.li@zilliz.com> * [skip ci] Update README.md Signed-off-by: del-zhenwu <zhenxiang.li@zilliz.com> * [skip ci] Update mergify.yml for bench scripts Signed-off-by: zhenwu <zhenwu@milvus.io> Co-authored-by: zhenwu <zhenwu@milvus.io>pull/6267/head
parent
07b0989628
commit
2b481563a8
|
@ -13,7 +13,7 @@ pull_request_rules:
|
|||
- name: Test passed for tests changed
|
||||
conditions:
|
||||
- base=master
|
||||
- -files~=^(?!tests).+
|
||||
- -files~=^(?!tests\/python_test).+
|
||||
- "status-success=continuous-integration/jenkins/pr-merge"
|
||||
actions:
|
||||
label:
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
# Quick start
|
||||
|
||||
### Description:
|
||||
|
||||
This project is used to test performance/reliability/stability for milvus server
|
||||
- Test cases can be organized with `yaml`
|
||||
- Test can run with local mode or helm mode
|
||||
|
||||
### Usage:
|
||||
`pip install requirements.txt`
|
||||
|
||||
if using local mode, the following libs is optional
|
||||
|
||||
`pymongo==3.10.0`
|
||||
|
||||
`kubernetes==10.0.1`
|
||||
|
||||
### Demos:
|
||||
|
||||
1. Local test:
|
||||
|
||||
`python3 main.py --local --host=*.* --port=19530 --suite=suites/gpu_search_performance_random50m.yaml`
|
||||
|
||||
### Definitions of test suites:
|
||||
|
||||
Testers need to write test suite config if adding a customizised test into the current test framework
|
||||
|
||||
1. search_performance: the test type,also we have`build_performance`,`insert_performance`,`accuracy`,`stability`,`search_stability`
|
||||
2. tables: list of test cases
|
||||
3. The following fields are in the `table` field:
|
||||
- server: run host
|
||||
- milvus: config in milvus
|
||||
- collection_name: currently support one collection
|
||||
- run_count: search count
|
||||
- search_params: params of query
|
||||
|
||||
## Test result:
|
||||
|
||||
Test result will be uploaded if tests run in helm mode, and will be used to judge if the test run pass or failed
|
Binary file not shown.
Before Width: | Height: | Size: 50 KiB |
Binary file not shown.
Before Width: | Height: | Size: 65 KiB |
Binary file not shown.
Before Width: | Height: | Size: 44 KiB |
|
@ -1,13 +0,0 @@
|
|||
try {
|
||||
def result = sh script: "helm status -n milvus ${env.HELM_SHARDS_RELEASE_NAME}", returnStatus: true
|
||||
if (!result) {
|
||||
sh "helm uninstall -n milvus ${env.HELM_SHARDS_RELEASE_NAME}"
|
||||
}
|
||||
} catch (exc) {
|
||||
def result = sh script: "helm status -n milvus ${env.HELM_SHARDS_RELEASE_NAME}", returnStatus: true
|
||||
if (!result) {
|
||||
sh "helm uninstall -n milvus ${env.HELM_SHARDS_RELEASE_NAME}"
|
||||
}
|
||||
throw exc
|
||||
}
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
timeout(time: 12, unit: 'HOURS') {
|
||||
try {
|
||||
dir ("milvus-helm") {
|
||||
// sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts'
|
||||
// sh 'helm repo update'
|
||||
checkout([$class: 'GitSCM', branches: [[name: "${HELM_BRANCH}"]], userRemoteConfigs: [[url: "${HELM_URL}", name: 'origin', refspec: "+refs/heads/${HELM_BRANCH}:refs/remotes/origin/${HELM_BRANCH}"]]])
|
||||
}
|
||||
dir ("milvus_benchmark") {
|
||||
print "Git clone url: ${TEST_URL}:${TEST_BRANCH}"
|
||||
checkout([$class: 'GitSCM', branches: [[name: "${TEST_BRANCH}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "${TEST_URL}", name: 'origin', refspec: "+refs/heads/${TEST_BRANCH}:refs/remotes/origin/${TEST_BRANCH}"]]])
|
||||
print "Install requirements"
|
||||
// sh "python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com"
|
||||
sh "python3 -m pip install -r requirements.txt"
|
||||
sh "python3 -m pip install git+${TEST_LIB_URL}"
|
||||
sh "python3 main.py --image-version=${params.IMAGE_VERSION} --schedule-conf=scheduler/${params.SHARDS_CONFIG_FILE} --deploy-mode=${params.DEPLOY_MODE}"
|
||||
}
|
||||
} catch (exc) {
|
||||
echo 'Deploy SHARDS Test Failed !'
|
||||
throw exc
|
||||
}
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
try {
|
||||
dir ("milvus-helm") {
|
||||
// sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts'
|
||||
// sh 'helm repo update'
|
||||
checkout([$class: 'GitSCM', branches: [[name: "${HELM_BRANCH}"]], userRemoteConfigs: [[url: "${HELM_URL}", name: 'origin', refspec: "+refs/heads/${HELM_BRANCH}:refs/remotes/origin/${HELM_BRANCH}"]]])
|
||||
}
|
||||
dir ("milvus_benchmark") {
|
||||
print "Git clone url: ${TEST_URL}:${TEST_BRANCH}"
|
||||
checkout([$class: 'GitSCM', branches: [[name: "${TEST_BRANCH}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "${TEST_URL}", name: 'origin', refspec: "+refs/heads/${TEST_BRANCH}:refs/remotes/origin/${TEST_BRANCH}"]]])
|
||||
print "Install requirements"
|
||||
sh "python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com"
|
||||
// sh "python3 -m pip install -r requirements.txt"
|
||||
sh "python3 -m pip install git+${TEST_LIB_URL}"
|
||||
sh "python3 main.py --image-version=${params.IMAGE_VERSION} --schedule-conf=scheduler/${params.CONFIG_FILE} --deploy-mode=${params.DEPLOY_MODE}"
|
||||
}
|
||||
} catch (exc) {
|
||||
echo 'Deploy Test Failed !'
|
||||
throw exc
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
timeout(time: 30, unit: 'MINUTES') {
|
||||
def imageName = "milvus/engine:${DOCKER_VERSION}"
|
||||
def remoteImageName = "milvusdb/daily-build:${REMOTE_DOCKER_VERSION}"
|
||||
def localDockerRegistryImage = "${params.LOCAL_DOKCER_REGISTRY_URL}/${imageName}"
|
||||
def remoteDockerRegistryImage = "${params.REMOTE_DOKCER_REGISTRY_URL}/${remoteImageName}"
|
||||
try {
|
||||
deleteImages("${localDockerRegistryImage}", true)
|
||||
|
||||
def pullSourceImageStatus = sh(returnStatus: true, script: "docker pull ${localDockerRegistryImage}")
|
||||
|
||||
if (pullSourceImageStatus == 0) {
|
||||
def renameImageStatus = sh(returnStatus: true, script: "docker tag ${localDockerRegistryImage} ${remoteImageName} && docker rmi ${localDockerRegistryImage}")
|
||||
def sourceImage = docker.image("${remoteImageName}")
|
||||
docker.withRegistry("https://${params.REMOTE_DOKCER_REGISTRY_URL}", "${params.REMOTE_DOCKER_CREDENTIALS_ID}") {
|
||||
sourceImage.push()
|
||||
sourceImage.push("${REMOTE_DOCKER_LATEST_VERSION}")
|
||||
}
|
||||
} else {
|
||||
echo "\"${localDockerRegistryImage}\" image does not exist !"
|
||||
}
|
||||
} catch (exc) {
|
||||
throw exc
|
||||
} finally {
|
||||
deleteImages("${localDockerRegistryImage}", true)
|
||||
deleteImages("${remoteDockerRegistryImage}", true)
|
||||
}
|
||||
}
|
||||
|
||||
boolean deleteImages(String imageName, boolean force) {
|
||||
def imageNameStr = imageName.trim()
|
||||
def isExistImage = sh(returnStatus: true, script: "docker inspect --type=image ${imageNameStr} 2>&1 > /dev/null")
|
||||
if (isExistImage == 0) {
|
||||
def deleteImageStatus = 0
|
||||
if (force) {
|
||||
def imageID = sh(returnStdout: true, script: "docker inspect --type=image --format \"{{.ID}}\" ${imageNameStr}")
|
||||
deleteImageStatus = sh(returnStatus: true, script: "docker rmi -f ${imageID}")
|
||||
} else {
|
||||
deleteImageStatus = sh(returnStatus: true, script: "docker rmi ${imageNameStr}")
|
||||
}
|
||||
|
||||
if (deleteImageStatus != 0) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
|
||||
class BaseExecutor(object):
|
||||
pass
|
|
@ -1,4 +0,0 @@
|
|||
from . import BaseExecutor
|
||||
|
||||
class ShellExecutor(BaseExecutor):
|
||||
pass
|
|
@ -1,370 +0,0 @@
|
|||
import os
|
||||
import pdb
|
||||
import time
|
||||
import logging
|
||||
import hashlib
|
||||
from yaml import full_load, dump
|
||||
import utils
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.utils")
|
||||
REGISTRY_URL = "registry.zilliz.com/milvus/engine"
|
||||
IDC_NAS_URL = "//172.16.70.249/test"
|
||||
NAS_URL = "//192.168.1.126/test"
|
||||
|
||||
|
||||
def get_host_cpus(hostname):
|
||||
from kubernetes import client, config
|
||||
config.load_kube_config()
|
||||
client.rest.logger.setLevel(logging.WARNING)
|
||||
v1 = client.CoreV1Api()
|
||||
cpus = v1.read_node(hostname).status.allocatable.get("cpu")
|
||||
return cpus
|
||||
|
||||
|
||||
# update values.yaml
|
||||
def update_values(file_path, deploy_mode, hostname, milvus_config, server_config=None):
|
||||
if not os.path.isfile(file_path):
|
||||
raise Exception('File: %s not found' % file_path)
|
||||
# bak values.yaml
|
||||
file_name = os.path.basename(file_path)
|
||||
bak_file_name = file_name + ".bak"
|
||||
file_parent_path = os.path.dirname(file_path)
|
||||
bak_file_path = file_parent_path + '/' + bak_file_name
|
||||
if os.path.exists(bak_file_path):
|
||||
os.system("cp %s %s" % (bak_file_path, file_path))
|
||||
else:
|
||||
os.system("cp %s %s" % (file_path, bak_file_path))
|
||||
with open(file_path) as f:
|
||||
values_dict = full_load(f)
|
||||
f.close()
|
||||
cluster = False
|
||||
if "cluster" in milvus_config and milvus_config["cluster"]:
|
||||
cluster = True
|
||||
for k, v in milvus_config.items():
|
||||
if k.find("primary_path") != -1:
|
||||
suffix_path = milvus_config["suffix_path"] if "suffix_path" in milvus_config else None
|
||||
path_value = v
|
||||
if suffix_path:
|
||||
path_value = v + "_" + str(int(time.time()))
|
||||
values_dict["primaryPath"] = path_value
|
||||
values_dict['wal']['path'] = path_value + "/wal"
|
||||
values_dict['logs']['path'] = path_value + "/logs"
|
||||
# elif k.find("use_blas_threshold") != -1:
|
||||
# values_dict['useBLASThreshold'] = int(v)
|
||||
elif k.find("gpu_search_threshold") != -1:
|
||||
values_dict['gpu']['gpuSearchThreshold'] = int(v)
|
||||
if cluster:
|
||||
values_dict['readonly']['gpu']['gpuSearchThreshold'] = int(v)
|
||||
elif k.find("cpu_cache_capacity") != -1:
|
||||
values_dict['cache']['cacheSize'] = v
|
||||
if cluster:
|
||||
values_dict['readonly']['cache']['cacheSize'] = v
|
||||
# elif k.find("cache_insert_data") != -1:
|
||||
# values_dict['cache']['cacheInsertData'] = v
|
||||
elif k.find("insert_buffer_size") != -1:
|
||||
values_dict['cache']['insertBufferSize'] = v
|
||||
if cluster:
|
||||
values_dict['readonly']['cache']['insertBufferSize'] = v
|
||||
elif k.find("gpu_resource_config.enable") != -1:
|
||||
values_dict['gpu']['enabled'] = v
|
||||
if cluster:
|
||||
values_dict['readonly']['gpu']['enabled'] = v
|
||||
elif k.find("gpu_resource_config.cache_capacity") != -1:
|
||||
values_dict['gpu']['cacheSize'] = v
|
||||
if cluster:
|
||||
values_dict['readonly']['gpu']['cacheSize'] = v
|
||||
elif k.find("build_index_resources") != -1:
|
||||
values_dict['gpu']['buildIndexDevices'] = v
|
||||
if cluster:
|
||||
values_dict['readonly']['gpu']['buildIndexDevices'] = v
|
||||
elif k.find("search_resources") != -1:
|
||||
values_dict['gpu']['searchDevices'] = v
|
||||
if cluster:
|
||||
values_dict['readonly']['gpu']['searchDevices'] = v
|
||||
# wal
|
||||
elif k.find("auto_flush_interval") != -1:
|
||||
values_dict['storage']['autoFlushInterval'] = v
|
||||
if cluster:
|
||||
values_dict['readonly']['storage']['autoFlushInterval'] = v
|
||||
elif k.find("wal_enable") != -1:
|
||||
values_dict['wal']['enabled'] = v
|
||||
|
||||
# if values_dict['nodeSelector']:
|
||||
# logger.warning("nodeSelector has been set: %s" % str(values_dict['engine']['nodeSelector']))
|
||||
# return
|
||||
values_dict["wal"]["recoveryErrorIgnore"] = True
|
||||
# enable monitor
|
||||
values_dict["metrics"]["enabled"] = True
|
||||
values_dict["metrics"]["address"] = "192.168.1.237"
|
||||
values_dict["metrics"]["port"] = 9091
|
||||
# only test avx2
|
||||
values_dict["extraConfiguration"].update({"engine": {"simd_type": "avx2"}})
|
||||
# stat_optimizer_enable
|
||||
values_dict["extraConfiguration"]["engine"].update({"stat_optimizer_enable": False})
|
||||
|
||||
# enable read-write mode
|
||||
if cluster:
|
||||
values_dict["cluster"]["enabled"] = True
|
||||
# update readonly log path
|
||||
values_dict["readonly"]['logs']['path'] = values_dict['logs']['path'] + "/readonly"
|
||||
if "readonly" in milvus_config:
|
||||
if "replicas" in milvus_config["readonly"]:
|
||||
values_dict["readonly"]["replicas"] = milvus_config["readonly"]["replicas"]
|
||||
|
||||
use_external_mysql = False
|
||||
if "external_mysql" in milvus_config and milvus_config["external_mysql"]:
|
||||
use_external_mysql = True
|
||||
# meta mysql
|
||||
if use_external_mysql:
|
||||
values_dict["mysql"]["enabled"] = False
|
||||
# values_dict["mysql"]["persistence"]["enabled"] = True
|
||||
# values_dict["mysql"]["persistence"]["existingClaim"] = hashlib.md5(path_value.encode(encoding='UTF-8')).hexdigest()
|
||||
values_dict['externalMysql']['enabled'] = True
|
||||
if deploy_mode == "local":
|
||||
values_dict['externalMysql']["ip"] = "192.168.1.238"
|
||||
else:
|
||||
values_dict['externalMysql']["ip"] = "milvus-mysql.test"
|
||||
values_dict['externalMysql']["port"] = 3306
|
||||
values_dict['externalMysql']["user"] = "root"
|
||||
values_dict['externalMysql']["password"] = "milvus"
|
||||
values_dict['externalMysql']["database"] = "db"
|
||||
else:
|
||||
values_dict["mysql"]["enabled"] = False
|
||||
# update values.yaml with the given host
|
||||
nas_url = NAS_URL
|
||||
if hostname:
|
||||
nas_url = IDC_NAS_URL
|
||||
values_dict['nodeSelector'] = {'kubernetes.io/hostname': hostname}
|
||||
cpus = server_config["cpus"]
|
||||
|
||||
# set limit/request cpus in resources
|
||||
values_dict["image"]['resources'] = {
|
||||
"limits": {
|
||||
# "cpu": str(int(cpus)) + ".0"
|
||||
"cpu": str(int(cpus)) + ".0"
|
||||
},
|
||||
"requests": {
|
||||
# "cpu": str(int(cpus) // 2) + ".0"
|
||||
"cpu": "4.0"
|
||||
}
|
||||
}
|
||||
# update readonly resouces limits/requests
|
||||
values_dict["readonly"]['resources'] = {
|
||||
"limits": {
|
||||
# "cpu": str(int(cpus)) + ".0"
|
||||
"cpu": str(int(cpus)) + ".0"
|
||||
},
|
||||
"requests": {
|
||||
# "cpu": str(int(cpus) // 2) + ".0"
|
||||
"cpu": "4.0"
|
||||
}
|
||||
}
|
||||
values_dict['tolerations'] = [{
|
||||
"key": "worker",
|
||||
"operator": "Equal",
|
||||
"value": "performance",
|
||||
"effect": "NoSchedule"
|
||||
}]
|
||||
# add extra volumes
|
||||
values_dict['extraVolumes'] = [{
|
||||
'name': 'test',
|
||||
'flexVolume': {
|
||||
'driver': "fstab/cifs",
|
||||
'fsType': "cifs",
|
||||
'secretRef': {
|
||||
'name': "cifs-test-secret"
|
||||
},
|
||||
'options': {
|
||||
'networkPath': nas_url,
|
||||
'mountOptions': "vers=1.0"
|
||||
}
|
||||
}
|
||||
}]
|
||||
values_dict['extraVolumeMounts'] = [{
|
||||
'name': 'test',
|
||||
'mountPath': '/test'
|
||||
}]
|
||||
|
||||
# add extra volumes for mysql
|
||||
# values_dict['mysql']['persistence']['enabled'] = True
|
||||
# values_dict['mysql']['configurationFilesPath'] = "/etc/mysql/mysql.conf.d/"
|
||||
# values_dict['mysql']['imageTag'] = '5.6'
|
||||
# values_dict['mysql']['securityContext'] = {
|
||||
# 'enabled': True}
|
||||
# mysql_db_path = "/test"
|
||||
if deploy_mode == "cluster" and use_external_mysql:
|
||||
# mount_path = values_dict["primaryPath"]+'/data'
|
||||
# long_str = '- name: test-mysql\n flexVolume:\n driver: fstab/cifs\n fsType: cifs\n secretRef:\n name: cifs-test-secret\n options:\n networkPath: //192.168.1.126/test\n mountOptions: vers=1.0'
|
||||
# values_dict['mysql']['extraVolumes'] = literal_str(long_str)
|
||||
# long_str_2 = "- name: test-mysql\n mountPath: %s" % mysql_db_path
|
||||
# values_dict['mysql']['extraVolumeMounts'] = literal_str(long_str_2)
|
||||
# mysql_cnf_str = '[mysqld]\npid-file=%s/mysql.pid\ndatadir=%s' % (mount_path, mount_path)
|
||||
# values_dict['mysql']['configurationFiles'] = {}
|
||||
# values_dict['mysql']['configurationFiles']['mysqld.cnf'] = literal_str(mysql_cnf_str)
|
||||
|
||||
values_dict['mysql']['enabled'] = False
|
||||
values_dict['externalMysql']['enabled'] = True
|
||||
values_dict['externalMysql']["ip"] = "192.168.1.197"
|
||||
values_dict['externalMysql']["port"] = 3306
|
||||
values_dict['externalMysql']["user"] = "root"
|
||||
values_dict['externalMysql']["password"] = "Fantast1c"
|
||||
values_dict['externalMysql']["database"] = "db"
|
||||
|
||||
# logger.debug(values_dict)
|
||||
# print(dump(values_dict))
|
||||
with open(file_path, 'w') as f:
|
||||
dump(values_dict, f, default_flow_style=False)
|
||||
f.close()
|
||||
# DEBUG
|
||||
with open(file_path) as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip("\n")
|
||||
|
||||
|
||||
# deploy server
|
||||
def helm_install_server(helm_path, deploy_mode, image_tag, image_type, name, namespace):
|
||||
timeout = 300
|
||||
logger.debug("Server deploy mode: %s" % deploy_mode)
|
||||
host = "%s.%s.svc.cluster.local" % (name, namespace)
|
||||
if deploy_mode == "single":
|
||||
install_cmd = "helm install \
|
||||
--set image.repository=%s \
|
||||
--set image.tag=%s \
|
||||
--set image.pullPolicy=Always \
|
||||
--set service.type=ClusterIP \
|
||||
-f ci/filebeat/values.yaml \
|
||||
--namespace %s \
|
||||
%s ." % (REGISTRY_URL, image_tag, namespace, name)
|
||||
elif deploy_mode == "cluster":
|
||||
install_cmd = "helm install \
|
||||
--set cluster.enabled=true \
|
||||
--set persistence.enabled=true \
|
||||
--set mishards.image.tag=test \
|
||||
--set mishards.image.pullPolicy=Always \
|
||||
--set image.repository=%s \
|
||||
--set image.tag=%s \
|
||||
--set image.pullPolicy=Always \
|
||||
--set service.type=ClusterIP \
|
||||
-f ci/filebeat/values.yaml \
|
||||
--namespace %s \
|
||||
%s ." % (REGISTRY_URL, image_tag, namespace, name)
|
||||
logger.debug(install_cmd)
|
||||
logger.debug(host)
|
||||
if os.system("cd %s && %s" % (helm_path, install_cmd)):
|
||||
logger.error("Helm install failed: %s" % name)
|
||||
return None
|
||||
time.sleep(30)
|
||||
# config.load_kube_config()
|
||||
# v1 = client.CoreV1Api()
|
||||
# pod_name = None
|
||||
# pod_id = None
|
||||
# pods = v1.list_namespaced_pod(namespace)
|
||||
# for i in pods.items:
|
||||
# if i.metadata.name.find(name) != -1:
|
||||
# pod_name = i.metadata.name
|
||||
# pod_ip = i.status.pod_ip
|
||||
# logger.debug(pod_name)
|
||||
# logger.debug(pod_ip)
|
||||
# return pod_name, pod_ip
|
||||
return host
|
||||
|
||||
|
||||
# delete server
|
||||
@utils.retry(3)
|
||||
def helm_del_server(name, namespace):
|
||||
# logger.debug("Sleep 600s before uninstall server")
|
||||
# time.sleep(600)
|
||||
del_cmd = "helm uninstall -n milvus %s" % name
|
||||
logger.info(del_cmd)
|
||||
if os.system(del_cmd):
|
||||
logger.error("Helm delete name:%s failed" % name)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def restart_server(helm_release_name, namespace):
|
||||
res = True
|
||||
timeout = 120000
|
||||
# service_name = "%s.%s.svc.cluster.local" % (helm_release_name, namespace)
|
||||
config.load_kube_config()
|
||||
v1 = client.CoreV1Api()
|
||||
pod_name = None
|
||||
# config_map_names = v1.list_namespaced_config_map(namespace, pretty='true')
|
||||
# body = {"replicas": 0}
|
||||
pods = v1.list_namespaced_pod(namespace)
|
||||
for i in pods.items:
|
||||
if i.metadata.name.find(helm_release_name) != -1 and i.metadata.name.find("mysql") == -1:
|
||||
pod_name = i.metadata.name
|
||||
break
|
||||
# v1.patch_namespaced_config_map(config_map_name, namespace, body, pretty='true')
|
||||
# status_res = v1.read_namespaced_service_status(helm_release_name, namespace, pretty='true')
|
||||
logger.debug("Pod name: %s" % pod_name)
|
||||
if pod_name is not None:
|
||||
try:
|
||||
v1.delete_namespaced_pod(pod_name, namespace)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
logger.error("Exception when calling CoreV1Api->delete_namespaced_pod")
|
||||
res = False
|
||||
return res
|
||||
logger.error("Sleep 10s after pod deleted")
|
||||
time.sleep(10)
|
||||
# check if restart successfully
|
||||
pods = v1.list_namespaced_pod(namespace)
|
||||
for i in pods.items:
|
||||
pod_name_tmp = i.metadata.name
|
||||
logger.error(pod_name_tmp)
|
||||
if pod_name_tmp == pod_name:
|
||||
continue
|
||||
elif pod_name_tmp.find(helm_release_name) == -1 or pod_name_tmp.find("mysql") != -1:
|
||||
continue
|
||||
else:
|
||||
status_res = v1.read_namespaced_pod_status(pod_name_tmp, namespace, pretty='true')
|
||||
logger.error(status_res.status.phase)
|
||||
start_time = time.time()
|
||||
ready_break = False
|
||||
while time.time() - start_time <= timeout:
|
||||
logger.error(time.time())
|
||||
status_res = v1.read_namespaced_pod_status(pod_name_tmp, namespace, pretty='true')
|
||||
if status_res.status.phase == "Running":
|
||||
logger.error("Already running")
|
||||
ready_break = True
|
||||
break
|
||||
else:
|
||||
time.sleep(5)
|
||||
if time.time() - start_time > timeout:
|
||||
logger.error("Restart pod: %s timeout" % pod_name_tmp)
|
||||
res = False
|
||||
return res
|
||||
if ready_break:
|
||||
break
|
||||
else:
|
||||
raise Exception("Pod: %s not found" % pod_name)
|
||||
follow = True
|
||||
pretty = True
|
||||
previous = True # bool | Return previous terminated container logs. Defaults to false. (optional)
|
||||
since_seconds = 56 # int | A relative time in seconds before the current time from which to show logs. If this value precedes the time a pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will be returned. Only one of sinceSeconds or sinceTime may be specified. (optional)
|
||||
timestamps = True # bool | If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false. (optional)
|
||||
container = "milvus"
|
||||
# start_time = time.time()
|
||||
# while time.time() - start_time <= timeout:
|
||||
# try:
|
||||
# api_response = v1.read_namespaced_pod_log(pod_name_tmp, namespace, container=container, follow=follow,
|
||||
# pretty=pretty, previous=previous, since_seconds=since_seconds,
|
||||
# timestamps=timestamps)
|
||||
# logging.error(api_response)
|
||||
# return res
|
||||
# except Exception as e:
|
||||
# logging.error("Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e)
|
||||
# # waiting for server start
|
||||
# time.sleep(2)
|
||||
# # res = False
|
||||
# # return res
|
||||
# if time.time() - start_time > timeout:
|
||||
# logging.error("Restart pod: %s timeout" % pod_name_tmp)
|
||||
# res = False
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(type(get_host_cpus("idc-sh002")))
|
|
@ -1,927 +0,0 @@
|
|||
import os
|
||||
import logging
|
||||
import pdb
|
||||
import time
|
||||
import re
|
||||
import random
|
||||
import traceback
|
||||
import json
|
||||
import csv
|
||||
import threading
|
||||
from multiprocessing import Process
|
||||
import numpy as np
|
||||
from milvus import DataType
|
||||
from yaml import full_load, dump
|
||||
import concurrent.futures
|
||||
|
||||
import locust_user
|
||||
from client import MilvusClient
|
||||
import parser
|
||||
from runner import Runner
|
||||
from milvus_metrics.api import report
|
||||
from milvus_metrics.models import Env, Hardware, Server, Metric
|
||||
import helm_utils
|
||||
import utils
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.k8s_runner")
|
||||
namespace = "milvus"
|
||||
default_port = 19530
|
||||
DELETE_INTERVAL_TIME = 5
|
||||
# INSERT_INTERVAL = 100000
|
||||
INSERT_INTERVAL = 50000
|
||||
BIG_FLUSH_INTERVAL = 3600
|
||||
DEFAULT_FLUSH_INTERVAL = 1
|
||||
timestamp = int(time.time())
|
||||
default_path = "/var/lib/milvus"
|
||||
|
||||
|
||||
class K8sRunner(Runner):
|
||||
"""run docker mode"""
|
||||
|
||||
def __init__(self):
|
||||
super(K8sRunner, self).__init__()
|
||||
self.service_name = utils.get_unique_name()
|
||||
self.host = None
|
||||
self.port = default_port
|
||||
self.hostname = None
|
||||
self.env_value = None
|
||||
self.hardware = None
|
||||
self.deploy_mode = None
|
||||
|
||||
def init_env(self, milvus_config, server_config, server_host, deploy_mode, image_type, image_tag):
|
||||
logger.debug("Tests run on server host:")
|
||||
logger.debug(server_host)
|
||||
self.hostname = server_host
|
||||
self.deploy_mode = deploy_mode
|
||||
if self.hostname:
|
||||
try:
|
||||
cpus = helm_utils.get_host_cpus(self.hostname)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
cpus = 64
|
||||
logger.debug(type(cpus))
|
||||
if server_config:
|
||||
if "cpus" in server_config.keys():
|
||||
cpus = min(server_config["cpus"], int(cpus))
|
||||
else:
|
||||
server_config.update({"cpus": cpus})
|
||||
else:
|
||||
server_config = {"cpus": cpus}
|
||||
self.hardware = Hardware(name=self.hostname, cpus=cpus)
|
||||
# update values
|
||||
helm_path = os.path.join(os.getcwd(), "../milvus-helm/charts/milvus")
|
||||
values_file_path = helm_path + "/values.yaml"
|
||||
if not os.path.exists(values_file_path):
|
||||
raise Exception("File %s not existed" % values_file_path)
|
||||
if milvus_config:
|
||||
helm_utils.update_values(values_file_path, deploy_mode, server_host, milvus_config, server_config)
|
||||
try:
|
||||
logger.debug("Start install server")
|
||||
self.host = helm_utils.helm_install_server(helm_path, deploy_mode, image_tag, image_type, self.service_name,
|
||||
namespace)
|
||||
except Exception as e:
|
||||
logger.error("Helm install server failed: %s" % (str(e)))
|
||||
logger.error(traceback.format_exc())
|
||||
logger.error(self.hostname)
|
||||
self.clean_up()
|
||||
return False
|
||||
logger.debug(server_config)
|
||||
# for debugging
|
||||
if not self.host:
|
||||
logger.error("Helm install server failed")
|
||||
self.clean_up()
|
||||
return False
|
||||
return True
|
||||
|
||||
def clean_up(self):
|
||||
logger.debug("Start clean up: %s" % self.service_name)
|
||||
helm_utils.helm_del_server(self.service_name, namespace)
|
||||
|
||||
def report_wrapper(self, milvus_instance, env_value, hostname, collection_info, index_info, search_params,
|
||||
run_params=None, server_config=None):
|
||||
metric = Metric()
|
||||
metric.set_run_id(timestamp)
|
||||
metric.env = Env(env_value)
|
||||
metric.env.OMP_NUM_THREADS = 0
|
||||
metric.hardware = self.hardware
|
||||
# TODO: removed
|
||||
# server_version = milvus_instance.get_server_version()
|
||||
# server_mode = milvus_instance.get_server_mode()
|
||||
# commit = milvus_instance.get_server_commit()
|
||||
server_version = "0.12.0"
|
||||
server_mode = self.deploy_mode
|
||||
metric.server = Server(version=server_version, mode=server_mode, build_commit=None)
|
||||
metric.collection = collection_info
|
||||
metric.index = index_info
|
||||
metric.search = search_params
|
||||
metric.run_params = run_params
|
||||
return metric
|
||||
|
||||
def run(self, run_type, collection):
|
||||
logger.debug(run_type)
|
||||
logger.debug(collection)
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
milvus_instance = MilvusClient(collection_name=collection_name, host=self.host)
|
||||
|
||||
# TODO: removed
|
||||
# self.env_value = milvus_instance.get_server_config()
|
||||
# ugly implemention
|
||||
# self.env_value = utils.convert_nested(self.env_value)
|
||||
# self.env_value.pop("logs")
|
||||
# self.env_value.pop("network")
|
||||
self.env_value = collection
|
||||
|
||||
if run_type == "insert_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"]
|
||||
if milvus_instance.exists_collection():
|
||||
milvus_instance.drop()
|
||||
time.sleep(10)
|
||||
index_info = {}
|
||||
search_params = {}
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
milvus_instance.create_collection(dimension, data_type=vector_type,
|
||||
other_fields=other_fields)
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
logger.debug(milvus_instance.describe_index())
|
||||
res = self.do_insert(milvus_instance, collection_name, data_type, dimension, collection_size, ni_per)
|
||||
flush_time = 0.0
|
||||
if "flush" in collection and collection["flush"] == "no":
|
||||
logger.debug("No manual flush")
|
||||
else:
|
||||
start_time = time.time()
|
||||
milvus_instance.flush()
|
||||
flush_time = time.time() - start_time
|
||||
logger.debug(milvus_instance.count())
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"other_fields": other_fields,
|
||||
"ni_per": ni_per
|
||||
}
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info, index_info,
|
||||
search_params)
|
||||
total_time = res["total_time"]
|
||||
build_time = 0
|
||||
if build_index is True:
|
||||
logger.debug("Start build index for last file")
|
||||
start_time = time.time()
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
build_time = time.time() - start_time
|
||||
total_time = total_time + build_time
|
||||
metric.metrics = {
|
||||
"type": run_type,
|
||||
"value": {
|
||||
"total_time": total_time,
|
||||
"qps": res["qps"],
|
||||
"ni_time": res["ni_time"],
|
||||
"flush_time": flush_time,
|
||||
"build_time": build_time
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "build_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
search_params = {}
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
start_time = time.time()
|
||||
# drop index
|
||||
logger.debug("Drop index")
|
||||
milvus_instance.drop_index(index_field_name)
|
||||
# start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
# TODO: need to check
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
logger.debug(milvus_instance.describe_index())
|
||||
logger.debug(milvus_instance.count())
|
||||
end_time = time.time()
|
||||
# end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info, index_info,
|
||||
search_params)
|
||||
metric.metrics = {
|
||||
"type": "build_performance",
|
||||
"value": {
|
||||
"build_time": round(end_time - start_time, 1),
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "delete_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
auto_flush = collection["auto_flush"] if "auto_flush" in collection else True
|
||||
search_params = {}
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error(milvus_instance.show_collections())
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
length = milvus_instance.count()
|
||||
logger.info(length)
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
ids = [i for i in range(length)]
|
||||
loops = int(length / ni_per)
|
||||
milvus_instance.load_collection()
|
||||
# TODO: remove
|
||||
# start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
start_time = time.time()
|
||||
# if auto_flush is False:
|
||||
# milvus_instance.set_config("storage", "auto_flush_interval", BIG_FLUSH_INTERVAL)
|
||||
for i in range(loops):
|
||||
delete_ids = ids[i * ni_per: i * ni_per + ni_per]
|
||||
logger.debug("Delete %d - %d" % (delete_ids[0], delete_ids[-1]))
|
||||
milvus_instance.delete(delete_ids)
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
start_flush_time = time.time()
|
||||
milvus_instance.flush()
|
||||
end_flush_time = time.time()
|
||||
end_time = time.time()
|
||||
# end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
# milvus_instance.set_config("storage", "auto_flush_interval", DEFAULT_FLUSH_INTERVAL)
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info, index_info,
|
||||
search_params)
|
||||
delete_time = round(end_time - start_time, 1)
|
||||
metric.metrics = {
|
||||
"type": "delete_performance",
|
||||
"value": {
|
||||
"delete_time": delete_time,
|
||||
"qps": round(collection_size / delete_time, 1)
|
||||
}
|
||||
}
|
||||
if auto_flush is False:
|
||||
flush_time = round(end_flush_time - start_flush_time, 1)
|
||||
metric.metrics["value"].update({"flush_time": flush_time})
|
||||
report(metric)
|
||||
|
||||
elif run_type == "get_ids_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
ids_length_per_segment = collection["ids_length_per_segment"]
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
search_params = {}
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
for ids_num in ids_length_per_segment:
|
||||
segment_num, get_ids = milvus_instance.get_rand_ids_each_segment(ids_num)
|
||||
start_time = time.time()
|
||||
get_res = milvus_instance.get_entities(get_ids)
|
||||
total_time = time.time() - start_time
|
||||
avg_time = total_time / segment_num
|
||||
run_params = {"ids_num": ids_num}
|
||||
logger.info(
|
||||
"Segment num: %d, ids num per segment: %d, run_time: %f" % (segment_num, ids_num, total_time))
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info,
|
||||
index_info, search_params, run_params=run_params)
|
||||
metric.metrics = {
|
||||
"type": run_type,
|
||||
"value": {
|
||||
"total_time": round(total_time, 1),
|
||||
"avg_time": round(avg_time, 1)
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "search_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
run_count = collection["run_count"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
# filter_query = collection["filter"] if "filter" in collection else None
|
||||
filters = collection["filters"] if "filters" in collection else []
|
||||
filter_query = []
|
||||
search_params = collection["search_params"]
|
||||
fields = self.get_fields(milvus_instance, collection_name)
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
"fields": fields
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
milvus_instance.load_collection()
|
||||
logger.info("Start warm up query")
|
||||
res = self.do_query(milvus_instance, collection_name, vec_field_name, [1], [1], 2,
|
||||
search_param=search_params[0], filter_query=filter_query)
|
||||
logger.info("End warm up query")
|
||||
for search_param in search_params:
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
if not filters:
|
||||
filters.append(None)
|
||||
for filter in filters:
|
||||
filter_param = []
|
||||
if isinstance(filter, dict) and "range" in filter:
|
||||
filter_query.append(eval(filter["range"]))
|
||||
filter_param.append(filter["range"])
|
||||
if isinstance(filter, dict) and "term" in filter:
|
||||
filter_query.append(eval(filter["term"]))
|
||||
filter_param.append(filter["term"])
|
||||
logger.info("filter param: %s" % json.dumps(filter_param))
|
||||
res = self.do_query(milvus_instance, collection_name, vec_field_name, top_ks, nqs, run_count,
|
||||
search_param, filter_query=filter_query)
|
||||
headers = ["Nq/Top-k"]
|
||||
headers.extend([str(top_k) for top_k in top_ks])
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
utils.print_table(headers, nqs, res)
|
||||
for index_nq, nq in enumerate(nqs):
|
||||
for index_top_k, top_k in enumerate(top_ks):
|
||||
search_param_group = {
|
||||
"nq": nq,
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"filter": filter_param
|
||||
}
|
||||
search_time = res[index_nq][index_top_k]
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname,
|
||||
collection_info, index_info, search_param_group)
|
||||
metric.metrics = {
|
||||
"type": "search_performance",
|
||||
"value": {
|
||||
"search_time": search_time
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "locust_insert_stress":
|
||||
pass
|
||||
|
||||
elif run_type in ["locust_search_performance", "locust_insert_performance", "locust_mix_performance"]:
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"]
|
||||
if milvus_instance.exists_collection():
|
||||
milvus_instance.drop()
|
||||
time.sleep(10)
|
||||
index_info = {}
|
||||
search_params = {}
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
milvus_instance.create_collection(dimension, data_type=vector_type, other_fields=None)
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
logger.debug(milvus_instance.describe_index())
|
||||
if run_type in ["locust_search_performance", "locust_mix_performance"]:
|
||||
res = self.do_insert(milvus_instance, collection_name, data_type, dimension, collection_size, ni_per)
|
||||
if "flush" in collection and collection["flush"] == "no":
|
||||
logger.debug("No manual flush")
|
||||
else:
|
||||
milvus_instance.flush()
|
||||
if build_index is True:
|
||||
logger.debug("Start build index for last file")
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, _async=True,
|
||||
index_param=index_param)
|
||||
logger.debug(milvus_instance.describe_index())
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
milvus_instance.load_collection()
|
||||
logger.info("Start warm up query")
|
||||
for i in range(2):
|
||||
res = self.do_query(milvus_instance, collection_name, vec_field_name, [1], [1], 2,
|
||||
search_param={"nprobe": 16})
|
||||
logger.info("End warm up query")
|
||||
real_metric_type = utils.metric_type_trans(metric_type)
|
||||
### spawn locust requests
|
||||
task = collection["task"]
|
||||
connection_type = "single"
|
||||
connection_num = task["connection_num"]
|
||||
if connection_num > 1:
|
||||
connection_type = "multi"
|
||||
clients_num = task["clients_num"]
|
||||
hatch_rate = task["hatch_rate"]
|
||||
during_time = utils.timestr_to_int(task["during_time"])
|
||||
task_types = task["types"]
|
||||
run_params = {"tasks": {}, "clients_num": clients_num, "spawn_rate": hatch_rate, "during_time": during_time}
|
||||
for task_type in task_types:
|
||||
run_params["tasks"].update({task_type["type"]: task_type["weight"] if "weight" in task_type else 1})
|
||||
|
||||
# . collect stats
|
||||
locust_stats = locust_user.locust_executor(self.host, self.port, collection_name,
|
||||
connection_type=connection_type, run_params=run_params)
|
||||
logger.info(locust_stats)
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info, index_info,
|
||||
search_params)
|
||||
metric.metrics = {
|
||||
"type": run_type,
|
||||
"value": locust_stats}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "search_ids_stability":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
search_params = collection["search_params"]
|
||||
during_time = collection["during_time"]
|
||||
ids_length = collection["ids_length"]
|
||||
ids = collection["ids"]
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
g_top_k = int(collection["top_ks"].split("-")[1])
|
||||
l_top_k = int(collection["top_ks"].split("-")[0])
|
||||
g_id = int(ids.split("-")[1])
|
||||
l_id = int(ids.split("-")[0])
|
||||
g_id_length = int(ids_length.split("-")[1])
|
||||
l_id_length = int(ids_length.split("-")[0])
|
||||
|
||||
milvus_instance.load_collection()
|
||||
# start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
# logger.debug(start_mem_usage)
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + during_time * 60:
|
||||
search_param = {}
|
||||
top_k = random.randint(l_top_k, g_top_k)
|
||||
ids_num = random.randint(l_id_length, g_id_length)
|
||||
ids_param = [random.randint(l_id_length, g_id_length) for _ in range(ids_num)]
|
||||
for k, v in search_params.items():
|
||||
search_param[k] = random.randint(int(v.split("-")[0]), int(v.split("-")[1]))
|
||||
logger.debug("Query top-k: %d, ids_num: %d, param: %s" % (top_k, ids_num, json.dumps(search_param)))
|
||||
result = milvus_instance.query_ids(top_k, ids_param, search_param=search_param)
|
||||
# end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info, index_info,
|
||||
{})
|
||||
metric.metrics = {
|
||||
"type": "search_ids_stability",
|
||||
"value": {
|
||||
"during_time": during_time,
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
# for sift/deep datasets
|
||||
# TODO: enable
|
||||
elif run_type == "accuracy":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
search_params = collection["search_params"]
|
||||
# mapping to search param list
|
||||
search_params = self.generate_combinations(search_params)
|
||||
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
milvus_instance.load_collection()
|
||||
true_ids_all = self.get_groundtruth_ids(collection_size)
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
for search_param in search_params:
|
||||
headers = ["Nq/Top-k"]
|
||||
res = []
|
||||
for nq in nqs:
|
||||
for top_k in top_ks:
|
||||
tmp_res = []
|
||||
search_param_group = {
|
||||
"nq": nq,
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"metric_type": metric_type
|
||||
}
|
||||
logger.info("Query params: %s" % json.dumps(search_param_group))
|
||||
result_ids = self.do_query_ids(milvus_instance, collection_name, vec_field_name, top_k, nq,
|
||||
search_param=search_param)
|
||||
# mem_used = milvus_instance.get_mem_info()["memory_used"]
|
||||
acc_value = self.get_recall_value(true_ids_all[:nq, :top_k].tolist(), result_ids)
|
||||
logger.info("Query accuracy: %s" % acc_value)
|
||||
tmp_res.append(acc_value)
|
||||
# logger.info("Memory usage: %s" % mem_used)
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info,
|
||||
index_info, search_param_group)
|
||||
metric.metrics = {
|
||||
"type": "accuracy",
|
||||
"value": {
|
||||
"acc": acc_value
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
# logger.info("Memory usage: %s" % mem_used)
|
||||
res.append(tmp_res)
|
||||
headers.extend([str(top_k) for top_k in top_ks])
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
utils.print_table(headers, nqs, res)
|
||||
|
||||
elif run_type == "ann_accuracy":
|
||||
hdf5_source_file = collection["source_file"]
|
||||
collection_name = collection["collection_name"]
|
||||
index_types = collection["index_types"]
|
||||
index_params = collection["index_params"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
search_params = collection["search_params"]
|
||||
# mapping to search param list
|
||||
search_params = self.generate_combinations(search_params)
|
||||
# mapping to index param list
|
||||
index_params = self.generate_combinations(index_params)
|
||||
|
||||
data_type, dimension, metric_type = parser.parse_ann_collection_name(collection_name)
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
dataset = utils.get_dataset(hdf5_source_file)
|
||||
if milvus_instance.exists_collection(collection_name):
|
||||
logger.info("Re-create collection: %s" % collection_name)
|
||||
milvus_instance.drop()
|
||||
time.sleep(DELETE_INTERVAL_TIME)
|
||||
true_ids = np.array(dataset["neighbors"])
|
||||
vector_type = self.get_vector_type_from_metric(metric_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
real_metric_type = utils.metric_type_trans(metric_type)
|
||||
|
||||
# re-create collection
|
||||
if milvus_instance.exists_collection(collection_name):
|
||||
milvus_instance.drop()
|
||||
time.sleep(DELETE_INTERVAL_TIME)
|
||||
milvus_instance.create_collection(dimension, data_type=vector_type)
|
||||
insert_vectors = self.normalize(metric_type, np.array(dataset["train"]))
|
||||
if len(insert_vectors) != dataset["train"].shape[0]:
|
||||
raise Exception("Row count of insert vectors: %d is not equal to dataset size: %d" % (
|
||||
len(insert_vectors), dataset["train"].shape[0]))
|
||||
logger.debug("The row count of entities to be inserted: %d" % len(insert_vectors))
|
||||
# Insert batch once
|
||||
# milvus_instance.insert(insert_vectors)
|
||||
loops = len(insert_vectors) // INSERT_INTERVAL + 1
|
||||
for i in range(loops):
|
||||
start = i * INSERT_INTERVAL
|
||||
end = min((i + 1) * INSERT_INTERVAL, len(insert_vectors))
|
||||
if start < end:
|
||||
tmp_vectors = insert_vectors[start:end]
|
||||
ids = [i for i in range(start, end)]
|
||||
if not isinstance(tmp_vectors, list):
|
||||
entities = milvus_instance.generate_entities(tmp_vectors.tolist(), ids)
|
||||
res_ids = milvus_instance.insert(entities, ids=ids)
|
||||
else:
|
||||
entities = milvus_instance.generate_entities(tmp_vectors, ids)
|
||||
res_ids = milvus_instance.insert(entities, ids=ids)
|
||||
assert res_ids == ids
|
||||
milvus_instance.flush()
|
||||
res_count = milvus_instance.count()
|
||||
logger.info("Table: %s, row count: %d" % (collection_name, res_count))
|
||||
if res_count != len(insert_vectors):
|
||||
raise Exception("Table row count is not equal to insert vectors")
|
||||
for index_type in index_types:
|
||||
for index_param in index_params:
|
||||
logger.debug("Building index with param: %s" % json.dumps(index_param))
|
||||
if milvus_instance.get_config("cluster.enable") == "true":
|
||||
milvus_instance.create_index(vec_field_name, index_type, metric_type, _async=True,
|
||||
index_param=index_param)
|
||||
else:
|
||||
milvus_instance.create_index(vec_field_name, index_type, metric_type,
|
||||
index_param=index_param)
|
||||
logger.info(milvus_instance.describe_index())
|
||||
logger.info("Start load collection: %s" % collection_name)
|
||||
milvus_instance.load_collection()
|
||||
logger.info("End load collection: %s" % collection_name)
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
logger.debug(index_info)
|
||||
warm_up = True
|
||||
for search_param in search_params:
|
||||
for nq in nqs:
|
||||
query_vectors = self.normalize(metric_type, np.array(dataset["test"][:nq]))
|
||||
if not isinstance(query_vectors, list):
|
||||
query_vectors = query_vectors.tolist()
|
||||
for top_k in top_ks:
|
||||
search_param_group = {
|
||||
"nq": len(query_vectors),
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"metric_type": metric_type
|
||||
}
|
||||
logger.debug(search_param_group)
|
||||
vector_query = {"vector": {vec_field_name: {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": real_metric_type,
|
||||
"params": search_param}
|
||||
}}
|
||||
for i in range(2):
|
||||
result = milvus_instance.query(vector_query)
|
||||
warm_up = False
|
||||
logger.info("End warm up")
|
||||
result = milvus_instance.query(vector_query)
|
||||
result_ids = milvus_instance.get_ids(result)
|
||||
acc_value = self.get_recall_value(true_ids[:nq, :top_k].tolist(), result_ids)
|
||||
logger.info("Query ann_accuracy: %s" % acc_value)
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname,
|
||||
collection_info, index_info, search_param_group)
|
||||
metric.metrics = {
|
||||
"type": "ann_accuracy",
|
||||
"value": {
|
||||
"acc": acc_value
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "search_stability":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
search_params = collection["search_params"]
|
||||
during_time = collection["during_time"]
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
g_top_k = int(collection["top_ks"].split("-")[1])
|
||||
g_nq = int(collection["nqs"].split("-")[1])
|
||||
l_top_k = int(collection["top_ks"].split("-")[0])
|
||||
l_nq = int(collection["nqs"].split("-")[0])
|
||||
milvus_instance.load_collection()
|
||||
# start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
# logger.debug(start_mem_usage)
|
||||
start_row_count = milvus_instance.count()
|
||||
logger.debug(milvus_instance.describe_index())
|
||||
logger.info(start_row_count)
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
real_metric_type = utils.metric_type_trans(metric_type)
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + during_time * 60:
|
||||
search_param = {}
|
||||
top_k = random.randint(l_top_k, g_top_k)
|
||||
nq = random.randint(l_nq, g_nq)
|
||||
for k, v in search_params.items():
|
||||
search_param[k] = random.randint(int(v.split("-")[0]), int(v.split("-")[1]))
|
||||
query_vectors = [[random.random() for _ in range(dimension)] for _ in range(nq)]
|
||||
logger.debug("Query nq: %d, top-k: %d, param: %s" % (nq, top_k, json.dumps(search_param)))
|
||||
vector_query = {"vector": {vec_field_name: {
|
||||
"topk": top_k,
|
||||
"query": query_vectors[:nq],
|
||||
"metric_type": real_metric_type,
|
||||
"params": search_param}
|
||||
}}
|
||||
milvus_instance.query(vector_query)
|
||||
# end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info, index_info,
|
||||
{})
|
||||
metric.metrics = {
|
||||
"type": "search_stability",
|
||||
"value": {
|
||||
"during_time": during_time,
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "loop_stability":
|
||||
# init data
|
||||
milvus_instance.clean_db()
|
||||
pull_interval = collection["pull_interval"]
|
||||
collection_num = collection["collection_num"]
|
||||
concurrent = collection["concurrent"] if "concurrent" in collection else False
|
||||
concurrent_num = collection_num
|
||||
dimension = collection["dimension"] if "dimension" in collection else 128
|
||||
insert_xb = collection["insert_xb"] if "insert_xb" in collection else 100000
|
||||
index_types = collection["index_types"] if "index_types" in collection else ['ivf_sq8']
|
||||
index_param = {"nlist": 256}
|
||||
collection_names = []
|
||||
milvus_instances_map = {}
|
||||
insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(insert_xb)]
|
||||
ids = [i for i in range(insert_xb)]
|
||||
# initialize and prepare
|
||||
for i in range(collection_num):
|
||||
name = utils.get_unique_name(prefix="collection_%d_" % i)
|
||||
collection_names.append(name)
|
||||
metric_type = random.choice(["l2", "ip"])
|
||||
# default float_vector
|
||||
milvus_instance = MilvusClient(collection_name=name, host=self.host)
|
||||
milvus_instance.create_collection(dimension, other_fields=None)
|
||||
index_type = random.choice(index_types)
|
||||
field_name = utils.get_default_field_name()
|
||||
milvus_instance.create_index(field_name, index_type, metric_type, index_param=index_param)
|
||||
logger.info(milvus_instance.describe_index())
|
||||
insert_vectors = utils.normalize(metric_type, insert_vectors)
|
||||
entities = milvus_instance.generate_entities(insert_vectors, ids)
|
||||
res_ids = milvus_instance.insert(entities, ids=ids)
|
||||
milvus_instance.flush()
|
||||
milvus_instances_map.update({name: milvus_instance})
|
||||
logger.info(milvus_instance.describe_index())
|
||||
|
||||
# loop time unit: min -> s
|
||||
pull_interval_seconds = pull_interval * 60
|
||||
tasks = ["insert_rand", "query_rand", "flush"]
|
||||
i = 1
|
||||
while True:
|
||||
logger.info("Loop time: %d" % i)
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < pull_interval_seconds:
|
||||
if concurrent:
|
||||
threads = []
|
||||
for name in collection_names:
|
||||
task_name = random.choice(tasks)
|
||||
task_run = getattr(milvus_instances_map[name], task_name)
|
||||
t = threading.Thread(target=task_run, args=())
|
||||
threads.append(t)
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
# with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_num) as executor:
|
||||
# future_results = {executor.submit(getattr(milvus_instances_map[mp[j][0]], mp[j][1])): j for j in range(concurrent_num)}
|
||||
# for future in concurrent.futures.as_completed(future_results):
|
||||
# future.result()
|
||||
else:
|
||||
tmp_collection_name = random.choice(collection_names)
|
||||
task_name = random.choice(tasks)
|
||||
logger.info(tmp_collection_name)
|
||||
logger.info(task_name)
|
||||
task_run = getattr(milvus_instances_map[tmp_collection_name], task_name)
|
||||
task_run()
|
||||
|
||||
logger.debug("Restart server")
|
||||
helm_utils.restart_server(self.service_name, namespace)
|
||||
# new connection
|
||||
# for name in collection_names:
|
||||
# milvus_instance = MilvusClient(collection_name=name, host=self.host)
|
||||
# milvus_instances_map.update({name: milvus_instance})
|
||||
time.sleep(30)
|
||||
i = i + 1
|
||||
|
||||
elif run_type == "stability":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
during_time = collection["during_time"]
|
||||
operations = collection["operations"]
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error(milvus_instance.show_collections())
|
||||
raise Exception("Table name: %s not existed" % collection_name)
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
# start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
start_row_count = milvus_instance.count()
|
||||
logger.info(start_row_count)
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
real_metric_type = utils.metric_type_trans(metric_type)
|
||||
query_vectors = [[random.random() for _ in range(dimension)] for _ in range(10000)]
|
||||
if "insert" in operations:
|
||||
insert_xb = operations["insert"]["xb"]
|
||||
if "delete" in operations:
|
||||
delete_xb = operations["delete"]["xb"]
|
||||
if "query" in operations:
|
||||
g_top_k = int(operations["query"]["top_ks"].split("-")[1])
|
||||
l_top_k = int(operations["query"]["top_ks"].split("-")[0])
|
||||
g_nq = int(operations["query"]["nqs"].split("-")[1])
|
||||
l_nq = int(operations["query"]["nqs"].split("-")[0])
|
||||
search_params = operations["query"]["search_params"]
|
||||
i = 0
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + during_time * 60:
|
||||
i = i + 1
|
||||
q = self.gen_executors(operations)
|
||||
for name in q:
|
||||
try:
|
||||
if name == "insert":
|
||||
insert_ids = random.sample(list(range(collection_size)), insert_xb)
|
||||
insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(insert_xb)]
|
||||
entities = milvus_instance.generate_entities(insert_vectors, insert_ids)
|
||||
milvus_instance.insert(entities, ids=insert_ids)
|
||||
elif name == "delete":
|
||||
delete_ids = random.sample(list(range(collection_size)), delete_xb)
|
||||
milvus_instance.delete(delete_ids)
|
||||
elif name == "query":
|
||||
top_k = random.randint(l_top_k, g_top_k)
|
||||
nq = random.randint(l_nq, g_nq)
|
||||
search_param = {}
|
||||
for k, v in search_params.items():
|
||||
search_param[k] = random.randint(int(v.split("-")[0]), int(v.split("-")[1]))
|
||||
logger.debug("Query nq: %d, top-k: %d, param: %s" % (nq, top_k, json.dumps(search_param)))
|
||||
vector_query = {"vector": {vec_field_name: {
|
||||
"topk": top_k,
|
||||
"query": query_vectors[:nq],
|
||||
"metric_type": real_metric_type,
|
||||
"params": search_param}
|
||||
}}
|
||||
result = milvus_instance.query(vector_query)
|
||||
elif name in ["flush", "compact"]:
|
||||
func = getattr(milvus_instance, name)
|
||||
func()
|
||||
logger.debug(milvus_instance.count())
|
||||
except Exception as e:
|
||||
logger.error(name)
|
||||
logger.error(str(e))
|
||||
raise
|
||||
logger.debug("Loop time: %d" % i)
|
||||
# end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
end_row_count = milvus_instance.count()
|
||||
metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, collection_info, index_info,
|
||||
{})
|
||||
metric.metrics = {
|
||||
"type": "stability",
|
||||
"value": {
|
||||
"during_time": during_time,
|
||||
"row_count_increments": end_row_count - start_row_count
|
||||
}
|
||||
}
|
||||
report(metric)
|
||||
|
||||
elif run_type == "debug":
|
||||
time.sleep(7200)
|
||||
default_insert_vectors = [[random.random() for _ in range(128)] for _ in range(500000)]
|
||||
interval = 50000
|
||||
for loop in range(1, 7):
|
||||
insert_xb = loop * interval
|
||||
insert_vectors = default_insert_vectors[:insert_xb]
|
||||
insert_ids = [i for i in range(insert_xb)]
|
||||
entities = milvus_instance.generate_entities(insert_vectors, insert_ids)
|
||||
for j in range(5):
|
||||
milvus_instance.insert(entities, ids=insert_ids)
|
||||
time.sleep(10)
|
||||
|
||||
else:
|
||||
raise Exception("Run type not defined")
|
||||
logger.debug("All test finished")
|
|
@ -1,732 +0,0 @@
|
|||
import os
|
||||
import logging
|
||||
import pdb
|
||||
import string
|
||||
import time
|
||||
import random
|
||||
import json
|
||||
import csv
|
||||
from multiprocessing import Process
|
||||
import numpy as np
|
||||
import concurrent.futures
|
||||
from queue import Queue
|
||||
|
||||
import locust_user
|
||||
from milvus import DataType
|
||||
from client import MilvusClient
|
||||
from runner import Runner
|
||||
import utils
|
||||
import parser
|
||||
|
||||
|
||||
DELETE_INTERVAL_TIME = 5
|
||||
INSERT_INTERVAL = 50000
|
||||
logger = logging.getLogger("milvus_benchmark.local_runner")
|
||||
|
||||
|
||||
class LocalRunner(Runner):
|
||||
"""run local mode"""
|
||||
def __init__(self, host, port):
|
||||
super(LocalRunner, self).__init__()
|
||||
self.host = host
|
||||
self.port = port
|
||||
|
||||
def run(self, run_type, collection):
|
||||
logger.debug(run_type)
|
||||
logger.debug(collection)
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
milvus_instance = MilvusClient(collection_name=collection_name, host=self.host, port=self.port)
|
||||
logger.info(milvus_instance.show_collections())
|
||||
# TODO:
|
||||
# self.env_value = milvus_instance.get_server_config()
|
||||
# ugly implemention
|
||||
# self.env_value = utils.convert_nested(self.env_value)
|
||||
# self.env_value.pop("logs")
|
||||
# self.env_value.pop("network")
|
||||
# logger.info(self.env_value)
|
||||
|
||||
if run_type in ["insert_performance", "insert_flush_performance"]:
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"]
|
||||
if milvus_instance.exists_collection():
|
||||
milvus_instance.drop()
|
||||
time.sleep(10)
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
milvus_instance.create_collection(dimension, data_type=vector_type, other_fields=other_fields)
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
res = self.do_insert(milvus_instance, collection_name, data_type, dimension, collection_size, ni_per)
|
||||
milvus_instance.flush()
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
if build_index is True:
|
||||
logger.debug("Start build index for last file")
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
|
||||
elif run_type == "delete_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
auto_flush = collection["auto_flush"] if "auto_flush" in collection else True
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error(milvus_instance.show_collections())
|
||||
logger.error("Table: %s not found" % collection_name)
|
||||
return
|
||||
length = milvus_instance.count()
|
||||
ids = [i for i in range(length)]
|
||||
loops = int(length / ni_per)
|
||||
if auto_flush is False:
|
||||
milvus_instance.set_config("storage", "auto_flush_interval", BIG_FLUSH_INTERVAL)
|
||||
for i in range(loops):
|
||||
delete_ids = ids[i*ni_per: i*ni_per+ni_per]
|
||||
logger.debug("Delete %d - %d" % (delete_ids[0], delete_ids[-1]))
|
||||
milvus_instance.delete(delete_ids)
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
milvus_instance.flush()
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
|
||||
elif run_type == "build_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
# drop index
|
||||
logger.debug("Drop index")
|
||||
milvus_instance.drop_index(index_field_name)
|
||||
start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
start_time = time.time()
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
end_time = time.time()
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
logger.debug("Diff memory: %s, current memory usage: %s, build time: %s" % ((end_mem_usage - start_mem_usage), end_mem_usage, round(end_time - start_time, 1)))
|
||||
|
||||
elif run_type == "search_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
run_count = collection["run_count"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
search_params = collection["search_params"]
|
||||
filter_query = []
|
||||
filters = collection["filters"] if "filters" in collection else []
|
||||
# pdb.set_trace()
|
||||
# ranges = collection["range"] if "range" in collection else None
|
||||
# terms = collection["term"] if "term" in collection else None
|
||||
# if ranges:
|
||||
# filter_query.append(eval(ranges))
|
||||
# if terms:
|
||||
# filter_query.append(eval(terms))
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
# for debugging
|
||||
# time.sleep(3600)
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
logger.info(milvus_instance.count())
|
||||
result = milvus_instance.describe_index()
|
||||
logger.info(result)
|
||||
milvus_instance.preload_collection()
|
||||
mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
logger.info(mem_usage)
|
||||
for search_param in search_params:
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
filter_param = []
|
||||
if not filters:
|
||||
filters.append(None)
|
||||
for filter in filters:
|
||||
if isinstance(filter, dict) and "range" in filter:
|
||||
filter_query.append(eval(filter["range"]))
|
||||
filter_param.append(filter["range"])
|
||||
if isinstance(filter, dict) and "term" in filter:
|
||||
filter_query.append(eval(filter["term"]))
|
||||
filter_param.append(filter["term"])
|
||||
logger.info("filter param: %s" % json.dumps(filter_param))
|
||||
res = self.do_query(milvus_instance, collection_name, vec_field_name, top_ks, nqs, run_count, search_param, filter_query)
|
||||
headers = ["Nq/Top-k"]
|
||||
headers.extend([str(top_k) for top_k in top_ks])
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
utils.print_table(headers, nqs, res)
|
||||
mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
logger.info(mem_usage)
|
||||
|
||||
elif run_type == "locust_search_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"]
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
# if build_index is True:
|
||||
# index_type = collection["index_type"]
|
||||
# index_param = collection["index_param"]
|
||||
# # TODO: debug
|
||||
# if milvus_instance.exists_collection():
|
||||
# milvus_instance.drop()
|
||||
# time.sleep(10)
|
||||
# other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
# milvus_instance.create_collection(dimension, data_type=vector_type, other_fields=other_fields)
|
||||
# milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
# res = self.do_insert(milvus_instance, collection_name, data_type, dimension, collection_size, ni_per)
|
||||
# milvus_instance.flush()
|
||||
# logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
# if build_index is True:
|
||||
# logger.debug("Start build index for last file")
|
||||
# milvus_instance.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
real_metric_type = utils.metric_type_trans(metric_type)
|
||||
### spawn locust requests
|
||||
task = collection["task"]
|
||||
connection_type = "single"
|
||||
connection_num = task["connection_num"]
|
||||
if connection_num > 1:
|
||||
connection_type = "multi"
|
||||
clients_num = task["clients_num"]
|
||||
hatch_rate = task["hatch_rate"]
|
||||
during_time = utils.timestr_to_int(task["during_time"])
|
||||
task_types = task["types"]
|
||||
# """
|
||||
# task:
|
||||
# connection_num: 1
|
||||
# clients_num: 100
|
||||
# hatch_rate: 2
|
||||
# during_time: 5m
|
||||
# types:
|
||||
# -
|
||||
# type: query
|
||||
# weight: 1
|
||||
# params:
|
||||
# top_k: 10
|
||||
# nq: 1
|
||||
# # filters:
|
||||
# # -
|
||||
# # range:
|
||||
# # int64:
|
||||
# # LT: 0
|
||||
# # GT: 1000000
|
||||
# search_param:
|
||||
# nprobe: 16
|
||||
# """
|
||||
run_params = {"tasks": {}, "clients_num": clients_num, "spawn_rate": hatch_rate, "during_time": during_time}
|
||||
for task_type in task_types:
|
||||
run_params["tasks"].update({task_type["type"]: task_type["weight"] if "weight" in task_type else 1})
|
||||
|
||||
#. collect stats
|
||||
locust_stats = locust_user.locust_executor(self.host, self.port, collection_name, connection_type=connection_type, run_params=run_params)
|
||||
logger.info(locust_stats)
|
||||
|
||||
elif run_type == "search_ids_stability":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
search_params = collection["search_params"]
|
||||
during_time = collection["during_time"]
|
||||
ids_length = collection["ids_length"]
|
||||
ids = collection["ids"]
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
g_top_k = int(collection["top_ks"].split("-")[1])
|
||||
l_top_k = int(collection["top_ks"].split("-")[0])
|
||||
g_id = int(ids.split("-")[1])
|
||||
l_id = int(ids.split("-")[0])
|
||||
g_id_length = int(ids_length.split("-")[1])
|
||||
l_id_length = int(ids_length.split("-")[0])
|
||||
|
||||
milvus_instance.preload_collection()
|
||||
start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
logger.debug(start_mem_usage)
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + during_time * 60:
|
||||
search_param = {}
|
||||
top_k = random.randint(l_top_k, g_top_k)
|
||||
ids_num = random.randint(l_id_length, g_id_length)
|
||||
l_ids = random.randint(l_id, g_id-ids_num)
|
||||
# ids_param = [random.randint(l_id_length, g_id_length) for _ in range(ids_num)]
|
||||
ids_param = [id for id in range(l_ids, l_ids+ids_num)]
|
||||
for k, v in search_params.items():
|
||||
search_param[k] = random.randint(int(v.split("-")[0]), int(v.split("-")[1]))
|
||||
logger.debug("Query top-k: %d, ids_num: %d, param: %s" % (top_k, ids_num, json.dumps(search_param)))
|
||||
result = milvus_instance.query_ids(top_k, ids_param, search_param=search_param)
|
||||
end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
metrics = {
|
||||
"during_time": during_time,
|
||||
"start_mem_usage": start_mem_usage,
|
||||
"end_mem_usage": end_mem_usage,
|
||||
"diff_mem": end_mem_usage - start_mem_usage,
|
||||
}
|
||||
logger.info(metrics)
|
||||
|
||||
elif run_type == "search_performance_concurrents":
|
||||
data_type, dimension, metric_type = parser.parse_ann_collection_name(collection_name)
|
||||
hdf5_source_file = collection["source_file"]
|
||||
use_single_connection = collection["use_single_connection"]
|
||||
concurrents = collection["concurrents"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
search_params = self.generate_combinations(collection["search_params"])
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
logger.info(milvus_instance.count())
|
||||
result = milvus_instance.describe_index()
|
||||
logger.info(result)
|
||||
milvus_instance.preload_collection()
|
||||
dataset = utils.get_dataset(hdf5_source_file)
|
||||
for concurrent_num in concurrents:
|
||||
top_k = top_ks[0]
|
||||
for nq in nqs:
|
||||
mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
logger.info(mem_usage)
|
||||
query_vectors = self.normalize(metric_type, np.array(dataset["test"][:nq]))
|
||||
logger.debug(search_params)
|
||||
for search_param in search_params:
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
total_time = 0.0
|
||||
if use_single_connection is True:
|
||||
connections = [MilvusClient(collection_name=collection_name, host=self.host, port=self.port)]
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_num) as executor:
|
||||
future_results = {executor.submit(
|
||||
self.do_query_qps, connections[0], query_vectors, top_k, search_param=search_param) : index for index in range(concurrent_num)}
|
||||
else:
|
||||
connections = [MilvusClient(collection_name=collection_name, host=self.hos, port=self.port) for i in range(concurrent_num)]
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_num) as executor:
|
||||
future_results = {executor.submit(
|
||||
self.do_query_qps, connections[index], query_vectors, top_k, search_param=search_param) : index for index in range(concurrent_num)}
|
||||
for future in concurrent.futures.as_completed(future_results):
|
||||
total_time = total_time + future.result()
|
||||
qps_value = total_time / concurrent_num
|
||||
logger.debug("QPS value: %f, total_time: %f, request_nums: %f" % (qps_value, total_time, concurrent_num))
|
||||
mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
logger.info(mem_usage)
|
||||
|
||||
elif run_type == "ann_accuracy":
|
||||
hdf5_source_file = collection["source_file"]
|
||||
collection_name = collection["collection_name"]
|
||||
index_types = collection["index_types"]
|
||||
index_params = collection["index_params"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
search_params = collection["search_params"]
|
||||
# mapping to search param list
|
||||
search_params = self.generate_combinations(search_params)
|
||||
# mapping to index param list
|
||||
index_params = self.generate_combinations(index_params)
|
||||
data_type, dimension, metric_type = parser.parse_ann_collection_name(collection_name)
|
||||
dataset = utils.get_dataset(hdf5_source_file)
|
||||
true_ids = np.array(dataset["neighbors"])
|
||||
vector_type = self.get_vector_type_from_metric(metric_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
real_metric_type = utils.metric_type_trans(metric_type)
|
||||
|
||||
# re-create collection
|
||||
if milvus_instance.exists_collection(collection_name):
|
||||
milvus_instance.drop()
|
||||
time.sleep(DELETE_INTERVAL_TIME)
|
||||
milvus_instance.create_collection(dimension, data_type=vector_type)
|
||||
insert_vectors = self.normalize(metric_type, np.array(dataset["train"]))
|
||||
if len(insert_vectors) != dataset["train"].shape[0]:
|
||||
raise Exception("Row count of insert vectors: %d is not equal to dataset size: %d" % (len(insert_vectors), dataset["train"].shape[0]))
|
||||
logger.debug("The row count of entities to be inserted: %d" % len(insert_vectors))
|
||||
# insert batch once
|
||||
# milvus_instance.insert(insert_vectors)
|
||||
loops = len(insert_vectors) // INSERT_INTERVAL + 1
|
||||
for i in range(loops):
|
||||
start = i*INSERT_INTERVAL
|
||||
end = min((i+1)*INSERT_INTERVAL, len(insert_vectors))
|
||||
if start < end:
|
||||
tmp_vectors = insert_vectors[start:end]
|
||||
ids = [i for i in range(start, end)]
|
||||
if not isinstance(tmp_vectors, list):
|
||||
entities = milvus_instance.generate_entities(tmp_vectors.tolist(), ids)
|
||||
res_ids = milvus_instance.insert(entities, ids=ids)
|
||||
else:
|
||||
entities = milvus_instance.generate_entities(tmp_vectors, ids)
|
||||
res_ids = milvus_instance.insert(entities, ids=ids)
|
||||
assert res_ids == ids
|
||||
milvus_instance.flush()
|
||||
res_count = milvus_instance.count()
|
||||
logger.info("Table: %s, row count: %d" % (collection_name, res_count))
|
||||
if res_count != len(insert_vectors):
|
||||
raise Exception("Table row count is not equal to insert vectors")
|
||||
for index_type in index_types:
|
||||
for index_param in index_params:
|
||||
logger.debug("Building index with param: %s, metric_type: %s" % (json.dumps(index_param), metric_type))
|
||||
milvus_instance.create_index(vec_field_name, index_type, metric_type, index_param=index_param)
|
||||
logger.info("Start preload collection: %s" % collection_name)
|
||||
milvus_instance.preload_collection()
|
||||
for search_param in search_params:
|
||||
for nq in nqs:
|
||||
query_vectors = self.normalize(metric_type, np.array(dataset["test"][:nq]))
|
||||
if not isinstance(query_vectors, list):
|
||||
query_vectors = query_vectors.tolist()
|
||||
for top_k in top_ks:
|
||||
logger.debug("Search nq: %d, top-k: %d, search_param: %s, metric_type: %s" % (nq, top_k, json.dumps(search_param), metric_type))
|
||||
vector_query = {"vector": {vec_field_name: {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": real_metric_type,
|
||||
"params": search_param}
|
||||
}}
|
||||
result = milvus_instance.query(vector_query)
|
||||
result_ids = milvus_instance.get_ids(result)
|
||||
# pdb.set_trace()
|
||||
acc_value = self.get_recall_value(true_ids[:nq, :top_k].tolist(), result_ids)
|
||||
logger.info("Query ann_accuracy: %s" % acc_value)
|
||||
|
||||
elif run_type == "accuracy":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
search_params = collection["search_params"]
|
||||
# mapping to search param list
|
||||
search_params = self.generate_combinations(search_params)
|
||||
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error("Table name: %s not existed" % collection_name)
|
||||
return
|
||||
logger.info(milvus_instance.count())
|
||||
index_info = milvus_instance.describe_index()
|
||||
logger.info(index_info)
|
||||
milvus_instance.preload_collection()
|
||||
true_ids_all = self.get_groundtruth_ids(collection_size)
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
for search_param in search_params:
|
||||
headers = ["Nq/Top-k"]
|
||||
res = []
|
||||
for nq in nqs:
|
||||
tmp_res = []
|
||||
for top_k in top_ks:
|
||||
search_param_group = {
|
||||
"nq": nq,
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"metric_type": metric_type
|
||||
}
|
||||
logger.info("Query params: %s" % json.dumps(search_param_group))
|
||||
result_ids = self.do_query_ids(milvus_instance, collection_name, vec_field_name, top_k, nq, search_param=search_param)
|
||||
mem_used = milvus_instance.get_mem_info()["memory_used"]
|
||||
acc_value = self.get_recall_value(true_ids_all[:nq, :top_k].tolist(), result_ids)
|
||||
logger.info("Query accuracy: %s" % acc_value)
|
||||
tmp_res.append(acc_value)
|
||||
logger.info("Memory usage: %s" % mem_used)
|
||||
res.append(tmp_res)
|
||||
headers.extend([str(top_k) for top_k in top_ks])
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
utils.print_table(headers, nqs, res)
|
||||
|
||||
elif run_type == "stability":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
during_time = collection["during_time"]
|
||||
operations = collection["operations"]
|
||||
if not milvus_instance.exists_collection():
|
||||
logger.error(milvus_instance.show_collections())
|
||||
raise Exception("Table name: %s not existed" % collection_name)
|
||||
milvus_instance.preload_collection()
|
||||
start_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
start_row_count = milvus_instance.count()
|
||||
logger.info(start_row_count)
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
vec_field_name = utils.get_default_field_name(vector_type)
|
||||
real_metric_type = utils.metric_type_trans(metric_type)
|
||||
query_vectors = [[random.random() for _ in range(dimension)] for _ in range(10000)]
|
||||
if "insert" in operations:
|
||||
insert_xb = operations["insert"]["xb"]
|
||||
if "delete" in operations:
|
||||
delete_xb = operations["delete"]["xb"]
|
||||
if "query" in operations:
|
||||
g_top_k = int(operations["query"]["top_ks"].split("-")[1])
|
||||
l_top_k = int(operations["query"]["top_ks"].split("-")[0])
|
||||
g_nq = int(operations["query"]["nqs"].split("-")[1])
|
||||
l_nq = int(operations["query"]["nqs"].split("-")[0])
|
||||
search_params = operations["query"]["search_params"]
|
||||
i = 0
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + during_time * 60:
|
||||
i = i + 1
|
||||
q = self.gen_executors(operations)
|
||||
for name in q:
|
||||
try:
|
||||
if name == "insert":
|
||||
insert_ids = random.sample(list(range(collection_size)), insert_xb)
|
||||
insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(insert_xb)]
|
||||
entities = milvus_instance.generate_entities(insert_vectors, insert_ids)
|
||||
milvus_instance.insert(entities, ids=insert_ids)
|
||||
elif name == "delete":
|
||||
delete_ids = random.sample(list(range(collection_size)), delete_xb)
|
||||
milvus_instance.delete(delete_ids)
|
||||
elif name == "query":
|
||||
top_k = random.randint(l_top_k, g_top_k)
|
||||
nq = random.randint(l_nq, g_nq)
|
||||
search_param = {}
|
||||
for k, v in search_params.items():
|
||||
search_param[k] = random.randint(int(v.split("-")[0]), int(v.split("-")[1]))
|
||||
logger.debug("Query nq: %d, top-k: %d, param: %s" % (nq, top_k, json.dumps(search_param)))
|
||||
vector_query = {"vector": {vec_field_name: {
|
||||
"topk": top_k,
|
||||
"query": query_vectors[:nq],
|
||||
"metric_type": real_metric_type,
|
||||
"params": search_param}
|
||||
}}
|
||||
result = milvus_instance.query(vector_query)
|
||||
elif name in ["flush", "compact"]:
|
||||
func = getattr(milvus_instance, name)
|
||||
func()
|
||||
logger.debug(milvus_instance.count())
|
||||
except Exception as e:
|
||||
logger.error(name)
|
||||
logger.error(str(e))
|
||||
raise
|
||||
logger.debug("Loop time: %d" % i)
|
||||
end_mem_usage = milvus_instance.get_mem_info()["memory_used"]
|
||||
end_row_count = milvus_instance.count()
|
||||
metrics = {
|
||||
"during_time": during_time,
|
||||
"start_mem_usage": start_mem_usage,
|
||||
"end_mem_usage": end_mem_usage,
|
||||
"diff_mem": end_mem_usage - start_mem_usage,
|
||||
"row_count_increments": end_row_count - start_row_count
|
||||
}
|
||||
logger.info(metrics)
|
||||
|
||||
elif run_type == "loop_stability":
|
||||
# init data
|
||||
milvus_instance.clean_db()
|
||||
pull_interval = collection["pull_interval"]
|
||||
collection_num = collection["collection_num"]
|
||||
concurrent = collection["concurrent"] if "concurrent" in collection else False
|
||||
concurrent_num = collection_num
|
||||
dimension = collection["dimension"] if "dimension" in collection else 128
|
||||
insert_xb = collection["insert_xb"] if "insert_xb" in collection else 100000
|
||||
index_types = collection["index_types"] if "index_types" in collection else ['ivf_sq8']
|
||||
index_param = {"nlist": 256}
|
||||
collection_names = []
|
||||
milvus_instances_map = {}
|
||||
insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(insert_xb)]
|
||||
ids = [i for i in range(insert_xb)]
|
||||
# initialize and prepare
|
||||
for i in range(collection_num):
|
||||
name = utils.get_unique_name(prefix="collection_%d_" % i)
|
||||
collection_names.append(name)
|
||||
metric_type = random.choice(["l2", "ip"])
|
||||
# default float_vector
|
||||
milvus_instance = MilvusClient(collection_name=name, host=self.host)
|
||||
milvus_instance.create_collection(dimension, other_fields=None)
|
||||
index_type = random.choice(index_types)
|
||||
field_name = utils.get_default_field_name()
|
||||
milvus_instance.create_index(field_name, index_type, metric_type, index_param=index_param)
|
||||
logger.info(milvus_instance.describe_index())
|
||||
insert_vectors = utils.normalize(metric_type, insert_vectors)
|
||||
entities = milvus_instance.generate_entities(insert_vectors, ids)
|
||||
res_ids = milvus_instance.insert(entities, ids=ids)
|
||||
milvus_instance.flush()
|
||||
milvus_instances_map.update({name: milvus_instance})
|
||||
logger.info(milvus_instance.describe_index())
|
||||
|
||||
# loop time unit: min -> s
|
||||
pull_interval_seconds = pull_interval * 60
|
||||
tasks = ["insert_rand", "delete_rand", "query_rand", "flush", "compact"]
|
||||
i = 1
|
||||
while True:
|
||||
logger.info("Loop time: %d" % i)
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < pull_interval_seconds:
|
||||
if concurrent:
|
||||
threads = []
|
||||
for name in collection_names:
|
||||
task_name = random.choice(tasks)
|
||||
task_run = getattr(milvus_instances_map[name], task_name)
|
||||
t = threading.Thread(target=task_run, args=())
|
||||
threads.append(t)
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
# with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_num) as executor:
|
||||
# future_results = {executor.submit(getattr(milvus_instances_map[mp[j][0]], mp[j][1])): j for j in range(concurrent_num)}
|
||||
# for future in concurrent.futures.as_completed(future_results):
|
||||
# future.result()
|
||||
else:
|
||||
tmp_collection_name = random.choice(collection_names)
|
||||
task_name = random.choice(tasks)
|
||||
logger.info(tmp_collection_name)
|
||||
logger.info(task_name)
|
||||
task_run = getattr(milvus_instances_map[tmp_collection_name], task_name)
|
||||
task_run()
|
||||
# new connection
|
||||
# for name in collection_names:
|
||||
# milvus_instance = MilvusClient(collection_name=name, host=self.host)
|
||||
# milvus_instances_map.update({name: milvus_instance})
|
||||
i = i + 1
|
||||
|
||||
elif run_type == "locust_mix_performance":
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(
|
||||
collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"]
|
||||
vector_type = self.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
# drop exists collection
|
||||
if milvus_instance.exists_collection():
|
||||
milvus_instance.drop()
|
||||
time.sleep(10)
|
||||
# create collection
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
milvus_instance.create_collection(dimension, data_type=DataType.FLOAT_VECTOR, collection_name=collection_name, other_fields=other_fields)
|
||||
logger.info(milvus_instance.get_info())
|
||||
# insert entities
|
||||
insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(ni_per)]
|
||||
insert_ids = random.sample(list(range(collection_size)), ni_per)
|
||||
insert_vectors = utils.normalize(metric_type, insert_vectors)
|
||||
entities = milvus_instance.generate_entities(insert_vectors, insert_ids, collection_name)
|
||||
milvus_instance.insert(entities, ids=insert_ids)
|
||||
# flush
|
||||
milvus_instance.flush()
|
||||
logger.info(milvus_instance.get_stats())
|
||||
logger.debug("Table row counts: %d" % milvus_instance.count())
|
||||
# create index
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
logger.debug("Start build index for last file")
|
||||
milvus_instance.create_index(index_field_name, index_type, metric_type, index_param)
|
||||
logger.debug(milvus_instance.describe_index())
|
||||
# locust
|
||||
task = collection["tasks"]
|
||||
task_file = utils.get_unique_name()
|
||||
task_file_script = task_file + '.py'
|
||||
task_file_csv = task_file + '_stats.csv'
|
||||
task_types = task["types"]
|
||||
connection_type = "single"
|
||||
connection_num = task["connection_num"]
|
||||
if connection_num > 1:
|
||||
connection_type = "multi"
|
||||
clients_num = task["clients_num"]
|
||||
hatch_rate = task["hatch_rate"]
|
||||
during_time = task["during_time"]
|
||||
def_strs = ""
|
||||
# define def str
|
||||
for task_type in task_types:
|
||||
type = task_type["type"]
|
||||
weight = task_type["weight"]
|
||||
if type == "flush":
|
||||
def_str = """
|
||||
@task(%d)
|
||||
def flush(self):
|
||||
client = get_client(collection_name)
|
||||
client.flush(collection_name=collection_name)
|
||||
""" % weight
|
||||
if type == "compact":
|
||||
def_str = """
|
||||
@task(%d)
|
||||
def compact(self):
|
||||
client = get_client(collection_name)
|
||||
client.compact(collection_name)
|
||||
""" % weight
|
||||
if type == "query":
|
||||
def_str = """
|
||||
@task(%d)
|
||||
def query(self):
|
||||
client = get_client(collection_name)
|
||||
params = %s
|
||||
X = [[random.random() for i in range(dim)] for i in range(params["nq"])]
|
||||
vector_query = {"vector": {"%s": {
|
||||
"topk": params["top_k"],
|
||||
"query": X,
|
||||
"metric_type": "%s",
|
||||
"params": params["search_param"]}}}
|
||||
client.query(vector_query, filter_query=params["filters"], collection_name=collection_name)
|
||||
""" % (weight, task_type["params"], index_field_name, utils.metric_type_trans(metric_type))
|
||||
if type == "insert":
|
||||
def_str = """
|
||||
@task(%d)
|
||||
def insert(self):
|
||||
client = get_client(collection_name)
|
||||
params = %s
|
||||
insert_ids = random.sample(list(range(100000)), params["nb"])
|
||||
insert_vectors = [[random.random() for _ in range(dim)] for _ in range(params["nb"])]
|
||||
insert_vectors = utils.normalize("l2", insert_vectors)
|
||||
entities = generate_entities(insert_vectors, insert_ids)
|
||||
client.insert(entities,ids=insert_ids, collection_name=collection_name)
|
||||
""" % (weight, task_type["params"])
|
||||
if type == "delete":
|
||||
def_str = """
|
||||
@task(%d)
|
||||
def delete(self):
|
||||
client = get_client(collection_name)
|
||||
ids = [random.randint(1, 1000000) for i in range(1)]
|
||||
client.delete(ids, collection_name)
|
||||
""" % weight
|
||||
def_strs += def_str
|
||||
print(def_strs)
|
||||
# define locust code str
|
||||
code_str = """
|
||||
import random
|
||||
import json
|
||||
from locust import User, task, between
|
||||
from locust_task import MilvusTask
|
||||
from client import MilvusClient
|
||||
import utils
|
||||
|
||||
host = '%s'
|
||||
port = %s
|
||||
collection_name = '%s'
|
||||
dim = %s
|
||||
connection_type = '%s'
|
||||
m = MilvusClient(host=host, port=port)
|
||||
|
||||
|
||||
def get_client(collection_name):
|
||||
if connection_type == 'single':
|
||||
return MilvusTask(m=m)
|
||||
elif connection_type == 'multi':
|
||||
return MilvusTask(connection_type='multi', host=host, port=port, collection_name=collection_name)
|
||||
|
||||
|
||||
def generate_entities(vectors, ids):
|
||||
return m.generate_entities(vectors, ids, collection_name)
|
||||
|
||||
|
||||
class MixTask(User):
|
||||
wait_time = between(0.001, 0.002)
|
||||
%s
|
||||
""" % (self.host, self.port, collection_name, dimension, connection_type, def_strs)
|
||||
with open(task_file_script, "w+") as fd:
|
||||
fd.write(code_str)
|
||||
locust_cmd = "locust -f %s --headless --csv=%s -u %d -r %d -t %s" % (
|
||||
task_file_script,
|
||||
task_file,
|
||||
clients_num,
|
||||
hatch_rate,
|
||||
during_time)
|
||||
logger.info(locust_cmd)
|
||||
try:
|
||||
res = os.system(locust_cmd)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
return
|
||||
|
||||
# . retrieve and collect test statistics
|
||||
metric = None
|
||||
with open(task_file_csv, newline='') as fd:
|
||||
dr = csv.DictReader(fd)
|
||||
for row in dr:
|
||||
if row["Name"] != "Aggregated":
|
||||
continue
|
||||
metric = row
|
||||
logger.info(metric)
|
||||
|
||||
else:
|
||||
raise Exception("Run type not defined")
|
||||
logger.debug("All test finished")
|
|
@ -1,33 +0,0 @@
|
|||
import random
|
||||
from locust import User, task, between
|
||||
from locust_task import MilvusTask
|
||||
from client import MilvusClient
|
||||
from milvus import DataType
|
||||
|
||||
connection_type = "single"
|
||||
host = "192.168.1.6"
|
||||
port = 19530
|
||||
collection_name = "create_collection_CZkkwJgo"
|
||||
dim = 128
|
||||
nb = 50000
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
m.clean_db()
|
||||
m.create_collection(dim, data_type=DataType.FLOAT_VECTOR, auto_id=True, other_fields=None)
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
entities = m.generate_entities(vectors)
|
||||
|
||||
|
||||
class FlushTask(User):
|
||||
wait_time = between(0.001, 0.002)
|
||||
if connection_type == "single":
|
||||
client = MilvusTask(m=m)
|
||||
else:
|
||||
client = MilvusTask(host=host, port=port, collection_name=collection_name)
|
||||
|
||||
# def insert(self):
|
||||
# self.client.insert(entities)
|
||||
|
||||
@task(1)
|
||||
def flush(self):
|
||||
self.client.insert(entities)
|
||||
self.client.flush(collection_name)
|
|
@ -1,36 +0,0 @@
|
|||
import logging
|
||||
import random
|
||||
from locust import User, task, between
|
||||
from locust_task import MilvusTask
|
||||
from client import MilvusClient
|
||||
from milvus import DataType
|
||||
|
||||
connection_type = "single"
|
||||
host = "192.168.1.6"
|
||||
port = 19530
|
||||
collection_name = "sift_10m_100000_128_l2"
|
||||
dim = 128
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
# m.clean_db()
|
||||
# m.create_collection(dim, data_type=DataType.FLOAT_VECTOR, auto_id=True, other_fields=None)
|
||||
nb = 6000
|
||||
# vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
# entities = m.generate_entities(vectors)
|
||||
ids = [i for i in range(nb)]
|
||||
|
||||
class GetEntityTask(User):
|
||||
wait_time = between(0.001, 0.002)
|
||||
if connection_type == "single":
|
||||
client = MilvusTask(m=m)
|
||||
else:
|
||||
client = MilvusTask(host=host, port=port, collection_name=collection_name)
|
||||
|
||||
# def insert(self):
|
||||
# self.client.insert(entities)
|
||||
|
||||
@task(1)
|
||||
def get_entity_by_id(self):
|
||||
# num = random.randint(100, 200)
|
||||
# get_ids = random.sample(ids, num)
|
||||
self.client.get_entities([0])
|
||||
# logging.getLogger().info(len(get_res))
|
|
@ -1,33 +0,0 @@
|
|||
import random
|
||||
from locust import User, task, between
|
||||
from locust_task import MilvusTask
|
||||
from client import MilvusClient
|
||||
from milvus import DataType
|
||||
|
||||
connection_type = "single"
|
||||
host = "192.168.1.6"
|
||||
port = 19530
|
||||
collection_name = "create_collection_hello"
|
||||
dim = 128
|
||||
nb = 50000
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
# m.clean_db()
|
||||
m.create_collection(dim, data_type=DataType.FLOAT_VECTOR, auto_id=True, other_fields=None)
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
entities = m.generate_entities(vectors)
|
||||
|
||||
|
||||
class FlushTask(User):
|
||||
wait_time = between(0.001, 0.002)
|
||||
if connection_type == "single":
|
||||
client = MilvusTask(m=m)
|
||||
else:
|
||||
client = MilvusTask(host=host, port=port, collection_name=collection_name)
|
||||
|
||||
@task(1)
|
||||
def insert(self):
|
||||
self.client.insert(entities)
|
||||
# @task(1)
|
||||
# def create_partition(self):
|
||||
# tag = 'tag_'.join(random.choice(string.ascii_letters) for _ in range(8))
|
||||
# self.client.create_partition(tag, collection_name)
|
|
@ -1,46 +0,0 @@
|
|||
import random
|
||||
from client import MilvusClient
|
||||
from locust_task import MilvusTask
|
||||
from locust import User, task, between
|
||||
|
||||
connection_type = "single"
|
||||
host = "172.16.50.9"
|
||||
port = 19530
|
||||
collection_name = "sift_1m_2000000_128_l2_2"
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
dim = 128
|
||||
top_k = 5
|
||||
nq = 1
|
||||
X = [[random.random() for i in range(dim)] for i in range(nq)]
|
||||
search_params = {"nprobe": 16}
|
||||
vector_query = {"vector": {'float_vector': {
|
||||
"topk": top_k,
|
||||
"query": X,
|
||||
"params": search_params,
|
||||
'metric_type': 'L2'}}}
|
||||
# m.clean_db()
|
||||
|
||||
|
||||
class QueryTask(User):
|
||||
wait_time = between(0.001, 0.002)
|
||||
|
||||
def preload(self):
|
||||
self.client.preload_collection()
|
||||
|
||||
@task(10)
|
||||
def query(self):
|
||||
if connection_type == "single":
|
||||
client = MilvusTask(m=m, connection_type=connection_type)
|
||||
elif connection_type == "multi":
|
||||
client = MilvusTask(host, port, collection_name, connection_type=connection_type)
|
||||
top_k = 10
|
||||
search_param = {"nprobe": 16}
|
||||
X = [[random.random() for i in range(dim)]]
|
||||
vector_query = {"vector": {"float_vector": {
|
||||
"topk": top_k,
|
||||
"query": X,
|
||||
"metric_type": "L2",
|
||||
"params": search_param}
|
||||
}}
|
||||
filter_query = None
|
||||
client.query(vector_query, filter_query=filter_query, collection_name=collection_name)
|
|
@ -1,45 +0,0 @@
|
|||
import random
|
||||
import time
|
||||
import logging
|
||||
from locust import TaskSet, task
|
||||
|
||||
dim = 128
|
||||
X = [[random.random() for _ in range(dim)] for _ in range(1)]
|
||||
|
||||
|
||||
class Tasks(TaskSet):
|
||||
|
||||
@task
|
||||
def query(self):
|
||||
top_k = 10
|
||||
search_param = {"nprobe": 16}
|
||||
X = [[random.random() for i in range(dim)]]
|
||||
vector_query = {"vector": {"float_vector": {
|
||||
"topk": top_k,
|
||||
"query": X,
|
||||
"metric_type": "L2",
|
||||
"params": search_param}
|
||||
}}
|
||||
filter_query = None
|
||||
self.client.query(vector_query, filter_query=filter_query, log=False)
|
||||
|
||||
@task
|
||||
def flush(self):
|
||||
self.client.flush(log=False)
|
||||
|
||||
@task
|
||||
def get(self):
|
||||
self.client.get()
|
||||
|
||||
@task
|
||||
def delete(self):
|
||||
self.client.delete([random.randint(1, 1000000)], log=False)
|
||||
|
||||
def insert(self):
|
||||
ids = [random.randint(1, 10000000)]
|
||||
entities = self.client.generate_entities(X, ids)
|
||||
self.client.insert(entities, ids, log=False)
|
||||
|
||||
@task
|
||||
def insert_rand(self):
|
||||
self.client.insert_rand(log=False)
|
|
@ -1,18 +0,0 @@
|
|||
from locust_user import locust_executor
|
||||
from client import MilvusClient
|
||||
from milvus import DataType
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
connection_type = "single"
|
||||
host = "192.168.1.239"
|
||||
# host = "172.16.50.15"
|
||||
port = 19530
|
||||
collection_name = "sift_1m_2000000_128_l2_2"
|
||||
run_params = {"tasks": {"insert_rand": 5, "query": 10, "flush": 2}, "clients_num": 10, "spawn_rate": 2, "during_time": 3600}
|
||||
dim = 128
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
m.clean_db()
|
||||
m.create_collection(dim, data_type=DataType.FLOAT_VECTOR, auto_id=False, other_fields=None)
|
||||
|
||||
locust_executor(host, port, collection_name, run_params=run_params)
|
|
@ -1,199 +0,0 @@
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
import pdb
|
||||
import argparse
|
||||
import logging
|
||||
import traceback
|
||||
from multiprocessing import Process
|
||||
from queue import Queue
|
||||
from logging import handlers
|
||||
from yaml import full_load, dump
|
||||
from local_runner import LocalRunner
|
||||
from docker_runner import DockerRunner
|
||||
import parser
|
||||
|
||||
DEFAULT_IMAGE = "milvusdb/milvus:latest"
|
||||
LOG_FOLDER = "logs"
|
||||
NAMESPACE = "milvus"
|
||||
LOG_PATH = "/test/milvus/benchmark/logs/"
|
||||
BRANCH = "0.11.1"
|
||||
|
||||
logger = logging.getLogger('milvus_benchmark')
|
||||
logger.setLevel(logging.INFO)
|
||||
# create file handler which logs even debug messages
|
||||
fh = logging.FileHandler(LOG_PATH+'benchmark-{}-{:%Y-%m-%d}.log'.format(BRANCH, datetime.now()))
|
||||
fh.setLevel(logging.DEBUG)
|
||||
# create console handler with a higher log level
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
# create formatter and add it to the handlers
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
fh.setFormatter(formatter)
|
||||
ch.setFormatter(formatter)
|
||||
# add the handlers to the logger
|
||||
logger.addHandler(fh)
|
||||
logger.addHandler(ch)
|
||||
|
||||
def positive_int(s):
|
||||
i = None
|
||||
try:
|
||||
i = int(s)
|
||||
except ValueError:
|
||||
pass
|
||||
if not i or i < 1:
|
||||
raise argparse.ArgumentTypeError("%r is not a positive integer" % s)
|
||||
return i
|
||||
|
||||
|
||||
def get_image_tag(image_version, image_type):
|
||||
return "%s-%s-centos7-release" % (image_version, image_type)
|
||||
# return "%s-%s-centos7-release" % ("0.7.1", image_type)
|
||||
# return "%s-%s-centos7-release" % ("PR-2780", image_type)
|
||||
|
||||
|
||||
def queue_worker(queue):
|
||||
from k8s_runner import K8sRunner
|
||||
while not queue.empty():
|
||||
q = queue.get()
|
||||
suite = q["suite"]
|
||||
server_host = q["server_host"]
|
||||
deploy_mode = q["deploy_mode"]
|
||||
image_type = q["image_type"]
|
||||
image_tag = q["image_tag"]
|
||||
|
||||
with open(suite) as f:
|
||||
suite_dict = full_load(f)
|
||||
f.close()
|
||||
logger.debug(suite_dict)
|
||||
|
||||
run_type, run_params = parser.operations_parser(suite_dict)
|
||||
collections = run_params["collections"]
|
||||
for collection in collections:
|
||||
# run tests
|
||||
milvus_config = collection["milvus"] if "milvus" in collection else None
|
||||
server_config = collection["server"] if "server" in collection else None
|
||||
logger.debug(milvus_config)
|
||||
logger.debug(server_config)
|
||||
runner = K8sRunner()
|
||||
if runner.init_env(milvus_config, server_config, server_host, deploy_mode, image_type, image_tag):
|
||||
logger.debug("Start run tests")
|
||||
try:
|
||||
runner.run(run_type, collection)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
logger.error(traceback.format_exc())
|
||||
finally:
|
||||
time.sleep(60)
|
||||
runner.clean_up()
|
||||
else:
|
||||
logger.error("Runner init failed")
|
||||
if server_host:
|
||||
logger.debug("All task finished in queue: %s" % server_host)
|
||||
|
||||
|
||||
def main():
|
||||
arg_parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
# helm mode with scheduler
|
||||
arg_parser.add_argument(
|
||||
"--image-version",
|
||||
default="",
|
||||
help="image version")
|
||||
arg_parser.add_argument(
|
||||
"--schedule-conf",
|
||||
metavar='FILE',
|
||||
default='',
|
||||
help="load test schedule from FILE")
|
||||
arg_parser.add_argument(
|
||||
"--deploy-mode",
|
||||
default='',
|
||||
help="single node or multi nodes")
|
||||
|
||||
# local mode
|
||||
arg_parser.add_argument(
|
||||
'--local',
|
||||
action='store_true',
|
||||
help='use local milvus server')
|
||||
arg_parser.add_argument(
|
||||
'--host',
|
||||
help='server host ip param for local mode',
|
||||
default='127.0.0.1')
|
||||
arg_parser.add_argument(
|
||||
'--port',
|
||||
help='server port param for local mode',
|
||||
default='19530')
|
||||
arg_parser.add_argument(
|
||||
'--suite',
|
||||
metavar='FILE',
|
||||
help='load test suite from FILE',
|
||||
default='')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
if args.schedule_conf:
|
||||
if args.local:
|
||||
raise Exception("Helm mode with scheduler and other mode are incompatible")
|
||||
if not args.image_version:
|
||||
raise Exception("Image version not given")
|
||||
image_version = args.image_version
|
||||
deploy_mode = args.deploy_mode
|
||||
with open(args.schedule_conf) as f:
|
||||
schedule_config = full_load(f)
|
||||
f.close()
|
||||
queues = []
|
||||
# server_names = set()
|
||||
server_names = []
|
||||
for item in schedule_config:
|
||||
server_host = item["server"] if "server" in item else ""
|
||||
suite_params = item["suite_params"]
|
||||
server_names.append(server_host)
|
||||
q = Queue()
|
||||
for suite_param in suite_params:
|
||||
suite = "suites/"+suite_param["suite"]
|
||||
image_type = suite_param["image_type"]
|
||||
image_tag = get_image_tag(image_version, image_type)
|
||||
q.put({
|
||||
"suite": suite,
|
||||
"server_host": server_host,
|
||||
"deploy_mode": deploy_mode,
|
||||
"image_tag": image_tag,
|
||||
"image_type": image_type
|
||||
})
|
||||
queues.append(q)
|
||||
logger.error(queues)
|
||||
thread_num = len(server_names)
|
||||
processes = []
|
||||
|
||||
for i in range(thread_num):
|
||||
x = Process(target=queue_worker, args=(queues[i], ))
|
||||
processes.append(x)
|
||||
x.start()
|
||||
time.sleep(10)
|
||||
for x in processes:
|
||||
x.join()
|
||||
|
||||
# queue_worker(queues[0])
|
||||
|
||||
elif args.local:
|
||||
# for local mode
|
||||
host = args.host
|
||||
port = args.port
|
||||
suite = args.suite
|
||||
with open(suite) as f:
|
||||
suite_dict = full_load(f)
|
||||
f.close()
|
||||
logger.debug(suite_dict)
|
||||
run_type, run_params = parser.operations_parser(suite_dict)
|
||||
collections = run_params["collections"]
|
||||
if len(collections) > 1:
|
||||
raise Exception("Multi collections not supported in Local Mode")
|
||||
collection = collections[0]
|
||||
runner = LocalRunner(host, port)
|
||||
logger.info("Start run local mode test, test type: %s" % run_type)
|
||||
runner.run(run_type, collection)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,42 +0,0 @@
|
|||
import random
|
||||
from locust import User, task, between
|
||||
from locust_task import MilvusTask
|
||||
from client import MilvusClient
|
||||
|
||||
connection_type = "single"
|
||||
host = "192.168.1.29"
|
||||
port = 19530
|
||||
collection_name = "sift_128_euclidean"
|
||||
dim = 128
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
|
||||
|
||||
class MixTask(User):
|
||||
wait_time = between(0.001, 0.002)
|
||||
print("in query task")
|
||||
if connection_type == "single":
|
||||
client = MilvusTask(m=m)
|
||||
else:
|
||||
client = MilvusTask(host=host, port=port, collection_name=collection_name)
|
||||
|
||||
@task(30)
|
||||
def query(self):
|
||||
top_k = 10
|
||||
X = [[random.random() for i in range(dim)] for i in range(1)]
|
||||
search_param = {"nprobe": 16}
|
||||
self.client.query(X, top_k, search_param)
|
||||
|
||||
@task(10)
|
||||
def insert(self):
|
||||
id = random.randint(10000000, 10000000000)
|
||||
X = [[random.random() for i in range(dim)] for i in range(1)]
|
||||
self.client.insert(X, ids=[id])
|
||||
|
||||
@task(1)
|
||||
def flush(self):
|
||||
self.client.flush()
|
||||
|
||||
# @task(5)
|
||||
# def delete(self):
|
||||
# self.client.delete([random.randint(1, 1000000)])
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
import pdb
|
||||
import time
|
||||
|
||||
class Base(object):
|
||||
pass
|
||||
|
||||
|
||||
class Insert(Base):
|
||||
pass
|
|
@ -1,12 +0,0 @@
|
|||
pymilvus-test>=0.5.0,<0.6.0
|
||||
scipy>=1.3.1
|
||||
scikit-learn>=0.19.1
|
||||
h5py>=2.7.1
|
||||
# influxdb==5.2.2
|
||||
pyyaml>=5.1
|
||||
tableprint==0.8.0
|
||||
ansicolors==1.1.8
|
||||
kubernetes==10.0.1
|
||||
# rq==1.2.0
|
||||
locust>=1.3.2
|
||||
pymongo==3.10.0
|
|
@ -1,11 +0,0 @@
|
|||
|
||||
class Reporter(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def report(self, result):
|
||||
pass
|
||||
|
||||
|
||||
class BaseResult(object):
|
||||
pass
|
|
@ -1,369 +0,0 @@
|
|||
import os
|
||||
import threading
|
||||
import logging
|
||||
import pdb
|
||||
import time
|
||||
import random
|
||||
import grpc
|
||||
from multiprocessing import Process
|
||||
from itertools import product
|
||||
import numpy as np
|
||||
import sklearn.preprocessing
|
||||
from milvus import DataType
|
||||
from client import MilvusClient
|
||||
import utils
|
||||
import parser
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runner")
|
||||
|
||||
VECTORS_PER_FILE = 1000000
|
||||
SIFT_VECTORS_PER_FILE = 100000
|
||||
BINARY_VECTORS_PER_FILE = 2000000
|
||||
|
||||
MAX_NQ = 10001
|
||||
FILE_PREFIX = "binary_"
|
||||
|
||||
# FOLDER_NAME = 'ann_1000m/source_data'
|
||||
SRC_BINARY_DATA_DIR = '/test/milvus/raw_data/random/'
|
||||
SIFT_SRC_DATA_DIR = '/test/milvus/raw_data/sift1b/'
|
||||
DEEP_SRC_DATA_DIR = '/test/milvus/raw_data/deep1b/'
|
||||
BINARY_SRC_DATA_DIR = '/test/milvus/raw_data/binary/'
|
||||
SIFT_SRC_GROUNDTRUTH_DATA_DIR = SIFT_SRC_DATA_DIR + 'gnd'
|
||||
|
||||
WARM_TOP_K = 1
|
||||
WARM_NQ = 1
|
||||
DEFAULT_DIM = 512
|
||||
|
||||
|
||||
GROUNDTRUTH_MAP = {
|
||||
"1000000": "idx_1M.ivecs",
|
||||
"2000000": "idx_2M.ivecs",
|
||||
"5000000": "idx_5M.ivecs",
|
||||
"10000000": "idx_10M.ivecs",
|
||||
"20000000": "idx_20M.ivecs",
|
||||
"50000000": "idx_50M.ivecs",
|
||||
"100000000": "idx_100M.ivecs",
|
||||
"200000000": "idx_200M.ivecs",
|
||||
"500000000": "idx_500M.ivecs",
|
||||
"1000000000": "idx_1000M.ivecs",
|
||||
}
|
||||
|
||||
|
||||
def gen_file_name(idx, dimension, data_type):
|
||||
s = "%05d" % idx
|
||||
fname = FILE_PREFIX + str(dimension) + "d_" + s + ".npy"
|
||||
if data_type == "random":
|
||||
fname = SRC_BINARY_DATA_DIR+fname
|
||||
elif data_type == "sift":
|
||||
fname = SIFT_SRC_DATA_DIR+fname
|
||||
elif data_type == "deep":
|
||||
fname = DEEP_SRC_DATA_DIR+fname
|
||||
elif data_type == "binary":
|
||||
fname = BINARY_SRC_DATA_DIR+fname
|
||||
return fname
|
||||
|
||||
|
||||
def get_vectors_from_binary(nq, dimension, data_type):
|
||||
# use the first file, nq should be less than VECTORS_PER_FILE
|
||||
if nq > MAX_NQ:
|
||||
raise Exception("Over size nq")
|
||||
if data_type == "random":
|
||||
file_name = SRC_BINARY_DATA_DIR+'query_%d.npy' % dimension
|
||||
elif data_type == "sift":
|
||||
file_name = SIFT_SRC_DATA_DIR+'query.npy'
|
||||
elif data_type == "deep":
|
||||
file_name = DEEP_SRC_DATA_DIR+'query.npy'
|
||||
elif data_type == "binary":
|
||||
file_name = BINARY_SRC_DATA_DIR+'query.npy'
|
||||
data = np.load(file_name)
|
||||
vectors = data[0:nq].tolist()
|
||||
return vectors
|
||||
|
||||
|
||||
class Runner(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def gen_executors(self, operations):
|
||||
l = []
|
||||
for name, operation in operations.items():
|
||||
weight = operation["weight"] if "weight" in operation else 1
|
||||
l.extend([name] * weight)
|
||||
random.shuffle(l)
|
||||
return l
|
||||
|
||||
def get_vector_type(self, data_type):
|
||||
vector_type = ''
|
||||
if data_type in ["random", "sift", "deep", "glove"]:
|
||||
vector_type = DataType.FLOAT_VECTOR
|
||||
elif data_type in ["binary"]:
|
||||
vector_type = DataType.BINARY_VECTOR
|
||||
else:
|
||||
raise Exception("Data type: %s not defined" % data_type)
|
||||
return vector_type
|
||||
|
||||
def get_vector_type_from_metric(self, metric_type):
|
||||
vector_type = ''
|
||||
if metric_type in ["hamming", "jaccard"]:
|
||||
vector_type = DataType.BINARY_VECTOR
|
||||
else:
|
||||
vector_type = DataType.FLOAT_VECTOR
|
||||
return vector_type
|
||||
|
||||
def normalize(self, metric_type, X):
|
||||
if metric_type == "ip":
|
||||
logger.info("Set normalize for metric_type: %s" % metric_type)
|
||||
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
||||
X = X.astype(np.float32)
|
||||
elif metric_type == "l2":
|
||||
X = X.astype(np.float32)
|
||||
elif metric_type in ["jaccard", "hamming", "sub", "super"]:
|
||||
tmp = []
|
||||
for item in X:
|
||||
new_vector = bytes(np.packbits(item, axis=-1).tolist())
|
||||
tmp.append(new_vector)
|
||||
X = tmp
|
||||
return X
|
||||
|
||||
def generate_combinations(self, args):
|
||||
if isinstance(args, list):
|
||||
args = [el if isinstance(el, list) else [el] for el in args]
|
||||
return [list(x) for x in product(*args)]
|
||||
elif isinstance(args, dict):
|
||||
flat = []
|
||||
for k, v in args.items():
|
||||
if isinstance(v, list):
|
||||
flat.append([(k, el) for el in v])
|
||||
else:
|
||||
flat.append([(k, v)])
|
||||
return [dict(x) for x in product(*flat)]
|
||||
else:
|
||||
raise TypeError("No args handling exists for %s" % type(args).__name__)
|
||||
|
||||
def do_insert(self, milvus, collection_name, data_type, dimension, size, ni):
|
||||
'''
|
||||
@params:
|
||||
mivlus: server connect instance
|
||||
dimension: collection dimensionn
|
||||
# index_file_size: size trigger file merge
|
||||
size: row count of vectors to be insert
|
||||
ni: row count of vectors to be insert each time
|
||||
# store_id: if store the ids returned by call add_vectors or not
|
||||
@return:
|
||||
total_time: total time for all insert operation
|
||||
qps: vectors added per second
|
||||
ni_time: avarage insert operation time
|
||||
'''
|
||||
bi_res = {}
|
||||
total_time = 0.0
|
||||
qps = 0.0
|
||||
ni_time = 0.0
|
||||
if data_type == "random":
|
||||
if dimension == 512:
|
||||
vectors_per_file = VECTORS_PER_FILE
|
||||
elif dimension == 4096:
|
||||
vectors_per_file = 100000
|
||||
elif dimension == 16384:
|
||||
vectors_per_file = 10000
|
||||
elif data_type == "sift":
|
||||
vectors_per_file = SIFT_VECTORS_PER_FILE
|
||||
elif data_type in ["binary"]:
|
||||
vectors_per_file = BINARY_VECTORS_PER_FILE
|
||||
else:
|
||||
raise Exception("data_type: %s not supported" % data_type)
|
||||
if size % vectors_per_file or size % ni:
|
||||
raise Exception("Not invalid collection size or ni")
|
||||
i = 0
|
||||
while i < (size // vectors_per_file):
|
||||
vectors = []
|
||||
if vectors_per_file >= ni:
|
||||
file_name = gen_file_name(i, dimension, data_type)
|
||||
# logger.info("Load npy file: %s start" % file_name)
|
||||
data = np.load(file_name)
|
||||
# logger.info("Load npy file: %s end" % file_name)
|
||||
for j in range(vectors_per_file // ni):
|
||||
vectors = data[j*ni:(j+1)*ni].tolist()
|
||||
if vectors:
|
||||
# start insert vectors
|
||||
start_id = i * vectors_per_file + j * ni
|
||||
end_id = start_id + len(vectors)
|
||||
logger.debug("Start id: %s, end id: %s" % (start_id, end_id))
|
||||
ids = [k for k in range(start_id, end_id)]
|
||||
entities = milvus.generate_entities(vectors, ids)
|
||||
ni_start_time = time.time()
|
||||
try:
|
||||
res_ids = milvus.insert(entities, ids=ids)
|
||||
except grpc.RpcError as e:
|
||||
if e.code() == grpc.StatusCode.UNAVAILABLE:
|
||||
logger.debug("Retry insert")
|
||||
def retry():
|
||||
res_ids = milvus.insert(entities, ids=ids)
|
||||
|
||||
t0 = threading.Thread(target=retry)
|
||||
t0.start()
|
||||
t0.join()
|
||||
logger.debug("Retry successfully")
|
||||
raise e
|
||||
assert ids == res_ids
|
||||
# milvus.flush()
|
||||
logger.debug(milvus.count())
|
||||
ni_end_time = time.time()
|
||||
total_time = total_time + ni_end_time - ni_start_time
|
||||
i += 1
|
||||
else:
|
||||
vectors.clear()
|
||||
loops = ni // vectors_per_file
|
||||
for j in range(loops):
|
||||
file_name = gen_file_name(loops*i+j, dimension, data_type)
|
||||
data = np.load(file_name)
|
||||
vectors.extend(data.tolist())
|
||||
if vectors:
|
||||
start_id = i * vectors_per_file
|
||||
end_id = start_id + len(vectors)
|
||||
logger.info("Start id: %s, end id: %s" % (start_id, end_id))
|
||||
ids = [k for k in range(start_id, end_id)]
|
||||
entities = milvus.generate_entities(vectors, ids)
|
||||
ni_start_time = time.time()
|
||||
try:
|
||||
res_ids = milvus.insert(entities, ids=ids)
|
||||
except grpc.RpcError as e:
|
||||
if e.code() == grpc.StatusCode.UNAVAILABLE:
|
||||
logger.debug("Retry insert")
|
||||
def retry():
|
||||
res_ids = milvus.insert(entities, ids=ids)
|
||||
|
||||
t0 = threading.Thread(target=retry)
|
||||
t0.start()
|
||||
t0.join()
|
||||
logger.debug("Retry successfully")
|
||||
raise e
|
||||
|
||||
assert ids == res_ids
|
||||
# milvus.flush()
|
||||
logger.debug(milvus.count())
|
||||
ni_end_time = time.time()
|
||||
total_time = total_time + ni_end_time - ni_start_time
|
||||
i += loops
|
||||
qps = round(size / total_time, 2)
|
||||
ni_time = round(total_time / (size / ni), 2)
|
||||
bi_res["total_time"] = round(total_time, 2)
|
||||
bi_res["qps"] = qps
|
||||
bi_res["ni_time"] = ni_time
|
||||
return bi_res
|
||||
|
||||
def do_query(self, milvus, collection_name, vec_field_name, top_ks, nqs, run_count=1, search_param=None, filter_query=None):
|
||||
bi_res = []
|
||||
(data_type, collection_size, index_file_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
base_query_vectors = get_vectors_from_binary(MAX_NQ, dimension, data_type)
|
||||
for nq in nqs:
|
||||
tmp_res = []
|
||||
query_vectors = base_query_vectors[0:nq]
|
||||
for top_k in top_ks:
|
||||
avg_query_time = 0.0
|
||||
min_query_time = 0.0
|
||||
logger.info("Start query, query params: top-k: {}, nq: {}, actually length of vectors: {}".format(top_k, nq, len(query_vectors)))
|
||||
for i in range(run_count):
|
||||
logger.debug("Start run query, run %d of %s" % (i+1, run_count))
|
||||
start_time = time.time()
|
||||
vector_query = {"vector": {vec_field_name: {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": utils.metric_type_trans(metric_type),
|
||||
"params": search_param}
|
||||
}}
|
||||
query_res = milvus.query(vector_query, filter_query=filter_query)
|
||||
interval_time = time.time() - start_time
|
||||
if (i == 0) or (min_query_time > interval_time):
|
||||
min_query_time = interval_time
|
||||
logger.info("Min query time: %.2f" % min_query_time)
|
||||
tmp_res.append(round(min_query_time, 2))
|
||||
bi_res.append(tmp_res)
|
||||
return bi_res
|
||||
|
||||
def do_query_qps(self, milvus, query_vectors, top_k, search_param):
|
||||
start_time = time.time()
|
||||
result = milvus.query(query_vectors, top_k, search_param)
|
||||
end_time = time.time()
|
||||
return end_time - start_time
|
||||
|
||||
def do_query_ids(self, milvus, collection_name, vec_field_name, top_k, nq, search_param=None, filter_query=None):
|
||||
(data_type, collection_size, index_file_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
base_query_vectors = get_vectors_from_binary(MAX_NQ, dimension, data_type)
|
||||
query_vectors = base_query_vectors[0:nq]
|
||||
logger.info("Start query, query params: top-k: {}, nq: {}, actually length of vectors: {}".format(top_k, nq, len(query_vectors)))
|
||||
vector_query = {"vector": {vec_field_name: {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": utils.metric_type_trans(metric_type),
|
||||
"params": search_param}
|
||||
}}
|
||||
query_res = milvus.query(vector_query, filter_query=filter_query)
|
||||
result_ids = milvus.get_ids(query_res)
|
||||
return result_ids
|
||||
|
||||
def do_query_acc(self, milvus, collection_name, top_k, nq, id_store_name, search_param=None):
|
||||
(data_type, collection_size, index_file_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
base_query_vectors = get_vectors_from_binary(MAX_NQ, dimension, data_type)
|
||||
vectors = base_query_vectors[0:nq]
|
||||
logger.info("Start query, query params: top-k: {}, nq: {}, actually length of vectors: {}".format(top_k, nq, len(vectors)))
|
||||
query_res = milvus.query(vectors, top_k, search_param=None)
|
||||
# if file existed, cover it
|
||||
if os.path.isfile(id_store_name):
|
||||
os.remove(id_store_name)
|
||||
with open(id_store_name, 'a+') as fd:
|
||||
for nq_item in query_res:
|
||||
for item in nq_item:
|
||||
fd.write(str(item.id)+'\t')
|
||||
fd.write('\n')
|
||||
|
||||
# compute and print accuracy
|
||||
def compute_accuracy(self, flat_file_name, index_file_name):
|
||||
flat_id_list = []; index_id_list = []
|
||||
logger.info("Loading flat id file: %s" % flat_file_name)
|
||||
with open(flat_file_name, 'r') as flat_id_fd:
|
||||
for line in flat_id_fd:
|
||||
tmp_list = line.strip("\n").strip().split("\t")
|
||||
flat_id_list.append(tmp_list)
|
||||
logger.info("Loading index id file: %s" % index_file_name)
|
||||
with open(index_file_name) as index_id_fd:
|
||||
for line in index_id_fd:
|
||||
tmp_list = line.strip("\n").strip().split("\t")
|
||||
index_id_list.append(tmp_list)
|
||||
if len(flat_id_list) != len(index_id_list):
|
||||
raise Exception("Flat index result length: <flat: %s, index: %s> not match, Acc compute exiting ..." % (len(flat_id_list), len(index_id_list)))
|
||||
# get the accuracy
|
||||
return self.get_recall_value(flat_id_list, index_id_list)
|
||||
|
||||
def get_recall_value(self, true_ids, result_ids):
|
||||
"""
|
||||
Use the intersection length
|
||||
"""
|
||||
sum_radio = 0.0
|
||||
for index, item in enumerate(result_ids):
|
||||
# tmp = set(item).intersection(set(flat_id_list[index]))
|
||||
tmp = set(true_ids[index]).intersection(set(item))
|
||||
sum_radio = sum_radio + len(tmp) / len(item)
|
||||
# logger.debug(sum_radio)
|
||||
return round(sum_radio / len(result_ids), 3)
|
||||
|
||||
"""
|
||||
Implementation based on:
|
||||
https://github.com/facebookresearch/faiss/blob/master/benchs/datasets.py
|
||||
"""
|
||||
def get_groundtruth_ids(self, collection_size):
|
||||
fname = GROUNDTRUTH_MAP[str(collection_size)]
|
||||
fname = SIFT_SRC_GROUNDTRUTH_DATA_DIR + "/" + fname
|
||||
a = np.fromfile(fname, dtype='int32')
|
||||
d = a[0]
|
||||
true_ids = a.reshape(-1, d + 1)[:, 1:].copy()
|
||||
return true_ids
|
||||
|
||||
def get_fields(self, milvus, collection_name):
|
||||
fields = []
|
||||
info = milvus.get_info(collection_name)
|
||||
for item in info["fields"]:
|
||||
fields.append(item["name"])
|
||||
return fields
|
||||
|
||||
# def get_filter_query(self, filter_query):
|
||||
# for filter in filter_query:
|
|
@ -1,11 +0,0 @@
|
|||
|
||||
|
||||
class BaseRunner(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def set_up(self):
|
||||
pass
|
||||
|
||||
def tear_down(self):
|
||||
pass
|
|
@ -1,75 +0,0 @@
|
|||
import time
|
||||
import random
|
||||
from locust import Locust, TaskSet, events, task, between
|
||||
from client import MilvusClient
|
||||
from . import BasicRunner
|
||||
|
||||
|
||||
dim = 128
|
||||
top_k = 10
|
||||
X = [[random.random() for i in range(dim)] for i in range(1)]
|
||||
search_param = {"nprobe": 16}
|
||||
|
||||
|
||||
class MilvusTask(object):
|
||||
def __init__(self, type="single", args):
|
||||
self.type = type
|
||||
self.m = None
|
||||
if type == "single":
|
||||
self.m = MilvusClient(host=args["host"], port=args["port"], collection_name=args["collection_name"])
|
||||
elif type == "multi":
|
||||
self.m = MilvusClient(host=args["m"])
|
||||
|
||||
def query(self, *args, **kwargs):
|
||||
name = "milvus_search"
|
||||
request_type = "grpc"
|
||||
start_time = time.time()
|
||||
try:
|
||||
# result = self.m.getattr(*args, **kwargs)
|
||||
status, result = self.m.query(*args, **kwargs)
|
||||
except Exception as e:
|
||||
total_time = int((time.time() - start_time) * 1000)
|
||||
events.request_failure.fire(request_type=request_type, name=name, response_time=total_time, exception=e, response_length=0)
|
||||
else:
|
||||
if not status.OK:
|
||||
total_time = int((time.time() - start_time) * 1000)
|
||||
events.request_failure.fire(request_type=request_type, name=name, response_time=total_time, exception=e, response_length=0)
|
||||
else:
|
||||
total_time = int((time.time() - start_time) * 1000)
|
||||
events.request_success.fire(request_type=request_type, name=name, response_time=total_time, response_length=0)
|
||||
# In this example, I've hardcoded response_length=0. If we would want the response length to be
|
||||
# reported correctly in the statistics, we would probably need to hook in at a lower level
|
||||
|
||||
|
||||
class MilvusLocust(Locust):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(MilvusLocust, self).__init__(*args, **kwargs)
|
||||
self.client = MilvusTask(self.host, self.port, self.collection_name)
|
||||
|
||||
|
||||
class Query(MilvusLocust):
|
||||
host = "192.168.1.183"
|
||||
port = 19530
|
||||
collection_name = "sift_128_euclidean"
|
||||
# m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
wait_time = between(0.001, 0.002)
|
||||
|
||||
class task_set(TaskSet):
|
||||
@task
|
||||
def query(self):
|
||||
self.client.query(X, top_k, search_param)
|
||||
|
||||
|
||||
class LocustRunner(BasicRunner):
|
||||
"""Only one client, not support M/S mode"""
|
||||
def __init__(self, args):
|
||||
# Start client with params including client number && last time && hatch rate ...
|
||||
pass
|
||||
|
||||
def set_up(self):
|
||||
# helm install locust client
|
||||
pass
|
||||
|
||||
def tear_down(self):
|
||||
# helm uninstall
|
||||
pass
|
|
@ -1,11 +0,0 @@
|
|||
[
|
||||
{
|
||||
"server": "athena",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "011_gpu_search_debug.yaml",
|
||||
"image_type": "gpu"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,65 +0,0 @@
|
|||
[
|
||||
{
|
||||
"server": "athena",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "080_gpu_accuracy.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "080_search_stability.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "gpu_accuracy_ann.yaml",
|
||||
"image_type": "gpu"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"server": "poseidon",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "080_gpu_search.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "080_cpu_search.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "080_gpu_build.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "080_cpu_accuracy.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "locust_search.yaml",
|
||||
"image_type": "cpu"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"server": "apollo",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "cpu_accuracy_ann.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "080_cpu_build.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "080_insert_performance.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "add_flush_performance.yaml",
|
||||
"image_type": "cpu"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,15 +0,0 @@
|
|||
[
|
||||
{
|
||||
"server": "poseidon",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "crud_add.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "gpu_accuracy_sift1m.yaml",
|
||||
"image_type": "gpu"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,11 +0,0 @@
|
|||
[
|
||||
{
|
||||
"server": "eros",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "011_gpu_build_sift1b.yaml",
|
||||
"image_type": "gpu"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,53 +0,0 @@
|
|||
[
|
||||
{
|
||||
"server": "apollo",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "cpu_accuracy_ann.yaml",
|
||||
"image_type": "cpu"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"server": "poseidon",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "gpu_search_performance.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "cpu_search_performance.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "insert_performance.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "gpu_accuracy.yaml",
|
||||
"image_type": "gpu"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"server": "eros",
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "gpu_accuracy_ann.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "gpu_search_stability.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "gpu_build_performance.yaml",
|
||||
"image_type": "gpu"
|
||||
},
|
||||
{
|
||||
"suite": "cpu_build_performance.yaml",
|
||||
"image_type": "cpu"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,18 +0,0 @@
|
|||
[
|
||||
{
|
||||
"suite_params": [
|
||||
{
|
||||
"suite": "shards_insert_performance.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "shards_ann_debug.yaml",
|
||||
"image_type": "cpu"
|
||||
},
|
||||
{
|
||||
"suite": "shards_loop_stability.yaml",
|
||||
"image_type": "cpu"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,50 +0,0 @@
|
|||
import random
|
||||
import logging
|
||||
from locust import User, task, between
|
||||
from locust_task import MilvusTask
|
||||
from client import MilvusClient
|
||||
from milvus import DataType
|
||||
import utils
|
||||
|
||||
connection_type = "single"
|
||||
host = "172.16.50.9"
|
||||
port = 19530
|
||||
collection_name = "sift_5m_2000000_128_l2_2"
|
||||
dim = 128
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
# m.clean_db()
|
||||
# m.create_collection(dim, data_type=DataType.FLOAT_VECTOR, auto_id=True, other_fields=None)
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(1000)]
|
||||
entities = m.generate_entities(vectors)
|
||||
ids = [i for i in range(10000000)]
|
||||
|
||||
|
||||
class QueryTask(User):
|
||||
wait_time = between(0.001, 0.002)
|
||||
# if connection_type == "single":
|
||||
# client = MilvusTask(m=m)
|
||||
# else:
|
||||
# client = MilvusTask(host=host, port=port, collection_name=collection_name)
|
||||
client = MilvusTask(host, port, collection_name, connection_type=connection_type)
|
||||
|
||||
# @task
|
||||
# def query(self):
|
||||
# top_k = 5
|
||||
# X = [[random.random() for i in range(dim)] for i in range(1)]
|
||||
# search_param = {"nprobe": 16}
|
||||
# self.client.query(X, top_k, search_param)
|
||||
|
||||
@task(1)
|
||||
def insert(self):
|
||||
self.client.insert(entities)
|
||||
|
||||
# @task(1)
|
||||
# def create(self):
|
||||
# collection_name = utils.get_unique_name(prefix="locust")
|
||||
# self.client.create_collection(dim, data_type=DataType.FLOAT_VECTOR, auto_id=True, collection_name=collection_name, other_fields=None)
|
||||
|
||||
# @task(1)
|
||||
# def delete(self):
|
||||
# delete_ids = random.sample(ids, 100)
|
||||
# logging.error(delete_ids)
|
||||
# self.client.delete(delete_ids)
|
|
@ -1,40 +0,0 @@
|
|||
build_performance:
|
||||
collections:
|
||||
-
|
||||
server:
|
||||
db_config.primary_path: /test/milvus/db_data_011/random_1m_1024_512_l2_ivf
|
||||
cache_config.cpu_cache_capacity: 32
|
||||
engine_config.use_blas_threshold: 1100
|
||||
engine_config.gpu_search_threshold: 1
|
||||
gpu_resource_config.enable: true
|
||||
gpu_resource_config.cache_capacity: 6
|
||||
gpu_resource_config.search_resources:
|
||||
- gpu0
|
||||
- gpu1
|
||||
gpu_resource_config.build_index_resources:
|
||||
- gpu0
|
||||
- gpu1
|
||||
collection_name: random_1m_1024_512_l2
|
||||
index_type: ivf_flat
|
||||
index_param:
|
||||
nlist: 16384
|
||||
|
||||
-
|
||||
server:
|
||||
db_config.primary_path: /test/milvus/db_data_011/sift_1m_128_128_l2_pq
|
||||
cache_config.cpu_cache_capacity: 32
|
||||
engine_config.use_blas_threshold: 1100
|
||||
engine_config.gpu_search_threshold: 1
|
||||
gpu_resource_config.enable: true
|
||||
gpu_resource_config.cache_capacity: 6
|
||||
gpu_resource_config.search_resources:
|
||||
- gpu0
|
||||
- gpu1
|
||||
gpu_resource_config.build_index_resources:
|
||||
- gpu0
|
||||
- gpu1
|
||||
collection_name: sift_1m_128_128_l2
|
||||
index_type: ivf_pq
|
||||
index_param:
|
||||
nlist: 8092
|
||||
m: 32
|
|
@ -1,57 +0,0 @@
|
|||
insert_performance:
|
||||
collections:
|
||||
-
|
||||
milvus:
|
||||
db_config.primary_path: /test/milvus/db_data_011/cluster/sift_10m_128_l2
|
||||
cache_config.cpu_cache_capacity: 4GB
|
||||
engine_config.use_blas_threshold: 1100
|
||||
engine_config.gpu_search_threshold: 1
|
||||
gpu_resource_config.enable: true
|
||||
gpu_resource_config.cache_capacity: 4GB
|
||||
gpu_resource_config.search_resources:
|
||||
- gpu0
|
||||
- gpu1
|
||||
gpu_resource_config.build_index_resources:
|
||||
- gpu0
|
||||
- gpu1
|
||||
wal_enable: true
|
||||
# cluster: true
|
||||
# external_mysql: true
|
||||
collection_name: sift_10m_128_l2_011
|
||||
# other_fields: int,float
|
||||
ni_per: 50000
|
||||
build_index: false
|
||||
index_type: ivf_sq8
|
||||
index_param:
|
||||
nlist: 1024
|
||||
# -
|
||||
# server:
|
||||
# db_config.primary_path: /test/milvus/db_data_011/sift_50m_100000_128_l2
|
||||
# cache_config.cpu_cache_capacity: 4GB
|
||||
# engine_config.use_blas_threshold: 1100
|
||||
# engine_config.gpu_search_threshold: 1
|
||||
# gpu_resource_config.enable: true
|
||||
# gpu_resource_config.cache_capacity: 4GB
|
||||
# gpu_resource_config.search_resources:
|
||||
# - gpu0
|
||||
# - gpu1
|
||||
# gpu_resource_config.build_index_resources:
|
||||
# - gpu0
|
||||
# - gpu1
|
||||
# wal_enable: true
|
||||
# collection_name: sift_50m_100000_128_l2
|
||||
# ni_per: 50000
|
||||
# build_index: false
|
||||
# index_type: ivf_sq8
|
||||
# index_param:
|
||||
# nlist: 1024
|
||||
# -
|
||||
# server:
|
||||
# db_config.primary_path: /test/milvus/db_data_011/sift_1b_524288_128_l2_debug
|
||||
# collection_name: sift_1b_524288_128_l2
|
||||
# ni_per: 100000
|
||||
# # flush: no
|
||||
# build_index: false
|
||||
# index_type: ivf_sq8
|
||||
# index_param:
|
||||
# nlist: 4096
|
|
@ -1,52 +0,0 @@
|
|||
import time
|
||||
import random
|
||||
import logging
|
||||
from client import MilvusClient
|
||||
import utils
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
milvus_instance = MilvusClient()
|
||||
milvus_instance.clean_db()
|
||||
p_num = 1
|
||||
dimension = 128
|
||||
insert_xb = 100000
|
||||
index_types = ['flat']
|
||||
index_param = {"nlist": 2048}
|
||||
collection_names = []
|
||||
milvus_instances_map = {}
|
||||
insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(insert_xb)]
|
||||
|
||||
for i in range(collection_num):
|
||||
name = utils.get_unique_name(prefix="collection_")
|
||||
print(name)
|
||||
collection_names.append(name)
|
||||
metric_type = "ip"
|
||||
# metric_type = random.choice(["l2", "ip"])
|
||||
index_file_size = random.randint(10, 100)
|
||||
milvus_instance.create_collection(name, dimension, index_file_size, metric_type)
|
||||
milvus_instance = MilvusClient(collection_name=name)
|
||||
index_type = random.choice(index_types)
|
||||
milvus_instance.create_index(index_type, index_param=index_param)
|
||||
insert_vectors = utils.normalize(metric_type, insert_vectors)
|
||||
milvus_instance.insert(insert_vectors)
|
||||
milvus_instance.flush()
|
||||
milvus_instances_map.update({name: milvus_instance})
|
||||
print(milvus_instance.describe_index(), milvus_instance.describe(), milvus_instance.count())
|
||||
|
||||
# tasks = ["insert_rand", "delete_rand", "query_rand", "flush", "compact"]
|
||||
tasks = ["insert_rand", "query_rand", "flush"]
|
||||
i = 1
|
||||
while True:
|
||||
print("Loop time: %d" % i)
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < pull_interval_seconds:
|
||||
# choose collection
|
||||
tmp_collection_name = random.choice(collection_names)
|
||||
# choose task from task
|
||||
task_name = random.choice(tasks)
|
||||
# print(tmp_collection_name, task_name)
|
||||
func = getattr(milvus_instances_map[tmp_collection_name], task_name)
|
||||
func()
|
||||
print("Restart")
|
||||
i = i + 1
|
|
@ -0,0 +1,13 @@
|
|||
random_data
|
||||
benchmark_logs/
|
||||
db/
|
||||
*idmap*.txt
|
||||
__pycache__/
|
||||
venv
|
||||
.idea
|
||||
nohup.out
|
||||
|
||||
*.swp
|
||||
*.swo
|
||||
.DS_Store
|
||||
.vscode
|
|
@ -0,0 +1,50 @@
|
|||
# Quick start
|
||||
|
||||
### Description:
|
||||
|
||||
- Test suites can be organized with `yaml `
|
||||
- Test can run with local mode or argo/jenkins mode, that manage the server env in argo/jenkins step or stages
|
||||
|
||||
### Demos:
|
||||
|
||||
1. Using argo pipeline:
|
||||
Run test suites(1.x, 2.x version) in argo workflows, innernal argo url: argo-test.zilliz.cc
|
||||
|
||||
2. Local test:
|
||||
Run test with the local server
|
||||
1. set python path
|
||||
|
||||
`export PYTHONPATH=/yourmilvusprojectpath/tests/milvus_benchmark`
|
||||
2. (optional, for `sift`/`glove` open dataset) mount NAS or update `*_DATA_DIR` in `runner.py`
|
||||
|
||||
`sudo mount -t cifs -o username=test,vers=1.0 //172.16.70.249/test /test`
|
||||
3. run test
|
||||
|
||||
`cd milvus-benchmark/`
|
||||
|
||||
`python3 main.py --local --host=*.* --port=19530 --suite=suites/2_insert_data.yaml`
|
||||
|
||||
### Definitions of test suites:
|
||||
|
||||
Testers need to write test suite config if adding a customized test into the current test framework
|
||||
|
||||
The following are the searching performance test suite:
|
||||
|
||||
1. insert_search_performance: the test type,also we have:
|
||||
|
||||
`search_performance`,`build_performance`,`insert_performance`,`accuracy`,`stability`,`search_stability`
|
||||
2. collections: list of test cases
|
||||
3. The following fields are in the `collection` field:
|
||||
- milvus: milvus config
|
||||
- collection_name: currently support one table
|
||||
- ni_per: per count of insert
|
||||
- index_type: index type
|
||||
- index_param: param of index
|
||||
- run_count: search count
|
||||
- search_params: params of search_vectors
|
||||
- top_ks: top_k of search
|
||||
- nqs: nq of search
|
||||
|
||||
## Test result:
|
||||
|
||||
Test result will be uploaded, which will be used to tell if the test run pass or failed
|
|
@ -0,0 +1,24 @@
|
|||
try {
|
||||
dir ("milvus-helm-charts") {
|
||||
// sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts'
|
||||
// sh 'helm repo update'
|
||||
checkout([$class: 'GitSCM', branches: [[name: "${HELM_BRANCH}"]], userRemoteConfigs: [[url: "${HELM_URL}", name: 'origin', refspec: "+refs/heads/${HELM_BRANCH}:refs/remotes/origin/${HELM_BRANCH}"]]])
|
||||
}
|
||||
// dir ("milvus_benchmark") {
|
||||
// print "Git clone url: ${TEST_URL}:${TEST_BRANCH}"
|
||||
// checkout([$class: 'GitSCM', branches: [[name: "${TEST_BRANCH}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "${TEST_URL}", name: 'origin', refspec: "+refs/heads/${TEST_BRANCH}:refs/remotes/origin/${TEST_BRANCH}"]]])
|
||||
print "Install requirements"
|
||||
|
||||
// sh "python3 -m pip install pymilvus-distributed==0.0.57"
|
||||
// sh "python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com"
|
||||
sh "python3 -m pip install -r requirements.txt"
|
||||
if ("${params.CLUSTER_NAME}" == "idc-kubernetes") {
|
||||
sh "export KUBECONFIG=/root/kube/.kube/config && cd milvus_benchmark && export PYTHONPATH=${env.WORKSPACE}/ && python3 main.py --image-version=${params.IMAGE_VERSION} --schedule-conf=scheduler/${params.CONFIG_FILE}"
|
||||
} else {
|
||||
sh "cd milvus_benchmark && export PYTHONPATH=${env.WORKSPACE}/ && python3 main.py --image-version=${params.IMAGE_VERSION} --schedule-conf=scheduler/${params.CONFIG_FILE}"
|
||||
}
|
||||
// }
|
||||
} catch (exc) {
|
||||
echo 'Deploy Test Failed !'
|
||||
throw exc
|
||||
}
|
|
@ -6,27 +6,25 @@ pipeline {
|
|||
}
|
||||
|
||||
parameters{
|
||||
string defaultValue: '0.11.1', description: 'server image version', name: 'IMAGE_VERSION', trim: true
|
||||
choice choices: ['single', 'shards'], description: 'server deploy mode', name: 'DEPLOY_MODE'
|
||||
string defaultValue: '011_data.json', description: 'test suite config yaml', name: 'CONFIG_FILE', trim: true
|
||||
string defaultValue: 'shards.json', description: 'shards test suite config yaml', name: 'SHARDS_CONFIG_FILE', trim: true
|
||||
string defaultValue: '09509e53-9125-4f5d-9ce8-42855987ad67', description: 'git credentials', name: 'GIT_USER', trim: true
|
||||
string defaultValue: 'master', description: 'server image version', name: 'IMAGE_VERSION', trim: true
|
||||
choice choices: ['kubernetes', 'idc-kubernetes'], description: 'cluster name', name: 'CLUSTER_NAME'
|
||||
string defaultValue: '2_data.json', description: 'test suite config yaml', name: 'CONFIG_FILE', trim: true
|
||||
string defaultValue: 'd0928627-efb6-4cfd-8030-9bf635988d85', description: 'git credentials', name: 'GIT_USER', trim: true
|
||||
}
|
||||
|
||||
environment {
|
||||
HELM_URL = "https://github.com/milvus-io/milvus-helm.git"
|
||||
HELM_BRANCH = "0.11.1"
|
||||
TEST_URL = "git@192.168.1.105:Test/milvus_benchmark.git"
|
||||
TEST_BRANCH = "0.11.1"
|
||||
TEST_LIB_URL = "http://192.168.1.105:6060/Test/milvus_metrics.git"
|
||||
HELM_RELEASE_NAME = "milvus-benchmark-test-${env.BUILD_NUMBER}"
|
||||
HELM_SHARDS_RELEASE_NAME = "milvus-shards-benchmark-test-${env.BUILD_NUMBER}"
|
||||
HELM_URL = "https://github.com/zilliztech/milvus-helm-charts.git"
|
||||
HELM_BRANCH = "main"
|
||||
TEST_URL = "https://github.com/zilliztech/milvus_benchmark.git"
|
||||
TEST_BRANCH = "distributed"
|
||||
HELM_RELEASE_NAME = "distributed-benchmark-test-${env.BUILD_NUMBER}"
|
||||
}
|
||||
|
||||
stages {
|
||||
stage("Setup env") {
|
||||
agent {
|
||||
kubernetes {
|
||||
cloud "${params.CLUSTER_NAME}"
|
||||
label "test-benchmark-${env.JOB_NAME}-${env.BUILD_NUMBER}"
|
||||
defaultContainer 'jnlp'
|
||||
yaml """
|
||||
|
@ -47,6 +45,9 @@ pipeline {
|
|||
- name: kubeconf
|
||||
mountPath: /root/.kube/
|
||||
readOnly: true
|
||||
- name: kubeconf2
|
||||
mountPath: /root/kube/.kube/
|
||||
readOnly: true
|
||||
- name: db-data-path
|
||||
mountPath: /test
|
||||
readOnly: false
|
||||
|
@ -61,6 +62,9 @@ pipeline {
|
|||
- name: kubeconf
|
||||
secret:
|
||||
secretName: test-cluster-config
|
||||
- name: kubeconf2
|
||||
secret:
|
||||
secretName: idc-cluster-config
|
||||
- name: db-data-path
|
||||
flexVolume:
|
||||
driver: "fstab/cifs"
|
||||
|
@ -81,6 +85,7 @@ pipeline {
|
|||
script {
|
||||
boolean isNightlyTest = isTimeTriggeredBuild()
|
||||
if (isNightlyTest) {
|
||||
// build job: 'milvus-publish-daily-docker', parameters: [[$class: 'StringParameterValue', name: 'BRANCH', value: "${params.IMAGE_VERSION}"]], wait: false
|
||||
build job: 'milvus-publish-daily-docker', parameters: [string(name: 'LOCAL_DOKCER_REGISTRY_URL', value: 'registry.zilliz.com'), string(name: 'REMOTE_DOKCER_REGISTRY_URL', value: 'registry-1.docker.io'), string(name: 'REMOTE_DOCKER_CREDENTIALS_ID', value: 'milvus-docker-access-token'), string(name: 'BRANCH', value: String.valueOf(IMAGE_VERSION))], wait: false
|
||||
} else {
|
||||
echo "Skip publish daily docker images ..."
|
||||
|
@ -95,11 +100,13 @@ pipeline {
|
|||
container('milvus-test-env') {
|
||||
script {
|
||||
print "In Deploy Test Stage"
|
||||
if ("${params.DEPLOY_MODE}" == "single") {
|
||||
load "${env.WORKSPACE}/ci/jenkinsfile/deploy_test.groovy"
|
||||
} else {
|
||||
load "${env.WORKSPACE}/ci/jenkinsfile/deploy_shards_test.groovy"
|
||||
// use the idc context
|
||||
// sh 'kubectl config use-context idc001'
|
||||
if ("${params.CLUSTER_NAME}" == "idc-kubernetes") {
|
||||
print "Use cluster name idc001"
|
||||
sh 'export KUBECONFIG=/root/kube/.kube/config'
|
||||
}
|
||||
load "${env.WORKSPACE}/ci/jenkinsfile/deploy_test.groovy"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -109,11 +116,7 @@ pipeline {
|
|||
steps {
|
||||
container('milvus-test-env') {
|
||||
script {
|
||||
if ("${params.DEPLOY_MODE}" == "single") {
|
||||
load "${env.WORKSPACE}/ci/jenkinsfile/cleanup.groovy"
|
||||
} else {
|
||||
load "${env.WORKSPACE}/ci/jenkinsfile/cleanupShards.groovy"
|
||||
}
|
||||
load "${env.WORKSPACE}/ci/jenkinsfile/cleanup.groovy"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -25,7 +25,7 @@ pipeline {
|
|||
axes {
|
||||
axis {
|
||||
name 'OS_NAME'
|
||||
values 'centos7'
|
||||
values 'ubuntu18.04', 'centos7'
|
||||
}
|
||||
|
||||
axis {
|
|
@ -0,0 +1,2 @@
|
|||
from locust import User, events
|
||||
import gevent
|
|
@ -0,0 +1,71 @@
|
|||
import logging
|
||||
import os
|
||||
from yaml import full_load
|
||||
from milvus_benchmark.chaos import utils
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.chaos.base")
|
||||
|
||||
|
||||
class BaseChaos(object):
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
def __init__(self, api_version, kind, metadata, spec):
|
||||
self.api_version = api_version
|
||||
self.kind = kind
|
||||
self.metadata = metadata
|
||||
self.spec = spec
|
||||
|
||||
def gen_experiment_config(self):
|
||||
pass
|
||||
"""
|
||||
1. load dict from default yaml
|
||||
2. merge dict between dict and self.x
|
||||
"""
|
||||
|
||||
def check_config(self):
|
||||
if not self.kind:
|
||||
raise Exception("kind is must be specified")
|
||||
if not self.spec:
|
||||
raise Exception("spec is must be specified")
|
||||
if "action" not in self.spec:
|
||||
raise Exception("action is must be specified in spec")
|
||||
if "selector" not in self.spec:
|
||||
raise Exception("selector is must be specified in spec")
|
||||
return True
|
||||
|
||||
def replace_label_selector(self):
|
||||
self.check_config()
|
||||
label_selectors_dict = self.spec["selector"]["labelSelectors"]
|
||||
label_selector = next(iter(label_selectors_dict.items()))
|
||||
label_selector_value = label_selector[1]
|
||||
# pods = utils.list_pod_for_namespace(label_selector[0] + "=" + label_selector_value)
|
||||
pods = utils.list_pod_for_namespace()
|
||||
real_label_selector_value = list(map(lambda pod: pod, filter(lambda pod: label_selector_value in pod, pods)))[0]
|
||||
self.spec["selector"]["labelSelectors"].update({label_selector[0]: real_label_selector_value})
|
||||
|
||||
|
||||
class PodChaos(BaseChaos):
|
||||
default_yaml = BaseChaos.cur_path + '/template/PodChaos.yaml'
|
||||
|
||||
def __init__(self, api_version, kind, metadata, spec):
|
||||
super(PodChaos, self).__init__(api_version, kind, metadata, spec)
|
||||
|
||||
def gen_experiment_config(self):
|
||||
with open(self.default_yaml) as f:
|
||||
default_config = full_load(f)
|
||||
f.close()
|
||||
self.replace_label_selector()
|
||||
experiment_config = default_config
|
||||
experiment_config.update({"apiVersion": self.api_version})
|
||||
experiment_config.update({"kind": self.kind})
|
||||
experiment_config["metadata"].update(self.metadata)
|
||||
experiment_config["spec"].update(self.spec)
|
||||
return experiment_config
|
||||
|
||||
|
||||
class NetworkChaos(BaseChaos):
|
||||
def __init__(self, api_version, kind, metadata, spec):
|
||||
super(NetworkChaos, self).__init__(api_version, kind, metadata, spec)
|
||||
|
||||
def gen_experiment_config(self):
|
||||
pass
|
|
@ -0,0 +1,62 @@
|
|||
from __future__ import print_function
|
||||
from utils import *
|
||||
import logging
|
||||
from pprint import pprint
|
||||
from kubernetes import client, config
|
||||
from kubernetes.client.rest import ApiException
|
||||
from milvus_benchmark import config as cf
|
||||
|
||||
config.load_kube_config()
|
||||
api_instance = client.CustomObjectsApi()
|
||||
logger = logging.getLogger("milvus_benchmark.chaos.chaosOpt")
|
||||
|
||||
|
||||
class ChaosOpt(object):
|
||||
def __init__(self, kind, group=cf.DEFAULT_GROUP, version=cf.DEFAULT_VERSION, namespace=cf.CHAOS_NAMESPACE):
|
||||
self.group = group
|
||||
self.version = version
|
||||
self.namespace = namespace
|
||||
self.plural = kind.lower()
|
||||
|
||||
# def get_metadata_name(self):
|
||||
# return self.metadata_name
|
||||
|
||||
def create_chaos_object(self, body):
|
||||
# body = create_chaos_config(self.plural, self.metadata_name, spec_params)
|
||||
# logger.info(body)
|
||||
pretty = 'true'
|
||||
try:
|
||||
api_response = api_instance.create_namespaced_custom_object(self.group, self.version, self.namespace,
|
||||
plural=self.plural, body=body, pretty=pretty)
|
||||
print(api_response)
|
||||
logging.getLogger().info(api_instance)
|
||||
except ApiException as e:
|
||||
logger.error("Exception when calling CustomObjectsApi->create_namespaced_custom_object: %s\n" % e)
|
||||
raise Exception(str(e))
|
||||
|
||||
def delete_chaos_object(self, metadata_name):
|
||||
print(metadata_name)
|
||||
try:
|
||||
data = api_instance.delete_namespaced_custom_object(self.group, self.version, self.namespace, self.plural,
|
||||
metadata_name)
|
||||
logger.info(data)
|
||||
except ApiException as e:
|
||||
logger.error("Exception when calling CustomObjectsApi->delete_namespaced_custom_object: %s\n" % e)
|
||||
raise Exception(str(e))
|
||||
|
||||
def list_chaos_object(self):
|
||||
try:
|
||||
data = api_instance.list_namespaced_custom_object(self.group, self.version, self.namespace,
|
||||
plural=self.plural)
|
||||
# pprint(data)
|
||||
except ApiException as e:
|
||||
logger.error("Exception when calling CustomObjectsApi->list_namespaced_custom_object: %s\n" % e)
|
||||
raise Exception(str(e))
|
||||
return data
|
||||
|
||||
def delete_all_chaos_object(self):
|
||||
chaos_objects = self.list_chaos_object()
|
||||
if len(chaos_objects["items"]) > 0:
|
||||
for item in chaos_objects["items"]:
|
||||
metadata_name = item["metadata"]["name"]
|
||||
self.delete_chaos_object(metadata_name)
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: milvus-podchaos
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
duration: 30s
|
||||
mode: one
|
||||
scheduler:
|
||||
cron: '@every 20s'
|
||||
selector:
|
||||
labelSelectors:
|
||||
app.kubernetes.io/name: zong-single-etcd-0
|
||||
namespaces:
|
||||
- milvus
|
||||
value: ''
|
|
@ -0,0 +1,11 @@
|
|||
chaos:
|
||||
kind: PodChaos
|
||||
spec:
|
||||
action: pod-kill
|
||||
selector:
|
||||
namespaces:
|
||||
- milvus
|
||||
labelSelectors:
|
||||
"app.kubernetes.io/name": etcd
|
||||
scheduler:
|
||||
cron: "@every 20s"
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: pod-failure-example
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-failure
|
||||
mode: one
|
||||
selector:
|
||||
labelSelectors:
|
||||
'app.kubernetes.io/component': 'tikv'
|
||||
scheduler:
|
||||
cron: '@every 2m'
|
|
@ -0,0 +1,36 @@
|
|||
from gevent import monkey
|
||||
monkey.patch_all()
|
||||
from yaml import full_load, dump
|
||||
from chaos.chaos_opt import ChaosOpt
|
||||
from milvus_benchmark.chaos.chaos_mesh import PodChaos, NetworkChaos
|
||||
from milvus_benchmark import config
|
||||
|
||||
kind_chaos_mapping = {
|
||||
"PodChaos": PodChaos,
|
||||
"NetworkChaos": NetworkChaos
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open('./pod.yaml') as f:
|
||||
conf = full_load(f)
|
||||
f.close()
|
||||
chaos_config = conf["chaos"]
|
||||
kind = chaos_config["kind"]
|
||||
spec = chaos_config["spec"]
|
||||
metadata_name = config.NAMESPACE + "-" + kind.lower()
|
||||
metadata = {"name": metadata_name}
|
||||
chaos_mesh = kind_chaos_mapping[kind](config.DEFAULT_API_VERSION, kind, metadata, spec)
|
||||
experiment_params = chaos_mesh.gen_experiment_config()
|
||||
# print(experiment_params)
|
||||
# with open('./pod-new-chaos.yaml', "w") as f:
|
||||
# dump(experiment_params, f)
|
||||
# f.close()
|
||||
chaos_opt = ChaosOpt(chaos_mesh.kind)
|
||||
res = chaos_opt.list_chaos_object()
|
||||
print(res)
|
||||
if len(res["items"]) != 0:
|
||||
# chaos_opt.delete_chaos_object("milvus-pod-chaos")
|
||||
print(res["items"][0]["metadata"]["name"])
|
||||
chaos_opt.delete_all_chaos_object()
|
||||
print(chaos_opt.list_chaos_object())
|
|
@ -0,0 +1,39 @@
|
|||
import logging
|
||||
from operator import methodcaller
|
||||
|
||||
from kubernetes import client, config
|
||||
from milvus_benchmark import config as cf
|
||||
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.chaos.utils")
|
||||
|
||||
|
||||
def list_pod_for_namespace(label_selector="app.kubernetes.io/instance=zong-standalone"):
|
||||
config.load_kube_config()
|
||||
v1 = client.CoreV1Api()
|
||||
ret = v1.list_namespaced_pod(namespace=cf.NAMESPACE, label_selector=label_selector)
|
||||
pods = []
|
||||
# label_selector = 'release=zong-single'
|
||||
for i in ret.items:
|
||||
pods.append(i.metadata.name)
|
||||
# print("%s\t%s\t%s" % (i.status.pod_ip, i.metadata.namespace, i.metadata.name))
|
||||
return pods
|
||||
|
||||
|
||||
def assert_fail(func, milvus_client, **params):
|
||||
try:
|
||||
methodcaller(func, **params)(milvus_client)
|
||||
except Exception as e:
|
||||
logger.debug("11111111111111111111111111")
|
||||
logger.info(str(e))
|
||||
pass
|
||||
else:
|
||||
raise Exception("fail-assert failed")
|
||||
|
||||
|
||||
def assert_pass(func, milvus_client, **params):
|
||||
try:
|
||||
methodcaller(func, **params)(milvus_client)
|
||||
logger.debug("&&&&&&&&&&&&&&&&&&&&")
|
||||
except Exception as e:
|
||||
raise
|
|
@ -6,14 +6,14 @@ import json
|
|||
import time, datetime
|
||||
import traceback
|
||||
from multiprocessing import Process
|
||||
from milvus import Milvus, DataType
|
||||
from pymilvus import Milvus, DataType
|
||||
import numpy as np
|
||||
import utils
|
||||
import config
|
||||
from milvus_benchmark.runners import utils
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.client")
|
||||
|
||||
SERVER_HOST_DEFAULT = "127.0.0.1"
|
||||
SERVER_PORT_DEFAULT = 19530
|
||||
INDEX_MAP = {
|
||||
"flat": "FLAT",
|
||||
"ivf_flat": "IVF_FLAT",
|
||||
|
@ -29,6 +29,8 @@ INDEX_MAP = {
|
|||
"rhnsw_sq": "RHNSW_SQ"
|
||||
}
|
||||
epsilon = 0.1
|
||||
DEFAULT_WARM_QUERY_TOPK = 1
|
||||
DEFAULT_WARM_QUERY_NQ = 1
|
||||
|
||||
|
||||
def time_wrapper(func):
|
||||
|
@ -51,15 +53,14 @@ def time_wrapper(func):
|
|||
|
||||
|
||||
class MilvusClient(object):
|
||||
def __init__(self, collection_name=None, host=None, port=None, timeout=180):
|
||||
def __init__(self, collection_name=None, host=None, port=None, timeout=300):
|
||||
self._collection_name = collection_name
|
||||
self._collection_info = None
|
||||
start_time = time.time()
|
||||
if not host:
|
||||
host = SERVER_HOST_DEFAULT
|
||||
host = config.SERVER_HOST_DEFAULT
|
||||
if not port:
|
||||
port = SERVER_PORT_DEFAULT
|
||||
logger.debug(host)
|
||||
logger.debug(port)
|
||||
port = config.SERVER_PORT_DEFAULT
|
||||
# retry connect remote server
|
||||
i = 0
|
||||
while time.time() < start_time + timeout:
|
||||
|
@ -74,7 +75,7 @@ class MilvusClient(object):
|
|||
logger.error(str(e))
|
||||
logger.error("Milvus connect failed: %d times" % i)
|
||||
i = i + 1
|
||||
time.sleep(i)
|
||||
time.sleep(30)
|
||||
|
||||
if time.time() > start_time + timeout:
|
||||
raise Exception("Server connect timeout")
|
||||
|
@ -83,12 +84,16 @@ class MilvusClient(object):
|
|||
def __str__(self):
|
||||
return 'Milvus collection %s' % self._collection_name
|
||||
|
||||
def check_status(self, status):
|
||||
if not status.OK():
|
||||
logger.error(status.message)
|
||||
logger.error(self._milvus.server_status())
|
||||
logger.error(self.count())
|
||||
raise Exception("Status not ok")
|
||||
def set_collection(self, collection_name):
|
||||
self._collection_name = collection_name
|
||||
|
||||
# TODO: server not support
|
||||
# def check_status(self, status):
|
||||
# if not status.OK():
|
||||
# logger.error(status.message)
|
||||
# logger.error(self._milvus.server_status())
|
||||
# logger.error(self.count())
|
||||
# raise Exception("Status not ok")
|
||||
|
||||
def check_result_ids(self, result):
|
||||
for index, item in enumerate(result):
|
||||
|
@ -97,6 +102,10 @@ class MilvusClient(object):
|
|||
logger.error(item[0].distance)
|
||||
raise Exception("Distance wrong")
|
||||
|
||||
@property
|
||||
def collection_name(self):
|
||||
return self._collection_name
|
||||
|
||||
# only support the given field name
|
||||
def create_collection(self, dimension, data_type=DataType.FLOAT_VECTOR, auto_id=False,
|
||||
collection_name=None, other_fields=None):
|
||||
|
@ -104,13 +113,22 @@ class MilvusClient(object):
|
|||
if not collection_name:
|
||||
collection_name = self._collection_name
|
||||
vec_field_name = utils.get_default_field_name(data_type)
|
||||
fields = [{"name": vec_field_name, "type": data_type, "params": {"dim": dimension}}]
|
||||
fields = [
|
||||
{"name": vec_field_name, "type": data_type, "params": {"dim": dimension}},
|
||||
{"name": "id", "type": DataType.INT64, "is_primary": True}
|
||||
]
|
||||
if other_fields:
|
||||
other_fields = other_fields.split(",")
|
||||
if "int" in other_fields:
|
||||
fields.append({"name": utils.DEFAULT_INT_FIELD_NAME, "type": DataType.INT64})
|
||||
if "float" in other_fields:
|
||||
fields.append({"name": utils.DEFAULT_FLOAT_FIELD_NAME, "type": DataType.FLOAT})
|
||||
for other_field_name in other_fields:
|
||||
if other_field_name.startswith("int"):
|
||||
field_type = DataType.INT64
|
||||
elif other_field_name.startswith("float"):
|
||||
field_type = DataType.FLOAT
|
||||
elif other_field_name.startswith("double"):
|
||||
field_type = DataType.DOUBLE
|
||||
else:
|
||||
raise Exception("Field name not supported")
|
||||
fields.append({"name": other_field_name, "type": field_type})
|
||||
create_param = {
|
||||
"fields": fields,
|
||||
"auto_id": auto_id}
|
||||
|
@ -126,33 +144,12 @@ class MilvusClient(object):
|
|||
collection_name = self._collection_name
|
||||
self._milvus.create_partition(collection_name, tag)
|
||||
|
||||
def generate_values(self, data_type, vectors, ids):
|
||||
values = None
|
||||
if data_type in [DataType.INT32, DataType.INT64]:
|
||||
values = ids
|
||||
elif data_type in [DataType.FLOAT, DataType.DOUBLE]:
|
||||
values = [(i + 0.0) for i in ids]
|
||||
elif data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]:
|
||||
values = vectors
|
||||
return values
|
||||
|
||||
def generate_entities(self, vectors, ids=None, collection_name=None):
|
||||
entities = []
|
||||
if collection_name is None:
|
||||
collection_name = self._collection_name
|
||||
info = self.get_info(collection_name)
|
||||
for field in info["fields"]:
|
||||
field_type = field["type"]
|
||||
entities.append(
|
||||
{"name": field["name"], "type": field_type, "values": self.generate_values(field_type, vectors, ids)})
|
||||
return entities
|
||||
|
||||
@time_wrapper
|
||||
def insert(self, entities, ids=None, collection_name=None):
|
||||
def insert(self, entities, collection_name=None):
|
||||
tmp_collection_name = self._collection_name if collection_name is None else collection_name
|
||||
try:
|
||||
insert_ids = self._milvus.insert(tmp_collection_name, entities, ids=ids)
|
||||
return insert_ids
|
||||
insert_res = self._milvus.insert(tmp_collection_name, entities)
|
||||
return insert_res.primary_keys
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
|
||||
|
@ -200,9 +197,6 @@ class MilvusClient(object):
|
|||
# self.check_status(status)
|
||||
# return ids, get_res
|
||||
|
||||
def get(self):
|
||||
get_ids = random.randint(1, 1000000)
|
||||
self._milvus.get_entity_by_id(self._collection_name, [get_ids])
|
||||
|
||||
@time_wrapper
|
||||
def get_entities(self, get_ids):
|
||||
|
@ -240,6 +234,15 @@ class MilvusClient(object):
|
|||
status = self._milvus.compact(tmp_collection_name)
|
||||
self.check_status(status)
|
||||
|
||||
# only support "in" in expr
|
||||
@time_wrapper
|
||||
def get(self, ids, collection_name=None):
|
||||
tmp_collection_name = self._collection_name if collection_name is None else collection_name
|
||||
# res = self._milvus.get(tmp_collection_name, ids, output_fields=None, partition_names=None)
|
||||
ids_expr = "id in %s" % (str(ids))
|
||||
res = self._milvus.query(tmp_collection_name, ids_expr, output_fields=None, partition_names=None)
|
||||
return res
|
||||
|
||||
@time_wrapper
|
||||
def create_index(self, field_name, index_type, metric_type, _async=False, index_param=None):
|
||||
index_type = INDEX_MAP[index_type]
|
||||
|
@ -256,20 +259,18 @@ class MilvusClient(object):
|
|||
self._milvus.create_index(self._collection_name, field_name, index_params, _async=_async)
|
||||
|
||||
# TODO: need to check
|
||||
def describe_index(self, field_name):
|
||||
def describe_index(self, field_name, collection_name=None):
|
||||
# stats = self.get_stats()
|
||||
info = self._milvus.describe_index(self._collection_name, field_name)
|
||||
index_info = {"index_type": "flat", "index_param": None}
|
||||
for field in info["fields"]:
|
||||
for index in field['indexes']:
|
||||
if not index or "index_type" not in index:
|
||||
continue
|
||||
else:
|
||||
for k, v in INDEX_MAP.items():
|
||||
if index['index_type'] == v:
|
||||
index_info['index_type'] = k
|
||||
index_info['index_param'] = index['params']
|
||||
return index_info
|
||||
tmp_collection_name = self._collection_name if collection_name is None else collection_name
|
||||
info = self._milvus.describe_index(tmp_collection_name, field_name)
|
||||
logger.info(info)
|
||||
index_info = {"index_type": "flat", "metric_type": None, "index_param": None}
|
||||
if info:
|
||||
index_info = {"index_type": info["index_type"], "metric_type": info["metric_type"], "index_param": info["params"]}
|
||||
# transfer index type name
|
||||
for k, v in INDEX_MAP.items():
|
||||
if index_info['index_type'] == v:
|
||||
index_info['index_type'] = k
|
||||
return index_info
|
||||
|
||||
def drop_index(self, field_name):
|
||||
|
@ -277,7 +278,7 @@ class MilvusClient(object):
|
|||
return self._milvus.drop_index(self._collection_name, field_name)
|
||||
|
||||
@time_wrapper
|
||||
def query(self, vector_query, filter_query=None, collection_name=None):
|
||||
def query(self, vector_query, filter_query=None, collection_name=None, timeout=300):
|
||||
tmp_collection_name = self._collection_name if collection_name is None else collection_name
|
||||
must_params = [vector_query]
|
||||
if filter_query:
|
||||
|
@ -285,11 +286,30 @@ class MilvusClient(object):
|
|||
query = {
|
||||
"bool": {"must": must_params}
|
||||
}
|
||||
result = self._milvus.search(tmp_collection_name, query)
|
||||
result = self._milvus.search(tmp_collection_name, query, timeout=timeout)
|
||||
return result
|
||||
|
||||
@time_wrapper
|
||||
def load_and_query(self, vector_query, filter_query=None, collection_name=None):
|
||||
def warm_query(self, index_field_name, search_param, metric_type, times=2):
|
||||
query_vectors = [[random.random() for _ in range(self._dimension)] for _ in range(DEFAULT_WARM_QUERY_NQ)]
|
||||
# index_info = self.describe_index(index_field_name)
|
||||
vector_query = {"vector": {index_field_name: {
|
||||
"topk": DEFAULT_WARM_QUERY_TOPK,
|
||||
"query": query_vectors,
|
||||
"metric_type": metric_type,
|
||||
"params": search_param}
|
||||
}}
|
||||
must_params = [vector_query]
|
||||
query = {
|
||||
"bool": {"must": must_params}
|
||||
}
|
||||
logger.debug("Start warm up query")
|
||||
for i in range(times):
|
||||
self._milvus.search(self._collection_name, query)
|
||||
logger.debug("End warm up query")
|
||||
|
||||
@time_wrapper
|
||||
def load_and_query(self, vector_query, filter_query=None, collection_name=None, timeout=120):
|
||||
tmp_collection_name = self._collection_name if collection_name is None else collection_name
|
||||
must_params = [vector_query]
|
||||
if filter_query:
|
||||
|
@ -298,17 +318,19 @@ class MilvusClient(object):
|
|||
"bool": {"must": must_params}
|
||||
}
|
||||
self.load_collection(tmp_collection_name)
|
||||
result = self._milvus.search(tmp_collection_name, query)
|
||||
result = self._milvus.search(tmp_collection_name, query, timeout=timeout)
|
||||
return result
|
||||
|
||||
def get_ids(self, result):
|
||||
idss = result._entities.ids
|
||||
# idss = result._entities.ids
|
||||
ids = []
|
||||
len_idss = len(idss)
|
||||
len_r = len(result)
|
||||
top_k = len_idss // len_r
|
||||
for offset in range(0, len_idss, top_k):
|
||||
ids.append(idss[offset: min(offset + top_k, len_idss)])
|
||||
# len_idss = len(idss)
|
||||
# len_r = len(result)
|
||||
# top_k = len_idss // len_r
|
||||
# for offset in range(0, len_idss, top_k):
|
||||
# ids.append(idss[offset: min(offset + top_k, len_idss)])
|
||||
for res in result:
|
||||
ids.append(res.ids)
|
||||
return ids
|
||||
|
||||
def query_rand(self, nq_max=100):
|
||||
|
@ -374,7 +396,7 @@ class MilvusClient(object):
|
|||
else:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(str(e))
|
||||
logger.warning("Collection count failed: {}".format(str(e)))
|
||||
break
|
||||
if i >= timeout:
|
||||
logger.error("Delete collection timeout")
|
||||
|
@ -383,10 +405,9 @@ class MilvusClient(object):
|
|||
return self._milvus.get_collection_stats(self._collection_name)
|
||||
|
||||
def get_info(self, collection_name=None):
|
||||
# pdb.set_trace()
|
||||
if collection_name is None:
|
||||
collection_name = self._collection_name
|
||||
return self._milvus.get_collection_info(collection_name)
|
||||
return self._milvus.describe_collection(collection_name)
|
||||
|
||||
def show_collections(self):
|
||||
return self._milvus.list_collections()
|
|
@ -0,0 +1,30 @@
|
|||
MONGO_SERVER = 'mongodb://192.168.1.234:27017/'
|
||||
# MONGO_SERVER = 'mongodb://mongodb.test:27017/'
|
||||
|
||||
SCHEDULER_DB = "scheduler"
|
||||
JOB_COLLECTION = "jobs"
|
||||
|
||||
REGISTRY_URL = "registry.zilliz.com/milvus/milvus"
|
||||
IDC_NAS_URL = "//172.16.70.249/test"
|
||||
|
||||
SERVER_HOST_DEFAULT = "127.0.0.1"
|
||||
SERVER_PORT_DEFAULT = 19530
|
||||
SERVER_VERSION = "2.0"
|
||||
|
||||
HELM_NAMESPACE = "milvus"
|
||||
BRANCH = "master"
|
||||
|
||||
DEFAULT_CPUS = 48
|
||||
|
||||
RAW_DATA_DIR = "/test/milvus/raw_data/"
|
||||
|
||||
# nars log
|
||||
LOG_PATH = "/test/milvus/benchmark/logs/{}/".format(BRANCH)
|
||||
|
||||
DEFAULT_DEPLOY_MODE = "single"
|
||||
|
||||
NAMESPACE = "milvus"
|
||||
CHAOS_NAMESPACE = "chaos-testing"
|
||||
DEFAULT_API_VERSION = 'chaos-mesh.org/v1alpha1'
|
||||
DEFAULT_GROUP = 'chaos-mesh.org'
|
||||
DEFAULT_VERSION = 'v1alpha1'
|
|
@ -0,0 +1,14 @@
|
|||
import logging
|
||||
from .helm import HelmEnv
|
||||
from .docker import DockerEnv
|
||||
from .local import LocalEnv
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.env")
|
||||
|
||||
|
||||
def get_env(env_mode, deploy_mode=None):
|
||||
return {
|
||||
"helm": HelmEnv(deploy_mode),
|
||||
"docker": DockerEnv(None),
|
||||
"local": LocalEnv(None),
|
||||
}.get(env_mode)
|
|
@ -0,0 +1,46 @@
|
|||
import logging
|
||||
from milvus_benchmark import utils
|
||||
from milvus_benchmark import config
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.env.env")
|
||||
|
||||
|
||||
class BaseEnv(object):
|
||||
"""docstring for Env"""
|
||||
def __init__(self, deploy_mode="single"):
|
||||
self.deploy_mode = deploy_mode
|
||||
self._name = utils.get_unique_name()
|
||||
self._hostname = None
|
||||
self._port = config.SERVER_PORT_DEFAULT
|
||||
|
||||
def start_up(self):
|
||||
logger.debug("IN ENV CLASS")
|
||||
pass
|
||||
|
||||
def tear_down(self):
|
||||
pass
|
||||
|
||||
def restart(self):
|
||||
pass
|
||||
|
||||
def set_hostname(self, hostname):
|
||||
self._hostname = hostname
|
||||
|
||||
def set_port(self, port):
|
||||
self._port = port
|
||||
|
||||
def resources(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def hostname(self):
|
||||
return self._hostname
|
||||
|
||||
@property
|
||||
def port(self):
|
||||
return self._port
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
import logging
|
||||
from milvus_benchmark.env.base import BaseEnv
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.env.docker")
|
||||
|
||||
|
||||
class DockerEnv(BaseEnv):
|
||||
"""docker env class wrapper"""
|
||||
env_mode = "docker"
|
||||
|
||||
def __init__(self, deploy_mode=None):
|
||||
super(DockerEnv, self).__init__(deploy_mode)
|
|
@ -0,0 +1,72 @@
|
|||
import os
|
||||
import time
|
||||
import pdb
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
from milvus_benchmark.env import helm_utils
|
||||
from milvus_benchmark.env.base import BaseEnv
|
||||
from milvus_benchmark import config
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.env.helm")
|
||||
TIMEOUT = 5
|
||||
|
||||
class HelmEnv(BaseEnv):
|
||||
"""helm env class wrapper"""
|
||||
env_mode = "helm"
|
||||
|
||||
def __init__(self, deploy_mode="single"):
|
||||
super(HelmEnv, self).__init__(deploy_mode)
|
||||
self._name_space = config.HELM_NAMESPACE
|
||||
|
||||
def start_up(self, helm_path, helm_install_params):
|
||||
if "namespace" in helm_install_params:
|
||||
self._name_space = helm_install_params["namespace"]
|
||||
server_name = helm_install_params["server_name"]
|
||||
server_tag = helm_install_params["server_tag"] if "server_tag" in helm_install_params else None
|
||||
server_config = helm_install_params["server_config"] if "server_config" in helm_install_params else None
|
||||
milvus_config = helm_install_params["milvus_config"]
|
||||
image_tag = helm_install_params["image_tag"]
|
||||
image_type = helm_install_params["image_type"]
|
||||
|
||||
logger.debug(self.deploy_mode)
|
||||
server_config = helm_utils.update_server_config(server_name, server_tag, server_config)
|
||||
# update values
|
||||
values_file_path = helm_path + "/values.yaml"
|
||||
if not os.path.exists(values_file_path):
|
||||
raise Exception("File {} not existed".format(values_file_path))
|
||||
lock_file_path = helm_path + "/values.yaml.lock"
|
||||
start_time = time.time()
|
||||
while os.path.exists(lock_file_path) and time.time() < start_time+TIMEOUT:
|
||||
logger.debug("Waiting for the lock file to release")
|
||||
time.sleep(1)
|
||||
if not os.path.exists(lock_file_path):
|
||||
# generate lock file
|
||||
open(lock_file_path, 'a').close()
|
||||
try:
|
||||
if milvus_config:
|
||||
helm_utils.update_values(values_file_path, self.deploy_mode, server_name, server_tag, milvus_config, server_config)
|
||||
logger.debug("Config file has been updated, remove the lock file")
|
||||
os.system("rm -rf %s" % lock_file_path)
|
||||
logger.debug("Start install server")
|
||||
hostname = helm_utils.helm_install_server(helm_path, self.deploy_mode, image_tag, image_type, self.name,
|
||||
self._name_space)
|
||||
status_cmd = 'kubectl get pods -n milvus -l release=zong-standalone -o=jsonpath=\'{range .items[*]}{.metadata.name}{"\t"}{.status.phase}{"\n"}{end}\''
|
||||
if not hostname:
|
||||
logger.error("Helm install server failed")
|
||||
return False
|
||||
else:
|
||||
self.set_hostname(hostname)
|
||||
while not helm_utils.running_status(self.name, self._name_space):
|
||||
pass
|
||||
else:
|
||||
return hostname
|
||||
except Exception as e:
|
||||
os.system("rm -rf %s" % lock_file_path)
|
||||
logger.error("Helm install server failed: %s" % (str(e)))
|
||||
logger.error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def tear_down(self):
|
||||
logger.debug("Start clean up: {}.{}".format(self.name, self._name_space))
|
||||
helm_utils.helm_del_server(self.name, self._name_space)
|
|
@ -0,0 +1,473 @@
|
|||
import os
|
||||
import pdb
|
||||
import time
|
||||
import logging
|
||||
import hashlib
|
||||
import traceback
|
||||
from yaml import full_load, dump
|
||||
from milvus_benchmark import utils
|
||||
from milvus_benchmark import config
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.env.helm_utils")
|
||||
BOOKKEEPER_PULSAR_MEM = '\"-Xms512m -Xmx1024m -XX:MaxDirectMemorySize=1024m -Dio.netty.leakDetectionLevel=disabled -Dio.netty.recycler.linkCapacity=1024 -XX:+UseG1GC -XX:MaxGCPauseMillis=10 -XX:+ParallelRefProcEnabled -XX:+UnlockExperimentalVMOptions -XX:+AggressiveOpts -XX:+DoEscapeAnalysis -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1NewSizePercent=50 -XX:+DisableExplicitGC -XX:-ResizePLAB -XX:+ExitOnOutOfMemoryError -XX:+PerfDisableSharedMem -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCApplicationStoppedTime -XX:+PrintHeapAtGC -verbosegc -XX:G1LogLevel=finest\"'
|
||||
BROKER_PULSAR_MEM = '\"-Xms512m -Xmx1024m -XX:MaxDirectMemorySize=1024m -Dio.netty.leakDetectionLevel=disabled -Dio.netty.recycler.linkCapacity=1024 -XX:+ParallelRefProcEnabled -XX:+UnlockExperimentalVMOptions -XX:+AggressiveOpts -XX:+DoEscapeAnalysis -XX:ParallelGCThreads=32 -XX:ConcGCThreads=32 -XX:G1NewSizePercent=50 -XX:+DisableExplicitGC -XX:-ResizePLAB -XX:+ExitOnOutOfMemoryError -XX:+PerfDisableSharedMem\"'
|
||||
|
||||
|
||||
def get_host_cpus(hostname):
|
||||
from kubernetes import client, config
|
||||
config.load_kube_config()
|
||||
client.rest.logger.setLevel(logging.WARNING)
|
||||
try:
|
||||
v1 = client.CoreV1Api()
|
||||
cpus = v1.read_node(hostname).status.allocatable.get("cpu")
|
||||
except Exception as e:
|
||||
logger.error(traceback.format_exc())
|
||||
logger.error(str(e))
|
||||
cpus = 0
|
||||
finally:
|
||||
return cpus
|
||||
|
||||
|
||||
def update_server_config(server_name, server_tag, server_config):
|
||||
cpus = config.DEFAULT_CPUS
|
||||
if server_name:
|
||||
try:
|
||||
cpus = get_host_cpus(server_name)
|
||||
if not cpus:
|
||||
cpus = config.DEFAULT_CPUS
|
||||
except Exception as e:
|
||||
logger.error("Get cpus on host: {} failed".format(server_name))
|
||||
logger.error(str(e))
|
||||
if server_config:
|
||||
if "cpus" in server_config.keys():
|
||||
cpus = server_config["cpus"]
|
||||
# self.hardware = Hardware(name=self.hostname, cpus=cpus)
|
||||
if server_tag:
|
||||
cpus = int(server_tag.split("c")[0])
|
||||
kv = {"cpus": cpus}
|
||||
logger.debug(kv)
|
||||
if server_config:
|
||||
server_config.update(kv)
|
||||
else:
|
||||
server_config = kv
|
||||
return server_config
|
||||
|
||||
|
||||
"""
|
||||
description: update values.yaml
|
||||
return: no return
|
||||
"""
|
||||
|
||||
|
||||
def update_values(file_path, deploy_mode, hostname, server_tag, milvus_config, server_config=None):
|
||||
# bak values.yaml
|
||||
file_name = os.path.basename(file_path)
|
||||
bak_file_name = file_name + ".bak"
|
||||
file_parent_path = os.path.dirname(file_path)
|
||||
bak_file_path = file_parent_path + '/' + bak_file_name
|
||||
if os.path.exists(bak_file_path):
|
||||
os.system("cp %s %s" % (bak_file_path, file_path))
|
||||
else:
|
||||
os.system("cp %s %s" % (file_path, bak_file_path))
|
||||
with open(file_path) as f:
|
||||
values_dict = full_load(f)
|
||||
f.close()
|
||||
cluster = False
|
||||
if deploy_mode == "cluster":
|
||||
cluster = True
|
||||
|
||||
# TODO: disable change config
|
||||
# cluster = False
|
||||
# if "cluster" in milvus_config and milvus_config["cluster"]:
|
||||
# cluster = True
|
||||
# for k, v in milvus_config.items():
|
||||
# if k.find("primary_path") != -1:
|
||||
# suffix_path = milvus_config["suffix_path"] if "suffix_path" in milvus_config else None
|
||||
# path_value = v
|
||||
# if suffix_path:
|
||||
# path_value = v + "_" + str(int(time.time()))
|
||||
# values_dict["primaryPath"] = path_value
|
||||
# values_dict['wal']['path'] = path_value + "/wal"
|
||||
# values_dict['logs']['path'] = path_value + "/logs"
|
||||
# # elif k.find("use_blas_threshold") != -1:
|
||||
# # values_dict['useBLASThreshold'] = int(v)
|
||||
# elif k.find("gpu_search_threshold") != -1:
|
||||
# values_dict['gpu']['gpuSearchThreshold'] = int(v)
|
||||
# if cluster:
|
||||
# values_dict['readonly']['gpu']['gpuSearchThreshold'] = int(v)
|
||||
# elif k.find("cpu_cache_capacity") != -1:
|
||||
# values_dict['cache']['cacheSize'] = v
|
||||
# if cluster:
|
||||
# values_dict['readonly']['cache']['cacheSize'] = v
|
||||
# # elif k.find("cache_insert_data") != -1:
|
||||
# # values_dict['cache']['cacheInsertData'] = v
|
||||
# elif k.find("insert_buffer_size") != -1:
|
||||
# values_dict['cache']['insertBufferSize'] = v
|
||||
# if cluster:
|
||||
# values_dict['readonly']['cache']['insertBufferSize'] = v
|
||||
# elif k.find("gpu_resource_config.enable") != -1:
|
||||
# values_dict['gpu']['enabled'] = v
|
||||
# if cluster:
|
||||
# values_dict['readonly']['gpu']['enabled'] = v
|
||||
# elif k.find("gpu_resource_config.cache_capacity") != -1:
|
||||
# values_dict['gpu']['cacheSize'] = v
|
||||
# if cluster:
|
||||
# values_dict['readonly']['gpu']['cacheSize'] = v
|
||||
# elif k.find("build_index_resources") != -1:
|
||||
# values_dict['gpu']['buildIndexDevices'] = v
|
||||
# if cluster:
|
||||
# values_dict['readonly']['gpu']['buildIndexDevices'] = v
|
||||
# elif k.find("search_resources") != -1:
|
||||
# values_dict['gpu']['searchDevices'] = v
|
||||
# if cluster:
|
||||
# values_dict['readonly']['gpu']['searchDevices'] = v
|
||||
# # wal
|
||||
# elif k.find("auto_flush_interval") != -1:
|
||||
# values_dict['storage']['autoFlushInterval'] = v
|
||||
# if cluster:
|
||||
# values_dict['readonly']['storage']['autoFlushInterval'] = v
|
||||
# elif k.find("wal_enable") != -1:
|
||||
# values_dict['wal']['enabled'] = v
|
||||
|
||||
# # if values_dict['nodeSelector']:
|
||||
# # logger.warning("nodeSelector has been set: %s" % str(values_dict['engine']['nodeSelector']))
|
||||
# # return
|
||||
# values_dict["wal"]["recoveryErrorIgnore"] = True
|
||||
# # enable monitor
|
||||
# values_dict["metrics"]["enabled"] = True
|
||||
# values_dict["metrics"]["address"] = "192.168.1.237"
|
||||
# values_dict["metrics"]["port"] = 9091
|
||||
# # only test avx2
|
||||
# values_dict["extraConfiguration"].update({"engine": {"simd_type": "avx2"}})
|
||||
# # stat_optimizer_enable
|
||||
# values_dict["extraConfiguration"]["engine"].update({"stat_optimizer_enable": False})
|
||||
|
||||
# # enable read-write mode
|
||||
# if cluster:
|
||||
# values_dict["cluster"]["enabled"] = True
|
||||
# # update readonly log path
|
||||
# values_dict["readonly"]['logs']['path'] = values_dict['logs']['path'] + "/readonly"
|
||||
# if "readonly" in milvus_config:
|
||||
# if "replicas" in milvus_config["readonly"]:
|
||||
# values_dict["readonly"]["replicas"] = milvus_config["readonly"]["replicas"]
|
||||
|
||||
# use_external_mysql = False
|
||||
# if "external_mysql" in milvus_config and milvus_config["external_mysql"]:
|
||||
# use_external_mysql = True
|
||||
# # meta mysql
|
||||
# if use_external_mysql:
|
||||
# values_dict["mysql"]["enabled"] = False
|
||||
# # values_dict["mysql"]["persistence"]["enabled"] = True
|
||||
# # values_dict["mysql"]["persistence"]["existingClaim"] = hashlib.md5(path_value.encode(encoding='UTF-8')).hexdigest()
|
||||
# values_dict['externalMysql']['enabled'] = True
|
||||
# if deploy_mode == "local":
|
||||
# values_dict['externalMysql']["ip"] = "192.168.1.238"
|
||||
# else:
|
||||
# values_dict['externalMysql']["ip"] = "milvus-mysql.test"
|
||||
# values_dict['externalMysql']["port"] = 3306
|
||||
# values_dict['externalMysql']["user"] = "root"
|
||||
# values_dict['externalMysql']["password"] = "milvus"
|
||||
# values_dict['externalMysql']["database"] = "db"
|
||||
# else:
|
||||
# values_dict["mysql"]["enabled"] = False
|
||||
# # update values.yaml with the given host
|
||||
node_config = None
|
||||
perf_tolerations = [{
|
||||
"key": "worker",
|
||||
"operator": "Equal",
|
||||
"value": "performance",
|
||||
"effect": "NoSchedule"
|
||||
}]
|
||||
if hostname:
|
||||
node_config = {'kubernetes.io/hostname': hostname}
|
||||
elif server_tag:
|
||||
# server tag
|
||||
node_config = {'instance-type': server_tag}
|
||||
cpus = server_config["cpus"]
|
||||
logger.debug(hostname)
|
||||
if cluster is False:
|
||||
if node_config:
|
||||
values_dict['standalone']['nodeSelector'] = node_config
|
||||
values_dict['minio']['nodeSelector'] = node_config
|
||||
values_dict['etcd']['nodeSelector'] = node_config
|
||||
# TODO: disable
|
||||
# set limit/request cpus in resources
|
||||
values_dict['standalone']['resources'] = {
|
||||
"limits": {
|
||||
# "cpu": str(int(cpus)) + ".0"
|
||||
"cpu": str(int(cpus)) + ".0"
|
||||
},
|
||||
"requests": {
|
||||
"cpu": str(int(cpus) // 2 + 1) + ".0"
|
||||
# "cpu": "4.0"
|
||||
}
|
||||
}
|
||||
logger.debug("Add tolerations into standalone server")
|
||||
values_dict['standalone']['tolerations'] = perf_tolerations
|
||||
values_dict['minio']['tolerations'] = perf_tolerations
|
||||
values_dict['etcd']['tolerations'] = perf_tolerations
|
||||
else:
|
||||
# values_dict['pulsar']["broker"]["configData"].update({"maxMessageSize": "52428800", "PULSAR_MEM": BOOKKEEPER_PULSAR_MEM})
|
||||
# values_dict['pulsar']["bookkeeper"]["configData"].update({"nettyMaxFrameSizeBytes": "52428800", "PULSAR_MEM": BROKER_PULSAR_MEM})
|
||||
values_dict['proxynode']['nodeSelector'] = node_config
|
||||
values_dict['querynode']['nodeSelector'] = node_config
|
||||
values_dict['indexnode']['nodeSelector'] = node_config
|
||||
values_dict['datanode']['nodeSelector'] = node_config
|
||||
values_dict['minio']['nodeSelector'] = node_config
|
||||
|
||||
# values_dict['pulsar']["enabled"] = True
|
||||
# values_dict['pulsar']['autoRecovery']['nodeSelector'] = node_config
|
||||
# values_dict['pulsar']['proxy']['nodeSelector'] = node_config
|
||||
# values_dict['pulsar']['broker']['nodeSelector'] = node_config
|
||||
# values_dict['pulsar']['bookkeeper']['nodeSelector'] = node_config
|
||||
# values_dict['pulsar']['zookeeper']['nodeSelector'] = node_config
|
||||
values_dict['pulsarStandalone']['nodeSelector'] = node_config
|
||||
if hostname:
|
||||
logger.debug("Add tolerations into cluster server")
|
||||
values_dict['proxynode']['tolerations'] = perf_tolerations
|
||||
values_dict['querynode']['tolerations'] = perf_tolerations
|
||||
values_dict['indexnode']['tolerations'] = perf_tolerations
|
||||
values_dict['datanode']['tolerations'] = perf_tolerations
|
||||
values_dict['etcd']['tolerations'] = perf_tolerations
|
||||
values_dict['minio']['tolerations'] = perf_tolerations
|
||||
values_dict['pulsarStandalone']['tolerations'] = perf_tolerations
|
||||
# values_dict['pulsar']['autoRecovery']['tolerations'] = perf_tolerations
|
||||
# values_dict['pulsar']['proxy']['tolerations'] = perf_tolerations
|
||||
# values_dict['pulsar']['broker']['tolerations'] = perf_tolerations
|
||||
# values_dict['pulsar']['bookkeeper']['tolerations'] = perf_tolerations
|
||||
# values_dict['pulsar']['zookeeper']['tolerations'] = perf_tolerations
|
||||
|
||||
# add extra volumes
|
||||
values_dict['extraVolumes'] = [{
|
||||
'name': 'test',
|
||||
'flexVolume': {
|
||||
'driver': "fstab/cifs",
|
||||
'fsType': "cifs",
|
||||
'secretRef': {
|
||||
'name': "cifs-test-secret"
|
||||
},
|
||||
'options': {
|
||||
'networkPath': config.IDC_NAS_URL,
|
||||
'mountOptions': "vers=1.0"
|
||||
}
|
||||
}
|
||||
}]
|
||||
values_dict['extraVolumeMounts'] = [{
|
||||
'name': 'test',
|
||||
'mountPath': '/test'
|
||||
}]
|
||||
|
||||
with open(file_path, 'w') as f:
|
||||
dump(values_dict, f, default_flow_style=False)
|
||||
f.close()
|
||||
# DEBUG
|
||||
with open(file_path) as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip("\n")
|
||||
logger.debug(line)
|
||||
|
||||
|
||||
# deploy server
|
||||
def helm_install_server(helm_path, deploy_mode, image_tag, image_type, name, namespace):
|
||||
logger.debug("Server deploy mode: %s" % deploy_mode)
|
||||
host = "%s-milvus-ha.%s.svc.cluster.local" % (name, namespace)
|
||||
# TODO: update etcd config
|
||||
etcd_config_map_cmd = "kubectl create configmap -n %s %s --from-literal=ETCD_QUOTA_BACKEND_BYTES=8589934592 --from-literal=ETCD_SNAPSHOT_COUNT=5000 --from-literal=ETCD_AUTO_COMPACTION_MODE=revision --from-literal=ETCD_AUTO_COMPACTION_RETENTION=1" % (
|
||||
namespace, name)
|
||||
if os.system(etcd_config_map_cmd):
|
||||
raise Exception("Create configmap: {} failed".format(name))
|
||||
logger.debug("Create configmap: {} successfully".format(name))
|
||||
log_path = config.LOG_PATH + "install.log"
|
||||
install_cmd = "helm install \
|
||||
--set standalone.service.type=ClusterIP \
|
||||
--set image.all.repository=%s \
|
||||
--set image.all.tag=%s \
|
||||
--set minio.persistence.enabled=false \
|
||||
--set etcd.persistence.enabled=false \
|
||||
--set etcd.envVarsConfigMap=%s \
|
||||
--namespace %s \
|
||||
%s . >>%s >&1" % (config.REGISTRY_URL, image_tag, name, namespace, name, log_path)
|
||||
# --set image.all.pullPolicy=Always \
|
||||
if deploy_mode == "cluster":
|
||||
install_cmd = "helm install \
|
||||
--set standalone.enabled=false \
|
||||
--set image.all.repository=%s \
|
||||
--set image.all.tag=%s \
|
||||
--set minio.persistence.enabled=false \
|
||||
--set etcd.persistence.enabled=false \
|
||||
--set etcd.envVarsConfigMap=%s \
|
||||
--namespace %s \
|
||||
%s . >>%s >&1" % (config.REGISTRY_URL, image_tag, name, namespace, name, log_path)
|
||||
# --set image.all.pullPolicy=Always \
|
||||
elif deploy_mode != "single":
|
||||
raise Exception("Deploy mode: {} not support".format(deploy_mode))
|
||||
logger.debug(install_cmd)
|
||||
logger.debug(host)
|
||||
if os.system("cd %s && %s" % (helm_path, install_cmd)):
|
||||
logger.error("Helm install failed: %s" % name)
|
||||
return None
|
||||
logger.debug("Wait for 60s ..")
|
||||
time.sleep(60)
|
||||
# config.load_kube_config()
|
||||
# v1 = client.CoreV1Api()
|
||||
# pod_name = None
|
||||
# pod_id = None
|
||||
# pods = v1.list_namespaced_pod(namespace)
|
||||
# for i in pods.items:
|
||||
# if i.metadata.name.find(name) != -1:
|
||||
# pod_name = i.metadata.name
|
||||
# pod_ip = i.status.pod_ip
|
||||
# logger.debug(pod_name)
|
||||
# logger.debug(pod_ip)
|
||||
# return pod_name, pod_ip
|
||||
return host
|
||||
|
||||
|
||||
# delete server
|
||||
@utils.retry(3)
|
||||
def helm_del_server(name, namespace):
|
||||
# logger.debug("Sleep 600s before uninstall server")
|
||||
# time.sleep(600)
|
||||
delete_etcd_config_map_cmd = "kubectl delete configmap -n %s %s" % (namespace, name)
|
||||
logger.info(delete_etcd_config_map_cmd)
|
||||
if os.system(delete_etcd_config_map_cmd):
|
||||
logger.error("Delete configmap %s:%s failed" % (namespace, name))
|
||||
del_cmd = "helm uninstall -n milvus %s" % name
|
||||
logger.info(del_cmd)
|
||||
if os.system(del_cmd):
|
||||
logger.error("Helm delete name:%s failed" % name)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def restart_server(helm_release_name, namespace):
|
||||
res = True
|
||||
timeout = 120000
|
||||
# service_name = "%s.%s.svc.cluster.local" % (helm_release_name, namespace)
|
||||
config.load_kube_config()
|
||||
v1 = client.CoreV1Api()
|
||||
pod_name = None
|
||||
# config_map_names = v1.list_namespaced_config_map(namespace, pretty='true')
|
||||
# body = {"replicas": 0}
|
||||
pods = v1.list_namespaced_pod(namespace)
|
||||
for i in pods.items:
|
||||
if i.metadata.name.find(helm_release_name) != -1 and i.metadata.name.find("mysql") == -1:
|
||||
pod_name = i.metadata.name
|
||||
break
|
||||
# v1.patch_namespaced_config_map(config_map_name, namespace, body, pretty='true')
|
||||
# status_res = v1.read_namespaced_service_status(helm_release_name, namespace, pretty='true')
|
||||
logger.debug("Pod name: %s" % pod_name)
|
||||
if pod_name is not None:
|
||||
try:
|
||||
v1.delete_namespaced_pod(pod_name, namespace)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
logger.error("Exception when calling CoreV1Api->delete_namespaced_pod")
|
||||
res = False
|
||||
return res
|
||||
logger.error("Sleep 10s after pod deleted")
|
||||
time.sleep(10)
|
||||
# check if restart successfully
|
||||
pods = v1.list_namespaced_pod(namespace)
|
||||
for i in pods.items:
|
||||
pod_name_tmp = i.metadata.name
|
||||
logger.error(pod_name_tmp)
|
||||
if pod_name_tmp == pod_name:
|
||||
continue
|
||||
elif pod_name_tmp.find(helm_release_name) == -1 or pod_name_tmp.find("mysql") != -1:
|
||||
continue
|
||||
else:
|
||||
status_res = v1.read_namespaced_pod_status(pod_name_tmp, namespace, pretty='true')
|
||||
logger.error(status_res.status.phase)
|
||||
start_time = time.time()
|
||||
ready_break = False
|
||||
while time.time() - start_time <= timeout:
|
||||
logger.error(time.time())
|
||||
status_res = v1.read_namespaced_pod_status(pod_name_tmp, namespace, pretty='true')
|
||||
if status_res.status.phase == "Running":
|
||||
logger.error("Already running")
|
||||
ready_break = True
|
||||
break
|
||||
else:
|
||||
time.sleep(5)
|
||||
if time.time() - start_time > timeout:
|
||||
logger.error("Restart pod: %s timeout" % pod_name_tmp)
|
||||
res = False
|
||||
return res
|
||||
if ready_break:
|
||||
break
|
||||
else:
|
||||
raise Exception("Pod: %s not found" % pod_name)
|
||||
follow = True
|
||||
pretty = True
|
||||
previous = True # bool | Return previous terminated container logs. Defaults to false. (optional)
|
||||
since_seconds = 56 # int | A relative time in seconds before the current time from which to show logs. If this value precedes the time a pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will be returned. Only one of sinceSeconds or sinceTime may be specified. (optional)
|
||||
timestamps = True # bool | If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false. (optional)
|
||||
container = "milvus"
|
||||
# start_time = time.time()
|
||||
# while time.time() - start_time <= timeout:
|
||||
# try:
|
||||
# api_response = v1.read_namespaced_pod_log(pod_name_tmp, namespace, container=container, follow=follow,
|
||||
# pretty=pretty, previous=previous, since_seconds=since_seconds,
|
||||
# timestamps=timestamps)
|
||||
# logging.error(api_response)
|
||||
# return res
|
||||
# except Exception as e:
|
||||
# logging.error("Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e)
|
||||
# # waiting for server start
|
||||
# time.sleep(2)
|
||||
# # res = False
|
||||
# # return res
|
||||
# if time.time() - start_time > timeout:
|
||||
# logging.error("Restart pod: %s timeout" % pod_name_tmp)
|
||||
# res = False
|
||||
return res
|
||||
|
||||
|
||||
def get_pod_status(helm_release_name, namespace):
|
||||
from kubernetes import client, config
|
||||
config.load_kube_config()
|
||||
v1 = client.CoreV1Api()
|
||||
pod_status = []
|
||||
label_selector = 'app.kubernetes.io/instance={}'.format(helm_release_name)
|
||||
# pods = v1.list_namespaced_pod(namespace, label_selector=label_selector)
|
||||
pods = v1.list_namespaced_pod(namespace)
|
||||
for i in pods.items:
|
||||
if i.metadata.name.find(helm_release_name) != -1:
|
||||
pod_name = i.metadata.name
|
||||
result = v1.read_namespaced_pod_status(pod_name, namespace)
|
||||
pod_status.append({"pod": pod_name, "status": result.status.phase})
|
||||
# print(pod_status)
|
||||
return pod_status
|
||||
|
||||
|
||||
def running_status(helm_release_name, namespace):
|
||||
pod_status = get_pod_status(helm_release_name, namespace)
|
||||
for pod in pod_status:
|
||||
if pod["status"] != "Running":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def ff():
|
||||
namespace = 'milvus'
|
||||
helm_release_name = 'zong-standalone'
|
||||
# st = get_pod_status(helm_release_name, namespace)
|
||||
status = get_pod_status(helm_release_name, namespace)
|
||||
print(status)
|
||||
for s in status:
|
||||
if s["status"] != "Runningk":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def fff():
|
||||
print(time.time())
|
||||
|
||||
|
||||
while not ff():
|
||||
print("retry")
|
||||
else:
|
||||
print("gogog")
|
||||
print("hhhh")
|
|
@ -0,0 +1,21 @@
|
|||
import logging
|
||||
from milvus_benchmark.env.base import BaseEnv
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.env.local")
|
||||
|
||||
|
||||
class LocalEnv(BaseEnv):
|
||||
"""docker env class wrapper"""
|
||||
env_mode = "local"
|
||||
|
||||
def __init__(self, deploy_mode=None):
|
||||
super(LocalEnv, self).__init__(deploy_mode)
|
||||
|
||||
def start_up(self, hostname, port):
|
||||
res = True
|
||||
try:
|
||||
self.set_hostname(hostname)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
res = False
|
||||
return res
|
|
@ -0,0 +1,24 @@
|
|||
import logging.config
|
||||
from datetime import datetime
|
||||
import os
|
||||
import yaml
|
||||
import config
|
||||
|
||||
cur_path = os.path.abspath(os.path.dirname(__file__))
|
||||
LOG_CONFIG_PATH = cur_path + "/logging.yaml"
|
||||
FILE_NAME = config.LOG_PATH + 'benchmark-{:%Y-%m-%d}.log'.format(datetime.now())
|
||||
|
||||
|
||||
def setup_logging(config_path=LOG_CONFIG_PATH, default_level=logging.INFO):
|
||||
"""
|
||||
Setup logging configuration
|
||||
"""
|
||||
print(FILE_NAME)
|
||||
try:
|
||||
with open(config_path, 'rt') as f:
|
||||
log_config = yaml.safe_load(f.read())
|
||||
log_config["handlers"]["info_file_handler"].update({"filename": FILE_NAME})
|
||||
logging.config.dictConfig(log_config)
|
||||
except Exception:
|
||||
raise
|
||||
logging.error('Failed to open file', exc_info=True)
|
|
@ -0,0 +1,37 @@
|
|||
version: 1
|
||||
disable_existing_loggers: False
|
||||
formatters:
|
||||
simple:
|
||||
format: "%(asctime)s - %(name)s:%(lineno)s - %(levelname)s - %(message)s"
|
||||
|
||||
handlers:
|
||||
console:
|
||||
class: logging.StreamHandler
|
||||
level: DEBUG
|
||||
formatter: simple
|
||||
stream: ext://sys.stdout
|
||||
|
||||
info_file_handler:
|
||||
class: logging.FileHandler
|
||||
formatter: simple
|
||||
level: DEBUG
|
||||
filename: info.log
|
||||
|
||||
# error_file_handler:
|
||||
# class: logging.handlers.RotatingFileHandler
|
||||
# level: ERROR
|
||||
# formatter: simple
|
||||
# filename: errors.log
|
||||
# maxBytes: 10485760 # 10MB
|
||||
# backupCount: 20
|
||||
# encoding: utf8
|
||||
|
||||
loggers:
|
||||
milvus_benchmark:
|
||||
level: DEBUG
|
||||
handlers: [console, info_file_handler]
|
||||
propagate: no
|
||||
|
||||
root:
|
||||
level: DEBUG
|
||||
handlers: [console, info_file_handler]
|
|
@ -0,0 +1,260 @@
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
import pdb
|
||||
import argparse
|
||||
import logging
|
||||
import traceback
|
||||
from multiprocessing import Process
|
||||
from queue import Queue
|
||||
from logging import handlers
|
||||
from yaml import full_load, dump
|
||||
from milvus_benchmark.metrics.models.server import Server
|
||||
from milvus_benchmark.metrics.models.hardware import Hardware
|
||||
from milvus_benchmark.metrics.models.env import Env
|
||||
|
||||
from milvus_benchmark.env import get_env
|
||||
from milvus_benchmark.runners import get_runner
|
||||
from milvus_benchmark.metrics import api
|
||||
from milvus_benchmark import config
|
||||
from milvus_benchmark import parser
|
||||
# from scheduler import back_scheduler
|
||||
from logs import log
|
||||
|
||||
log.setup_logging()
|
||||
logger = logging.getLogger("milvus_benchmark.main")
|
||||
|
||||
DEFAULT_IMAGE = "milvusdb/milvus:latest"
|
||||
LOG_FOLDER = "logs"
|
||||
NAMESPACE = "milvus"
|
||||
SERVER_VERSION = "2.0"
|
||||
q = Queue()
|
||||
|
||||
|
||||
def positive_int(s):
|
||||
i = None
|
||||
try:
|
||||
i = int(s)
|
||||
except ValueError:
|
||||
pass
|
||||
if not i or i < 1:
|
||||
raise argparse.ArgumentTypeError("%r is not a positive integer" % s)
|
||||
return i
|
||||
|
||||
|
||||
def get_image_tag(image_version):
|
||||
return "%s-latest" % (image_version)
|
||||
|
||||
|
||||
# def shutdown(event):
|
||||
# logger.info("Check if there is scheduled jobs in scheduler")
|
||||
# if not back_scheduler.get_jobs():
|
||||
# logger.info("No job in scheduler, will shutdown the scheduler")
|
||||
# back_scheduler.shutdown(wait=False)
|
||||
|
||||
|
||||
def run_suite(run_type, suite, env_mode, env_params):
|
||||
try:
|
||||
start_status = False
|
||||
metric = api.Metric()
|
||||
deploy_mode = env_params["deploy_mode"] if "deploy_mode" in env_params else config.DEFAULT_DEPLOY_MODE
|
||||
env = get_env(env_mode, deploy_mode)
|
||||
metric.set_run_id()
|
||||
metric.set_mode(env_mode)
|
||||
metric.env = Env()
|
||||
metric.server = Server(version=config.SERVER_VERSION, mode=deploy_mode)
|
||||
logger.info(env_params)
|
||||
if env_mode == "local":
|
||||
metric.hardware = Hardware("")
|
||||
start_status = env.start_up(env_params["host"], env_params["port"])
|
||||
elif env_mode == "helm":
|
||||
helm_params = env_params["helm_params"]
|
||||
helm_path = env_params["helm_path"]
|
||||
server_name = helm_params["server_name"] if "server_name" in helm_params else None
|
||||
server_tag = helm_params["server_tag"] if "server_tag" in helm_params else None
|
||||
if not server_name and not server_tag:
|
||||
metric.hardware = Hardware("")
|
||||
else:
|
||||
metric.hardware = Hardware(server_name) if server_name else Hardware(server_tag)
|
||||
start_status = env.start_up(helm_path, helm_params)
|
||||
if start_status:
|
||||
metric.update_status(status="DEPLOYE_SUCC")
|
||||
logger.debug("Get runner")
|
||||
runner = get_runner(run_type, env, metric)
|
||||
cases, case_metrics = runner.extract_cases(suite)
|
||||
# TODO: only run when the as_group is equal to True
|
||||
logger.info("Prepare to run cases")
|
||||
runner.prepare(**cases[0])
|
||||
logger.info("Start run case")
|
||||
suite_status = True
|
||||
for index, case in enumerate(cases):
|
||||
case_metric = case_metrics[index]
|
||||
result = None
|
||||
err_message = ""
|
||||
try:
|
||||
result = runner.run_case(case_metric, **case)
|
||||
except Exception as e:
|
||||
err_message = str(e) + "\n" + traceback.format_exc()
|
||||
logger.error(traceback.format_exc())
|
||||
logger.info(result)
|
||||
if result:
|
||||
case_metric.update_status(status="RUN_SUCC")
|
||||
case_metric.update_result(result)
|
||||
else:
|
||||
case_metric.update_status(status="RUN_FAILED")
|
||||
case_metric.update_message(err_message)
|
||||
suite_status = False
|
||||
logger.debug(case_metric.metrics)
|
||||
# if env_mode == "helm":
|
||||
api.save(case_metric)
|
||||
if suite_status:
|
||||
metric.update_status(status="RUN_SUCC")
|
||||
else:
|
||||
metric.update_status(status="RUN_FAILED")
|
||||
else:
|
||||
logger.info("Deploy failed on server")
|
||||
metric.update_status(status="DEPLOYE_FAILED")
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
logger.error(traceback.format_exc())
|
||||
metric.update_status(status="RUN_FAILED")
|
||||
finally:
|
||||
api.save(metric)
|
||||
# time.sleep(10)
|
||||
env.tear_down()
|
||||
if metric.status != "RUN_SUCC":
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
arg_parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
# helm mode with scheduler
|
||||
arg_parser.add_argument(
|
||||
"--image-version",
|
||||
default="",
|
||||
help="image version")
|
||||
arg_parser.add_argument(
|
||||
"--schedule-conf",
|
||||
metavar='FILE',
|
||||
default='',
|
||||
help="load test schedule from FILE")
|
||||
|
||||
# local mode
|
||||
arg_parser.add_argument(
|
||||
'--local',
|
||||
action='store_true',
|
||||
help='use local milvus server')
|
||||
arg_parser.add_argument(
|
||||
'--host',
|
||||
help='server host ip param for local mode',
|
||||
default='127.0.0.1')
|
||||
arg_parser.add_argument(
|
||||
'--port',
|
||||
help='server port param for local mode',
|
||||
default='19530')
|
||||
arg_parser.add_argument(
|
||||
'--suite',
|
||||
metavar='FILE',
|
||||
help='load test suite from FILE',
|
||||
default='')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
if args.schedule_conf:
|
||||
if args.local:
|
||||
raise Exception("Helm mode with scheduler and other mode are incompatible")
|
||||
if not args.image_version:
|
||||
raise Exception("Image version not given")
|
||||
env_mode = "helm"
|
||||
image_version = args.image_version
|
||||
with open(args.schedule_conf) as f:
|
||||
schedule_config = full_load(f)
|
||||
f.close()
|
||||
helm_path = os.path.join(os.getcwd(), "..//milvus-helm-charts/charts/milvus-ha")
|
||||
for item in schedule_config:
|
||||
server_host = item["server"] if "server" in item else ""
|
||||
server_tag = item["server_tag"] if "server_tag" in item else ""
|
||||
deploy_mode = item["deploy_mode"] if "deploy_mode" in item else config.DEFAULT_DEPLOY_MODE
|
||||
suite_params = item["suite_params"]
|
||||
for suite_param in suite_params:
|
||||
suite_file = "suites/" + suite_param["suite"]
|
||||
with open(suite_file) as f:
|
||||
suite_dict = full_load(f)
|
||||
f.close()
|
||||
logger.debug(suite_dict)
|
||||
run_type, run_params = parser.operations_parser(suite_dict)
|
||||
collections = run_params["collections"]
|
||||
image_type = suite_param["image_type"]
|
||||
image_tag = get_image_tag(image_version)
|
||||
for suite in collections:
|
||||
# run test cases
|
||||
milvus_config = suite["milvus"] if "milvus" in suite else None
|
||||
server_config = suite["server"] if "server" in suite else None
|
||||
logger.debug(milvus_config)
|
||||
logger.debug(server_config)
|
||||
helm_params = {
|
||||
"server_name": server_host,
|
||||
"server_tag": server_tag,
|
||||
"server_config": server_config,
|
||||
"milvus_config": milvus_config,
|
||||
"image_tag": image_tag,
|
||||
"image_type": image_type
|
||||
}
|
||||
env_params = {
|
||||
"deploy_mode": deploy_mode,
|
||||
"helm_path": helm_path,
|
||||
"helm_params": helm_params
|
||||
}
|
||||
# job = back_scheduler.add_job(run_suite, args=[run_type, suite, env_mode, env_params],
|
||||
# misfire_grace_time=36000)
|
||||
# logger.info(job)
|
||||
# logger.info(job.id)
|
||||
|
||||
elif args.local:
|
||||
# for local mode
|
||||
env_params = {
|
||||
"host": args.host,
|
||||
"port": args.port,
|
||||
"deploy_mode": None
|
||||
}
|
||||
suite_file = args.suite
|
||||
with open(suite_file) as f:
|
||||
suite_dict = full_load(f)
|
||||
f.close()
|
||||
logger.debug(suite_dict)
|
||||
run_type, run_params = parser.operations_parser(suite_dict)
|
||||
collections = run_params["collections"]
|
||||
if len(collections) > 1:
|
||||
raise Exception("Multi collections not supported in Local Mode")
|
||||
# ensure there is only one case in suite
|
||||
# suite = {"run_type": run_type, "run_params": collections[0]}
|
||||
suite = collections[0]
|
||||
env_mode = "local"
|
||||
return run_suite(run_type, suite, env_mode, env_params)
|
||||
# job = back_scheduler.add_job(run_suite, args=[run_type, suite, env_mode, env_params], misfire_grace_time=36000)
|
||||
# logger.info(job)
|
||||
# logger.info(job.id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
if not main():
|
||||
sys.exit(-1)
|
||||
# from apscheduler.events import EVENT_JOB_MISSED
|
||||
# back_scheduler.add_listener(listen_miss, EVENT_JOB_MISSED)
|
||||
# back_scheduler.start()
|
||||
# except (KeyboardInterrupt, SystemExit):
|
||||
# logger.error("Received interruption")
|
||||
# # back_scheduler.shutdown(wait=False)
|
||||
# sys.exit(0)
|
||||
except Exception as e:
|
||||
logger.error(traceback.format_exc())
|
||||
# back_scheduler.shutdown(wait=False)
|
||||
sys.exit(-2)
|
||||
# block_scheduler.shutdown(wait=False)
|
||||
logger.info("All tests run finshed")
|
||||
sys.exit(0)
|
|
@ -0,0 +1,55 @@
|
|||
import pdb
|
||||
import logging
|
||||
from pymongo import MongoClient
|
||||
|
||||
from .models.env import Env
|
||||
from .models.hardware import Hardware
|
||||
from .models.metric import Metric
|
||||
from .models.server import Server
|
||||
from .config import DB, UNIQUE_ID_COLLECTION, DOC_COLLECTION
|
||||
from milvus_benchmark import config
|
||||
|
||||
_client = MongoClient(config.MONGO_SERVER)
|
||||
logger = logging.getLogger("milvus_benchmark.metric.api")
|
||||
|
||||
|
||||
def insert_or_get(md5):
|
||||
collection = _client[DB][UNIQUE_ID_COLLECTION]
|
||||
found = collection.find_one({'md5': md5})
|
||||
if not found:
|
||||
return collection.insert_one({'md5': md5}).inserted_id
|
||||
return found['_id']
|
||||
|
||||
|
||||
def save(obj):
|
||||
if not isinstance(obj, Metric):
|
||||
logger.error("obj is not instance of Metric")
|
||||
return False
|
||||
|
||||
logger.debug(vars(obj))
|
||||
if not isinstance(obj.server, Server):
|
||||
logger.error("obj.server is not instance of Server")
|
||||
return False
|
||||
|
||||
if not isinstance(obj.hardware, Hardware):
|
||||
logger.error("obj.hardware is not instance of Hardware")
|
||||
return False
|
||||
|
||||
if not isinstance(obj.env, Env):
|
||||
logger.error("obj.env is not instance of Env")
|
||||
return False
|
||||
|
||||
md5 = obj.server.json_md5()
|
||||
server_doc_id = insert_or_get(md5)
|
||||
obj.server = {"id": server_doc_id, "value": vars(obj.server)}
|
||||
|
||||
md5 = obj.hardware.json_md5()
|
||||
hardware_doc_id = insert_or_get(md5)
|
||||
obj.hardware = {"id": hardware_doc_id, "value": vars(obj.hardware)}
|
||||
|
||||
md5 = obj.env.json_md5()
|
||||
env_doc_id = insert_or_get(md5)
|
||||
obj.env = {"id": env_doc_id, "value": vars(obj.env)}
|
||||
|
||||
collection = _client[DB][DOC_COLLECTION]
|
||||
collection.insert_one(vars(obj))
|
|
@ -0,0 +1,3 @@
|
|||
DB = 'test'
|
||||
UNIQUE_ID_COLLECTION = 'unique_id'
|
||||
DOC_COLLECTION = 'doc'
|
|
@ -0,0 +1,4 @@
|
|||
from .env import Env
|
||||
from .hardware import Hardware
|
||||
from .metric import Metric
|
||||
from .server import Server
|
|
@ -0,0 +1,23 @@
|
|||
import json
|
||||
import hashlib
|
||||
|
||||
|
||||
class Env:
|
||||
"""
|
||||
{
|
||||
"_version": "0.1",
|
||||
"_type": "env",
|
||||
"server_config": dict,
|
||||
"OMP_NUM_THREADS": string,
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, server_config=None, omp_num_threads=None):
|
||||
self._version = '0.1'
|
||||
self._type = 'env'
|
||||
self.server_config = server_config
|
||||
self.OMP_NUM_THREADS = omp_num_threads
|
||||
|
||||
def json_md5(self):
|
||||
json_str = json.dumps(vars(self), sort_keys=True)
|
||||
return hashlib.md5(json_str.encode('utf-8')).hexdigest()
|
|
@ -0,0 +1,24 @@
|
|||
import json
|
||||
import hashlib
|
||||
|
||||
|
||||
class Hardware:
|
||||
"""
|
||||
{
|
||||
"_version": "0.1",
|
||||
"_type": "hardware",
|
||||
"name": string,
|
||||
"cpus": float
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, name=None, cpus=0.0):
|
||||
self._version = '0.1'
|
||||
self._type = 'hardware'
|
||||
self.name = name
|
||||
self.cpus = cpus
|
||||
|
||||
def json_md5(self):
|
||||
json_str = json.dumps(vars(self), sort_keys=True)
|
||||
return hashlib.md5(json_str.encode('utf-8')).hexdigest()
|
|
@ -0,0 +1,48 @@
|
|||
import time
|
||||
import datetime
|
||||
import json
|
||||
import hashlib
|
||||
from .env import Env
|
||||
from .server import Server
|
||||
from .hardware import Hardware
|
||||
|
||||
|
||||
class Metric(object):
|
||||
def __init__(self):
|
||||
self._version = '0.1'
|
||||
self._type = 'metric'
|
||||
self.run_id = None
|
||||
self.mode = None
|
||||
self.server = Server()
|
||||
self.hardware = Hardware()
|
||||
self.env = Env()
|
||||
self.status = "INIT"
|
||||
self.err_message = ""
|
||||
self.collection = {}
|
||||
self.index = {}
|
||||
self.search = {}
|
||||
self.run_params = {}
|
||||
self.metrics = {
|
||||
"type": "",
|
||||
"value": None,
|
||||
}
|
||||
self.datetime = str(datetime.datetime.now())
|
||||
|
||||
def set_run_id(self):
|
||||
self.run_id = int(time.time())
|
||||
|
||||
def set_mode(self, mode):
|
||||
self.mode = mode
|
||||
|
||||
def json_md5(self):
|
||||
json_str = json.dumps(vars(self), sort_keys=True)
|
||||
return hashlib.md5(json_str.encode('utf-8')).hexdigest()
|
||||
|
||||
def update_status(self, status):
|
||||
self.status = status
|
||||
|
||||
def update_result(self, result):
|
||||
self.metrics["value"].update(result)
|
||||
|
||||
def update_message(self, err_message):
|
||||
self.err_message = err_message
|
|
@ -0,0 +1,26 @@
|
|||
import json
|
||||
import hashlib
|
||||
|
||||
|
||||
class Server:
|
||||
"""
|
||||
{
|
||||
"_version": "0.1",
|
||||
"_type": "server",
|
||||
"version": string,
|
||||
"build_commit": string,
|
||||
# "md5": string,
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, version=None, mode=None, build_commit=None):
|
||||
self._version = '0.1'
|
||||
self._type = 'server'
|
||||
self.version = version
|
||||
self.mode = mode
|
||||
self.build_commit = build_commit
|
||||
# self.md5 = md5
|
||||
|
||||
def json_md5(self):
|
||||
json_str = json.dumps(vars(self), sort_keys=True)
|
||||
return hashlib.md5(json_str.encode('utf-8')).hexdigest()
|
|
@ -19,7 +19,9 @@ def collection_parser(collection_name):
|
|||
data_type = tmp[0]
|
||||
collection_size_unit = tmp[1][-1]
|
||||
collection_size = tmp[1][0:-1]
|
||||
if collection_size_unit == "m":
|
||||
if collection_size_unit == "w":
|
||||
collection_size = int(collection_size) * 10000
|
||||
elif collection_size_unit == "m":
|
||||
collection_size = int(collection_size) * 1000000
|
||||
elif collection_size_unit == "b":
|
||||
collection_size = int(collection_size) * 1000000000
|
|
@ -0,0 +1,25 @@
|
|||
from .insert import InsertRunner
|
||||
from .locust import LocustInsertRunner, LocustSearchRunner, LocustRandomRunner
|
||||
from .search import SearchRunner, InsertSearchRunner
|
||||
from .build import BuildRunner, InsertBuildRunner
|
||||
from .get import InsertGetRunner
|
||||
from .accuracy import AccuracyRunner
|
||||
from .accuracy import AccAccuracyRunner
|
||||
from .chaos import SimpleChaosRunner
|
||||
|
||||
|
||||
def get_runner(name, env, metric):
|
||||
return {
|
||||
"insert_performance": InsertRunner(env, metric),
|
||||
"search_performance": SearchRunner(env, metric),
|
||||
"insert_search_performance": InsertSearchRunner(env, metric),
|
||||
"locust_insert_performance": LocustInsertRunner(env, metric),
|
||||
"locust_search_performance": LocustSearchRunner(env, metric),
|
||||
"locust_random_performance": LocustRandomRunner(env, metric),
|
||||
"insert_build_performance": InsertBuildRunner(env, metric),
|
||||
"insert_get_performance": InsertGetRunner(env, metric),
|
||||
"build_performance": BuildRunner(env, metric),
|
||||
"accuracy": AccuracyRunner(env, metric),
|
||||
"ann_accuracy": AccAccuracyRunner(env, metric),
|
||||
"simple_chaos": SimpleChaosRunner(env, metric)
|
||||
}.get(name)
|
|
@ -0,0 +1,260 @@
|
|||
import json
|
||||
import time
|
||||
import copy
|
||||
import logging
|
||||
import numpy as np
|
||||
|
||||
from milvus_benchmark import parser
|
||||
from milvus_benchmark.runners import utils
|
||||
from milvus_benchmark.runners.base import BaseRunner
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.accuracy")
|
||||
INSERT_INTERVAL = 50000
|
||||
|
||||
|
||||
class AccuracyRunner(BaseRunner):
|
||||
"""run accuracy"""
|
||||
name = "accuracy"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(AccuracyRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
vector_type = utils.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
base_query_vectors = utils.get_vectors_from_binary(utils.MAX_NQ, dimension, data_type)
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size
|
||||
}
|
||||
index_info = self.milvus.describe_index(index_field_name, collection_name)
|
||||
filters = collection["filters"] if "filters" in collection else []
|
||||
filter_query = []
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
search_params = collection["search_params"]
|
||||
search_params = utils.generate_combinations(search_params)
|
||||
cases = list()
|
||||
case_metrics = list()
|
||||
self.init_metric(self.name, collection_info, index_info, search_info=None)
|
||||
for search_param in search_params:
|
||||
if not filters:
|
||||
filters.append(None)
|
||||
for filter in filters:
|
||||
filter_param = []
|
||||
if isinstance(filter, dict) and "range" in filter:
|
||||
filter_query.append(eval(filter["range"]))
|
||||
filter_param.append(filter["range"])
|
||||
if isinstance(filter, dict) and "term" in filter:
|
||||
filter_query.append(eval(filter["term"]))
|
||||
filter_param.append(filter["term"])
|
||||
for nq in nqs:
|
||||
query_vectors = base_query_vectors[0:nq]
|
||||
for top_k in top_ks:
|
||||
search_info = {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": utils.metric_type_trans(metric_type),
|
||||
"params": search_param}
|
||||
# TODO: only update search_info
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metric.search = {
|
||||
"nq": nq,
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"filter": filter_param
|
||||
}
|
||||
vector_query = {"vector": {index_field_name: search_info}}
|
||||
case = {
|
||||
"collection_name": collection_name,
|
||||
"index_field_name": index_field_name,
|
||||
"dimension": dimension,
|
||||
"data_type": data_type,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"collection_size": collection_size,
|
||||
"filter_query": filter_query,
|
||||
"vector_query": vector_query
|
||||
}
|
||||
cases.append(case)
|
||||
case_metrics.append(case_metric)
|
||||
return cases, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
self.milvus.set_collection(collection_name)
|
||||
if not self.milvus.exists_collection():
|
||||
logger.info("collection not exist")
|
||||
self.milvus.load_collection()
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
collection_size = case_param["collection_size"]
|
||||
nq = case_metric.search["nq"]
|
||||
top_k = case_metric.search["topk"]
|
||||
query_res = self.milvus.query(case_param["vector_query"], filter_query=case_param["filter_query"])
|
||||
true_ids = utils.get_ground_truth_ids(collection_size)
|
||||
logger.debug({"true_ids": [len(true_ids[0]), len(true_ids[0])]})
|
||||
result_ids = self.milvus.get_ids(query_res)
|
||||
logger.debug({"result_ids": len(result_ids[0])})
|
||||
acc_value = utils.get_recall_value(true_ids[:nq, :top_k].tolist(), result_ids)
|
||||
tmp_result = {"acc": acc_value}
|
||||
return tmp_result
|
||||
|
||||
|
||||
class AccAccuracyRunner(AccuracyRunner):
|
||||
"""run ann accuracy"""
|
||||
"""
|
||||
1. entities from hdf5
|
||||
2. one collection test different index
|
||||
"""
|
||||
name = "ann_accuracy"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(AccAccuracyRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, dimension, metric_type) = parser.parse_ann_collection_name(collection_name)
|
||||
hdf5_source_file = collection["source_file"]
|
||||
index_types = collection["index_types"]
|
||||
index_params = collection["index_params"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
search_params = collection["search_params"]
|
||||
vector_type = utils.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
dataset = utils.get_dataset(hdf5_source_file)
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name
|
||||
}
|
||||
filters = collection["filters"] if "filters" in collection else []
|
||||
filter_query = []
|
||||
search_params = utils.generate_combinations(search_params)
|
||||
index_params = utils.generate_combinations(index_params)
|
||||
cases = list()
|
||||
case_metrics = list()
|
||||
self.init_metric(self.name, collection_info, {}, search_info=None)
|
||||
true_ids = np.array(dataset["neighbors"])
|
||||
for index_type in index_types:
|
||||
for index_param in index_params:
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
for search_param in search_params:
|
||||
if not filters:
|
||||
filters.append(None)
|
||||
for filter in filters:
|
||||
filter_param = []
|
||||
if isinstance(filter, dict) and "range" in filter:
|
||||
filter_query.append(eval(filter["range"]))
|
||||
filter_param.append(filter["range"])
|
||||
if isinstance(filter, dict) and "term" in filter:
|
||||
filter_query.append(eval(filter["term"]))
|
||||
filter_param.append(filter["term"])
|
||||
for nq in nqs:
|
||||
query_vectors = utils.normalize(metric_type, np.array(dataset["test"][:nq]))
|
||||
for top_k in top_ks:
|
||||
search_info = {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": utils.metric_type_trans(metric_type),
|
||||
"params": search_param}
|
||||
# TODO: only update search_info
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metric.index = index_info
|
||||
case_metric.search = {
|
||||
"nq": nq,
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"filter": filter_param
|
||||
}
|
||||
vector_query = {"vector": {index_field_name: search_info}}
|
||||
case = {
|
||||
"collection_name": collection_name,
|
||||
"dataset": dataset,
|
||||
"index_field_name": index_field_name,
|
||||
"dimension": dimension,
|
||||
"data_type": data_type,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
"filter_query": filter_query,
|
||||
"vector_query": vector_query,
|
||||
"true_ids": true_ids
|
||||
}
|
||||
cases.append(case)
|
||||
case_metrics.append(case_metric)
|
||||
return cases, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
metric_type = case_param["metric_type"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
index_type = case_param["index_type"]
|
||||
index_param = case_param["index_param"]
|
||||
index_field_name = case_param["index_field_name"]
|
||||
|
||||
self.milvus.set_collection(collection_name)
|
||||
if self.milvus.exists_collection(collection_name):
|
||||
logger.info("Re-create collection: %s" % collection_name)
|
||||
self.milvus.drop()
|
||||
dataset = case_param["dataset"]
|
||||
self.milvus.create_collection(dimension, data_type=vector_type)
|
||||
insert_vectors = utils.normalize(metric_type, np.array(dataset["train"]))
|
||||
if len(insert_vectors) != dataset["train"].shape[0]:
|
||||
raise Exception("Row count of insert vectors: %d is not equal to dataset size: %d" % (
|
||||
len(insert_vectors), dataset["train"].shape[0]))
|
||||
logger.debug("The row count of entities to be inserted: %d" % len(insert_vectors))
|
||||
# Insert batch once
|
||||
# milvus_instance.insert(insert_vectors)
|
||||
info = self.milvus.get_info(collection_name)
|
||||
loops = len(insert_vectors) // INSERT_INTERVAL + 1
|
||||
for i in range(loops):
|
||||
start = i * INSERT_INTERVAL
|
||||
end = min((i + 1) * INSERT_INTERVAL, len(insert_vectors))
|
||||
if start < end:
|
||||
tmp_vectors = insert_vectors[start:end]
|
||||
ids = [i for i in range(start, end)]
|
||||
if not isinstance(tmp_vectors, list):
|
||||
entities = utils.generate_entities(info, tmp_vectors.tolist(), ids)
|
||||
res_ids = self.milvus.insert(entities)
|
||||
else:
|
||||
entities = utils.generate_entities(tmp_vectors, ids)
|
||||
res_ids = self.milvus.insert(entities)
|
||||
assert res_ids == ids
|
||||
logger.debug("End insert, start flush")
|
||||
self.milvus.flush()
|
||||
logger.debug("End flush")
|
||||
res_count = self.milvus.count()
|
||||
logger.info("Table: %s, row count: %d" % (collection_name, res_count))
|
||||
if res_count != len(insert_vectors):
|
||||
raise Exception("Table row count is not equal to insert vectors")
|
||||
if self.milvus.describe_index(index_field_name):
|
||||
self.milvus.drop_index(index_field_name)
|
||||
logger.info("Re-create index: %s" % collection_name)
|
||||
self.milvus.create_index(index_field_name, index_type, metric_type, index_param=index_param)
|
||||
logger.info(self.milvus.describe_index(index_field_name))
|
||||
logger.info("Start load collection: %s" % collection_name)
|
||||
# self.milvus.release_collection()
|
||||
self.milvus.load_collection()
|
||||
logger.info("End load collection: %s" % collection_name)
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
true_ids = case_param["true_ids"]
|
||||
nq = case_metric.search["nq"]
|
||||
top_k = case_metric.search["topk"]
|
||||
query_res = self.milvus.query(case_param["vector_query"], filter_query=case_param["filter_query"])
|
||||
result_ids = self.milvus.get_ids(query_res)
|
||||
acc_value = utils.get_recall_value(true_ids[:nq, :top_k].tolist(), result_ids)
|
||||
tmp_result = {"acc": acc_value}
|
||||
return tmp_result
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
import time
|
||||
import pdb
|
||||
import logging
|
||||
import threading
|
||||
import traceback
|
||||
import grpc
|
||||
import numpy as np
|
||||
|
||||
from milvus_benchmark.env import get_env
|
||||
from milvus_benchmark import config
|
||||
from milvus_benchmark.client import MilvusClient
|
||||
from . import utils
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.base")
|
||||
|
||||
|
||||
class BaseRunner(object):
|
||||
"""runner is actually the executors"""
|
||||
|
||||
def __init__(self, env, metric):
|
||||
self._metric = metric
|
||||
self._env = env
|
||||
self._run_as_group = False
|
||||
self._result = dict()
|
||||
self._milvus = MilvusClient(host=self._env.hostname)
|
||||
|
||||
def run(self, run_params):
|
||||
pass
|
||||
|
||||
def stop(self):
|
||||
logger.debug("Start clean up env: {} in runner".format(self.env.name))
|
||||
|
||||
@property
|
||||
def hostname(self):
|
||||
return self._env.hostname
|
||||
|
||||
@property
|
||||
def port(self):
|
||||
return self._env.port
|
||||
|
||||
@property
|
||||
def milvus(self):
|
||||
return self._milvus
|
||||
|
||||
@property
|
||||
def metric(self):
|
||||
return self._metric
|
||||
|
||||
@property
|
||||
def result(self):
|
||||
return self._result
|
||||
|
||||
@property
|
||||
def run_as_group(self):
|
||||
return self._run_as_group
|
||||
|
||||
def init_metric(self, name, collection_info=None, index_info=None, search_info=None, run_params=None):
|
||||
self._metric.collection = collection_info
|
||||
self._metric.index = index_info
|
||||
self._metric.search = search_info
|
||||
self._metric.run_params = run_params
|
||||
self._metric.metrics = {
|
||||
"type": name,
|
||||
"value": self._result
|
||||
}
|
||||
|
||||
# TODO: need an easy method to change value in metric
|
||||
def update_metric(self, key, value):
|
||||
pass
|
||||
|
||||
def insert_core(self, milvus, info, start_id, vectors):
|
||||
# start insert vectors
|
||||
end_id = start_id + len(vectors)
|
||||
logger.debug("Start id: %s, end id: %s" % (start_id, end_id))
|
||||
ids = [k for k in range(start_id, end_id)]
|
||||
entities = utils.generate_entities(info, vectors, ids)
|
||||
ni_start_time = time.time()
|
||||
try:
|
||||
_res_ids = milvus.insert(entities)
|
||||
except Exception as e:
|
||||
logger.error("Insert failed")
|
||||
logger.error(traceback.format_exc())
|
||||
raise e
|
||||
# assert ids == res_ids
|
||||
# milvus.flush()
|
||||
ni_end_time = time.time()
|
||||
logger.debug(milvus.count())
|
||||
return ni_end_time-ni_start_time
|
||||
|
||||
# TODO: need to improve
|
||||
def insert(self, milvus, collection_name, data_type, dimension, size, ni):
|
||||
total_time = 0.0
|
||||
rps = 0.0
|
||||
ni_time = 0.0
|
||||
vectors_per_file = utils.get_len_vectors_per_file(data_type, dimension)
|
||||
if size % vectors_per_file or size % ni:
|
||||
logger.error("Not invalid collection size or ni")
|
||||
return False
|
||||
i = 0
|
||||
info = milvus.get_info(collection_name)
|
||||
if data_type == "local" or not data_type:
|
||||
# insert local
|
||||
info = milvus.get_info(collection_name)
|
||||
while i < (size // vectors_per_file):
|
||||
vectors = []
|
||||
for j in range(vectors_per_file // ni):
|
||||
# vectors = src_vectors[j * ni:(j + 1) * ni]
|
||||
vectors = utils.generate_vectors(ni, dimension)
|
||||
if vectors:
|
||||
start_id = i * vectors_per_file + j * ni
|
||||
ni_time = self.insert_core(milvus, info, start_id, vectors)
|
||||
total_time = total_time+ni_time
|
||||
i += 1
|
||||
else:
|
||||
# insert from file
|
||||
while i < (size // vectors_per_file):
|
||||
vectors = []
|
||||
if vectors_per_file >= ni:
|
||||
file_name = utils.gen_file_name(i, dimension, data_type)
|
||||
# logger.info("Load npy file: %s start" % file_name)
|
||||
data = np.load(file_name)
|
||||
# logger.info("Load npy file: %s end" % file_name)
|
||||
for j in range(vectors_per_file // ni):
|
||||
vectors = data[j * ni:(j + 1) * ni].tolist()
|
||||
if vectors:
|
||||
start_id = i * vectors_per_file + j * ni
|
||||
ni_time = self.insert_core(milvus, info, start_id, vectors)
|
||||
total_time = total_time+ni_time
|
||||
i += 1
|
||||
else:
|
||||
vectors.clear()
|
||||
loops = ni // vectors_per_file
|
||||
for j in range(loops):
|
||||
file_name = utils.gen_file_name(loops * i + j, dimension, data_type)
|
||||
data = np.load(file_name)
|
||||
vectors.extend(data.tolist())
|
||||
if vectors:
|
||||
start_id = i * vectors_per_file
|
||||
ni_time = self.insert_core(milvus, info, start_id, vectors)
|
||||
total_time = total_time+ni_time
|
||||
i += loops
|
||||
rps = round(size / total_time, 2)
|
||||
ni_time = round(total_time / (size / ni), 2)
|
||||
result = {
|
||||
"total_time": round(total_time, 2),
|
||||
"rps": rps,
|
||||
"ni_time": ni_time
|
||||
}
|
||||
logger.info(result)
|
||||
return result
|
|
@ -0,0 +1,105 @@
|
|||
import time
|
||||
import copy
|
||||
import logging
|
||||
from milvus_benchmark import parser
|
||||
from milvus_benchmark.runners import utils
|
||||
from milvus_benchmark.runners.base import BaseRunner
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.build")
|
||||
|
||||
|
||||
class BuildRunner(BaseRunner):
|
||||
"""run build"""
|
||||
name = "build_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(BuildRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
vector_type = utils.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size,
|
||||
"other_fields": other_fields,
|
||||
"ni_per": ni_per
|
||||
}
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
flush = True
|
||||
if "flush" in collection and collection["flush"] == "no":
|
||||
flush = False
|
||||
self.init_metric(self.name, collection_info, index_info, search_info=None)
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metrics = list()
|
||||
case_params = list()
|
||||
case_metrics.append(case_metric)
|
||||
case_param = {
|
||||
"collection_name": collection_name,
|
||||
"data_type": data_type,
|
||||
"dimension": dimension,
|
||||
"collection_size": collection_size,
|
||||
"ni_per": ni_per,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"other_fields": other_fields,
|
||||
"flush_after_insert": flush,
|
||||
"index_field_name": index_field_name,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
}
|
||||
case_params.append(case_param)
|
||||
return case_params, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
self.milvus.set_collection(collection_name)
|
||||
if not self.milvus.exists_collection():
|
||||
logger.info("collection not exist")
|
||||
logger.debug({"collection count": self.milvus.count()})
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
index_field_name = case_param["index_field_name"]
|
||||
start_time = time.time()
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"],
|
||||
index_param=case_param["index_param"])
|
||||
build_time = round(time.time() - start_time, 2)
|
||||
tmp_result = {"build_time": build_time}
|
||||
return tmp_result
|
||||
|
||||
|
||||
class InsertBuildRunner(BuildRunner):
|
||||
"""run insert and build"""
|
||||
name = "insert_build_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(InsertBuildRunner, self).__init__(env, metric)
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
other_fields = case_param["other_fields"]
|
||||
self.milvus.set_collection(collection_name)
|
||||
if self.milvus.exists_collection():
|
||||
logger.debug("Start drop collection")
|
||||
self.milvus.drop()
|
||||
time.sleep(utils.DELETE_INTERVAL_TIME)
|
||||
self.milvus.create_collection(dimension, data_type=vector_type, other_fields=other_fields)
|
||||
self.insert(self.milvus, collection_name, case_param["data_type"], dimension,
|
||||
case_param["collection_size"], case_param["ni_per"])
|
||||
start_time = time.time()
|
||||
self.milvus.flush()
|
||||
flush_time = round(time.time() - start_time, 2)
|
||||
logger.debug({"collection count": self.milvus.count()})
|
||||
logger.debug({"flush_time": flush_time})
|
|
@ -0,0 +1,126 @@
|
|||
import copy
|
||||
import logging
|
||||
import pdb
|
||||
import time
|
||||
from operator import methodcaller
|
||||
from yaml import full_load, dump
|
||||
import threading
|
||||
from milvus_benchmark import utils
|
||||
from milvus_benchmark.runners import utils as runner_utils
|
||||
from milvus_benchmark.chaos import utils as chaos_utils
|
||||
from milvus_benchmark.runners.base import BaseRunner
|
||||
from chaos.chaos_opt import ChaosOpt
|
||||
from milvus_benchmark import config
|
||||
from milvus_benchmark.chaos.chaos_mesh import PodChaos, NetworkChaos
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.chaos")
|
||||
|
||||
kind_chaos_mapping = {
|
||||
"PodChaos": PodChaos,
|
||||
"NetworkChaos": NetworkChaos
|
||||
}
|
||||
|
||||
assert_func_mapping = {
|
||||
"fail": chaos_utils.assert_fail,
|
||||
"pass": chaos_utils.assert_pass
|
||||
}
|
||||
|
||||
|
||||
class SimpleChaosRunner(BaseRunner):
|
||||
"""run chaos"""
|
||||
name = "simple_chaos"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(SimpleChaosRunner, self).__init__(env, metric)
|
||||
|
||||
async def async_call(self, func, **kwargs):
|
||||
future = methodcaller(func, **kwargs)(self.milvus)
|
||||
|
||||
def run_step(self, interface_name, interface_params):
|
||||
if interface_name == "create_collection":
|
||||
collection_name = utils.get_unique_name("chaos")
|
||||
self.data_type = interface_params["data_type"]
|
||||
self.dimension = interface_params["dimension"]
|
||||
self.milvus.set_collection(collection_name)
|
||||
vector_type = runner_utils.get_vector_type(self.data_type)
|
||||
self.milvus.create_collection(self.dimension, data_type=vector_type)
|
||||
elif interface_name == "insert":
|
||||
batch_size = interface_params["batch_size"]
|
||||
collection_size = interface_params["collection_size"]
|
||||
self.insert(self.milvus, self.milvus.collection_name, self.data_type, self.dimension, collection_size,
|
||||
batch_size)
|
||||
elif interface_name == "create_index":
|
||||
metric_type = interface_params["metric_type"]
|
||||
index_type = interface_params["index_type"]
|
||||
index_param = interface_params["index_param"]
|
||||
vector_type = runner_utils.get_vector_type(self.data_type)
|
||||
field_name = runner_utils.get_default_field_name(vector_type)
|
||||
self.milvus.create_index(field_name, index_type, metric_type, index_param=index_param)
|
||||
elif interface_name == "flush":
|
||||
self.milvus.flush()
|
||||
|
||||
def extract_cases(self, collection):
|
||||
before_steps = collection["before"]
|
||||
after = collection["after"] if "after" in collection else None
|
||||
processing = collection["processing"]
|
||||
case_metrics = []
|
||||
case_params = [{
|
||||
"before_steps": before_steps,
|
||||
"after": after,
|
||||
"processing": processing
|
||||
}]
|
||||
self.init_metric(self.name, {}, {}, None)
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metrics.append(case_metric)
|
||||
return case_params, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
steps = case_param["before_steps"]
|
||||
for step in steps:
|
||||
interface_name = step["interface_name"]
|
||||
params = step["params"]
|
||||
self.run_step(interface_name, params)
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
processing = case_param["processing"]
|
||||
after = case_param["after"]
|
||||
user_chaos = processing["chaos"]
|
||||
kind = user_chaos["kind"]
|
||||
spec = user_chaos["spec"]
|
||||
metadata_name = config.NAMESPACE + "-" + kind.lower()
|
||||
metadata = {"name": metadata_name}
|
||||
process_assertion = processing["assertion"]
|
||||
after_assertion = after["assertion"]
|
||||
# load yaml from default template to generate stand chaos dict
|
||||
chaos_mesh = kind_chaos_mapping[kind](config.DEFAULT_API_VERSION, kind, metadata, spec)
|
||||
experiment_config = chaos_mesh.gen_experiment_config()
|
||||
process_func = processing["interface_name"]
|
||||
process_params = processing["params"] if "params" in processing else {}
|
||||
after_func = after["interface_name"]
|
||||
after_params = after["params"] if "params" in after else {}
|
||||
logger.debug(chaos_mesh.kind)
|
||||
chaos_opt = ChaosOpt(chaos_mesh.kind)
|
||||
chaos_objects = chaos_opt.list_chaos_object()
|
||||
if len(chaos_objects["items"]) != 0:
|
||||
logger.debug(chaos_objects["items"])
|
||||
chaos_opt.delete_chaos_object(chaos_mesh.metadata["name"])
|
||||
# with open('./pod-newq.yaml', "w") as f:
|
||||
# dump(experiment_config, f)
|
||||
# f.close()
|
||||
# concurrent inject chaos and run func
|
||||
# logger.debug(experiment_config)
|
||||
t_milvus = threading.Thread(target=assert_func_mapping[process_assertion], args=(process_func, self.milvus,), kwargs=process_params)
|
||||
try:
|
||||
t_milvus.start()
|
||||
chaos_opt.create_chaos_object(experiment_config)
|
||||
# processing assert exception
|
||||
except Exception as e:
|
||||
logger.info("exception {}".format(str(e)))
|
||||
else:
|
||||
chaos_opt.delete_chaos_object(chaos_mesh.metadata["name"])
|
||||
# TODO retry connect milvus
|
||||
time.sleep(15)
|
||||
assert_func_mapping[after_assertion](after_func, self.milvus, **after_params)
|
||||
finally:
|
||||
chaos_opt.delete_all_chaos_object()
|
||||
logger.info(chaos_opt.list_chaos_object())
|
|
@ -0,0 +1,118 @@
|
|||
import time
|
||||
import copy
|
||||
import logging
|
||||
from milvus_benchmark import parser
|
||||
from milvus_benchmark.runners import utils
|
||||
from milvus_benchmark.runners.base import BaseRunner
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.get")
|
||||
|
||||
|
||||
def get_ids(length, size):
|
||||
ids_list = []
|
||||
step = size // length
|
||||
for i in range(length):
|
||||
ids_list.append(step * i)
|
||||
return ids_list
|
||||
|
||||
|
||||
class GetRunner(BaseRunner):
|
||||
"""run get"""
|
||||
name = "get_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(GetRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
vector_type = utils.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
ids_length_list = collection["ids_length_list"]
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size,
|
||||
"other_fields": other_fields,
|
||||
"ni_per": ni_per
|
||||
}
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
flush = True
|
||||
if "flush" in collection and collection["flush"] == "no":
|
||||
flush = False
|
||||
self.init_metric(self.name, collection_info, index_info, search_info=None)
|
||||
case_metrics = list()
|
||||
for ids_length in ids_length_list:
|
||||
ids = get_ids(ids_length, collection_size)
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_params = list()
|
||||
case_metric.run_params = {"ids_length": ids_length}
|
||||
case_metrics.append(case_metric)
|
||||
case_param = {
|
||||
"collection_name": collection_name,
|
||||
"data_type": data_type,
|
||||
"dimension": dimension,
|
||||
"collection_size": collection_size,
|
||||
"ni_per": ni_per,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"other_fields": other_fields,
|
||||
"flush_after_insert": flush,
|
||||
"index_field_name": index_field_name,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
"ids": ids
|
||||
}
|
||||
case_params.append(case_param)
|
||||
return case_params, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
self.milvus.set_collection(collection_name)
|
||||
if not self.milvus.exists_collection():
|
||||
logger.info("collection not exist")
|
||||
logger.debug({"collection count": self.milvus.count()})
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
ids = case_param["ids"]
|
||||
start_time = time.time()
|
||||
self.milvus.get(ids)
|
||||
get_time = round(time.time() - start_time, 2)
|
||||
tmp_result = {"get_time": get_time}
|
||||
return tmp_result
|
||||
|
||||
|
||||
class InsertGetRunner(GetRunner):
|
||||
"""run insert and get"""
|
||||
name = "insert_get_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(InsertGetRunner, self).__init__(env, metric)
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
other_fields = case_param["other_fields"]
|
||||
self.milvus.set_collection(collection_name)
|
||||
if self.milvus.exists_collection():
|
||||
logger.debug("Start drop collection")
|
||||
self.milvus.drop()
|
||||
time.sleep(utils.DELETE_INTERVAL_TIME)
|
||||
self.milvus.create_collection(dimension, data_type=vector_type, other_fields=other_fields)
|
||||
self.insert(self.milvus, collection_name, case_param["data_type"], dimension,
|
||||
case_param["collection_size"], case_param["ni_per"])
|
||||
start_time = time.time()
|
||||
self.milvus.flush()
|
||||
flush_time = round(time.time() - start_time, 2)
|
||||
logger.debug({"collection count": self.milvus.count()})
|
||||
logger.debug({"flush_time": flush_time})
|
||||
self.milvus.load_collection()
|
|
@ -0,0 +1,124 @@
|
|||
import time
|
||||
import pdb
|
||||
import copy
|
||||
import logging
|
||||
from milvus_benchmark import parser
|
||||
from milvus_benchmark.runners import utils
|
||||
from milvus_benchmark.runners.base import BaseRunner
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.insert")
|
||||
|
||||
|
||||
class InsertRunner(BaseRunner):
|
||||
"""run insert"""
|
||||
name = "insert_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(InsertRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"] if "build_index" in collection else False
|
||||
index_info = None
|
||||
vector_type = utils.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size,
|
||||
"other_fields": other_fields,
|
||||
"ni_per": ni_per
|
||||
}
|
||||
index_field_name = None
|
||||
index_type = None
|
||||
index_param = None
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
flush = True
|
||||
if "flush" in collection and collection["flush"] == "no":
|
||||
flush = False
|
||||
self.init_metric(self.name, collection_info, index_info, None)
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metrics = list()
|
||||
case_params = list()
|
||||
case_metrics.append(case_metric)
|
||||
case_param = {
|
||||
"collection_name": collection_name,
|
||||
"data_type": data_type,
|
||||
"dimension": dimension,
|
||||
"collection_size": collection_size,
|
||||
"ni_per": ni_per,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"other_fields": other_fields,
|
||||
"build_index": build_index,
|
||||
"flush_after_insert": flush,
|
||||
"index_field_name": index_field_name,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
}
|
||||
case_params.append(case_param)
|
||||
return case_params, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
other_fields = case_param["other_fields"]
|
||||
index_field_name = case_param["index_field_name"]
|
||||
build_index = case_param["build_index"]
|
||||
|
||||
self.milvus.set_collection(collection_name)
|
||||
if self.milvus.exists_collection():
|
||||
logger.debug("Start drop collection")
|
||||
self.milvus.drop()
|
||||
time.sleep(utils.DELETE_INTERVAL_TIME)
|
||||
self.milvus.create_collection(dimension, data_type=vector_type,
|
||||
other_fields=other_fields)
|
||||
# TODO: update fields in collection_info
|
||||
# fields = self.get_fields(self.milvus, collection_name)
|
||||
# collection_info = {
|
||||
# "dimension": dimension,
|
||||
# "metric_type": metric_type,
|
||||
# "dataset_name": collection_name,
|
||||
# "fields": fields
|
||||
# }
|
||||
if build_index is True:
|
||||
if case_param["index_type"]:
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
logger.debug(self.milvus.describe_index(index_field_name))
|
||||
else:
|
||||
build_index = False
|
||||
logger.warning("Please specify the index_type")
|
||||
|
||||
# TODO: error handler
|
||||
def run_case(self, case_metric, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
index_field_name = case_param["index_field_name"]
|
||||
build_index = case_param["build_index"]
|
||||
|
||||
tmp_result = self.insert(self.milvus, collection_name, case_param["data_type"], dimension, case_param["collection_size"], case_param["ni_per"])
|
||||
flush_time = 0.0
|
||||
build_time = 0.0
|
||||
if case_param["flush_after_insert"] is True:
|
||||
start_time = time.time()
|
||||
self.milvus.flush()
|
||||
flush_time = round(time.time()-start_time, 2)
|
||||
logger.debug(self.milvus.count())
|
||||
if build_index is True:
|
||||
logger.debug("Start build index for last file")
|
||||
start_time = time.time()
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
build_time = round(time.time()-start_time, 2)
|
||||
tmp_result.update({"flush_time": flush_time, "build_time": build_time})
|
||||
return tmp_result
|
|
@ -0,0 +1,396 @@
|
|||
import pdb
|
||||
import time
|
||||
import copy
|
||||
import logging
|
||||
from . import locust_user
|
||||
from .base import BaseRunner
|
||||
from milvus_benchmark import parser
|
||||
from milvus_benchmark import utils
|
||||
from milvus_benchmark.runners import utils as runner_utils
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.locust")
|
||||
|
||||
|
||||
class LocustRunner(BaseRunner):
|
||||
def __init__(self, env, metric):
|
||||
super(LocustRunner, self).__init__(env, metric)
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
task = case_param["task"]
|
||||
connection_type = case_param["connection_type"]
|
||||
|
||||
# spawn locust requests
|
||||
clients_num = task["clients_num"]
|
||||
hatch_rate = task["hatch_rate"]
|
||||
during_time = utils.timestr_to_int(task["during_time"])
|
||||
task_types = task["types"]
|
||||
run_params = {"tasks": {}, "clients_num": clients_num, "spawn_rate": hatch_rate, "during_time": during_time}
|
||||
info_in_params = {
|
||||
"index_field_name": case_param["index_field_name"],
|
||||
"vector_field_name": case_param["vector_field_name"],
|
||||
"dimension": case_param["dimension"],
|
||||
"collection_info": self.milvus.get_info(collection_name)}
|
||||
logger.info(info_in_params)
|
||||
run_params.update({"op_info": info_in_params})
|
||||
for task_type in task_types:
|
||||
run_params["tasks"].update({
|
||||
task_type["type"]: {
|
||||
"weight": task_type["weight"] if "weight" in task_type else 1,
|
||||
"params": task_type["params"] if "params" in task_type else None,
|
||||
}
|
||||
})
|
||||
# collect stats
|
||||
# pdb.set_trace()
|
||||
logger.info(run_params)
|
||||
locust_stats = locust_user.locust_executor(self.hostname, self.port, collection_name,
|
||||
connection_type=connection_type, run_params=run_params)
|
||||
return locust_stats
|
||||
|
||||
|
||||
class LocustInsertRunner(LocustRunner):
|
||||
"""run insert"""
|
||||
name = "locust_insert_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(LocustInsertRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"] if "build_index" in collection else False
|
||||
vector_type = runner_utils.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size,
|
||||
"other_fields": other_fields,
|
||||
"ni_per": ni_per
|
||||
}
|
||||
index_field_name = None
|
||||
index_type = None
|
||||
index_param = None
|
||||
index_info = None
|
||||
vector_field_name = runner_utils.get_default_field_name(vector_type)
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
index_field_name = runner_utils.get_default_field_name(vector_type)
|
||||
task = collection["task"]
|
||||
connection_type = "single"
|
||||
connection_num = task["connection_num"]
|
||||
if connection_num > 1:
|
||||
connection_type = "multi"
|
||||
run_params = {
|
||||
"task": collection["task"],
|
||||
"connection_type": connection_type,
|
||||
}
|
||||
self.init_metric(self.name, collection_info, index_info, None, run_params)
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metrics = list()
|
||||
case_params = list()
|
||||
case_metrics.append(case_metric)
|
||||
case_param = {
|
||||
"collection_name": collection_name,
|
||||
"data_type": data_type,
|
||||
"dimension": dimension,
|
||||
"collection_size": collection_size,
|
||||
"ni_per": ni_per,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"other_fields": other_fields,
|
||||
"build_index": build_index,
|
||||
"index_field_name": index_field_name,
|
||||
"vector_field_name": vector_field_name,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
"task": collection["task"],
|
||||
"connection_type": connection_type,
|
||||
}
|
||||
case_params.append(case_param)
|
||||
return case_params, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
other_fields = case_param["other_fields"]
|
||||
index_field_name = case_param["index_field_name"]
|
||||
build_index = case_param["build_index"]
|
||||
|
||||
self.milvus.set_collection(collection_name)
|
||||
if self.milvus.exists_collection():
|
||||
logger.debug("Start drop collection")
|
||||
self.milvus.drop()
|
||||
time.sleep(runner_utils.DELETE_INTERVAL_TIME)
|
||||
self.milvus.create_collection(dimension, data_type=vector_type,
|
||||
other_fields=other_fields)
|
||||
# TODO: update fields in collection_info
|
||||
# fields = self.get_fields(self.milvus, collection_name)
|
||||
# collection_info = {
|
||||
# "dimension": dimension,
|
||||
# "metric_type": metric_type,
|
||||
# "dataset_name": collection_name,
|
||||
# "fields": fields
|
||||
# }
|
||||
if build_index is True:
|
||||
if case_param["index_type"]:
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
logger.debug(self.milvus.describe_index(index_field_name))
|
||||
else:
|
||||
build_index = False
|
||||
logger.warning("Please specify the index_type")
|
||||
|
||||
|
||||
class LocustSearchRunner(LocustRunner):
|
||||
"""run search"""
|
||||
name = "locust_search_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(LocustSearchRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"] if "build_index" in collection else False
|
||||
vector_type = runner_utils.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size,
|
||||
"other_fields": other_fields,
|
||||
"ni_per": ni_per
|
||||
}
|
||||
index_field_name = None
|
||||
index_type = None
|
||||
index_param = None
|
||||
index_info = None
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
index_field_name = runner_utils.get_default_field_name(vector_type)
|
||||
vector_field_name = runner_utils.get_default_field_name(vector_type)
|
||||
task = collection["task"]
|
||||
connection_type = "single"
|
||||
connection_num = task["connection_num"]
|
||||
if connection_num > 1:
|
||||
connection_type = "multi"
|
||||
run_params = {
|
||||
"task": collection["task"],
|
||||
"connection_type": connection_type,
|
||||
}
|
||||
self.init_metric(self.name, collection_info, index_info, None, run_params)
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metrics = list()
|
||||
case_params = list()
|
||||
case_metrics.append(case_metric)
|
||||
case_param = {
|
||||
"collection_name": collection_name,
|
||||
"data_type": data_type,
|
||||
"dimension": dimension,
|
||||
"collection_size": collection_size,
|
||||
"ni_per": ni_per,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"other_fields": other_fields,
|
||||
"build_index": build_index,
|
||||
"index_field_name": index_field_name,
|
||||
"vector_field_name": vector_field_name,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
"task": collection["task"],
|
||||
"connection_type": connection_type,
|
||||
}
|
||||
case_params.append(case_param)
|
||||
return case_params, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
other_fields = case_param["other_fields"]
|
||||
index_field_name = case_param["index_field_name"]
|
||||
metric_type = case_param["metric_type"]
|
||||
build_index = case_param["build_index"]
|
||||
|
||||
self.milvus.set_collection(collection_name)
|
||||
if self.milvus.exists_collection():
|
||||
logger.debug("Start drop collection")
|
||||
self.milvus.drop()
|
||||
time.sleep(runner_utils.DELETE_INTERVAL_TIME)
|
||||
self.milvus.create_collection(dimension, data_type=vector_type,
|
||||
other_fields=other_fields)
|
||||
# TODO: update fields in collection_info
|
||||
# fields = self.get_fields(self.milvus, collection_name)
|
||||
# collection_info = {
|
||||
# "dimension": dimension,
|
||||
# "metric_type": metric_type,
|
||||
# "dataset_name": collection_name,
|
||||
# "fields": fields
|
||||
# }
|
||||
if build_index is True:
|
||||
if case_param["index_type"]:
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
logger.debug(self.milvus.describe_index(index_field_name))
|
||||
else:
|
||||
build_index = False
|
||||
logger.warning("Please specify the index_type")
|
||||
self.insert(self.milvus, collection_name, case_param["data_type"], dimension, case_param["collection_size"], case_param["ni_per"])
|
||||
build_time = 0.0
|
||||
start_time = time.time()
|
||||
self.milvus.flush()
|
||||
flush_time = round(time.time()-start_time, 2)
|
||||
logger.debug(self.milvus.count())
|
||||
if build_index is True:
|
||||
logger.debug("Start build index for last file")
|
||||
start_time = time.time()
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
build_time = round(time.time()-start_time, 2)
|
||||
logger.debug({"flush_time": flush_time, "build_time": build_time})
|
||||
logger.info(self.milvus.count())
|
||||
logger.info("Start load collection")
|
||||
load_start_time = time.time()
|
||||
self.milvus.load_collection()
|
||||
logger.debug({"load_time": round(time.time()-load_start_time, 2)})
|
||||
search_param = None
|
||||
for op in case_param["task"]["types"]:
|
||||
if op["type"] == "query":
|
||||
search_param = op["params"]["search_param"]
|
||||
break
|
||||
logger.info("index_field_name: {}".format(index_field_name))
|
||||
self.milvus.warm_query(index_field_name, search_param, metric_type, times=2)
|
||||
|
||||
|
||||
class LocustRandomRunner(LocustRunner):
|
||||
"""run random interface"""
|
||||
name = "locust_random_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(LocustRandomRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
ni_per = collection["ni_per"]
|
||||
build_index = collection["build_index"] if "build_index" in collection else False
|
||||
vector_type = runner_utils.get_vector_type(data_type)
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size,
|
||||
"other_fields": other_fields,
|
||||
"ni_per": ni_per
|
||||
}
|
||||
index_field_name = None
|
||||
index_type = None
|
||||
index_param = None
|
||||
index_info = None
|
||||
vector_field_name = runner_utils.get_default_field_name(vector_type)
|
||||
if build_index is True:
|
||||
index_type = collection["index_type"]
|
||||
index_param = collection["index_param"]
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
index_field_name = runner_utils.get_default_field_name(vector_type)
|
||||
task = collection["task"]
|
||||
connection_type = "single"
|
||||
connection_num = task["connection_num"]
|
||||
if connection_num > 1:
|
||||
connection_type = "multi"
|
||||
run_params = {
|
||||
"task": collection["task"],
|
||||
"connection_type": connection_type,
|
||||
}
|
||||
self.init_metric(self.name, collection_info, index_info, None, run_params)
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metrics = list()
|
||||
case_params = list()
|
||||
case_metrics.append(case_metric)
|
||||
case_param = {
|
||||
"collection_name": collection_name,
|
||||
"data_type": data_type,
|
||||
"dimension": dimension,
|
||||
"collection_size": collection_size,
|
||||
"ni_per": ni_per,
|
||||
"metric_type": metric_type,
|
||||
"vector_type": vector_type,
|
||||
"other_fields": other_fields,
|
||||
"build_index": build_index,
|
||||
"index_field_name": index_field_name,
|
||||
"vector_field_name": vector_field_name,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
"task": collection["task"],
|
||||
"connection_type": connection_type,
|
||||
}
|
||||
case_params.append(case_param)
|
||||
return case_params, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
other_fields = case_param["other_fields"]
|
||||
index_field_name = case_param["index_field_name"]
|
||||
build_index = case_param["build_index"]
|
||||
|
||||
self.milvus.set_collection(collection_name)
|
||||
# if self.milvus.exists_collection():
|
||||
# logger.debug("Start drop collection")
|
||||
# self.milvus.drop()
|
||||
# time.sleep(runner_utils.DELETE_INTERVAL_TIME)
|
||||
# self.milvus.create_collection(dimension, data_type=vector_type,
|
||||
# other_fields=other_fields)
|
||||
# # TODO: update fields in collection_info
|
||||
# # fields = self.get_fields(self.milvus, collection_name)
|
||||
# # collection_info = {
|
||||
# # "dimension": dimension,
|
||||
# # "metric_type": metric_type,
|
||||
# # "dataset_name": collection_name,
|
||||
# # "fields": fields
|
||||
# # }
|
||||
# if build_index is True:
|
||||
# if case_param["index_type"]:
|
||||
# self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
# logger.debug(self.milvus.describe_index(index_field_name))
|
||||
# else:
|
||||
# build_index = False
|
||||
# logger.warning("Please specify the index_type")
|
||||
# self.insert(self.milvus, collection_name, case_param["data_type"], dimension, case_param["collection_size"], case_param["ni_per"])
|
||||
# build_time = 0.0
|
||||
# start_time = time.time()
|
||||
# self.milvus.flush()
|
||||
# flush_time = round(time.time()-start_time, 2)
|
||||
# logger.debug(self.milvus.count())
|
||||
# if build_index is True:
|
||||
# logger.debug("Start build index for last file")
|
||||
# start_time = time.time()
|
||||
# self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
# build_time = round(time.time()-start_time, 2)
|
||||
# logger.debug({"flush_time": flush_time, "build_time": build_time})
|
||||
# logger.info(self.milvus.count())
|
||||
logger.info("Start load collection")
|
||||
load_start_time = time.time()
|
||||
self.milvus.load_collection()
|
||||
logger.debug({"load_time": round(time.time()-load_start_time, 2)})
|
|
@ -3,7 +3,9 @@ import pdb
|
|||
import random
|
||||
import logging
|
||||
from locust import User, events
|
||||
from client import MilvusClient
|
||||
from milvus_benchmark.client import MilvusClient
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.locust_task")
|
||||
|
||||
|
||||
class MilvusTask(object):
|
||||
|
@ -17,7 +19,6 @@ class MilvusTask(object):
|
|||
port = kwargs.get("port")
|
||||
collection_name = kwargs.get("collection_name")
|
||||
self.m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
# logging.getLogger().error(id(self.m))
|
||||
|
||||
def __getattr__(self, name):
|
||||
func = getattr(self.m, name)
|
|
@ -0,0 +1,73 @@
|
|||
import pdb
|
||||
import random
|
||||
import time
|
||||
import logging
|
||||
import json
|
||||
from locust import TaskSet, task
|
||||
from . import utils
|
||||
|
||||
dim = 128
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.locust_tasks")
|
||||
|
||||
|
||||
class Tasks(TaskSet):
|
||||
@task(100)
|
||||
def query(self):
|
||||
op = "query"
|
||||
X = utils.generate_vectors(self.params[op]["nq"], self.op_info["dimension"])
|
||||
vector_query = {"vector": {self.op_info["vector_field_name"]: {
|
||||
"topk": self.params[op]["top_k"],
|
||||
"query": X,
|
||||
"metric_type": self.params[op]["metric_type"] if "metric_type" in self.params[op] else utils.DEFAULT_METRIC_TYPE,
|
||||
"params": self.params[op]["search_param"]}
|
||||
}}
|
||||
filter_query = []
|
||||
if "filters" in self.params[op]:
|
||||
for filter in self.params[op]["filters"]:
|
||||
if isinstance(filter, dict) and "range" in filter:
|
||||
filter_query.append(eval(filter["range"]))
|
||||
if isinstance(filter, dict) and "term" in filter:
|
||||
filter_query.append(eval(filter["term"]))
|
||||
logger.debug(filter_query)
|
||||
self.client.query(vector_query, filter_query=filter_query, log=False)
|
||||
|
||||
@task
|
||||
def flush(self):
|
||||
logger.debug("Flush")
|
||||
self.client.flush(log=False)
|
||||
|
||||
@task
|
||||
def load(self):
|
||||
self.client.load_collection()
|
||||
|
||||
@task
|
||||
def release(self):
|
||||
self.client.release_collection()
|
||||
self.client.load_collection()
|
||||
|
||||
# @task
|
||||
# def release_index(self):
|
||||
# self.client.release_index()
|
||||
|
||||
# @task
|
||||
# def create_index(self):
|
||||
# self.client.release_index()
|
||||
|
||||
@task
|
||||
def insert(self):
|
||||
op = "insert"
|
||||
ids = [random.randint(1, 10000000) for _ in range(self.params[op]["ni_per"])]
|
||||
X = [[random.random() for _ in range(dim)] for _ in range(self.params[op]["ni_per"])]
|
||||
entities = utils.generate_entities(self.op_info["collection_info"], X, ids)
|
||||
self.client.insert(entities, log=False)
|
||||
|
||||
@task
|
||||
def insert_rand(self):
|
||||
self.client.insert_rand(log=False)
|
||||
|
||||
@task
|
||||
def get(self):
|
||||
op = "get"
|
||||
ids = [random.randint(1, 10000000) for _ in range(self.params[op]["ids_length"])]
|
||||
self.client.get(ids)
|
|
@ -2,22 +2,20 @@ import logging
|
|||
import random
|
||||
import pdb
|
||||
import gevent
|
||||
import gevent.monkey
|
||||
gevent.monkey.patch_all()
|
||||
|
||||
from locust import User, between, events, stats
|
||||
# import gevent.monkey
|
||||
# gevent.monkey.patch_all()
|
||||
from locust import Locust, User, TaskSet, task, between, events, stats
|
||||
from locust.env import Environment
|
||||
import locust.stats
|
||||
from locust.stats import stats_printer, print_stats
|
||||
from locust.log import setup_logging, greenlet_exception_logger
|
||||
from milvus_benchmark.client import MilvusClient
|
||||
from .locust_task import MilvusTask
|
||||
from .locust_tasks import Tasks
|
||||
|
||||
locust.stats.CONSOLE_STATS_INTERVAL_SEC = 30
|
||||
from locust.log import setup_logging, greenlet_exception_logger
|
||||
logger = logging.getLogger("milvus_benchmark.runners.locust_user")
|
||||
|
||||
from locust_tasks import Tasks
|
||||
from client import MilvusClient
|
||||
from locust_task import MilvusTask
|
||||
|
||||
logger = logging.getLogger("__locust__")
|
||||
|
||||
class MyUser(User):
|
||||
# task_set = None
|
||||
|
@ -27,19 +25,25 @@ class MyUser(User):
|
|||
def locust_executor(host, port, collection_name, connection_type="single", run_params=None):
|
||||
m = MilvusClient(host=host, port=port, collection_name=collection_name)
|
||||
MyUser.tasks = {}
|
||||
MyUser.op_info = run_params["op_info"]
|
||||
MyUser.params = {}
|
||||
tasks = run_params["tasks"]
|
||||
for op, weight in tasks.items():
|
||||
task = {eval("Tasks."+op): weight}
|
||||
for op, value in tasks.items():
|
||||
task = {eval("Tasks." + op): value["weight"]}
|
||||
MyUser.tasks.update(task)
|
||||
logger.error(MyUser.tasks)
|
||||
# MyUser.tasks = {Tasks.query: 1, Tasks.flush: 1}
|
||||
MyUser.client = MilvusTask(host=host, port=port, collection_name=collection_name, connection_type=connection_type, m=m)
|
||||
MyUser.params[op] = value["params"] if "params" in value else None
|
||||
logger.info(MyUser.tasks)
|
||||
|
||||
MyUser.tasks = {Tasks.load: 1, Tasks.flush: 1}
|
||||
MyUser.client = MilvusTask(host=host, port=port, collection_name=collection_name, connection_type=connection_type,
|
||||
m=m)
|
||||
# MyUser.info = m.get_info(collection_name)
|
||||
env = Environment(events=events, user_classes=[MyUser])
|
||||
|
||||
runner = env.create_local_runner()
|
||||
# setup logging
|
||||
# setup_logging("WARNING", "/dev/null")
|
||||
setup_logging("WARNING", "/dev/null")
|
||||
greenlet_exception_logger(logger=logger)
|
||||
# greenlet_exception_logger(logger=logger)
|
||||
gevent.spawn(stats_printer(env.stats))
|
||||
# env.create_web_ui("127.0.0.1", 8089)
|
||||
# gevent.spawn(stats_printer(env.stats), env, "test", full_history=True)
|
||||
|
@ -55,16 +59,7 @@ def locust_executor(host, port, collection_name, connection_type="single", run_p
|
|||
"rps": round(env.stats.total.current_rps, 1),
|
||||
"fail_ratio": env.stats.total.fail_ratio,
|
||||
"max_response_time": round(env.stats.total.max_response_time, 1),
|
||||
"min_response_time": round(env.stats.total.avg_response_time, 1)
|
||||
"avg_response_time": round(env.stats.total.avg_response_time, 1)
|
||||
}
|
||||
runner.stop()
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
connection_type = "single"
|
||||
host = "192.168.1.112"
|
||||
port = 19530
|
||||
collection_name = "sift_1m_2000000_128_l2_2"
|
||||
run_params = {"tasks": {"query": 1, "flush": 1}, "clients_num": 1, "spawn_rate": 1, "during_time": 3}
|
||||
locust_executor(host, port, collection_name, run_params=run_params)
|
|
@ -0,0 +1,286 @@
|
|||
import time
|
||||
import pdb
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
from milvus_benchmark import parser
|
||||
from milvus_benchmark.runners import utils
|
||||
from milvus_benchmark.runners.base import BaseRunner
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.search")
|
||||
|
||||
|
||||
class SearchRunner(BaseRunner):
|
||||
"""run search"""
|
||||
name = "search_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(SearchRunner, self).__init__(env, metric)
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
run_count = collection["run_count"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
filters = collection["filters"] if "filters" in collection else []
|
||||
|
||||
search_params = collection["search_params"]
|
||||
# TODO: get fields by describe_index
|
||||
# fields = self.get_fields(self.milvus, collection_name)
|
||||
fields = None
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"collection_size": collection_size,
|
||||
"fields": fields
|
||||
}
|
||||
# TODO: need to get index_info
|
||||
index_info = None
|
||||
vector_type = utils.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
base_query_vectors = utils.get_vectors_from_binary(utils.MAX_NQ, dimension, data_type)
|
||||
cases = list()
|
||||
case_metrics = list()
|
||||
self.init_metric(self.name, collection_info, index_info, None)
|
||||
for search_param in search_params:
|
||||
logger.info("Search param: %s" % json.dumps(search_param))
|
||||
for filter in filters:
|
||||
filter_query = []
|
||||
filter_param = []
|
||||
if filter and isinstance(filter, dict):
|
||||
if "range" in filter:
|
||||
filter_query.append(eval(filter["range"]))
|
||||
filter_param.append(filter["range"])
|
||||
elif "term" in filter:
|
||||
filter_query.append(eval(filter["term"]))
|
||||
filter_param.append(filter["term"])
|
||||
else:
|
||||
raise Exception("%s not supported" % filter)
|
||||
logger.info("filter param: %s" % json.dumps(filter_param))
|
||||
for nq in nqs:
|
||||
query_vectors = base_query_vectors[0:nq]
|
||||
for top_k in top_ks:
|
||||
search_info = {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": utils.metric_type_trans(metric_type),
|
||||
"params": search_param}
|
||||
# TODO: only update search_info
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metric.search = {
|
||||
"nq": nq,
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"filter": filter_param
|
||||
}
|
||||
vector_query = {"vector": {index_field_name: search_info}}
|
||||
case = {
|
||||
"collection_name": collection_name,
|
||||
"index_field_name": index_field_name,
|
||||
"run_count": run_count,
|
||||
"filter_query": filter_query,
|
||||
"vector_query": vector_query,
|
||||
}
|
||||
cases.append(case)
|
||||
case_metrics.append(case_metric)
|
||||
return cases, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
self.milvus.set_collection(collection_name)
|
||||
if not self.milvus.exists_collection():
|
||||
logger.error("collection name: {} not existed".format(collection_name))
|
||||
return False
|
||||
logger.debug(self.milvus.count())
|
||||
logger.info("Start load collection")
|
||||
self.milvus.load_collection()
|
||||
# TODO: enable warm query
|
||||
# self.milvus.warm_query(index_field_name, search_params[0], times=2)
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
# index_field_name = case_param["index_field_name"]
|
||||
run_count = case_param["run_count"]
|
||||
avg_query_time = 0.0
|
||||
min_query_time = 0.0
|
||||
total_query_time = 0.0
|
||||
for i in range(run_count):
|
||||
logger.debug("Start run query, run %d of %s" % (i+1, run_count))
|
||||
start_time = time.time()
|
||||
_query_res = self.milvus.query(case_param["vector_query"], filter_query=case_param["filter_query"])
|
||||
interval_time = time.time() - start_time
|
||||
total_query_time += interval_time
|
||||
if (i == 0) or (min_query_time > interval_time):
|
||||
min_query_time = round(interval_time, 2)
|
||||
avg_query_time = round(total_query_time/run_count, 2)
|
||||
tmp_result = {"search_time": min_query_time, "avc_search_time": avg_query_time}
|
||||
return tmp_result
|
||||
|
||||
|
||||
class InsertSearchRunner(BaseRunner):
|
||||
"""run insert and search"""
|
||||
name = "insert_search_performance"
|
||||
|
||||
def __init__(self, env, metric):
|
||||
super(InsertSearchRunner, self).__init__(env, metric)
|
||||
self.build_time = None
|
||||
self.insert_result = None
|
||||
|
||||
def extract_cases(self, collection):
|
||||
collection_name = collection["collection_name"] if "collection_name" in collection else None
|
||||
(data_type, collection_size, dimension, metric_type) = parser.collection_parser(collection_name)
|
||||
build_index = collection["build_index"] if "build_index" in collection else False
|
||||
index_type = collection["index_type"] if "index_type" in collection else None
|
||||
index_param = collection["index_param"] if "index_param" in collection else None
|
||||
run_count = collection["run_count"]
|
||||
top_ks = collection["top_ks"]
|
||||
nqs = collection["nqs"]
|
||||
other_fields = collection["other_fields"] if "other_fields" in collection else None
|
||||
filters = collection["filters"] if "filters" in collection else []
|
||||
filter_query = []
|
||||
search_params = collection["search_params"]
|
||||
ni_per = collection["ni_per"]
|
||||
|
||||
# TODO: get fields by describe_index
|
||||
# fields = self.get_fields(self.milvus, collection_name)
|
||||
fields = None
|
||||
collection_info = {
|
||||
"dimension": dimension,
|
||||
"metric_type": metric_type,
|
||||
"dataset_name": collection_name,
|
||||
"fields": fields
|
||||
}
|
||||
index_info = {
|
||||
"index_type": index_type,
|
||||
"index_param": index_param
|
||||
}
|
||||
vector_type = utils.get_vector_type(data_type)
|
||||
index_field_name = utils.get_default_field_name(vector_type)
|
||||
base_query_vectors = utils.get_vectors_from_binary(utils.MAX_NQ, dimension, data_type)
|
||||
cases = list()
|
||||
case_metrics = list()
|
||||
self.init_metric(self.name, collection_info, index_info, None)
|
||||
for search_param in search_params:
|
||||
if not filters:
|
||||
filters.append(None)
|
||||
for filter in filters:
|
||||
filter_param = []
|
||||
if isinstance(filter, dict) and "range" in filter:
|
||||
filter_query.append(eval(filter["range"]))
|
||||
filter_param.append(filter["range"])
|
||||
if isinstance(filter, dict) and "term" in filter:
|
||||
filter_query.append(eval(filter["term"]))
|
||||
filter_param.append(filter["term"])
|
||||
logger.info("filter param: %s" % json.dumps(filter_param))
|
||||
for nq in nqs:
|
||||
query_vectors = base_query_vectors[0:nq]
|
||||
for top_k in top_ks:
|
||||
search_info = {
|
||||
"topk": top_k,
|
||||
"query": query_vectors,
|
||||
"metric_type": utils.metric_type_trans(metric_type),
|
||||
"params": search_param}
|
||||
# TODO: only update search_info
|
||||
case_metric = copy.deepcopy(self.metric)
|
||||
case_metric.search = {
|
||||
"nq": nq,
|
||||
"topk": top_k,
|
||||
"search_param": search_param,
|
||||
"filter": filter_param
|
||||
}
|
||||
vector_query = {"vector": {index_field_name: search_info}}
|
||||
case = {
|
||||
"collection_name": collection_name,
|
||||
"index_field_name": index_field_name,
|
||||
"other_fields": other_fields,
|
||||
"dimension": dimension,
|
||||
"data_type": data_type,
|
||||
"vector_type": vector_type,
|
||||
"collection_size": collection_size,
|
||||
"ni_per": ni_per,
|
||||
"build_index": build_index,
|
||||
"index_type": index_type,
|
||||
"index_param": index_param,
|
||||
"metric_type": metric_type,
|
||||
"run_count": run_count,
|
||||
"filter_query": filter_query,
|
||||
"vector_query": vector_query,
|
||||
}
|
||||
cases.append(case)
|
||||
case_metrics.append(case_metric)
|
||||
return cases, case_metrics
|
||||
|
||||
def prepare(self, **case_param):
|
||||
collection_name = case_param["collection_name"]
|
||||
dimension = case_param["dimension"]
|
||||
vector_type = case_param["vector_type"]
|
||||
other_fields = case_param["other_fields"]
|
||||
index_field_name = case_param["index_field_name"]
|
||||
build_index = case_param["build_index"]
|
||||
|
||||
self.milvus.set_collection(collection_name)
|
||||
if self.milvus.exists_collection():
|
||||
logger.debug("Start drop collection")
|
||||
self.milvus.drop()
|
||||
time.sleep(utils.DELETE_INTERVAL_TIME)
|
||||
self.milvus.create_collection(dimension, data_type=vector_type,
|
||||
other_fields=other_fields)
|
||||
# TODO: update fields in collection_info
|
||||
# fields = self.get_fields(self.milvus, collection_name)
|
||||
# collection_info = {
|
||||
# "dimension": dimension,
|
||||
# "metric_type": metric_type,
|
||||
# "dataset_name": collection_name,
|
||||
# "fields": fields
|
||||
# }
|
||||
if build_index is True:
|
||||
if case_param["index_type"]:
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
logger.debug(self.milvus.describe_index(index_field_name))
|
||||
else:
|
||||
build_index = False
|
||||
logger.warning("Please specify the index_type")
|
||||
insert_result = self.insert(self.milvus, collection_name, case_param["data_type"], dimension, case_param["collection_size"], case_param["ni_per"])
|
||||
self.insert_result = insert_result
|
||||
build_time = 0.0
|
||||
start_time = time.time()
|
||||
self.milvus.flush()
|
||||
flush_time = round(time.time()-start_time, 2)
|
||||
logger.debug(self.milvus.count())
|
||||
if build_index is True:
|
||||
logger.debug("Start build index for last file")
|
||||
start_time = time.time()
|
||||
self.milvus.create_index(index_field_name, case_param["index_type"], case_param["metric_type"], index_param=case_param["index_param"])
|
||||
build_time = round(time.time()-start_time, 2)
|
||||
logger.debug({"flush_time": flush_time, "build_time": build_time})
|
||||
self.build_time = build_time
|
||||
logger.info(self.milvus.count())
|
||||
logger.info("Start load collection")
|
||||
load_start_time = time.time()
|
||||
self.milvus.load_collection()
|
||||
logger.debug({"load_time": round(time.time()-load_start_time, 2)})
|
||||
|
||||
def run_case(self, case_metric, **case_param):
|
||||
run_count = case_param["run_count"]
|
||||
avg_query_time = 0.0
|
||||
min_query_time = 0.0
|
||||
total_query_time = 0.0
|
||||
for i in range(run_count):
|
||||
logger.debug("Start run query, run %d of %s" % (i+1, run_count))
|
||||
logger.info(case_metric.search)
|
||||
start_time = time.time()
|
||||
_query_res = self.milvus.query(case_param["vector_query"], filter_query=case_param["filter_query"])
|
||||
interval_time = time.time() - start_time
|
||||
total_query_time += interval_time
|
||||
if (i == 0) or (min_query_time > interval_time):
|
||||
min_query_time = round(interval_time, 2)
|
||||
avg_query_time = round(total_query_time/run_count, 2)
|
||||
logger.info("Min query time: %.2f, avg query time: %.2f" % (min_query_time, avg_query_time))
|
||||
tmp_result = {"insert": self.insert_result, "build_time": self.build_time, "search_time": min_query_time, "avc_search_time": avg_query_time}
|
||||
#
|
||||
# logger.info("Start load collection")
|
||||
# self.milvus.load_collection()
|
||||
# logger.info("Release load collection")
|
||||
# self.milvus.release_collection()
|
||||
return tmp_result
|
|
@ -0,0 +1,265 @@
|
|||
import os
|
||||
import pdb
|
||||
import logging
|
||||
import numpy as np
|
||||
import sklearn.preprocessing
|
||||
import h5py
|
||||
import random
|
||||
from itertools import product
|
||||
|
||||
from pymilvus import DataType
|
||||
from milvus_benchmark import config
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.runners.utils")
|
||||
|
||||
DELETE_INTERVAL_TIME = 2
|
||||
|
||||
VECTORS_PER_FILE = 1000000
|
||||
SIFT_VECTORS_PER_FILE = 100000
|
||||
BINARY_VECTORS_PER_FILE = 2000000
|
||||
|
||||
MAX_NQ = 10001
|
||||
FILE_PREFIX = "binary_"
|
||||
|
||||
WARM_TOP_K = 1
|
||||
WARM_NQ = 1
|
||||
DEFAULT_DIM = 512
|
||||
DEFAULT_METRIC_TYPE = "L2"
|
||||
|
||||
RANDOM_SRC_DATA_DIR = config.RAW_DATA_DIR + 'random/'
|
||||
SIFT_SRC_DATA_DIR = config.RAW_DATA_DIR + 'sift1b/'
|
||||
DEEP_SRC_DATA_DIR = config.RAW_DATA_DIR + 'deep1b/'
|
||||
JACCARD_SRC_DATA_DIR = config.RAW_DATA_DIR + 'jaccard/'
|
||||
HAMMING_SRC_DATA_DIR = config.RAW_DATA_DIR + 'hamming/'
|
||||
STRUCTURE_SRC_DATA_DIR = config.RAW_DATA_DIR + 'structure/'
|
||||
BINARY_SRC_DATA_DIR = config.RAW_DATA_DIR + 'binary/'
|
||||
SIFT_SRC_GROUNDTRUTH_DATA_DIR = SIFT_SRC_DATA_DIR + 'gnd'
|
||||
|
||||
DEFAULT_F_FIELD_NAME = 'float_vector'
|
||||
DEFAULT_B_FIELD_NAME = 'binary_vector'
|
||||
DEFAULT_INT_FIELD_NAME = 'int64'
|
||||
DEFAULT_FLOAT_FIELD_NAME = 'float'
|
||||
DEFAULT_DOUBLE_FIELD_NAME = "double"
|
||||
|
||||
GROUNDTRUTH_MAP = {
|
||||
"1000000": "idx_1M.ivecs",
|
||||
"2000000": "idx_2M.ivecs",
|
||||
"5000000": "idx_5M.ivecs",
|
||||
"10000000": "idx_10M.ivecs",
|
||||
"20000000": "idx_20M.ivecs",
|
||||
"50000000": "idx_50M.ivecs",
|
||||
"100000000": "idx_100M.ivecs",
|
||||
"200000000": "idx_200M.ivecs",
|
||||
"500000000": "idx_500M.ivecs",
|
||||
"1000000000": "idx_1000M.ivecs",
|
||||
}
|
||||
|
||||
METRIC_MAP = {
|
||||
"l2": "L2",
|
||||
"ip": "IP",
|
||||
"jaccard": "JACCARD",
|
||||
"hamming": "HAMMING",
|
||||
"sub": "SUBSTRUCTURE",
|
||||
"super": "SUPERSTRUCTURE"
|
||||
}
|
||||
|
||||
|
||||
def get_len_vectors_per_file(data_type, dimension):
|
||||
if data_type == "random":
|
||||
if dimension == 512:
|
||||
vectors_per_file = VECTORS_PER_FILE
|
||||
elif dimension == 4096:
|
||||
vectors_per_file = 100000
|
||||
elif dimension == 16384:
|
||||
vectors_per_file = 10000
|
||||
elif data_type == "sift":
|
||||
vectors_per_file = SIFT_VECTORS_PER_FILE
|
||||
elif data_type in ["binary"]:
|
||||
vectors_per_file = BINARY_VECTORS_PER_FILE
|
||||
elif data_type == "local":
|
||||
vectors_per_file = SIFT_VECTORS_PER_FILE
|
||||
else:
|
||||
raise Exception("data_type: %s not supported" % data_type)
|
||||
return vectors_per_file
|
||||
|
||||
|
||||
def get_vectors_from_binary(nq, dimension, data_type):
|
||||
# use the first file, nq should be less than VECTORS_PER_FILE
|
||||
if nq > MAX_NQ:
|
||||
raise Exception("Over size nq")
|
||||
if data_type == "local":
|
||||
return generate_vectors(nq, dimension)
|
||||
elif data_type == "random":
|
||||
file_name = RANDOM_SRC_DATA_DIR + 'query_%d.npy' % dimension
|
||||
elif data_type == "sift":
|
||||
file_name = SIFT_SRC_DATA_DIR + 'query.npy'
|
||||
elif data_type == "deep":
|
||||
file_name = DEEP_SRC_DATA_DIR + 'query.npy'
|
||||
elif data_type == "binary":
|
||||
file_name = BINARY_SRC_DATA_DIR + 'query.npy'
|
||||
data = np.load(file_name)
|
||||
vectors = data[0:nq].tolist()
|
||||
return vectors
|
||||
|
||||
|
||||
def generate_vectors(nb, dim):
|
||||
return [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
|
||||
|
||||
def generate_values(data_type, vectors, ids):
|
||||
values = None
|
||||
if data_type in [DataType.INT32, DataType.INT64]:
|
||||
values = ids
|
||||
elif data_type in [DataType.FLOAT, DataType.DOUBLE]:
|
||||
values = [(i + 0.0) for i in ids]
|
||||
elif data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]:
|
||||
values = vectors
|
||||
return values
|
||||
|
||||
|
||||
def generate_entities(info, vectors, ids=None):
|
||||
entities = []
|
||||
for field in info["fields"]:
|
||||
# if field["name"] == "_id":
|
||||
# continue
|
||||
field_type = field["type"]
|
||||
entities.append(
|
||||
{"name": field["name"], "type": field_type, "values": generate_values(field_type, vectors, ids)})
|
||||
return entities
|
||||
|
||||
|
||||
def metric_type_trans(metric_type):
|
||||
if metric_type in METRIC_MAP.keys():
|
||||
return METRIC_MAP[metric_type]
|
||||
else:
|
||||
raise Exception("metric_type: %s not in METRIC_MAP" % metric_type)
|
||||
|
||||
|
||||
def get_dataset(hdf5_file_path):
|
||||
if not os.path.exists(hdf5_file_path):
|
||||
raise Exception("%s not existed" % hdf5_file_path)
|
||||
dataset = h5py.File(hdf5_file_path)
|
||||
return dataset
|
||||
|
||||
|
||||
def get_default_field_name(data_type=DataType.FLOAT_VECTOR):
|
||||
if data_type == DataType.FLOAT_VECTOR:
|
||||
field_name = DEFAULT_F_FIELD_NAME
|
||||
elif data_type == DataType.BINARY_VECTOR:
|
||||
field_name = DEFAULT_B_FIELD_NAME
|
||||
elif data_type == DataType.INT64:
|
||||
field_name = DEFAULT_INT_FIELD_NAME
|
||||
elif data_type == DataType.FLOAT:
|
||||
field_name = DEFAULT_FLOAT_FIELD_NAME
|
||||
else:
|
||||
logger.error(data_type)
|
||||
raise Exception("Not supported data type")
|
||||
return field_name
|
||||
|
||||
|
||||
def get_vector_type(data_type):
|
||||
vector_type = ''
|
||||
if data_type in ["random", "sift", "deep", "glove", "local"]:
|
||||
vector_type = DataType.FLOAT_VECTOR
|
||||
elif data_type in ["binary"]:
|
||||
vector_type = DataType.BINARY_VECTOR
|
||||
else:
|
||||
raise Exception("Data type: %s not defined" % data_type)
|
||||
return vector_type
|
||||
|
||||
|
||||
def get_vector_type_from_metric(metric_type):
|
||||
vector_type = ''
|
||||
if metric_type in ["hamming", "jaccard"]:
|
||||
vector_type = DataType.BINARY_VECTOR
|
||||
else:
|
||||
vector_type = DataType.FLOAT_VECTOR
|
||||
return vector_type
|
||||
|
||||
|
||||
def normalize(metric_type, X):
|
||||
if metric_type == "ip":
|
||||
logger.info("Set normalize for metric_type: %s" % metric_type)
|
||||
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
||||
X = X.astype(np.float32)
|
||||
elif metric_type == "l2":
|
||||
X = X.astype(np.float32)
|
||||
elif metric_type in ["jaccard", "hamming", "sub", "super"]:
|
||||
tmp = []
|
||||
for item in X:
|
||||
new_vector = bytes(np.packbits(item, axis=-1).tolist())
|
||||
tmp.append(new_vector)
|
||||
X = tmp
|
||||
return X
|
||||
|
||||
|
||||
def generate_combinations(args):
|
||||
if isinstance(args, list):
|
||||
args = [el if isinstance(el, list) else [el] for el in args]
|
||||
return [list(x) for x in product(*args)]
|
||||
elif isinstance(args, dict):
|
||||
flat = []
|
||||
for k, v in args.items():
|
||||
if isinstance(v, list):
|
||||
flat.append([(k, el) for el in v])
|
||||
else:
|
||||
flat.append([(k, v)])
|
||||
return [dict(x) for x in product(*flat)]
|
||||
else:
|
||||
raise TypeError("No args handling exists for %s" % type(args).__name__)
|
||||
|
||||
|
||||
def gen_file_name(idx, dimension, data_type):
|
||||
s = "%05d" % idx
|
||||
fname = FILE_PREFIX + str(dimension) + "d_" + s + ".npy"
|
||||
if data_type == "random":
|
||||
fname = RANDOM_SRC_DATA_DIR + fname
|
||||
elif data_type == "sift":
|
||||
fname = SIFT_SRC_DATA_DIR + fname
|
||||
elif data_type == "deep":
|
||||
fname = DEEP_SRC_DATA_DIR + fname
|
||||
elif data_type == "jaccard":
|
||||
fname = JACCARD_SRC_DATA_DIR + fname
|
||||
elif data_type == "hamming":
|
||||
fname = HAMMING_SRC_DATA_DIR + fname
|
||||
elif data_type == "sub" or data_type == "super":
|
||||
fname = STRUCTURE_SRC_DATA_DIR + fname
|
||||
return fname
|
||||
|
||||
|
||||
def get_recall_value(true_ids, result_ids):
|
||||
"""
|
||||
Use the intersection length
|
||||
"""
|
||||
sum_radio = 0.0
|
||||
for index, item in enumerate(result_ids):
|
||||
# tmp = set(item).intersection(set(flat_id_list[index]))
|
||||
tmp = set(true_ids[index]).intersection(set(item))
|
||||
sum_radio = sum_radio + len(tmp) / len(item)
|
||||
# logger.debug(sum_radio)
|
||||
return round(sum_radio / len(result_ids), 3)
|
||||
|
||||
|
||||
def get_ground_truth_ids(collection_size):
|
||||
fname = GROUNDTRUTH_MAP[str(collection_size)]
|
||||
fname = SIFT_SRC_GROUNDTRUTH_DATA_DIR + "/" + fname
|
||||
a = np.fromfile(fname, dtype='int32')
|
||||
d = a[0]
|
||||
true_ids = a.reshape(-1, d + 1)[:, 1:].copy()
|
||||
return true_ids
|
||||
|
||||
|
||||
def normalize(metric_type, X):
|
||||
if metric_type == "ip":
|
||||
logger.info("Set normalize for metric_type: %s" % metric_type)
|
||||
X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
|
||||
X = X.astype(np.float32)
|
||||
elif metric_type == "l2":
|
||||
X = X.astype(np.float32)
|
||||
elif metric_type in ["jaccard", "hamming", "sub", "super"]:
|
||||
tmp = []
|
||||
for item in X:
|
||||
new_vector = bytes(np.packbits(item, axis=-1).tolist())
|
||||
tmp.append(new_vector)
|
||||
X = tmp
|
||||
return X
|
|
@ -0,0 +1,27 @@
|
|||
# import logging
|
||||
# from apscheduler.schedulers.background import BackgroundScheduler
|
||||
# from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
|
||||
# from apscheduler.jobstores.mongodb import MongoDBJobStore
|
||||
# from apscheduler.executors.pool import ProcessPoolExecutor, ThreadPoolExecutor
|
||||
# from apscheduler.executors.debug import DebugExecutor
|
||||
# import config
|
||||
# from pymongo import MongoClient
|
||||
|
||||
# logger = logging.basicConfig()
|
||||
|
||||
# mongo_client = MongoClient(config.MONGO_SERVER)
|
||||
# jobstores = {
|
||||
# 'default': MongoDBJobStore(database=config.SCHEDULER_DB, collection=config.JOB_COLLECTION, client=mongo_client)
|
||||
# }
|
||||
|
||||
# executors = {
|
||||
# 'default': ThreadPoolExecutor(max_workers=100)
|
||||
# }
|
||||
|
||||
# job_defaults = {
|
||||
# 'coalesce': True,
|
||||
# 'max_instances': 32
|
||||
# }
|
||||
# # TODO:
|
||||
# back_scheduler = BackgroundScheduler(executors=executors, job_defaults=job_defaults, logger=logger)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue