mirror of https://github.com/milvus-io/milvus.git
Update vector engine
parent
6c4cae438a
commit
3b629dc6b4
|
@ -42,3 +42,19 @@ class MetaManager(object):
|
||||||
# print("record.group_name: ", record.group_name)
|
# print("record.group_name: ", record.group_name)
|
||||||
db.session.delete(record)
|
db.session.delete(record)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def UpdateGroup(group_name, data):
|
||||||
|
GroupTable.query.filter(GroupTable.group_name==group_name).update(data)
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def GetAllRawFiles(group_name):
|
||||||
|
FileTable.query.filter(FileTable.group_name == group_name and FileTable.type == 'raw')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def CreateRawFile(group_name, filename):
|
||||||
|
db.session.add(FileTable(group_name, filename, 'raw', 0))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def UpdateFile(filename, data):
|
||||||
|
FileTable.query.filter(FileTable.filename == filename).update(data)
|
||||||
|
|
|
@ -49,6 +49,7 @@ class TestVectorEngine:
|
||||||
# Add vector for exist group
|
# Add vector for exist group
|
||||||
code, vector_id = VectorEngine.AddVector('test_group', self.__vectors)
|
code, vector_id = VectorEngine.AddVector('test_group', self.__vectors)
|
||||||
assert code == ErrorCode.SUCCESS_CODE
|
assert code == ErrorCode.SUCCESS_CODE
|
||||||
|
print(vector_id)
|
||||||
assert vector_id == ['test_group.0', 'test_group.1', 'test_group.2', 'test_group.3', 'test_group.4', 'test_group.5', 'test_group.6', 'test_group.7', 'test_group.8', 'test_group.9']
|
assert vector_id == ['test_group.0', 'test_group.1', 'test_group.2', 'test_group.3', 'test_group.4', 'test_group.5', 'test_group.6', 'test_group.7', 'test_group.8', 'test_group.9']
|
||||||
|
|
||||||
# Check search vector interface
|
# Check search vector interface
|
||||||
|
|
|
@ -5,13 +5,13 @@ from engine.controller.group_handler import GroupHandler
|
||||||
from engine.controller.index_file_handler import IndexFileHandler
|
from engine.controller.index_file_handler import IndexFileHandler
|
||||||
from engine.settings import ROW_LIMIT
|
from engine.settings import ROW_LIMIT
|
||||||
from flask import jsonify
|
from flask import jsonify
|
||||||
from engine import db
|
|
||||||
from engine.ingestion import build_index
|
from engine.ingestion import build_index
|
||||||
from engine.controller.scheduler import Scheduler
|
from engine.controller.scheduler import Scheduler
|
||||||
from engine.ingestion import serialize
|
from engine.ingestion import serialize
|
||||||
from engine.controller.meta_manager import MetaManager
|
from engine.controller.meta_manager import MetaManager
|
||||||
from engine.controller.error_code import ErrorCode
|
from engine.controller.error_code import ErrorCode
|
||||||
from engine.controller.storage_manager import StorageManager
|
from engine.controller.storage_manager import StorageManager
|
||||||
|
from datetime import date
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
class VectorEngine(object):
|
class VectorEngine(object):
|
||||||
|
@ -63,28 +63,58 @@ class VectorEngine(object):
|
||||||
|
|
||||||
return VectorEngine.SUCCESS_CODE, group_list
|
return VectorEngine.SUCCESS_CODE, group_list
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def AddVectorToNewFile(group_name):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def AddVectorToExistFile(group_name):
|
||||||
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def AddVector(group_name, vectors):
|
def AddVector(group_name, vectors):
|
||||||
print(group_name, vectors)
|
print(group_name, vectors)
|
||||||
error, _ = MetaManager.GetGroup(group_name)
|
error, group = MetaManager.GetGroup(group_name)
|
||||||
if error == VectorEngine.FAULT_CODE:
|
if error == VectorEngine.FAULT_CODE:
|
||||||
return VectorEngine.GROUP_NOT_EXIST, 'invalid'
|
return VectorEngine.GROUP_NOT_EXIST, 'invalid'
|
||||||
|
|
||||||
|
# first raw file
|
||||||
|
raw_filename = str(group.file_number)
|
||||||
|
files = MetaManager.GetAllRawFiles(group_name)
|
||||||
|
|
||||||
|
current_raw_row_number = 0
|
||||||
|
current_raw_file = None
|
||||||
|
if files != None:
|
||||||
|
for file in files:
|
||||||
|
if file.filename == raw_filename:
|
||||||
|
current_raw_file = file
|
||||||
|
current_raw_row_number = file.row_number
|
||||||
|
print(raw_filename)
|
||||||
|
else:
|
||||||
|
print("---- To Build Index")
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
vector_str_list = []
|
vector_str_list = []
|
||||||
for vector in vectors:
|
|
||||||
file = FileTable.query.filter(FileTable.group_name == group_name).filter(FileTable.type == 'raw').first()
|
|
||||||
group = GroupTable.query.filter(GroupTable.group_name == group_name).first()
|
|
||||||
|
|
||||||
if file:
|
# Verify if the row number + incoming row > limit
|
||||||
print('insert into exist file')
|
incoming_row_number = len(vectors)
|
||||||
# create vector id
|
|
||||||
vector_id = file.seq_no + 1
|
|
||||||
# insert into raw file
|
|
||||||
VectorEngine.InsertVectorIntoRawFile(group_name, file.filename, vector, vector_id)
|
|
||||||
|
|
||||||
# check if the file can be indexed
|
start_row_index = 0
|
||||||
if file.row_number + 1 >= ROW_LIMIT:
|
total_row_number = group.row_number
|
||||||
|
table_row_number = current_raw_row_number
|
||||||
|
if current_raw_row_number + incoming_row_number > ROW_LIMIT:
|
||||||
|
# Insert into exist raw file
|
||||||
|
start_row_index = ROW_LIMIT - current_raw_row_number
|
||||||
|
|
||||||
|
for i in range(0, start_row_index, 1):
|
||||||
|
total_row_number += 1
|
||||||
|
vector_id = total_row_number
|
||||||
|
VectorEngine.InsertVectorIntoRawFile(group_name, raw_filename, vectors[i], vector_id)
|
||||||
|
++ table_row_number
|
||||||
|
vector_str_list.append(group_name + '.' + str(vector_id))
|
||||||
|
|
||||||
|
# Build index
|
||||||
raw_vector_array, raw_vector_id_array = VectorEngine.GetVectorListFromRawFile(group_name)
|
raw_vector_array, raw_vector_id_array = VectorEngine.GetVectorListFromRawFile(group_name)
|
||||||
d = group.dimension
|
d = group.dimension
|
||||||
|
|
||||||
|
@ -96,33 +126,38 @@ class VectorEngine(object):
|
||||||
index_filename = file.filename + '_index'
|
index_filename = file.filename + '_index'
|
||||||
serialize.write_index(file_name=index_filename, index=index)
|
serialize.write_index(file_name=index_filename, index=index)
|
||||||
|
|
||||||
FileTable.query.filter(FileTable.group_name == group_name).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1,
|
UpdateFile(file.filename, {'row_number': ROW_LIMIT, 'type': 'index', 'filename': index_filename})
|
||||||
'type': 'index',
|
|
||||||
'filename': index_filename,
|
|
||||||
'seq_no': file.seq_no + 1})
|
|
||||||
db.session.commit()
|
|
||||||
VectorEngine.group_dict = None
|
|
||||||
else:
|
|
||||||
# we still can insert into exist raw file, update database
|
|
||||||
FileTable.query.filter(FileTable.group_name == group_name).filter(FileTable.type == 'raw').update({'row_number':file.row_number + 1,
|
|
||||||
'seq_no': file.seq_no + 1})
|
|
||||||
db.session.commit()
|
|
||||||
print('Update db for raw file insertion')
|
|
||||||
|
|
||||||
else:
|
# create new raw file name
|
||||||
print('add a new raw file')
|
raw_filename = str(group.file_number + 1)
|
||||||
# first raw file
|
table_row_number = 0
|
||||||
raw_filename = group_name + '.raw'
|
|
||||||
# create vector id
|
|
||||||
vector_id = 0
|
|
||||||
# create and insert vector into raw file
|
|
||||||
VectorEngine.InsertVectorIntoRawFile(group_name, raw_filename, vector, vector_id)
|
|
||||||
# insert a record into database
|
|
||||||
db.session.add(FileTable(group_name, raw_filename, 'raw', 1))
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
|
# update file table
|
||||||
|
MetaManager.CreateRawFile(group_name, raw_filename)
|
||||||
|
|
||||||
|
# Append vectors to raw file
|
||||||
|
if current_raw_file == None:
|
||||||
|
# update file table
|
||||||
|
MetaManager.CreateRawFile(group_name, raw_filename)
|
||||||
|
|
||||||
|
# 1. update db
|
||||||
|
new_group_file_number = group.file_number + 1
|
||||||
|
new_group_row_number = int(group.row_number) + incoming_row_number
|
||||||
|
MetaManager.UpdateGroup(group_name, {'file_number': new_group_file_number, 'row_number': new_group_row_number})
|
||||||
|
|
||||||
|
# 2. store vector into raw files
|
||||||
|
for i in range (start_row_index, incoming_row_number, 1):
|
||||||
|
vector_id = total_row_number
|
||||||
|
total_row_number += 1
|
||||||
|
VectorEngine.InsertVectorIntoRawFile(group_name, raw_filename, vectors[i], vector_id)
|
||||||
|
++ table_row_number
|
||||||
vector_str_list.append(group_name + '.' + str(vector_id))
|
vector_str_list.append(group_name + '.' + str(vector_id))
|
||||||
|
|
||||||
|
MetaManager.UpdateFile(raw_filename, {'row_number': table_row_number})
|
||||||
|
|
||||||
|
MetaManager.UpdateGroup(group_name, {'row_number': total_row_number})
|
||||||
|
# 3. sync
|
||||||
|
MetaManager.Sync()
|
||||||
return VectorEngine.SUCCESS_CODE, vector_str_list
|
return VectorEngine.SUCCESS_CODE, vector_str_list
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ class FileTable(db.Model):
|
||||||
filename = db.Column(db.String(100))
|
filename = db.Column(db.String(100))
|
||||||
type = db.Column(db.String(100))
|
type = db.Column(db.String(100))
|
||||||
row_number = db.Column(db.Integer)
|
row_number = db.Column(db.Integer)
|
||||||
seq_no = db.Column(db.Integer)
|
date = db.Column(db.Date)
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, group_name, filename, type, row_number):
|
def __init__(self, group_name, filename, type, row_number):
|
||||||
|
@ -16,7 +16,6 @@ class FileTable(db.Model):
|
||||||
self.type = type
|
self.type = type
|
||||||
self.row_number = row_number
|
self.row_number = row_number
|
||||||
self.type = type
|
self.type = type
|
||||||
self.seq_no = 0
|
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
|
@ -5,6 +5,7 @@ class GroupTable(db.Model):
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
group_name = db.Column(db.String(100))
|
group_name = db.Column(db.String(100))
|
||||||
file_number = db.Column(db.Integer)
|
file_number = db.Column(db.Integer)
|
||||||
|
row_number = db.Column(db.BigInteger)
|
||||||
dimension = db.Column(db.Integer)
|
dimension = db.Column(db.Integer)
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,6 +13,7 @@ class GroupTable(db.Model):
|
||||||
self.group_name = group_name
|
self.group_name = group_name
|
||||||
self.dimension = dimension
|
self.dimension = dimension
|
||||||
self.file_number = 0
|
self.file_number = 0
|
||||||
|
self.row_number = 0
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
Loading…
Reference in New Issue