milvus/internal/storage/vector_chunk_manager.go

163 lines
4.6 KiB
Go

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
package storage
import (
"bytes"
"encoding/binary"
"errors"
"io"
"github.com/milvus-io/milvus/internal/common"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
)
// VectorChunkManager is responsible for read and write vector data.
type VectorChunkManager struct {
localChunkManager ChunkManager
remoteChunkManager ChunkManager
schema *etcdpb.CollectionMeta
localCacheEnable bool
}
// NewVectorChunkManager create a new vector manager object.
func NewVectorChunkManager(localChunkManager ChunkManager, remoteChunkManager ChunkManager, schema *etcdpb.CollectionMeta, localCacheEnable bool) *VectorChunkManager {
return &VectorChunkManager{
localChunkManager: localChunkManager,
remoteChunkManager: remoteChunkManager,
schema: schema,
localCacheEnable: localCacheEnable,
}
}
// For vector data, we will download vector file from storage. And we will
// deserialize the file for it has binlog style. At last we store pure vector
// data to local storage as cache.
func (vcm *VectorChunkManager) downloadVectorFile(key string) ([]byte, error) {
if vcm.localChunkManager.Exist(key) {
return vcm.localChunkManager.Read(key)
}
insertCodec := NewInsertCodec(vcm.schema)
content, err := vcm.remoteChunkManager.Read(key)
if err != nil {
return nil, err
}
blob := &Blob{
Key: key,
Value: content,
}
_, _, data, err := insertCodec.Deserialize([]*Blob{blob})
if err != nil {
return nil, err
}
defer insertCodec.Close()
var results []byte
for _, singleData := range data.Data {
binaryVector, ok := singleData.(*BinaryVectorFieldData)
if ok {
results = binaryVector.Data
}
floatVector, ok := singleData.(*FloatVectorFieldData)
if ok {
buf := new(bytes.Buffer)
err := binary.Write(buf, common.Endian, floatVector.Data)
if err != nil {
return nil, err
}
results = buf.Bytes()
}
}
return results, nil
}
// GetPath returns the path of vector data. If cached, return local path.
// If not cached return remote path.
func (vcm *VectorChunkManager) GetPath(key string) (string, error) {
if vcm.localChunkManager.Exist(key) && vcm.localCacheEnable {
return vcm.localChunkManager.GetPath(key)
}
return vcm.remoteChunkManager.GetPath(key)
}
// Write writes the vector data to local cache if cache enabled.
func (vcm *VectorChunkManager) Write(key string, content []byte) error {
if !vcm.localCacheEnable {
return errors.New("Cannot write local file for local cache is not allowed")
}
return vcm.localChunkManager.Write(key, content)
}
// Exist checks whether vector data is saved to local cache.
func (vcm *VectorChunkManager) Exist(key string) bool {
return vcm.localChunkManager.Exist(key)
}
// Read reads the pure vector data. If cached, it reads from local.
func (vcm *VectorChunkManager) Read(key string) ([]byte, error) {
if vcm.localCacheEnable {
if vcm.localChunkManager.Exist(key) {
return vcm.localChunkManager.Read(key)
}
bytes, err := vcm.downloadVectorFile(key)
if err != nil {
return nil, err
}
err = vcm.localChunkManager.Write(key, bytes)
if err != nil {
return nil, err
}
return vcm.localChunkManager.Read(key)
}
return vcm.downloadVectorFile(key)
}
// ReadAt reads specific position data of vector. If cached, it reads from local.
func (vcm *VectorChunkManager) ReadAt(key string, p []byte, off int64) (int, error) {
if vcm.localCacheEnable {
if vcm.localChunkManager.Exist(key) {
return vcm.localChunkManager.ReadAt(key, p, off)
}
bytes, err := vcm.downloadVectorFile(key)
if err != nil {
return -1, err
}
err = vcm.localChunkManager.Write(key, bytes)
if err != nil {
return -1, err
}
return vcm.localChunkManager.ReadAt(key, p, off)
}
bytes, err := vcm.downloadVectorFile(key)
if err != nil {
return -1, err
}
if bytes == nil {
return 0, errors.New("vectorChunkManager: data downloaded is nil")
}
if off < 0 || int64(len(bytes)) < off {
return 0, errors.New("vectorChunkManager: invalid offset")
}
n := copy(p, bytes[off:])
if n < len(p) {
return n, io.EOF
}
return n, nil
}