mirror of https://github.com/milvus-io/milvus.git
parent
84baa93cb1
commit
8736372fd2
|
@ -57,12 +57,8 @@ VecIndexCreator::parse_impl(const std::string& serialized_params_str, knowhere::
|
|||
conf[key] = value;
|
||||
}
|
||||
|
||||
auto stoi_closure = [](const std::string& s) -> auto {
|
||||
return std::stoi(s);
|
||||
};
|
||||
auto stof_closure = [](const std::string& s) -> auto {
|
||||
return std::stof(s);
|
||||
};
|
||||
auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); };
|
||||
auto stof_closure = [](const std::string& s) -> float { return std::stof(s); };
|
||||
|
||||
/***************************** meta *******************************/
|
||||
check_parameter<int>(conf, knowhere::meta::DIM, stoi_closure, std::nullopt);
|
||||
|
|
|
@ -110,9 +110,21 @@ func getFileNameAndExt(filePath string) (string, string) {
|
|||
}
|
||||
|
||||
func (p *ImportWrapper) fileValidation(filePaths []string, rowBased bool) error {
|
||||
// use this map to check duplicate files
|
||||
files := make(map[string]struct{})
|
||||
|
||||
for i := 0; i < len(filePaths); i++ {
|
||||
filePath := filePaths[i]
|
||||
_, fileType := getFileNameAndExt(filePath)
|
||||
_, ok := files[filePath]
|
||||
if ok {
|
||||
// only check dupliate numpy file
|
||||
if fileType == NumpyFileExt {
|
||||
return errors.New("duplicate file: " + filePath)
|
||||
}
|
||||
} else {
|
||||
files[filePath] = struct{}{}
|
||||
}
|
||||
|
||||
// check file type
|
||||
if rowBased {
|
||||
|
@ -127,6 +139,9 @@ func (p *ImportWrapper) fileValidation(filePaths []string, rowBased bool) error
|
|||
|
||||
// check file size
|
||||
size, _ := p.chunkManager.Size(filePath)
|
||||
if size == 0 {
|
||||
return errors.New("the file " + filePath + " is empty")
|
||||
}
|
||||
if size > MaxFileSize {
|
||||
return errors.New("the file " + filePath + " size exceeds the maximum file size: " + strconv.FormatInt(MaxFileSize, 10) + " bytes")
|
||||
}
|
||||
|
@ -141,6 +156,7 @@ func (p *ImportWrapper) fileValidation(filePaths []string, rowBased bool) error
|
|||
func (p *ImportWrapper) Import(filePaths []string, rowBased bool, onlyValidate bool) error {
|
||||
err := p.fileValidation(filePaths, rowBased)
|
||||
if err != nil {
|
||||
log.Error("import error: " + err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -480,7 +496,7 @@ func (p *ImportWrapper) splitFieldsData(fieldsData map[storage.FieldID]storage.F
|
|||
|
||||
for name, count := range rowCounter {
|
||||
if count != rowCount {
|
||||
return errors.New("import error: field " + name + " row count " + strconv.Itoa(count) + " is not equal to other fields " + strconv.Itoa(rowCount))
|
||||
return errors.New("import error: field " + name + " row count " + strconv.Itoa(count) + " is not equal to other fields row count " + strconv.Itoa(rowCount))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"go.uber.org/zap"
|
||||
"golang.org/x/exp/mmap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/common"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
|
@ -25,6 +26,70 @@ const (
|
|||
TempFilesPath = "/tmp/milvus_test/import/"
|
||||
)
|
||||
|
||||
type MockChunkManager struct {
|
||||
size int64
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Path(filePath string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Reader(filePath string) (storage.FileReader, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Write(filePath string, content []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) MultiWrite(contents map[string][]byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Exist(filePath string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Read(filePath string) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) MultiRead(filePaths []string) ([][]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) ListWithPrefix(prefix string) ([]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) ReadWithPrefix(prefix string) ([]string, [][]byte, error) {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) ReadAt(filePath string, off int64, length int64) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Mmap(filePath string) (*mmap.ReaderAt, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Size(filePath string) (int64, error) {
|
||||
return mc.size, nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) Remove(filePath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) MultiRemove(filePaths []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mc *MockChunkManager) RemoveWithPrefix(prefix string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Test_NewImportWrapper(t *testing.T) {
|
||||
f := dependency.NewDefaultFactory(true)
|
||||
ctx := context.Background()
|
||||
|
@ -591,3 +656,66 @@ func Test_ImportColumnBased_perf(t *testing.T) {
|
|||
|
||||
tr.Record("parse large json files: " + filePath1 + "," + filePath2)
|
||||
}
|
||||
|
||||
func Test_FileValidation(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cm := &MockChunkManager{
|
||||
size: 1,
|
||||
}
|
||||
|
||||
idAllocator := newIDAllocator(ctx, t)
|
||||
schema := perfSchema(128)
|
||||
shardNum := 2
|
||||
segmentSize := 512 // unit: MB
|
||||
|
||||
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil, nil)
|
||||
|
||||
// duplicate files
|
||||
var files = [2]string{"1.npy", "1.npy"}
|
||||
err := wrapper.fileValidation(files[:], false)
|
||||
assert.NotNil(t, err)
|
||||
err = wrapper.fileValidation(files[:], true)
|
||||
assert.NotNil(t, err)
|
||||
|
||||
// unsupported file type
|
||||
files[0] = "1"
|
||||
files[1] = "1"
|
||||
err = wrapper.fileValidation(files[:], true)
|
||||
assert.NotNil(t, err)
|
||||
|
||||
err = wrapper.fileValidation(files[:], false)
|
||||
assert.NotNil(t, err)
|
||||
|
||||
// valid cases
|
||||
files[0] = "1.json"
|
||||
files[1] = "2.json"
|
||||
err = wrapper.fileValidation(files[:], true)
|
||||
assert.Nil(t, err)
|
||||
|
||||
files[1] = "2.npy"
|
||||
err = wrapper.fileValidation(files[:], false)
|
||||
assert.Nil(t, err)
|
||||
|
||||
// empty file
|
||||
cm = &MockChunkManager{
|
||||
size: 0,
|
||||
}
|
||||
wrapper = NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil, nil)
|
||||
err = wrapper.fileValidation(files[:], true)
|
||||
assert.NotNil(t, err)
|
||||
|
||||
err = wrapper.fileValidation(files[:], false)
|
||||
assert.NotNil(t, err)
|
||||
|
||||
// file size exceed limit
|
||||
cm = &MockChunkManager{
|
||||
size: MaxFileSize + 1,
|
||||
}
|
||||
wrapper = NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil, nil)
|
||||
err = wrapper.fileValidation(files[:], true)
|
||||
assert.NotNil(t, err)
|
||||
|
||||
err = wrapper.fileValidation(files[:], false)
|
||||
assert.NotNil(t, err)
|
||||
}
|
||||
|
|
|
@ -350,7 +350,7 @@ func (v *JSONColumnValidator) Handle(columns map[storage.FieldID][]interface{})
|
|||
if rowCount == -1 {
|
||||
rowCount = counter
|
||||
} else if rowCount != counter {
|
||||
return errors.New("JSON column validator: the field " + k + " row count " + strconv.Itoa(int(counter)) + " is not equal to other fields " + strconv.Itoa(int(rowCount)))
|
||||
return errors.New("JSON column validator: the field " + k + " row count " + strconv.Itoa(int(counter)) + " is not equal to other fields row count" + strconv.Itoa(int(rowCount)))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue