// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package importutil import ( "bytes" "encoding/binary" "io" "os" "strconv" "strings" "testing" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/sbinet/npyio/npy" "github.com/stretchr/testify/assert" ) type MockReader struct { } func (r *MockReader) Read(p []byte) (n int, err error) { return 0, io.EOF } func Test_CreateNumpyFile(t *testing.T) { // directory doesn't exist data1 := []float32{1, 2, 3, 4, 5} err := CreateNumpyFile("/dummy_not_exist/dummy.npy", data1) assert.Error(t, err) // invalid data type data2 := make(map[string]int) err = CreateNumpyFile("/tmp/dummy.npy", data2) assert.Error(t, err) } func Test_CreateNumpyData(t *testing.T) { // directory doesn't exist data1 := []float32{1, 2, 3, 4, 5} buf, err := CreateNumpyData(data1) assert.NotNil(t, buf) assert.NoError(t, err) // invalid data type data2 := make(map[string]int) buf, err = CreateNumpyData(data2) assert.Error(t, err) assert.Nil(t, buf) } func Test_ConvertNumpyType(t *testing.T) { checkFunc := func(inputs []string, output schemapb.DataType) { for i := 0; i < len(inputs); i++ { dt, err := convertNumpyType(inputs[i]) assert.NoError(t, err) assert.Equal(t, output, dt) } } checkFunc([]string{"b1", "i1", "int8"}, schemapb.DataType_Int8) checkFunc([]string{"i2", "i2", "int16"}, schemapb.DataType_Int16) checkFunc([]string{"i4", "i4", "int32"}, schemapb.DataType_Int32) checkFunc([]string{"i8", "i8", "int64"}, schemapb.DataType_Int64) checkFunc([]string{"f4", "f4", "float32"}, schemapb.DataType_Float) checkFunc([]string{"f8", "f8", "float64"}, schemapb.DataType_Double) dt, err := convertNumpyType("dummy") assert.Error(t, err) assert.Equal(t, schemapb.DataType_None, dt) } func Test_StringLen(t *testing.T) { len, utf, err := stringLen("S1") assert.Equal(t, 1, len) assert.False(t, utf) assert.NoError(t, err) len, utf, err = stringLen("2S") assert.Equal(t, 2, len) assert.False(t, utf) assert.NoError(t, err) len, utf, err = stringLen("4U") assert.Equal(t, 4, len) assert.True(t, utf) assert.NoError(t, err) len, utf, err = stringLen("dummy") assert.Error(t, err) assert.Equal(t, 0, len) assert.False(t, utf) } func Test_NumpyAdapterSetByteOrder(t *testing.T) { adapter := &NumpyAdapter{ reader: nil, npyReader: &npy.Reader{}, } assert.Nil(t, adapter.Reader()) assert.NotNil(t, adapter.NpyReader()) adapter.npyReader.Header.Descr.Type = " maxLen { maxLen = len(str) } } for _, str := range values { for i := 0; i < maxLen; i++ { if i < len(str) { data = append(data, str[i]) } else { data = append(data, 0) } } } npyReader.Header.Descr.Shape = append(npyReader.Header.Descr.Shape, len(values)) adapter := &NumpyAdapter{ reader: strings.NewReader(string(data)), npyReader: npyReader, readPosition: 0, dataType: schemapb.DataType_VarChar, } // count should greater than 0 res, err := adapter.ReadString(0) assert.Error(t, err) assert.Nil(t, res) // maxLen is zero npyReader.Header.Descr.Type = "S0" res, err = adapter.ReadString(1) assert.Error(t, err) assert.Nil(t, res) npyReader.Header.Descr.Type = "S" + strconv.FormatInt(int64(maxLen), 10) res, err = adapter.ReadString(len(values) + 1) assert.NoError(t, err) assert.Equal(t, len(values), len(res)) for i := 0; i < len(res); i++ { assert.Equal(t, values[i], res[i]) } }) t.Run("test read ascii characters with utf32", func(t *testing.T) { filePath := TempFilesPath + "varchar1.npy" data := []string{"a ", "bbb", " c", "dd", "eeee", "fff"} err := CreateNumpyFile(filePath, data) assert.NoError(t, err) file, err := os.Open(filePath) assert.NoError(t, err) defer file.Close() adapter, err := NewNumpyAdapter(file) assert.NoError(t, err) // partly read res, err := adapter.ReadString(len(data) - 1) assert.NoError(t, err) assert.Equal(t, len(data)-1, len(res)) for i := 0; i < len(res); i++ { assert.Equal(t, data[i], res[i]) } // read the left data res, err = adapter.ReadString(len(data)) assert.NoError(t, err) assert.Equal(t, 1, len(res)) assert.Equal(t, data[len(data)-1], res[0]) // nothing to read res, err = adapter.ReadString(len(data)) assert.NoError(t, err) assert.Nil(t, res) }) t.Run("test read non-ascii characters with utf32", func(t *testing.T) { filePath := TempFilesPath + "varchar2.npy" data := []string{"で と ど ", " 马克bbb", "$(한)삼각*"} err := CreateNumpyFile(filePath, data) assert.NoError(t, err) file, err := os.Open(filePath) assert.NoError(t, err) defer file.Close() adapter, err := NewNumpyAdapter(file) assert.NoError(t, err) res, err := adapter.ReadString(len(data)) assert.NoError(t, err) assert.Equal(t, len(data), len(res)) for i := 0; i < len(res); i++ { assert.Equal(t, data[i], res[i]) } }) } func Test_DecodeUtf32(t *testing.T) { // wrong input res, err := decodeUtf32([]byte{1, 2}, binary.LittleEndian) assert.Error(t, err) assert.Empty(t, res) // this string contains ascii characters and unicode characters str := "ad◤三百🎵ゐ↙" // utf32 littleEndian of str src := []byte{97, 0, 0, 0, 100, 0, 0, 0, 228, 37, 0, 0, 9, 78, 0, 0, 126, 118, 0, 0, 181, 243, 1, 0, 144, 48, 0, 0, 153, 33, 0, 0} res, err = decodeUtf32(src, binary.LittleEndian) assert.NoError(t, err) assert.Equal(t, str, res) // utf32 bigEndian of str src = []byte{0, 0, 0, 97, 0, 0, 0, 100, 0, 0, 37, 228, 0, 0, 78, 9, 0, 0, 118, 126, 0, 1, 243, 181, 0, 0, 48, 144, 0, 0, 33, 153} res, err = decodeUtf32(src, binary.BigEndian) assert.NoError(t, err) assert.Equal(t, str, res) }