Alter varChar type params's name to max_length (#17409)

Signed-off-by: xige-16 <xi.ge@zilliz.com>
pull/17413/head
xige-16 2022-06-07 15:58:06 +08:00 committed by GitHub
parent a9ec1f8864
commit b5c11a216d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 32 additions and 32 deletions

View File

@ -24,4 +24,4 @@ const int64_t INVALID_SEG_OFFSET = -1;
const milvus::PkType INVALID_PK; // of std::monostate if not set.
// TODO: default field start id, could get from config.yaml
const int64_t START_USER_FIELDID = 100;
const char MAX_LENGTH_PER_ROW[] = "max_length_per_row";
const char MAX_LENGTH[] = "max_length";

View File

@ -141,8 +141,8 @@ class FieldMeta {
Assert(!is_vector());
}
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t max_length_per_row)
: name_(name), id_(id), type_(type), string_info_(StringInfo{max_length_per_row}) {
FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t max_length)
: name_(name), id_(id), type_(type), string_info_(StringInfo{max_length}) {
Assert(is_string());
}
@ -174,7 +174,7 @@ class FieldMeta {
get_max_len() const {
Assert(is_string());
Assert(string_info_.has_value());
return string_info_->max_length_per_row;
return string_info_->max_length;
}
std::optional<MetricType>
@ -204,7 +204,7 @@ class FieldMeta {
if (is_vector()) {
return datatype_sizeof(type_, get_dim());
} else if (is_string()) {
return string_info_->max_length_per_row;
return string_info_->max_length;
} else {
return datatype_sizeof(type_);
}
@ -216,7 +216,7 @@ class FieldMeta {
std::optional<MetricType> metric_type_;
};
struct StringInfo {
int64_t max_length_per_row;
int64_t max_length;
};
FieldName name_;
FieldId id_;

View File

@ -70,8 +70,8 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) {
}
} else if (datatype_is_string(data_type)) {
auto type_map = RepeatedKeyValToMap(child.type_params());
AssertInfo(type_map.count(MAX_LENGTH_PER_ROW), "max_length_per_row not found");
auto max_len = boost::lexical_cast<int64_t>(type_map.at(MAX_LENGTH_PER_ROW));
AssertInfo(type_map.count(MAX_LENGTH), "max_length not found");
auto max_len = boost::lexical_cast<int64_t>(type_map.at(MAX_LENGTH));
schema->AddField(name, field_id, data_type, max_len);
} else {
schema->AddField(name, field_id, data_type);

View File

@ -60,8 +60,8 @@ class Schema {
// string type
void
AddField(const FieldName& name, const FieldId id, DataType data_type, int64_t max_length_per_row) {
auto field_meta = FieldMeta(name, id, data_type, max_length_per_row);
AddField(const FieldName& name, const FieldId id, DataType data_type, int64_t max_length) {
auto field_meta = FieldMeta(name, id, data_type, max_length);
this->AddField(std::move(field_meta));
}

View File

@ -100,7 +100,7 @@ func newTestSchema() *schemapb.CollectionSchema {
Description: "schema for test used",
AutoID: false,
Fields: []*schemapb.FieldSchema{
{FieldID: 1, Name: "field1", IsPrimaryKey: false, Description: "field no.1", DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{{Key: "max_length_per_row", Value: "100"}}},
{FieldID: 1, Name: "field1", IsPrimaryKey: false, Description: "field no.1", DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{{Key: "max_length", Value: "100"}}},
{FieldID: 2, Name: "field2", IsPrimaryKey: false, Description: "field no.2", DataType: schemapb.DataType_FloatVector},
},
}

View File

@ -375,7 +375,7 @@ func (mf *MetaFactory) GetFieldSchema() []*schemapb.FieldSchema {
DataType: schemapb.DataType_VarChar,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "max_length_per_row",
Key: "max_length",
Value: "100",
},
},

View File

@ -652,7 +652,7 @@ func (cct *createCollectionTask) PreExecute(ctx context.Context) error {
}
}
// valid max length per row parameters
// if max_length_per_row not specified, return error
// if max_length not specified, return error
if field.DataType == schemapb.DataType_VarChar {
err = validateMaxLengthPerRow(cct.schema.Name, field)
if err != nil {

View File

@ -123,7 +123,7 @@ func constructCollectionSchemaByDataType(collectionName string, fieldName2DataTy
if dataType == schemapb.DataType_VarChar {
fieldSchema.TypeParams = []*commonpb.KeyValuePair{
{
Key: "max_length_per_row",
Key: "max_length",
Value: strconv.Itoa(testMaxVarCharLength),
},
}

View File

@ -36,7 +36,7 @@ const boundedTS = 2
const enableMultipleVectorFields = false
// maximum length of variable-length strings
const maxVarCharLengthKey = "max_length_per_row"
const maxVarCharLengthKey = "max_length"
const defaultMaxVarCharLength = 65535
// isAlpha check if c is alpha.
@ -199,7 +199,7 @@ func validateMaxLengthPerRow(collectionName string, field *schemapb.FieldSchema)
exist := false
for _, param := range field.TypeParams {
if param.Key != maxVarCharLengthKey {
return fmt.Errorf("type param key(max_length_per_row) should be specified for varChar field, not %s", param.Key)
return fmt.Errorf("type param key(max_length) should be specified for varChar field, not %s", param.Key)
}
maxLengthPerRow, err := strconv.ParseInt(param.Value, 10, 64)
@ -211,9 +211,9 @@ func validateMaxLengthPerRow(collectionName string, field *schemapb.FieldSchema)
}
exist = true
}
// if not exist type params max_length_per_row, return error
// if not exist type params max_length, return error
if !exist {
return fmt.Errorf("type param(max_length_per_row) should be specified for varChar field of collection %s", collectionName)
return fmt.Errorf("type param(max_length) should be specified for varChar field of collection %s", collectionName)
}
return nil

View File

@ -217,7 +217,7 @@ func TestValidatePrimaryKey(t *testing.T) {
DataType: schemapb.DataType_VarChar,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "max_length_per_row",
Key: "max_length",
Value: "100",
},
},

View File

@ -112,7 +112,7 @@ func strKeySchema() *schemapb.CollectionSchema {
Description: "uid",
DataType: schemapb.DataType_VarChar,
TypeParams: []*commonpb.KeyValuePair{
{Key: "max_length_per_row", Value: "1024"},
{Key: "max_length", Value: "1024"},
},
},
{

View File

@ -35,13 +35,13 @@ func GetMaxLengthOfVarLengthField(fieldSchema *schemapb.FieldSchema) (int, error
paramsMap[p.Key] = p.Value
}
maxLengthPerRowKey := "max_length_per_row"
maxLengthPerRowKey := "max_length"
switch fieldSchema.DataType {
case schemapb.DataType_VarChar:
maxLengthPerRowValue, ok := paramsMap[maxLengthPerRowKey]
if !ok {
return 0, fmt.Errorf("the max_length_per_row was not specified, field type is %s", fieldSchema.DataType.String())
return 0, fmt.Errorf("the max_length was not specified, field type is %s", fieldSchema.DataType.String())
}
maxLength, err = strconv.Atoi(maxLengthPerRowValue)
if err != nil {

View File

@ -85,7 +85,7 @@ func TestSchema(t *testing.T) {
DataType: 21,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "max_length_per_row",
Key: "max_length",
Value: "125",
},
},

View File

@ -35,7 +35,7 @@ def hello_milvus(host="127.0.0.1"):
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length_per_row=65535),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection")

View File

@ -29,7 +29,7 @@ def hello_milvus(collection_name):
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length_per_row=65535),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection")

View File

@ -48,8 +48,8 @@ def gen_bool_field(name=ct.default_bool_field_name, description=ct.default_desc,
is_primary=is_primary, **kwargs)
return bool_field
def gen_string_field(name=ct.default_string_field_name, description=ct.default_desc, is_primary=False, max_length_per_row=ct.default_length, **kwargs):
string_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.VARCHAR, description=description, max_length_per_row=max_length_per_row,
def gen_string_field(name=ct.default_string_field_name, description=ct.default_desc, is_primary=False, max_length=ct.default_length, **kwargs):
string_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.VARCHAR, description=description, max_length=max_length,
is_primary=is_primary, **kwargs)
return string_field

View File

@ -9,7 +9,7 @@ allure-pytest==2.7.0
pytest-print==0.2.1
pytest-level==0.1.1
pytest-xdist==2.2.1
pymilvus==2.1.0.dev66
pymilvus==2.1.0.dev69
pytest-rerunfailures==9.1.1
git+https://github.com/Projectplace/pytest-tags
ndg-httpsclient

View File

@ -2811,7 +2811,7 @@ class TestCollectionString(TestcaseBase):
"""
target: test create collection with string field
method: 1. create collection with string field
2. String field max_length_per_row exceeds maximum
2. String field max_length exceeds maximum
expected: Raise exception
"""
self._connect()
@ -2819,9 +2819,9 @@ class TestCollectionString(TestcaseBase):
int_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
max_length = 100000
string_field = cf.gen_string_field(max_length_per_row=max_length)
string_field = cf.gen_string_field(max_length=max_length)
schema = cf.gen_collection_schema([int_field, string_field, vec_field])
error = {ct.err_code: 0, ct.err_msg: "invalid max_length_per_row: %s" % max_length}
error = {ct.err_code: 0, ct.err_msg: "invalid max_length: %s" % max_length}
self.collection_wrap.init_collection(name=c_name, schema=schema,
check_task=CheckTasks.err_res, check_items=error)

View File

@ -1179,7 +1179,7 @@ class TestInsertString(TestcaseBase):
nums = 70000
field_one = cf.gen_int64_field()
field_two = cf.gen_float_field()
field_three = cf.gen_string_field(max_length_per_row=nums)
field_three = cf.gen_string_field(max_length=nums)
vec_field = cf.gen_float_vec_field()
df = [field_one, field_two, field_three, vec_field]
error = {ct.err_code: 0, ct.err_msg: 'Data type is not support.'}