influxdb/pkg/csv2lp/data_conversion.go

305 lines
8.8 KiB
Go

package csv2lp
import (
"encoding/base64"
"errors"
"fmt"
"io"
"math"
"strconv"
"strings"
"time"
"golang.org/x/text/encoding/ianaindex"
)
// see https://v2.docs.influxdata.com/v2.0/reference/syntax/annotated-csv/#valid-data-types
const (
stringDatatype = "string"
doubleDatatype = "double"
boolDatatype = "boolean"
longDatatype = "long"
uLongDatatype = "unsignedLong"
durationDatatype = "duration"
base64BinaryDataType = "base64Binary"
dateTimeDatatype = "dateTime"
)
// predefined dateTime formats
const (
RFC3339 = "RFC3339"
RFC3339Nano = "RFC3339Nano"
dataFormatNumber = "number" //the same as long, but serialized without i suffix, used for timestamps
)
var supportedDataTypes map[string]struct{}
func init() {
supportedDataTypes = make(map[string]struct{}, 9)
supportedDataTypes[stringDatatype] = struct{}{}
supportedDataTypes[doubleDatatype] = struct{}{}
supportedDataTypes[boolDatatype] = struct{}{}
supportedDataTypes[longDatatype] = struct{}{}
supportedDataTypes[uLongDatatype] = struct{}{}
supportedDataTypes[durationDatatype] = struct{}{}
supportedDataTypes[base64BinaryDataType] = struct{}{}
supportedDataTypes[dateTimeDatatype] = struct{}{}
supportedDataTypes[""] = struct{}{}
}
// IsTypeSupported returns true if the data type is supported
func IsTypeSupported(dataType string) bool {
_, supported := supportedDataTypes[dataType]
return supported
}
var replaceMeasurement *strings.Replacer = strings.NewReplacer(",", "\\,", " ", "\\ ")
var replaceTag *strings.Replacer = strings.NewReplacer(",", "\\,", " ", "\\ ", "=", "\\=")
var replaceQuoted *strings.Replacer = strings.NewReplacer("\"", "\\\"", "\\", "\\\\")
func escapeMeasurement(val string) string {
for i := 0; i < len(val); i++ {
if val[i] == ',' || val[i] == ' ' {
return replaceMeasurement.Replace(val)
}
}
return val
}
func escapeTag(val string) string {
for i := 0; i < len(val); i++ {
if val[i] == ',' || val[i] == ' ' || val[i] == '=' {
return replaceTag.Replace(val)
}
}
return val
}
func escapeString(val string) string {
for i := 0; i < len(val); i++ {
if val[i] == '"' || val[i] == '\\' {
return replaceQuoted.Replace(val)
}
}
return val
}
// normalizeNumberString normalizes the supplied value with the help of the format supplied.
// This normalization is intended to convert number strings of different locales to a strconv-parseable value.
//
// The format's first character is a fraction delimiter character. Next characters in the format
// are simply removed, they are typically used to visually separate groups in large numbers.
// The removeFaction parameter controls whether the returned value can contain also the fraction part.
//
// For example, to get a strconv-parseable float from a Spanish value '3.494.826.157,123', use format ",." .
func normalizeNumberString(value string, format string, removeFraction bool) string {
if len(format) == 0 {
format = ". \n\t\r_"
}
if strings.ContainsAny(value, format) {
formatRunes := []rune(format)
fractionRune := formatRunes[0]
ignored := formatRunes[1:]
retVal := strings.Builder{}
retVal.Grow(len(value))
ForAllCharacters:
for _, c := range value {
// skip ignored characters
for i := 0; i < len(ignored); i++ {
if c == ignored[i] {
continue ForAllCharacters
}
}
if c == fractionRune {
if removeFraction {
break ForAllCharacters
}
retVal.WriteByte('.')
} else {
retVal.WriteRune(c)
}
}
return retVal.String()
}
return value
}
func toTypedValue(val string, column *CsvTableColumn) (interface{}, error) {
dataType := column.DataType
dataFormat := column.DataFormat
if column.ParseF != nil {
return column.ParseF(val)
}
switch dataType {
case stringDatatype:
return val, nil
case dateTimeDatatype:
switch dataFormat {
case "": // number or time.RFC3339
t, err := strconv.ParseInt(val, 10, 64)
if err != nil {
return time.Parse(time.RFC3339, val)
}
return time.Unix(0, t).UTC(), nil
case RFC3339:
return time.Parse(time.RFC3339, val)
case RFC3339Nano:
return time.Parse(time.RFC3339Nano, val)
case dataFormatNumber:
t, err := strconv.ParseInt(val, 10, 64)
if err != nil {
return nil, err
}
return time.Unix(0, t).UTC(), nil
default:
if column.TimeZone != nil {
return time.ParseInLocation(dataFormat, val, column.TimeZone)
}
return time.Parse(dataFormat, val)
}
case durationDatatype:
return time.ParseDuration(val)
case doubleDatatype:
return strconv.ParseFloat(normalizeNumberString(val, dataFormat, false), 64)
case boolDatatype:
switch {
case len(val) == 0:
return nil, errors.New("Unsupported boolean value '" + val + "' , first character is expected to be 't','f','0','1','y','n'")
case val[0] == 't' || val[0] == 'T' || val[0] == 'y' || val[0] == 'Y' || val[0] == '1':
return true, nil
case val[0] == 'f' || val[0] == 'F' || val[0] == 'n' || val[0] == 'N' || val[0] == '0':
return false, nil
default:
return nil, errors.New("Unsupported boolean value '" + val + "' , first character is expected to be 't','f','0','1','y','n'")
}
case longDatatype:
return strconv.ParseInt(normalizeNumberString(val, dataFormat, true), 10, 64)
case uLongDatatype:
return strconv.ParseUint(normalizeNumberString(val, dataFormat, true), 10, 64)
case base64BinaryDataType:
return base64.StdEncoding.DecodeString(val)
default:
return nil, fmt.Errorf("unsupported data type '%s'", dataType)
}
}
func appendProtocolValue(buffer []byte, value interface{}) ([]byte, error) {
switch v := value.(type) {
case uint64:
return append(strconv.AppendUint(buffer, v, 10), 'u'), nil
case int64:
return append(strconv.AppendInt(buffer, v, 10), 'i'), nil
case int:
return append(strconv.AppendInt(buffer, int64(v), 10), 'i'), nil
case float64:
if math.IsNaN(v) {
return buffer, errors.New("value is NaN")
}
if math.IsInf(v, 0) {
return buffer, errors.New("value is Infinite")
}
return strconv.AppendFloat(buffer, v, 'f', -1, 64), nil
case float32:
v32 := float64(v)
if math.IsNaN(v32) {
return buffer, errors.New("value is NaN")
}
if math.IsInf(v32, 0) {
return buffer, errors.New("value is Infinite")
}
return strconv.AppendFloat(buffer, v32, 'f', -1, 64), nil
case string:
buffer = append(buffer, '"')
buffer = append(buffer, escapeString(v)...)
buffer = append(buffer, '"')
return buffer, nil
case []byte:
buf := make([]byte, base64.StdEncoding.EncodedLen(len(v)))
base64.StdEncoding.Encode(buf, v)
return append(buffer, buf...), nil
case bool:
if v {
return append(buffer, "true"...), nil
}
return append(buffer, "false"...), nil
case time.Time:
return strconv.AppendInt(buffer, v.UnixNano(), 10), nil
case time.Duration:
return append(strconv.AppendInt(buffer, v.Nanoseconds(), 10), 'i'), nil
default:
return buffer, fmt.Errorf("unsupported value type: %T", v)
}
}
func appendConverted(buffer []byte, val string, column *CsvTableColumn) ([]byte, error) {
if len(column.DataType) == 0 { // keep the value as it is
return append(buffer, val...), nil
}
typedVal, err := toTypedValue(val, column)
if err != nil {
return buffer, err
}
return appendProtocolValue(buffer, typedVal)
}
func decodeNop(reader io.Reader) io.Reader {
return reader
}
// CreateDecoder creates a decoding reader from the supplied encoding to UTF-8, or returns an error
func CreateDecoder(encoding string) (func(io.Reader) io.Reader, error) {
if len(encoding) > 0 && encoding != "UTF-8" {
enc, err := ianaindex.IANA.Encoding(encoding)
if err != nil {
return nil, fmt.Errorf("%v, see https://www.iana.org/assignments/character-sets/character-sets.xhtml", err)
}
if enc == nil {
return nil, fmt.Errorf("unsupported encoding: %s", encoding)
}
return enc.NewDecoder().Reader, nil
}
return decodeNop, nil
}
// createBoolParseFn returns a function that converts a string value to boolean according to format "true,yes,1:false,no,0"
func createBoolParseFn(format string) func(string) (interface{}, error) {
var err error = nil
truthy := []string{}
falsy := []string{}
if !strings.Contains(format, ":") {
err = fmt.Errorf("unsupported boolean format: %s should be in 'true,yes,1:false,no,0' format, but no ':' is present", format)
} else {
colon := strings.Index(format, ":")
t := format[:colon]
f := format[colon+1:]
if t != "" {
truthy = strings.Split(t, ",")
}
if f != "" {
falsy = strings.Split(f, ",")
}
}
return func(val string) (interface{}, error) {
if err != nil {
return nil, err
}
for _, s := range falsy {
if s == val {
return false, nil
}
}
for _, s := range truthy {
if s == val {
return true, nil
}
}
if len(falsy) == 0 {
return false, nil
}
if len(truthy) == 0 {
return true, nil
}
return nil, fmt.Errorf("unsupported boolean value: %s must one of %v or one of %v", val, truthy, falsy)
}
}