305 lines
8.8 KiB
Go
305 lines
8.8 KiB
Go
package csv2lp
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"golang.org/x/text/encoding/ianaindex"
|
|
)
|
|
|
|
// see https://v2.docs.influxdata.com/v2.0/reference/syntax/annotated-csv/#valid-data-types
|
|
const (
|
|
stringDatatype = "string"
|
|
doubleDatatype = "double"
|
|
boolDatatype = "boolean"
|
|
longDatatype = "long"
|
|
uLongDatatype = "unsignedLong"
|
|
durationDatatype = "duration"
|
|
base64BinaryDataType = "base64Binary"
|
|
dateTimeDatatype = "dateTime"
|
|
)
|
|
|
|
// predefined dateTime formats
|
|
const (
|
|
RFC3339 = "RFC3339"
|
|
RFC3339Nano = "RFC3339Nano"
|
|
dataFormatNumber = "number" //the same as long, but serialized without i suffix, used for timestamps
|
|
)
|
|
|
|
var supportedDataTypes map[string]struct{}
|
|
|
|
func init() {
|
|
supportedDataTypes = make(map[string]struct{}, 9)
|
|
supportedDataTypes[stringDatatype] = struct{}{}
|
|
supportedDataTypes[doubleDatatype] = struct{}{}
|
|
supportedDataTypes[boolDatatype] = struct{}{}
|
|
supportedDataTypes[longDatatype] = struct{}{}
|
|
supportedDataTypes[uLongDatatype] = struct{}{}
|
|
supportedDataTypes[durationDatatype] = struct{}{}
|
|
supportedDataTypes[base64BinaryDataType] = struct{}{}
|
|
supportedDataTypes[dateTimeDatatype] = struct{}{}
|
|
supportedDataTypes[""] = struct{}{}
|
|
}
|
|
|
|
// IsTypeSupported returns true if the data type is supported
|
|
func IsTypeSupported(dataType string) bool {
|
|
_, supported := supportedDataTypes[dataType]
|
|
return supported
|
|
}
|
|
|
|
var replaceMeasurement *strings.Replacer = strings.NewReplacer(",", "\\,", " ", "\\ ")
|
|
var replaceTag *strings.Replacer = strings.NewReplacer(",", "\\,", " ", "\\ ", "=", "\\=")
|
|
var replaceQuoted *strings.Replacer = strings.NewReplacer("\"", "\\\"", "\\", "\\\\")
|
|
|
|
func escapeMeasurement(val string) string {
|
|
for i := 0; i < len(val); i++ {
|
|
if val[i] == ',' || val[i] == ' ' {
|
|
return replaceMeasurement.Replace(val)
|
|
}
|
|
}
|
|
return val
|
|
}
|
|
func escapeTag(val string) string {
|
|
for i := 0; i < len(val); i++ {
|
|
if val[i] == ',' || val[i] == ' ' || val[i] == '=' {
|
|
return replaceTag.Replace(val)
|
|
}
|
|
}
|
|
return val
|
|
}
|
|
func escapeString(val string) string {
|
|
for i := 0; i < len(val); i++ {
|
|
if val[i] == '"' || val[i] == '\\' {
|
|
return replaceQuoted.Replace(val)
|
|
}
|
|
}
|
|
return val
|
|
}
|
|
|
|
// normalizeNumberString normalizes the supplied value with the help of the format supplied.
|
|
// This normalization is intended to convert number strings of different locales to a strconv-parseable value.
|
|
//
|
|
// The format's first character is a fraction delimiter character. Next characters in the format
|
|
// are simply removed, they are typically used to visually separate groups in large numbers.
|
|
// The removeFaction parameter controls whether the returned value can contain also the fraction part.
|
|
//
|
|
// For example, to get a strconv-parseable float from a Spanish value '3.494.826.157,123', use format ",." .
|
|
func normalizeNumberString(value string, format string, removeFraction bool) string {
|
|
if len(format) == 0 {
|
|
format = ". \n\t\r_"
|
|
}
|
|
if strings.ContainsAny(value, format) {
|
|
formatRunes := []rune(format)
|
|
fractionRune := formatRunes[0]
|
|
ignored := formatRunes[1:]
|
|
retVal := strings.Builder{}
|
|
retVal.Grow(len(value))
|
|
ForAllCharacters:
|
|
for _, c := range value {
|
|
// skip ignored characters
|
|
for i := 0; i < len(ignored); i++ {
|
|
if c == ignored[i] {
|
|
continue ForAllCharacters
|
|
}
|
|
}
|
|
if c == fractionRune {
|
|
if removeFraction {
|
|
break ForAllCharacters
|
|
}
|
|
retVal.WriteByte('.')
|
|
} else {
|
|
retVal.WriteRune(c)
|
|
}
|
|
}
|
|
|
|
return retVal.String()
|
|
}
|
|
return value
|
|
}
|
|
|
|
func toTypedValue(val string, column *CsvTableColumn) (interface{}, error) {
|
|
dataType := column.DataType
|
|
dataFormat := column.DataFormat
|
|
if column.ParseF != nil {
|
|
return column.ParseF(val)
|
|
}
|
|
switch dataType {
|
|
case stringDatatype:
|
|
return val, nil
|
|
case dateTimeDatatype:
|
|
switch dataFormat {
|
|
case "": // number or time.RFC3339
|
|
t, err := strconv.ParseInt(val, 10, 64)
|
|
if err != nil {
|
|
return time.Parse(time.RFC3339, val)
|
|
}
|
|
return time.Unix(0, t).UTC(), nil
|
|
case RFC3339:
|
|
return time.Parse(time.RFC3339, val)
|
|
case RFC3339Nano:
|
|
return time.Parse(time.RFC3339Nano, val)
|
|
case dataFormatNumber:
|
|
t, err := strconv.ParseInt(val, 10, 64)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return time.Unix(0, t).UTC(), nil
|
|
default:
|
|
if column.TimeZone != nil {
|
|
return time.ParseInLocation(dataFormat, val, column.TimeZone)
|
|
}
|
|
return time.Parse(dataFormat, val)
|
|
}
|
|
case durationDatatype:
|
|
return time.ParseDuration(val)
|
|
case doubleDatatype:
|
|
return strconv.ParseFloat(normalizeNumberString(val, dataFormat, false), 64)
|
|
case boolDatatype:
|
|
switch {
|
|
case len(val) == 0:
|
|
return nil, errors.New("Unsupported boolean value '" + val + "' , first character is expected to be 't','f','0','1','y','n'")
|
|
case val[0] == 't' || val[0] == 'T' || val[0] == 'y' || val[0] == 'Y' || val[0] == '1':
|
|
return true, nil
|
|
case val[0] == 'f' || val[0] == 'F' || val[0] == 'n' || val[0] == 'N' || val[0] == '0':
|
|
return false, nil
|
|
default:
|
|
return nil, errors.New("Unsupported boolean value '" + val + "' , first character is expected to be 't','f','0','1','y','n'")
|
|
}
|
|
case longDatatype:
|
|
return strconv.ParseInt(normalizeNumberString(val, dataFormat, true), 10, 64)
|
|
case uLongDatatype:
|
|
return strconv.ParseUint(normalizeNumberString(val, dataFormat, true), 10, 64)
|
|
case base64BinaryDataType:
|
|
return base64.StdEncoding.DecodeString(val)
|
|
default:
|
|
return nil, fmt.Errorf("unsupported data type '%s'", dataType)
|
|
}
|
|
}
|
|
|
|
func appendProtocolValue(buffer []byte, value interface{}) ([]byte, error) {
|
|
switch v := value.(type) {
|
|
case uint64:
|
|
return append(strconv.AppendUint(buffer, v, 10), 'u'), nil
|
|
case int64:
|
|
return append(strconv.AppendInt(buffer, v, 10), 'i'), nil
|
|
case int:
|
|
return append(strconv.AppendInt(buffer, int64(v), 10), 'i'), nil
|
|
case float64:
|
|
if math.IsNaN(v) {
|
|
return buffer, errors.New("value is NaN")
|
|
}
|
|
if math.IsInf(v, 0) {
|
|
return buffer, errors.New("value is Infinite")
|
|
}
|
|
return strconv.AppendFloat(buffer, v, 'f', -1, 64), nil
|
|
case float32:
|
|
v32 := float64(v)
|
|
if math.IsNaN(v32) {
|
|
return buffer, errors.New("value is NaN")
|
|
}
|
|
if math.IsInf(v32, 0) {
|
|
return buffer, errors.New("value is Infinite")
|
|
}
|
|
return strconv.AppendFloat(buffer, v32, 'f', -1, 64), nil
|
|
case string:
|
|
buffer = append(buffer, '"')
|
|
buffer = append(buffer, escapeString(v)...)
|
|
buffer = append(buffer, '"')
|
|
return buffer, nil
|
|
case []byte:
|
|
buf := make([]byte, base64.StdEncoding.EncodedLen(len(v)))
|
|
base64.StdEncoding.Encode(buf, v)
|
|
return append(buffer, buf...), nil
|
|
case bool:
|
|
if v {
|
|
return append(buffer, "true"...), nil
|
|
}
|
|
return append(buffer, "false"...), nil
|
|
case time.Time:
|
|
return strconv.AppendInt(buffer, v.UnixNano(), 10), nil
|
|
case time.Duration:
|
|
return append(strconv.AppendInt(buffer, v.Nanoseconds(), 10), 'i'), nil
|
|
default:
|
|
return buffer, fmt.Errorf("unsupported value type: %T", v)
|
|
}
|
|
}
|
|
|
|
func appendConverted(buffer []byte, val string, column *CsvTableColumn) ([]byte, error) {
|
|
if len(column.DataType) == 0 { // keep the value as it is
|
|
return append(buffer, val...), nil
|
|
}
|
|
typedVal, err := toTypedValue(val, column)
|
|
if err != nil {
|
|
return buffer, err
|
|
}
|
|
return appendProtocolValue(buffer, typedVal)
|
|
}
|
|
|
|
func decodeNop(reader io.Reader) io.Reader {
|
|
return reader
|
|
}
|
|
|
|
// CreateDecoder creates a decoding reader from the supplied encoding to UTF-8, or returns an error
|
|
func CreateDecoder(encoding string) (func(io.Reader) io.Reader, error) {
|
|
if len(encoding) > 0 && encoding != "UTF-8" {
|
|
enc, err := ianaindex.IANA.Encoding(encoding)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%v, see https://www.iana.org/assignments/character-sets/character-sets.xhtml", err)
|
|
}
|
|
if enc == nil {
|
|
return nil, fmt.Errorf("unsupported encoding: %s", encoding)
|
|
}
|
|
return enc.NewDecoder().Reader, nil
|
|
}
|
|
return decodeNop, nil
|
|
}
|
|
|
|
// createBoolParseFn returns a function that converts a string value to boolean according to format "true,yes,1:false,no,0"
|
|
func createBoolParseFn(format string) func(string) (interface{}, error) {
|
|
var err error = nil
|
|
truthy := []string{}
|
|
falsy := []string{}
|
|
if !strings.Contains(format, ":") {
|
|
err = fmt.Errorf("unsupported boolean format: %s should be in 'true,yes,1:false,no,0' format, but no ':' is present", format)
|
|
} else {
|
|
colon := strings.Index(format, ":")
|
|
t := format[:colon]
|
|
f := format[colon+1:]
|
|
if t != "" {
|
|
truthy = strings.Split(t, ",")
|
|
}
|
|
if f != "" {
|
|
falsy = strings.Split(f, ",")
|
|
}
|
|
}
|
|
return func(val string) (interface{}, error) {
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, s := range falsy {
|
|
if s == val {
|
|
return false, nil
|
|
}
|
|
}
|
|
for _, s := range truthy {
|
|
if s == val {
|
|
return true, nil
|
|
}
|
|
}
|
|
if len(falsy) == 0 {
|
|
return false, nil
|
|
}
|
|
if len(truthy) == 0 {
|
|
return true, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unsupported boolean value: %s must one of %v or one of %v", val, truthy, falsy)
|
|
}
|
|
}
|