feat: Error when parsing lines with duplicate tags

pull/24376/head
Jake Goulding 2020-03-11 22:02:13 -04:00
parent 167750669b
commit 4fd0c6f210
3 changed files with 60 additions and 26 deletions

23
Cargo.lock generated
View File

@ -408,6 +408,7 @@ dependencies = [
"csv 1.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"dotenv 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
"either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"http 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -424,6 +425,7 @@ dependencies = [
"serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"tonic 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tonic-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1555,6 +1557,25 @@ name = "smallvec"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "snafu"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "snafu-derive"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "socket2"
version = "0.3.11"
@ -2386,6 +2407,8 @@ dependencies = [
"checksum signal-hook-registry 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94f478ede9f64724c5d173d7bb56099ec3e2d9fc2774aac65d34b8b890405f41"
"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
"checksum smallvec 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5c2fb2ec9bcd216a5b0d0ccf31ab17b5ed1d627960edff65bbe95d3ce221cefc"
"checksum snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "546db9181bce2aa22ed883c33d65603b76335b4c2533a98289f54265043de7a1"
"checksum snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bdc75da2e0323f297402fd9c8fdba709bb04e4c627cbe31d19a2c91fc8d9f0e2"
"checksum socket2 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)" = "e8b74de517221a2cb01a53349cf54182acdc31a074727d3079068448c0676d85"
"checksum sourcefile 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4bf77cb82ba8453b42b6ae1d692e4cdc92f9a47beaf89a847c8be83f4e328ad3"
"checksum spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"

View File

@ -38,6 +38,8 @@ croaring = "0.4.2"
http = "0.2.0"
serde_urlencoded = "0.6.1"
nom = "5.1.1"
snafu = "0.6.2"
either = "1.5.3"
[dev-dependencies]
criterion = "0.3"

View File

@ -1,3 +1,4 @@
use either::Either;
use nom::{
branch::alt,
bytes::complete::{tag, take_while1},
@ -7,27 +8,16 @@ use nom::{
sequence::{separated_pair, terminated, tuple},
IResult,
};
use std::{error, fmt};
use snafu::Snafu;
use std::collections::BTreeMap;
#[derive(Debug, Clone)]
pub struct ParseError {
description: String,
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display(r#"Must not contain duplicate tags, but "{}" was repeated"#, tag_key))]
DuplicateTag { tag_key: String },
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.description)
}
}
impl error::Error for ParseError {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
// Generic error, underlying cause isn't tracked.
None
}
}
pub type Result<T, E = ParseError> = std::result::Result<T, E>;
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, PartialEq, Clone)]
pub struct Point<T> {
@ -192,13 +182,16 @@ pub fn parse(input: &str) -> Result<Vec<PointType>> {
input
.lines()
.flat_map(|line| match parse_line(line) {
Ok((_remaining, parsed_line)) => line_to_points(parsed_line).map(Ok),
Ok((_remaining, parsed_line)) => match line_to_points(parsed_line) {
Ok(i) => Either::Left(i.map(Ok)),
Err(e) => Either::Right(std::iter::once(Err(e))),
},
Err(e) => panic!("TODO: Failed to parse: {}", e),
})
.collect()
}
fn line_to_points(parsed_line: ParsedLine<'_>) -> impl Iterator<Item = PointType> + '_ {
fn line_to_points(parsed_line: ParsedLine<'_>) -> Result<impl Iterator<Item = PointType> + '_> {
let ParsedLine {
measurement,
tag_set,
@ -206,10 +199,13 @@ fn line_to_points(parsed_line: ParsedLine<'_>) -> impl Iterator<Item = PointType
timestamp,
} = parsed_line;
let mut tag_set = tag_set.unwrap_or_default();
// TODO: handle duplicates?
tag_set.sort_by(|a, b| a.0.cmp(&b.0));
let tag_set = tag_set;
let mut unique_sorted_tag_set = BTreeMap::new();
for (tag_key, tag_value) in tag_set.unwrap_or_default() {
if unique_sorted_tag_set.insert(tag_key, tag_value).is_some() {
return DuplicateTag { tag_key }.fail();
}
}
let tag_set = unique_sorted_tag_set;
let timestamp = timestamp.expect("TODO: default timestamp not supported");
@ -220,14 +216,14 @@ fn line_to_points(parsed_line: ParsedLine<'_>) -> impl Iterator<Item = PointType
}
let series_base = series_base;
field_set.into_iter().map(move |(field_key, field_value)| {
Ok(field_set.into_iter().map(move |(field_key, field_value)| {
let series = format!("{}\t{}", series_base, field_key);
match field_value {
FieldValue::I64(value) => PointType::new_i64(series, value, timestamp),
FieldValue::F64(value) => PointType::new_f64(series, value, timestamp),
}
})
}))
}
fn parse_line(i: &str) -> IResult<&str, ParsedLine<'_>> {
@ -399,6 +395,19 @@ mod test {
Ok(())
}
#[test]
fn parse_tag_set_duplicate_tags() -> Result {
let input = "foo,tag=1,tag=2 value=1 123";
let err = parse(input).expect_err("Parsing duplicate tags should fail");
assert_eq!(
err.to_string(),
r#"Must not contain duplicate tags, but "tag" was repeated"#
);
Ok(())
}
#[test]
fn index_pairs() {
let p = Point {