diff --git a/influxdb_influxql_parser/src/expression/arithmetic.rs b/influxdb_influxql_parser/src/expression/arithmetic.rs index da4cbb8869..55f7b5b239 100644 --- a/influxdb_influxql_parser/src/expression/arithmetic.rs +++ b/influxdb_influxql_parser/src/expression/arithmetic.rs @@ -12,7 +12,7 @@ use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; use nom::combinator::{cut, map, opt, value}; use nom::multi::{many0, separated_list0}; -use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; +use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}; use std::fmt::{Display, Formatter, Write}; /// An InfluxQL arithmetic expression. @@ -316,11 +316,54 @@ where )(i) } -/// Parse a variable reference, which is an identifier followed by an optional cast expression. +/// Parse a segmented identifier +/// +/// ```text +/// segmented_identifier ::= identifier | +/// ( identifier "." identifier ) | +/// ( identifier "." identifier? "." identifier ) +/// ``` +fn segmented_identifier(i: &str) -> ParseResult<&str, Identifier> { + let (remaining, (opt_prefix, name)) = pair( + opt(alt(( + // ident2 "." ident1 "." + map( + pair( + terminated(identifier, tag(".")), + terminated(identifier, tag(".")), + ), + |(ident2, ident1)| (Some(ident2), Some(ident1)), + ), + // identifier ".." + map(terminated(identifier, tag("..")), |ident2| { + (Some(ident2), None) + }), + // identifier "." + map(terminated(identifier, tag(".")), |ident1| { + (None, Some(ident1)) + }), + ))), + identifier, + )(i)?; + + Ok(( + remaining, + match opt_prefix { + Some((None, Some(ident1))) => format!("{}.{}", ident1.0, name.0).into(), + Some((Some(ident2), None)) => format!("{}..{}", ident2.0, name.0).into(), + Some((Some(ident2), Some(ident1))) => { + format!("{}.{}.{}", ident2.0, ident1.0, name.0).into() + } + _ => name, + }, + )) +} + +/// Parse a variable reference, which is a segmented identifier followed by an optional cast expression. pub(crate) fn var_ref(i: &str) -> ParseResult<&str, Expr> { map( pair( - identifier, + segmented_identifier, opt(preceded( tag("::"), expect( @@ -515,6 +558,19 @@ mod test { let (_, got) = var_ref("foo").unwrap(); assert_eq!(got, var_ref!("foo")); + // Whilst this is parsed as a 3-part name, it is treated as a quoted string 🙄 + // VarRefs are parsed as segmented identifiers + // + // * https://github.com/influxdata/influxql/blob/7e7d61973256ffeef4b99edd0a89f18a9e52fa2d/parser.go#L2515-L2516 + // + // and then the segments are joined as a single string + // + // * https://github.com/influxdata/influxql/blob/7e7d61973256ffeef4b99edd0a89f18a9e52fa2d/parser.go#L2551 + let (rem, got) = var_ref("db.rp.foo").unwrap(); + assert_eq!(got, var_ref!("db.rp.foo")); + assert_eq!(format!("{}", got), r#""db.rp.foo""#); + assert_eq!(rem, ""); + // with cast operator let (_, got) = var_ref("foo::tag").unwrap(); assert_eq!(got, var_ref!("foo", Tag)); @@ -539,6 +595,62 @@ mod test { assert!(got.is_empty()) } + #[test] + fn test_segmented_identifier() { + // Unquoted + let (rem, id) = segmented_identifier("part0").unwrap(); + assert_eq!(rem, ""); + assert_eq!(format!("{}", id), "part0"); + + // id.id + let (rem, id) = segmented_identifier("part1.part0").unwrap(); + assert_eq!(rem, ""); + assert_eq!(format!("{}", id), "\"part1.part0\""); + + // id..id + let (rem, id) = segmented_identifier("part2..part0").unwrap(); + assert_eq!(rem, ""); + assert_eq!(format!("{}", id), "\"part2..part0\""); + + // id.id.id + let (rem, id) = segmented_identifier("part2.part1.part0").unwrap(); + assert_eq!(rem, ""); + assert_eq!(format!("{}", id), "\"part2.part1.part0\""); + + // "id"."id".id + let (rem, id) = segmented_identifier(r#""part 2"."part 1".part0"#).unwrap(); + assert_eq!(rem, ""); + assert_eq!(format!("{}", id), "\"part 2.part 1.part0\""); + + // Only parses 3 segments + let (rem, id) = segmented_identifier("part2.part1.part0.foo").unwrap(); + assert_eq!(rem, ".foo"); + assert_eq!(format!("{}", id), "\"part2.part1.part0\""); + + // Quoted + let (rem, id) = segmented_identifier("\"part0\"").unwrap(); + assert_eq!(rem, ""); + assert_eq!(format!("{}", id), "part0"); + + // Additional test cases, with compatibility proven via https://go.dev/play/p/k2150CJocVl + + let (rem, id) = segmented_identifier(r#""part" 2"."part 1".part0"#).unwrap(); + assert_eq!(rem, r#" 2"."part 1".part0"#); + assert_eq!(format!("{}", id), "part"); + + let (rem, id) = segmented_identifier(r#""part" 2."part 1".part0"#).unwrap(); + assert_eq!(rem, r#" 2."part 1".part0"#); + assert_eq!(format!("{}", id), "part"); + + let (rem, id) = segmented_identifier(r#""part "2"."part 1".part0"#).unwrap(); + assert_eq!(rem, r#"2"."part 1".part0"#); + assert_eq!(format!("{}", id), r#""part ""#); + + let (rem, id) = segmented_identifier(r#""part ""2"."part 1".part0"#).unwrap(); + assert_eq!(rem, r#""2"."part 1".part0"#); + assert_eq!(format!("{}", id), r#""part ""#); + } + #[test] fn test_display_expr() { let (_, e) = arithmetic_expression("5 + 51").unwrap(); diff --git a/influxdb_influxql_parser/src/literal.rs b/influxdb_influxql_parser/src/literal.rs index 580d6d37a4..2fcc533366 100644 --- a/influxdb_influxql_parser/src/literal.rs +++ b/influxdb_influxql_parser/src/literal.rs @@ -249,15 +249,15 @@ fn single_duration(i: &str) -> ParseResult<&str, i64> { pair( integer, alt(( - value(Nanosecond, tag("ns")), // nanoseconds - value(Microsecond, tag("µs")), // microseconds - value(Microsecond, tag("us")), // microseconds - value(Millisecond, tag("ms")), // milliseconds - value(Second, tag("s")), // seconds - value(Minute, tag("m")), // minutes - value(Hour, tag("h")), // hours - value(Day, tag("d")), // days - value(Week, tag("w")), // weeks + value(Nanosecond, tag("ns")), // nanoseconds + value(Microsecond, tag("µ")), // microseconds + value(Microsecond, tag("u")), // microseconds + value(Millisecond, tag("ms")), // milliseconds + value(Second, tag("s")), // seconds + value(Minute, tag("m")), // minutes + value(Hour, tag("h")), // hours + value(Day, tag("d")), // days + value(Week, tag("w")), // weeks )), ), |(v, unit)| match unit { @@ -410,10 +410,14 @@ mod test { let (_, got) = single_duration("38ns").unwrap(); assert_eq!(got, 38); - let (_, got) = single_duration("22us").unwrap(); + let (_, got) = single_duration("22u").unwrap(); assert_eq!(got, 22 * NANOS_PER_MICRO); - let (_, got) = single_duration("7µs").unwrap(); + let (rem, got) = single_duration("22us").unwrap(); + assert_eq!(got, 22 * NANOS_PER_MICRO); + assert_eq!(rem, "s"); // prove that we ignore the trailing s + + let (_, got) = single_duration("7µ").unwrap(); assert_eq!(got, 7 * NANOS_PER_MICRO); let (_, got) = single_duration("15ms").unwrap(); diff --git a/influxdb_influxql_parser/src/select.rs b/influxdb_influxql_parser/src/select.rs index 7ac2e4e28a..3cfa3a60b2 100644 --- a/influxdb_influxql_parser/src/select.rs +++ b/influxdb_influxql_parser/src/select.rs @@ -774,6 +774,12 @@ mod test { select_statement("SELECT value FROM cpu WHERE time <= now()TZ('Australia/Hobart')") .unwrap(); assert_eq!(rem, ""); + + // segmented var ref identifiers + let (rem, _) = + select_statement(r#"SELECT LAST("n.usage_user") FROM cpu WHERE n.usage_user > 0"#) + .unwrap(); + assert_eq!(rem, ""); } #[test] @@ -848,6 +854,16 @@ mod test { } ); + // Parse expression with an alias and no unnecessary whitespace + let (_, got) = Field::parse("LAST(\"n.asks\")").unwrap(); + assert_eq!( + got, + Field { + expr: call!("LAST", var_ref!("n.asks")), + alias: None + } + ); + // Parse a call with a VarRef let (_, got) = Field::parse("DISTINCT foo AS bar").unwrap(); assert_eq!( diff --git a/influxdb_influxql_parser/src/string.rs b/influxdb_influxql_parser/src/string.rs index a81825e2ca..7b586ece62 100644 --- a/influxdb_influxql_parser/src/string.rs +++ b/influxdb_influxql_parser/src/string.rs @@ -7,8 +7,8 @@ use crate::impl_tuple_clause; use crate::internal::{expect, ParseError, ParseResult}; use nom::branch::alt; -use nom::bytes::complete::{is_not, tag}; -use nom::character::complete::char; +use nom::bytes::complete::{is_not, tag, take_till}; +use nom::character::complete::{anychar, char}; use nom::combinator::{map, value, verify}; use nom::error::Error; use nom::multi::fold_many0; @@ -137,13 +137,24 @@ fn regex_literal(i: &str) -> ParseResult<&str, &str> { loop { // match everything except `\`, `/` or `\n` - let (_, match_i) = is_not("\\/\n")(remaining)?; + let (_, match_i) = take_till(|c| c == '\\' || c == '/' || c == '\n')(remaining)?; consumed = &i[..(consumed.len() + match_i.len())]; remaining = &i[consumed.len()..]; + // If we didn't consume anything, check whether it is a newline or regex delimiter, + // which signals we should leave this parser for outer processing. + if consumed.is_empty() { + is_not("/\n")(remaining)?; + } + // Try and consume '\' followed by a '/' if let Ok((remaining_i, _)) = char::<_, Error<&str>>('\\')(remaining) { if char::<_, Error<&str>>('/')(remaining_i).is_ok() { + // If we didn't consume anything, but we found "\/" sequence, + // we need to return an error so the outer fold_many0 parser does not trigger + // an infinite recursion error. + anychar(consumed)?; + // We're escaping a '/' (a regex delimiter), so finish and let // the outer parser match and unescape return Ok((remaining, consumed)); @@ -201,6 +212,10 @@ mod test { let (_, got) = double_quoted_string(r#""quick draw""#).unwrap(); assert_eq!(got, "quick draw"); + // ascii + let (_, got) = double_quoted_string(r#""n.asks""#).unwrap(); + assert_eq!(got, "n.asks"); + // unicode let (_, got) = double_quoted_string("\"quick draw\u{1f47d}\"").unwrap(); assert_eq!( @@ -265,6 +280,9 @@ mod test { let (_, got) = single_quoted_string(r#"'\n\''"#).unwrap(); assert_eq!(got, "\n'"); + let (_, got) = single_quoted_string(r#"'\'hello\''"#).unwrap(); + assert_eq!(got, "'hello'"); + // literal tab let (_, got) = single_quoted_string("'quick\tdraw'").unwrap(); assert_eq!(got, "quick\tdraw"); @@ -300,13 +318,17 @@ mod test { assert_eq!(got, "hello".into()); // handle escaped delimiters "\/" - let (_, got) = regex(r#"/this\/is\/a\/path/"#).unwrap(); - assert_eq!(got, "this/is/a/path".into()); + let (_, got) = regex(r#"/\/this\/is\/a\/path/"#).unwrap(); + assert_eq!(got, "/this/is/a/path".into()); // ignores any other possible escape sequence let (_, got) = regex(r#"/hello\n/"#).unwrap(); assert_eq!(got, "hello\\n".into()); + // can parse possible escape sequence at beginning of regex + let (_, got) = regex(r#"/\w.*/"#).unwrap(); + assert_eq!(got, "\\w.*".into()); + // Empty regex let (i, got) = regex("//").unwrap(); assert_eq!(i, "");