fix: InfluxQL parser incompatibilities (#6034)
* fix: Parse regular expressions starting with possible escape sequence This was failing because the previous combinator, `is_not`, would return an error if it consumed no input when identifying one of the characters in its set. This case would then prevent the remainder of the `regex_literal` parser from identifying and ignoring sequences like "\w" * fix: Parse microsecond duration literals with correct unit suffix * fix: Parse a var ref as a 3-part, segmented identifier Closes #6033 * chore: Address lint warnings * chore: Additional test cases per feedbackpull/24376/head
parent
4fb2843d05
commit
f54124102e
|
@ -12,7 +12,7 @@ use nom::bytes::complete::tag;
|
|||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::{cut, map, opt, value};
|
||||
use nom::multi::{many0, separated_list0};
|
||||
use nom::sequence::{delimited, pair, preceded, separated_pair, tuple};
|
||||
use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
|
||||
use std::fmt::{Display, Formatter, Write};
|
||||
|
||||
/// An InfluxQL arithmetic expression.
|
||||
|
@ -316,11 +316,54 @@ where
|
|||
)(i)
|
||||
}
|
||||
|
||||
/// Parse a variable reference, which is an identifier followed by an optional cast expression.
|
||||
/// Parse a segmented identifier
|
||||
///
|
||||
/// ```text
|
||||
/// segmented_identifier ::= identifier |
|
||||
/// ( identifier "." identifier ) |
|
||||
/// ( identifier "." identifier? "." identifier )
|
||||
/// ```
|
||||
fn segmented_identifier(i: &str) -> ParseResult<&str, Identifier> {
|
||||
let (remaining, (opt_prefix, name)) = pair(
|
||||
opt(alt((
|
||||
// ident2 "." ident1 "."
|
||||
map(
|
||||
pair(
|
||||
terminated(identifier, tag(".")),
|
||||
terminated(identifier, tag(".")),
|
||||
),
|
||||
|(ident2, ident1)| (Some(ident2), Some(ident1)),
|
||||
),
|
||||
// identifier ".."
|
||||
map(terminated(identifier, tag("..")), |ident2| {
|
||||
(Some(ident2), None)
|
||||
}),
|
||||
// identifier "."
|
||||
map(terminated(identifier, tag(".")), |ident1| {
|
||||
(None, Some(ident1))
|
||||
}),
|
||||
))),
|
||||
identifier,
|
||||
)(i)?;
|
||||
|
||||
Ok((
|
||||
remaining,
|
||||
match opt_prefix {
|
||||
Some((None, Some(ident1))) => format!("{}.{}", ident1.0, name.0).into(),
|
||||
Some((Some(ident2), None)) => format!("{}..{}", ident2.0, name.0).into(),
|
||||
Some((Some(ident2), Some(ident1))) => {
|
||||
format!("{}.{}.{}", ident2.0, ident1.0, name.0).into()
|
||||
}
|
||||
_ => name,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
/// Parse a variable reference, which is a segmented identifier followed by an optional cast expression.
|
||||
pub(crate) fn var_ref(i: &str) -> ParseResult<&str, Expr> {
|
||||
map(
|
||||
pair(
|
||||
identifier,
|
||||
segmented_identifier,
|
||||
opt(preceded(
|
||||
tag("::"),
|
||||
expect(
|
||||
|
@ -515,6 +558,19 @@ mod test {
|
|||
let (_, got) = var_ref("foo").unwrap();
|
||||
assert_eq!(got, var_ref!("foo"));
|
||||
|
||||
// Whilst this is parsed as a 3-part name, it is treated as a quoted string 🙄
|
||||
// VarRefs are parsed as segmented identifiers
|
||||
//
|
||||
// * https://github.com/influxdata/influxql/blob/7e7d61973256ffeef4b99edd0a89f18a9e52fa2d/parser.go#L2515-L2516
|
||||
//
|
||||
// and then the segments are joined as a single string
|
||||
//
|
||||
// * https://github.com/influxdata/influxql/blob/7e7d61973256ffeef4b99edd0a89f18a9e52fa2d/parser.go#L2551
|
||||
let (rem, got) = var_ref("db.rp.foo").unwrap();
|
||||
assert_eq!(got, var_ref!("db.rp.foo"));
|
||||
assert_eq!(format!("{}", got), r#""db.rp.foo""#);
|
||||
assert_eq!(rem, "");
|
||||
|
||||
// with cast operator
|
||||
let (_, got) = var_ref("foo::tag").unwrap();
|
||||
assert_eq!(got, var_ref!("foo", Tag));
|
||||
|
@ -539,6 +595,62 @@ mod test {
|
|||
assert!(got.is_empty())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segmented_identifier() {
|
||||
// Unquoted
|
||||
let (rem, id) = segmented_identifier("part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "part0");
|
||||
|
||||
// id.id
|
||||
let (rem, id) = segmented_identifier("part1.part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part1.part0\"");
|
||||
|
||||
// id..id
|
||||
let (rem, id) = segmented_identifier("part2..part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part2..part0\"");
|
||||
|
||||
// id.id.id
|
||||
let (rem, id) = segmented_identifier("part2.part1.part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part2.part1.part0\"");
|
||||
|
||||
// "id"."id".id
|
||||
let (rem, id) = segmented_identifier(r#""part 2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part 2.part 1.part0\"");
|
||||
|
||||
// Only parses 3 segments
|
||||
let (rem, id) = segmented_identifier("part2.part1.part0.foo").unwrap();
|
||||
assert_eq!(rem, ".foo");
|
||||
assert_eq!(format!("{}", id), "\"part2.part1.part0\"");
|
||||
|
||||
// Quoted
|
||||
let (rem, id) = segmented_identifier("\"part0\"").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "part0");
|
||||
|
||||
// Additional test cases, with compatibility proven via https://go.dev/play/p/k2150CJocVl
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part" 2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#" 2"."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), "part");
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part" 2."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#" 2."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), "part");
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part "2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#"2"."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), r#""part ""#);
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part ""2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#""2"."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), r#""part ""#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display_expr() {
|
||||
let (_, e) = arithmetic_expression("5 + 51").unwrap();
|
||||
|
|
|
@ -249,15 +249,15 @@ fn single_duration(i: &str) -> ParseResult<&str, i64> {
|
|||
pair(
|
||||
integer,
|
||||
alt((
|
||||
value(Nanosecond, tag("ns")), // nanoseconds
|
||||
value(Microsecond, tag("µs")), // microseconds
|
||||
value(Microsecond, tag("us")), // microseconds
|
||||
value(Millisecond, tag("ms")), // milliseconds
|
||||
value(Second, tag("s")), // seconds
|
||||
value(Minute, tag("m")), // minutes
|
||||
value(Hour, tag("h")), // hours
|
||||
value(Day, tag("d")), // days
|
||||
value(Week, tag("w")), // weeks
|
||||
value(Nanosecond, tag("ns")), // nanoseconds
|
||||
value(Microsecond, tag("µ")), // microseconds
|
||||
value(Microsecond, tag("u")), // microseconds
|
||||
value(Millisecond, tag("ms")), // milliseconds
|
||||
value(Second, tag("s")), // seconds
|
||||
value(Minute, tag("m")), // minutes
|
||||
value(Hour, tag("h")), // hours
|
||||
value(Day, tag("d")), // days
|
||||
value(Week, tag("w")), // weeks
|
||||
)),
|
||||
),
|
||||
|(v, unit)| match unit {
|
||||
|
@ -410,10 +410,14 @@ mod test {
|
|||
let (_, got) = single_duration("38ns").unwrap();
|
||||
assert_eq!(got, 38);
|
||||
|
||||
let (_, got) = single_duration("22us").unwrap();
|
||||
let (_, got) = single_duration("22u").unwrap();
|
||||
assert_eq!(got, 22 * NANOS_PER_MICRO);
|
||||
|
||||
let (_, got) = single_duration("7µs").unwrap();
|
||||
let (rem, got) = single_duration("22us").unwrap();
|
||||
assert_eq!(got, 22 * NANOS_PER_MICRO);
|
||||
assert_eq!(rem, "s"); // prove that we ignore the trailing s
|
||||
|
||||
let (_, got) = single_duration("7µ").unwrap();
|
||||
assert_eq!(got, 7 * NANOS_PER_MICRO);
|
||||
|
||||
let (_, got) = single_duration("15ms").unwrap();
|
||||
|
|
|
@ -774,6 +774,12 @@ mod test {
|
|||
select_statement("SELECT value FROM cpu WHERE time <= now()TZ('Australia/Hobart')")
|
||||
.unwrap();
|
||||
assert_eq!(rem, "");
|
||||
|
||||
// segmented var ref identifiers
|
||||
let (rem, _) =
|
||||
select_statement(r#"SELECT LAST("n.usage_user") FROM cpu WHERE n.usage_user > 0"#)
|
||||
.unwrap();
|
||||
assert_eq!(rem, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -848,6 +854,16 @@ mod test {
|
|||
}
|
||||
);
|
||||
|
||||
// Parse expression with an alias and no unnecessary whitespace
|
||||
let (_, got) = Field::parse("LAST(\"n.asks\")").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
Field {
|
||||
expr: call!("LAST", var_ref!("n.asks")),
|
||||
alias: None
|
||||
}
|
||||
);
|
||||
|
||||
// Parse a call with a VarRef
|
||||
let (_, got) = Field::parse("DISTINCT foo AS bar").unwrap();
|
||||
assert_eq!(
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
use crate::impl_tuple_clause;
|
||||
use crate::internal::{expect, ParseError, ParseResult};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{is_not, tag};
|
||||
use nom::character::complete::char;
|
||||
use nom::bytes::complete::{is_not, tag, take_till};
|
||||
use nom::character::complete::{anychar, char};
|
||||
use nom::combinator::{map, value, verify};
|
||||
use nom::error::Error;
|
||||
use nom::multi::fold_many0;
|
||||
|
@ -137,13 +137,24 @@ fn regex_literal(i: &str) -> ParseResult<&str, &str> {
|
|||
|
||||
loop {
|
||||
// match everything except `\`, `/` or `\n`
|
||||
let (_, match_i) = is_not("\\/\n")(remaining)?;
|
||||
let (_, match_i) = take_till(|c| c == '\\' || c == '/' || c == '\n')(remaining)?;
|
||||
consumed = &i[..(consumed.len() + match_i.len())];
|
||||
remaining = &i[consumed.len()..];
|
||||
|
||||
// If we didn't consume anything, check whether it is a newline or regex delimiter,
|
||||
// which signals we should leave this parser for outer processing.
|
||||
if consumed.is_empty() {
|
||||
is_not("/\n")(remaining)?;
|
||||
}
|
||||
|
||||
// Try and consume '\' followed by a '/'
|
||||
if let Ok((remaining_i, _)) = char::<_, Error<&str>>('\\')(remaining) {
|
||||
if char::<_, Error<&str>>('/')(remaining_i).is_ok() {
|
||||
// If we didn't consume anything, but we found "\/" sequence,
|
||||
// we need to return an error so the outer fold_many0 parser does not trigger
|
||||
// an infinite recursion error.
|
||||
anychar(consumed)?;
|
||||
|
||||
// We're escaping a '/' (a regex delimiter), so finish and let
|
||||
// the outer parser match and unescape
|
||||
return Ok((remaining, consumed));
|
||||
|
@ -201,6 +212,10 @@ mod test {
|
|||
let (_, got) = double_quoted_string(r#""quick draw""#).unwrap();
|
||||
assert_eq!(got, "quick draw");
|
||||
|
||||
// ascii
|
||||
let (_, got) = double_quoted_string(r#""n.asks""#).unwrap();
|
||||
assert_eq!(got, "n.asks");
|
||||
|
||||
// unicode
|
||||
let (_, got) = double_quoted_string("\"quick draw\u{1f47d}\"").unwrap();
|
||||
assert_eq!(
|
||||
|
@ -265,6 +280,9 @@ mod test {
|
|||
let (_, got) = single_quoted_string(r#"'\n\''"#).unwrap();
|
||||
assert_eq!(got, "\n'");
|
||||
|
||||
let (_, got) = single_quoted_string(r#"'\'hello\''"#).unwrap();
|
||||
assert_eq!(got, "'hello'");
|
||||
|
||||
// literal tab
|
||||
let (_, got) = single_quoted_string("'quick\tdraw'").unwrap();
|
||||
assert_eq!(got, "quick\tdraw");
|
||||
|
@ -300,13 +318,17 @@ mod test {
|
|||
assert_eq!(got, "hello".into());
|
||||
|
||||
// handle escaped delimiters "\/"
|
||||
let (_, got) = regex(r#"/this\/is\/a\/path/"#).unwrap();
|
||||
assert_eq!(got, "this/is/a/path".into());
|
||||
let (_, got) = regex(r#"/\/this\/is\/a\/path/"#).unwrap();
|
||||
assert_eq!(got, "/this/is/a/path".into());
|
||||
|
||||
// ignores any other possible escape sequence
|
||||
let (_, got) = regex(r#"/hello\n/"#).unwrap();
|
||||
assert_eq!(got, "hello\\n".into());
|
||||
|
||||
// can parse possible escape sequence at beginning of regex
|
||||
let (_, got) = regex(r#"/\w.*/"#).unwrap();
|
||||
assert_eq!(got, "\\w.*".into());
|
||||
|
||||
// Empty regex
|
||||
let (i, got) = regex("//").unwrap();
|
||||
assert_eq!(i, "");
|
||||
|
|
Loading…
Reference in New Issue