From 969319dfd37e99b9e1fc977924b2d929e539b884 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Tue, 14 Feb 2023 09:21:11 +1100 Subject: [PATCH] fix: Allow all valid characters following a keyword (#6959) * fix: Allow all valid characters following a keyword Closes #6382 * chore: Identified additional test cases --- influxdb_influxql_parser/src/keywords.rs | 63 ++++++++++++++--------- iox_query/src/plan/influxql/rewriter.rs | 14 +++++ iox_query/src/plan/influxql/test_utils.rs | 23 +++++++++ 3 files changed, 76 insertions(+), 24 deletions(-) diff --git a/influxdb_influxql_parser/src/keywords.rs b/influxdb_influxql_parser/src/keywords.rs index 75fafabe49..6820dd923d 100644 --- a/influxdb_influxql_parser/src/keywords.rs +++ b/influxdb_influxql_parser/src/keywords.rs @@ -3,34 +3,25 @@ //! [keywords]: https://docs.influxdata.com/influxdb/v1.8/query_language/spec/#keywords use crate::internal::ParseResult; -use nom::branch::alt; -use nom::bytes::complete::{tag, tag_no_case}; +use nom::bytes::complete::tag_no_case; use nom::character::complete::alpha1; -use nom::combinator::{eof, fail, peek, verify}; +use nom::combinator::{fail, verify}; use nom::sequence::terminated; +use nom::FindToken; use once_cell::sync::Lazy; use std::collections::HashSet; use std::hash::{Hash, Hasher}; -/// Peeks at the input for acceptable characters separating a keyword. +/// Verifies the next character of `i` is valid following a keyword. /// -/// Will return a failure if one of the expected characters is not found. -fn keyword_follow_char(i: &str) -> ParseResult<&str, &str> { - peek(alt(( - tag(" "), - tag("\n"), - tag(";"), - tag("("), - tag(")"), - tag("\t"), - tag(","), - tag("="), - tag("!"), // possible != - tag("/"), // possible comment - tag("-"), // possible comment - eof, - fail, // Return a failure if we reach the end of this alternation - )))(i) +/// Keywords may be followed by whitespace, statement terminator (;), parens, +/// or conditional and arithmetic operators or EOF +fn keyword_follow_char(i: &str) -> ParseResult<&str, ()> { + if i.is_empty() || b" \n\t;(),=!><+-/*|&^%".find_token(i.bytes().next().unwrap()) { + Ok((i, ())) + } else { + fail(i) + } } /// Token represents a string with case-insensitive ordering and equality. @@ -162,6 +153,7 @@ pub fn keyword<'a>(keyword: &'static str) -> impl FnMut(&'a str) -> ParseResult< #[cfg(test)] mod test { use super::*; + use crate::assert_error; use assert_matches::assert_matches; #[test] @@ -278,13 +270,36 @@ mod test { // Will fail because keyword `OR` in `ORDER` is not recognized, as is not terminated by a valid character let err = or_keyword("ORDER").unwrap_err(); assert_matches!(err, nom::Err::Error(crate::internal::Error::Nom(_, kind)) if kind == nom::error::ErrorKind::Fail); + } - // test valid follow-on characters + #[test] + fn test_keyword_followed_by_valid_char() { let mut tag_keyword = keyword("TAG"); - let (rem, got) = tag_keyword("tag!").unwrap(); - assert_eq!(rem, "!"); + // followed by EOF + let (rem, got) = tag_keyword("tag").unwrap(); + assert_eq!(rem, ""); assert_eq!(got, "tag"); + + // + // Test some of the expected characters + // + + let (rem, got) = tag_keyword("tag!=foo").unwrap(); + assert_eq!(rem, "!=foo"); + assert_eq!(got, "tag"); + + let (rem, got) = tag_keyword("tag>foo").unwrap(); + assert_eq!(rem, ">foo"); + assert_eq!(got, "tag"); + + let (rem, got) = tag_keyword("tag&1 = foo").unwrap(); + assert_eq!(rem, "&1 = foo"); + assert_eq!(got, "tag"); + + // Fallible + + assert_error!(tag_keyword("tag$"), Fail); } #[test] diff --git a/iox_query/src/plan/influxql/rewriter.rs b/iox_query/src/plan/influxql/rewriter.rs index f3c12be092..ef0e146fa2 100644 --- a/iox_query/src/plan/influxql/rewriter.rs +++ b/iox_query/src/plan/influxql/rewriter.rs @@ -746,6 +746,20 @@ mod test { "SELECT SUM(field_f64::float) AS SUM_field_f64, SUM(field_i64::integer) AS SUM_field_i64, SUM(shared_field0::float) AS SUM_shared_field0 FROM temp_01" ); + let stmt = parse_select("SELECT * FROM merge_00, merge_01"); + let stmt = rewrite_statement(&namespace, &stmt).unwrap(); + assert_eq!( + stmt.to_string(), + "SELECT col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01" + ); + + let stmt = parse_select("SELECT /col0/ FROM merge_00, merge_01"); + let stmt = rewrite_statement(&namespace, &stmt).unwrap(); + assert_eq!( + stmt.to_string(), + "SELECT col0::float AS col0, col0::tag AS col0_1 FROM merge_00, merge_01" + ); + // Fallible cases let stmt = parse_select("SELECT *::field + *::tag FROM cpu"); diff --git a/iox_query/src/plan/influxql/test_utils.rs b/iox_query/src/plan/influxql/test_utils.rs index cde772d06b..6919f713a6 100644 --- a/iox_query/src/plan/influxql/test_utils.rs +++ b/iox_query/src/plan/influxql/test_utils.rs @@ -114,6 +114,29 @@ pub(crate) mod database { .with_string_field_column_with_stats("shared_field0", None, None) .with_one_row_of_data(), ), + // Schemas for testing clashing column names when merging across measurements + Arc::new( + TestChunk::new("merge_00") + .with_id(next_chunk_id()) + .with_quiet() + .with_time_column() + .with_tag_column("col0") + .with_f64_field_column("col1") + .with_bool_field_column("col2") + .with_string_field_column_with_stats("col3", None, None) + .with_one_row_of_data(), + ), + Arc::new( + TestChunk::new("merge_01") + .with_id(next_chunk_id()) + .with_quiet() + .with_time_column() + .with_tag_column("col1") + .with_f64_field_column("col0") + .with_bool_field_column("col3") + .with_string_field_column_with_stats("col2", None, None) + .with_one_row_of_data(), + ), ] } }