feat: Add influxdb_influxql_parser crate (#5415)

* feat: Add crate; parse quoted identifiers

* chore: Run cargo hakari tasks

* chore: satisfy linter

* chore: Use `test_helpers::Result`

* feat: Add all InfluxQL keywords

* chore: Update influxdb_influxql_parser/src/lib.rs

Co-authored-by: Marco Neumann <marco@crepererum.net>

* chore: PR feedback

* chore: PR Feedback, remove Result<()>

* chore: Update Cargo.lock

Co-authored-by: CircleCI[bot] <circleci@influxdata.com>
Co-authored-by: Marco Neumann <marco@crepererum.net>
pull/24376/head
Stuart Carnie 2022-08-19 09:09:45 +10:00 committed by GitHub
parent d75df2b610
commit b4e5895d7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 399 additions and 0 deletions

9
Cargo.lock generated
View File

@ -2129,6 +2129,15 @@ dependencies = [
"uuid 1.1.2",
]
[[package]]
name = "influxdb_influxql_parser"
version = "0.1.0"
dependencies = [
"nom",
"test_helpers",
"workspace-hack",
]
[[package]]
name = "influxdb_iox"
version = "0.1.0"

View File

@ -14,6 +14,7 @@ members = [
"executor",
"generated_types",
"import",
"influxdb_influxql_parser",
"influxdb_iox",
"influxdb_iox_client",
"influxdb_line_protocol",

View File

@ -0,0 +1,11 @@
[package]
name = "influxdb_influxql_parser"
version = "0.1.0"
edition = "2021"
[dependencies] # In alphabetical order
nom = { version = "7", default-features = false, features = ["std"] }
workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
test_helpers = { path = "../test_helpers" }

View File

@ -0,0 +1,128 @@
//! # Parse an InfluxQL [identifier]
//!
//! Identifiers are parsed using the following rules:
//!
//! * double quoted identifiers can contain any unicode character other than a new line
//! * double quoted identifiers can contain escaped characters, namely `\"`, `\n`, `\t`, `\\` and `\'`
//! * double quoted identifiers can contain [InfluxQL keywords][keywords]
//! * unquoted identifiers must start with an upper or lowercase ASCII character or `_`
//! * unquoted identifiers may contain only ASCII letters, decimal digits, and `_`
//!
//! [identifier]: https://docs.influxdata.com/influxdb/v1.8/query_language/spec/#identifiers
//! [keywords]: https://docs.influxdata.com/influxdb/v1.8/query_language/spec/#keywords
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::streaming::is_not;
use nom::character::streaming::char;
use nom::combinator::{map, value, verify};
use nom::multi::fold_many0;
use nom::sequence::{delimited, preceded};
use nom::IResult;
// Taken liberally from https://github.com/Geal/nom/blob/main/examples/string.rs and
// adjusted for InfluxQL
/// Parse an escaped character: `\n`, `\t`, `\"`, `\\` and `\'`.
fn parse_escaped_char(input: &str) -> IResult<&str, char> {
preceded(
char('\\'),
alt((
value('\n', char('n')),
value('\t', char('t')),
value('\\', char('\\')),
value('"', char('"')),
value('\'', char('\'')),
)),
)(input)
}
/// Parse a non-empty block of text that doesn't include \ or "
fn parse_literal(input: &str) -> IResult<&str, &str> {
// Skip newlines, " and \.
let not_quote_slash_newline = is_not("\"\\\n");
verify(not_quote_slash_newline, |s: &str| !s.is_empty())(input)
}
/// A string fragment contains a fragment of a string being parsed: either
/// a non-empty Literal (a series of non-escaped characters), a single
/// parsed escaped character, or a block of escaped whitespace.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
}
/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char
/// into a StringFragment.
fn parse_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
alt((
map(parse_literal, StringFragment::Literal),
map(parse_escaped_char, StringFragment::EscapedChar),
))(input)
}
/// Parse a string. Use a loop of parse_fragment and push all of the fragments
/// into an output string.
pub fn parse_string(input: &str) -> IResult<&str, String> {
// fold_many0 is the equivalent of iterator::fold. It runs a parser in a loop,
// and for each output value, calls a folding function on each output value.
let build_string = fold_many0(parse_fragment, String::new, |mut string, fragment| {
match fragment {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
}
string
});
delimited(char('"'), build_string, char('"'))(input)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_parse_string() {
// ascii
let (_, got) = parse_string(r#""quick draw""#).unwrap();
assert_eq!(got, "quick draw");
// unicode
let (_, got) = parse_string("\"quick draw\u{1f47d}\"").unwrap();
assert_eq!(
got,
"quick draw\u{1f47d}" // 👽
);
// escaped characters
let (_, got) = parse_string(r#""\n\t\'\"""#).unwrap();
assert_eq!(got, "\n\t'\"");
// literal tab
let (_, got) = parse_string("\"quick\tdraw\"").unwrap();
assert_eq!(got, "quick\tdraw");
// literal carriage return
let (_, got) = parse_string("\"quick\rdraw\"").unwrap();
assert_eq!(got, "quick\rdraw");
// ┌─────────────────────────────┐
// │ Fallible tests │
// └─────────────────────────────┘
// Not terminated
let res = parse_string(r#""quick draw"#);
assert!(res.is_err());
// Literal newline
let res = parse_string("\"quick\ndraw\"");
assert!(res.is_err());
// Invalid escape
let res = parse_string(r#""quick\idraw""#);
assert!(res.is_err());
}
}

View File

@ -0,0 +1,235 @@
//! # Parse InfluxQL [keywords]
//!
//! [keywords]: https://docs.influxdata.com/influxdb/v1.8/query_language/spec/#keywords
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case};
use nom::combinator::{eof, peek};
use nom::sequence::terminated;
use nom::IResult;
/// Peeks at the input for acceptable characters following a keyword.
fn keyword_follow_char(i: &str) -> IResult<&str, &str> {
peek(alt((
tag(" "),
tag("\n"),
tag(";"),
tag("("),
tag(")"),
tag("\t"),
tag(","),
tag("="),
eof,
)))(i)
}
/// Parses the input for matching InfluxQL keywords from ALL to DROP.
fn keyword_all_to_drop(i: &str) -> IResult<&str, &str> {
alt((
terminated(tag_no_case("ALL"), keyword_follow_char),
terminated(tag_no_case("ALTER"), keyword_follow_char),
terminated(tag_no_case("ANALYZE"), keyword_follow_char),
terminated(tag_no_case("ANY"), keyword_follow_char),
terminated(tag_no_case("AS"), keyword_follow_char),
terminated(tag_no_case("ASC"), keyword_follow_char),
terminated(tag_no_case("BEGIN"), keyword_follow_char),
terminated(tag_no_case("BY"), keyword_follow_char),
terminated(tag_no_case("CARDINALITY"), keyword_follow_char),
terminated(tag_no_case("CREATE"), keyword_follow_char),
terminated(tag_no_case("CONTINUOUS"), keyword_follow_char),
terminated(tag_no_case("DATABASE"), keyword_follow_char),
terminated(tag_no_case("DATABASES"), keyword_follow_char),
terminated(tag_no_case("DEFAULT"), keyword_follow_char),
terminated(tag_no_case("DELETE"), keyword_follow_char),
terminated(tag_no_case("DESC"), keyword_follow_char),
terminated(tag_no_case("DESTINATIONS"), keyword_follow_char),
terminated(tag_no_case("DIAGNOSTICS"), keyword_follow_char),
terminated(tag_no_case("DISTINCT"), keyword_follow_char),
terminated(tag_no_case("DROP"), keyword_follow_char),
))(i)
}
/// Parses the input for matching InfluxQL keywords from DURATION to LIMIT.
fn keyword_duration_to_limit(i: &str) -> IResult<&str, &str> {
alt((
terminated(tag_no_case("DURATION"), keyword_follow_char),
terminated(tag_no_case("END"), keyword_follow_char),
terminated(tag_no_case("EVERY"), keyword_follow_char),
terminated(tag_no_case("EXACT"), keyword_follow_char),
terminated(tag_no_case("EXPLAIN"), keyword_follow_char),
terminated(tag_no_case("FIELD"), keyword_follow_char),
terminated(tag_no_case("FOR"), keyword_follow_char),
terminated(tag_no_case("FROM"), keyword_follow_char),
terminated(tag_no_case("GRANT"), keyword_follow_char),
terminated(tag_no_case("GRANTS"), keyword_follow_char),
terminated(tag_no_case("GROUP"), keyword_follow_char),
terminated(tag_no_case("GROUPS"), keyword_follow_char),
terminated(tag_no_case("IN"), keyword_follow_char),
terminated(tag_no_case("INF"), keyword_follow_char),
terminated(tag_no_case("INSERT"), keyword_follow_char),
terminated(tag_no_case("INTO"), keyword_follow_char),
terminated(tag_no_case("KEY"), keyword_follow_char),
terminated(tag_no_case("KEYS"), keyword_follow_char),
terminated(tag_no_case("KILL"), keyword_follow_char),
terminated(tag_no_case("LIMIT"), keyword_follow_char),
))(i)
}
/// Parses the input for matching InfluxQL keywords from MEASUREMENT to SET.
fn keyword_measurement_to_set(i: &str) -> IResult<&str, &str> {
alt((
terminated(tag_no_case("MEASUREMENT"), keyword_follow_char),
terminated(tag_no_case("MEASUREMENTS"), keyword_follow_char),
terminated(tag_no_case("NAME"), keyword_follow_char),
terminated(tag_no_case("OFFSET"), keyword_follow_char),
terminated(tag_no_case("ON"), keyword_follow_char),
terminated(tag_no_case("ORDER"), keyword_follow_char),
terminated(tag_no_case("PASSWORD"), keyword_follow_char),
terminated(tag_no_case("POLICY"), keyword_follow_char),
terminated(tag_no_case("POLICIES"), keyword_follow_char),
terminated(tag_no_case("PRIVILEGES"), keyword_follow_char),
terminated(tag_no_case("QUERIES"), keyword_follow_char),
terminated(tag_no_case("QUERY"), keyword_follow_char),
terminated(tag_no_case("READ"), keyword_follow_char),
terminated(tag_no_case("REPLICATION"), keyword_follow_char),
terminated(tag_no_case("RESAMPLE"), keyword_follow_char),
terminated(tag_no_case("RETENTION"), keyword_follow_char),
terminated(tag_no_case("REVOKE"), keyword_follow_char),
terminated(tag_no_case("SELECT"), keyword_follow_char),
terminated(tag_no_case("SERIES"), keyword_follow_char),
terminated(tag_no_case("SET"), keyword_follow_char),
))(i)
}
/// Parses the input for matching InfluxQL keywords from SHOW to WRITE.
fn keyword_show_to_write(i: &str) -> IResult<&str, &str> {
alt((
terminated(tag_no_case("SHOW"), keyword_follow_char),
terminated(tag_no_case("SHARD"), keyword_follow_char),
terminated(tag_no_case("SHARDS"), keyword_follow_char),
terminated(tag_no_case("SLIMIT"), keyword_follow_char),
terminated(tag_no_case("SOFFSET"), keyword_follow_char),
terminated(tag_no_case("STATS"), keyword_follow_char),
terminated(tag_no_case("SUBSCRIPTION"), keyword_follow_char),
terminated(tag_no_case("SUBSCRIPTIONS"), keyword_follow_char),
terminated(tag_no_case("TAG"), keyword_follow_char),
terminated(tag_no_case("TO"), keyword_follow_char),
terminated(tag_no_case("USER"), keyword_follow_char),
terminated(tag_no_case("USERS"), keyword_follow_char),
terminated(tag_no_case("VALUES"), keyword_follow_char),
terminated(tag_no_case("WHERE"), keyword_follow_char),
terminated(tag_no_case("WITH"), keyword_follow_char),
terminated(tag_no_case("WRITE"), keyword_follow_char),
))(i)
}
// Matches any InfluxQL reserved keyword.
pub fn sql_keyword(i: &str) -> IResult<&str, &str> {
// NOTE that the alt function takes a tuple with a maximum arity of 21, hence
// the reason these are broken into groups
alt((
keyword_all_to_drop,
keyword_duration_to_limit,
keyword_measurement_to_set,
keyword_show_to_write,
))(i)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_keywords() {
// all keywords
sql_keyword("ALL").unwrap();
sql_keyword("ALTER").unwrap();
sql_keyword("ANALYZE").unwrap();
sql_keyword("ANY").unwrap();
sql_keyword("AS").unwrap();
sql_keyword("ASC").unwrap();
sql_keyword("BEGIN").unwrap();
sql_keyword("BY").unwrap();
sql_keyword("CARDINALITY").unwrap();
sql_keyword("CREATE").unwrap();
sql_keyword("CONTINUOUS").unwrap();
sql_keyword("DATABASE").unwrap();
sql_keyword("DATABASES").unwrap();
sql_keyword("DEFAULT").unwrap();
sql_keyword("DELETE").unwrap();
sql_keyword("DESC").unwrap();
sql_keyword("DESTINATIONS").unwrap();
sql_keyword("DIAGNOSTICS").unwrap();
sql_keyword("DISTINCT").unwrap();
sql_keyword("DROP").unwrap();
sql_keyword("DURATION").unwrap();
sql_keyword("END").unwrap();
sql_keyword("EVERY").unwrap();
sql_keyword("EXACT").unwrap();
sql_keyword("EXPLAIN").unwrap();
sql_keyword("FIELD").unwrap();
sql_keyword("FOR").unwrap();
sql_keyword("FROM").unwrap();
sql_keyword("GRANT").unwrap();
sql_keyword("GRANTS").unwrap();
sql_keyword("GROUP").unwrap();
sql_keyword("GROUPS").unwrap();
sql_keyword("IN").unwrap();
sql_keyword("INF").unwrap();
sql_keyword("INSERT").unwrap();
sql_keyword("INTO").unwrap();
sql_keyword("KEY").unwrap();
sql_keyword("KEYS").unwrap();
sql_keyword("KILL").unwrap();
sql_keyword("LIMIT").unwrap();
sql_keyword("MEASUREMENT").unwrap();
sql_keyword("MEASUREMENTS").unwrap();
sql_keyword("NAME").unwrap();
sql_keyword("OFFSET").unwrap();
sql_keyword("ON").unwrap();
sql_keyword("ORDER").unwrap();
sql_keyword("PASSWORD").unwrap();
sql_keyword("POLICY").unwrap();
sql_keyword("POLICIES").unwrap();
sql_keyword("PRIVILEGES").unwrap();
sql_keyword("QUERIES").unwrap();
sql_keyword("QUERY").unwrap();
sql_keyword("READ").unwrap();
sql_keyword("REPLICATION").unwrap();
sql_keyword("RESAMPLE").unwrap();
sql_keyword("RETENTION").unwrap();
sql_keyword("REVOKE").unwrap();
sql_keyword("SELECT").unwrap();
sql_keyword("SERIES").unwrap();
sql_keyword("SET").unwrap();
sql_keyword("SHOW").unwrap();
sql_keyword("SHARD").unwrap();
sql_keyword("SHARDS").unwrap();
sql_keyword("SLIMIT").unwrap();
sql_keyword("SOFFSET").unwrap();
sql_keyword("STATS").unwrap();
sql_keyword("SUBSCRIPTION").unwrap();
sql_keyword("SUBSCRIPTIONS").unwrap();
sql_keyword("TAG").unwrap();
sql_keyword("TO").unwrap();
sql_keyword("USER").unwrap();
sql_keyword("USERS").unwrap();
sql_keyword("VALUES").unwrap();
sql_keyword("WHERE").unwrap();
sql_keyword("WITH").unwrap();
sql_keyword("WRITE").unwrap();
// case insensitivity
sql_keyword("all").unwrap();
// ┌─────────────────────────────┐
// │ Fallible tests │
// └─────────────────────────────┘
let res = sql_keyword("NOT_A_KEYWORD");
assert!(res.is_err());
}
}

View File

@ -0,0 +1,15 @@
//! # Parse a subset of [InfluxQL]
//!
//! [InfluxQL]: https://docs.influxdata.com/influxdb/v1.8/query_language
#![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)]
#![warn(
missing_copy_implementations,
missing_docs,
clippy::explicit_iter_loop,
clippy::future_not_send,
clippy::use_self,
clippy::clone_on_ref_ptr
)]
mod identifier;
mod keywords;