feat: Add influxdb_influxql_parser crate (#5415)
* feat: Add crate; parse quoted identifiers * chore: Run cargo hakari tasks * chore: satisfy linter * chore: Use `test_helpers::Result` * feat: Add all InfluxQL keywords * chore: Update influxdb_influxql_parser/src/lib.rs Co-authored-by: Marco Neumann <marco@crepererum.net> * chore: PR feedback * chore: PR Feedback, remove Result<()> * chore: Update Cargo.lock Co-authored-by: CircleCI[bot] <circleci@influxdata.com> Co-authored-by: Marco Neumann <marco@crepererum.net>pull/24376/head
parent
d75df2b610
commit
b4e5895d7a
|
@ -2129,6 +2129,15 @@ dependencies = [
|
|||
"uuid 1.1.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "influxdb_influxql_parser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"nom",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "influxdb_iox"
|
||||
version = "0.1.0"
|
||||
|
|
|
@ -14,6 +14,7 @@ members = [
|
|||
"executor",
|
||||
"generated_types",
|
||||
"import",
|
||||
"influxdb_influxql_parser",
|
||||
"influxdb_iox",
|
||||
"influxdb_iox_client",
|
||||
"influxdb_line_protocol",
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "influxdb_influxql_parser"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies] # In alphabetical order
|
||||
nom = { version = "7", default-features = false, features = ["std"] }
|
||||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
test_helpers = { path = "../test_helpers" }
|
|
@ -0,0 +1,128 @@
|
|||
//! # Parse an InfluxQL [identifier]
|
||||
//!
|
||||
//! Identifiers are parsed using the following rules:
|
||||
//!
|
||||
//! * double quoted identifiers can contain any unicode character other than a new line
|
||||
//! * double quoted identifiers can contain escaped characters, namely `\"`, `\n`, `\t`, `\\` and `\'`
|
||||
//! * double quoted identifiers can contain [InfluxQL keywords][keywords]
|
||||
//! * unquoted identifiers must start with an upper or lowercase ASCII character or `_`
|
||||
//! * unquoted identifiers may contain only ASCII letters, decimal digits, and `_`
|
||||
//!
|
||||
//! [identifier]: https://docs.influxdata.com/influxdb/v1.8/query_language/spec/#identifiers
|
||||
//! [keywords]: https://docs.influxdata.com/influxdb/v1.8/query_language/spec/#keywords
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::streaming::is_not;
|
||||
use nom::character::streaming::char;
|
||||
use nom::combinator::{map, value, verify};
|
||||
use nom::multi::fold_many0;
|
||||
use nom::sequence::{delimited, preceded};
|
||||
use nom::IResult;
|
||||
|
||||
// Taken liberally from https://github.com/Geal/nom/blob/main/examples/string.rs and
|
||||
// adjusted for InfluxQL
|
||||
|
||||
/// Parse an escaped character: `\n`, `\t`, `\"`, `\\` and `\'`.
|
||||
fn parse_escaped_char(input: &str) -> IResult<&str, char> {
|
||||
preceded(
|
||||
char('\\'),
|
||||
alt((
|
||||
value('\n', char('n')),
|
||||
value('\t', char('t')),
|
||||
value('\\', char('\\')),
|
||||
value('"', char('"')),
|
||||
value('\'', char('\'')),
|
||||
)),
|
||||
)(input)
|
||||
}
|
||||
|
||||
/// Parse a non-empty block of text that doesn't include \ or "
|
||||
fn parse_literal(input: &str) -> IResult<&str, &str> {
|
||||
// Skip newlines, " and \.
|
||||
let not_quote_slash_newline = is_not("\"\\\n");
|
||||
|
||||
verify(not_quote_slash_newline, |s: &str| !s.is_empty())(input)
|
||||
}
|
||||
|
||||
/// A string fragment contains a fragment of a string being parsed: either
|
||||
/// a non-empty Literal (a series of non-escaped characters), a single
|
||||
/// parsed escaped character, or a block of escaped whitespace.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum StringFragment<'a> {
|
||||
Literal(&'a str),
|
||||
EscapedChar(char),
|
||||
}
|
||||
|
||||
/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char
|
||||
/// into a StringFragment.
|
||||
fn parse_fragment(input: &str) -> IResult<&str, StringFragment<'_>> {
|
||||
alt((
|
||||
map(parse_literal, StringFragment::Literal),
|
||||
map(parse_escaped_char, StringFragment::EscapedChar),
|
||||
))(input)
|
||||
}
|
||||
|
||||
/// Parse a string. Use a loop of parse_fragment and push all of the fragments
|
||||
/// into an output string.
|
||||
pub fn parse_string(input: &str) -> IResult<&str, String> {
|
||||
// fold_many0 is the equivalent of iterator::fold. It runs a parser in a loop,
|
||||
// and for each output value, calls a folding function on each output value.
|
||||
let build_string = fold_many0(parse_fragment, String::new, |mut string, fragment| {
|
||||
match fragment {
|
||||
StringFragment::Literal(s) => string.push_str(s),
|
||||
StringFragment::EscapedChar(c) => string.push(c),
|
||||
}
|
||||
string
|
||||
});
|
||||
|
||||
delimited(char('"'), build_string, char('"'))(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_string() {
|
||||
// ascii
|
||||
let (_, got) = parse_string(r#""quick draw""#).unwrap();
|
||||
assert_eq!(got, "quick draw");
|
||||
|
||||
// unicode
|
||||
let (_, got) = parse_string("\"quick draw\u{1f47d}\"").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
"quick draw\u{1f47d}" // 👽
|
||||
);
|
||||
|
||||
// escaped characters
|
||||
let (_, got) = parse_string(r#""\n\t\'\"""#).unwrap();
|
||||
assert_eq!(got, "\n\t'\"");
|
||||
|
||||
// literal tab
|
||||
let (_, got) = parse_string("\"quick\tdraw\"").unwrap();
|
||||
assert_eq!(got, "quick\tdraw");
|
||||
|
||||
// literal carriage return
|
||||
let (_, got) = parse_string("\"quick\rdraw\"").unwrap();
|
||||
assert_eq!(got, "quick\rdraw");
|
||||
|
||||
// ┌─────────────────────────────┐
|
||||
// │ Fallible tests │
|
||||
// └─────────────────────────────┘
|
||||
|
||||
// Not terminated
|
||||
let res = parse_string(r#""quick draw"#);
|
||||
assert!(res.is_err());
|
||||
|
||||
// Literal newline
|
||||
let res = parse_string("\"quick\ndraw\"");
|
||||
assert!(res.is_err());
|
||||
|
||||
// Invalid escape
|
||||
let res = parse_string(r#""quick\idraw""#);
|
||||
assert!(res.is_err());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
//! # Parse InfluxQL [keywords]
|
||||
//!
|
||||
//! [keywords]: https://docs.influxdata.com/influxdb/v1.8/query_language/spec/#keywords
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{tag, tag_no_case};
|
||||
use nom::combinator::{eof, peek};
|
||||
use nom::sequence::terminated;
|
||||
use nom::IResult;
|
||||
|
||||
/// Peeks at the input for acceptable characters following a keyword.
|
||||
fn keyword_follow_char(i: &str) -> IResult<&str, &str> {
|
||||
peek(alt((
|
||||
tag(" "),
|
||||
tag("\n"),
|
||||
tag(";"),
|
||||
tag("("),
|
||||
tag(")"),
|
||||
tag("\t"),
|
||||
tag(","),
|
||||
tag("="),
|
||||
eof,
|
||||
)))(i)
|
||||
}
|
||||
|
||||
/// Parses the input for matching InfluxQL keywords from ALL to DROP.
|
||||
fn keyword_all_to_drop(i: &str) -> IResult<&str, &str> {
|
||||
alt((
|
||||
terminated(tag_no_case("ALL"), keyword_follow_char),
|
||||
terminated(tag_no_case("ALTER"), keyword_follow_char),
|
||||
terminated(tag_no_case("ANALYZE"), keyword_follow_char),
|
||||
terminated(tag_no_case("ANY"), keyword_follow_char),
|
||||
terminated(tag_no_case("AS"), keyword_follow_char),
|
||||
terminated(tag_no_case("ASC"), keyword_follow_char),
|
||||
terminated(tag_no_case("BEGIN"), keyword_follow_char),
|
||||
terminated(tag_no_case("BY"), keyword_follow_char),
|
||||
terminated(tag_no_case("CARDINALITY"), keyword_follow_char),
|
||||
terminated(tag_no_case("CREATE"), keyword_follow_char),
|
||||
terminated(tag_no_case("CONTINUOUS"), keyword_follow_char),
|
||||
terminated(tag_no_case("DATABASE"), keyword_follow_char),
|
||||
terminated(tag_no_case("DATABASES"), keyword_follow_char),
|
||||
terminated(tag_no_case("DEFAULT"), keyword_follow_char),
|
||||
terminated(tag_no_case("DELETE"), keyword_follow_char),
|
||||
terminated(tag_no_case("DESC"), keyword_follow_char),
|
||||
terminated(tag_no_case("DESTINATIONS"), keyword_follow_char),
|
||||
terminated(tag_no_case("DIAGNOSTICS"), keyword_follow_char),
|
||||
terminated(tag_no_case("DISTINCT"), keyword_follow_char),
|
||||
terminated(tag_no_case("DROP"), keyword_follow_char),
|
||||
))(i)
|
||||
}
|
||||
|
||||
/// Parses the input for matching InfluxQL keywords from DURATION to LIMIT.
|
||||
fn keyword_duration_to_limit(i: &str) -> IResult<&str, &str> {
|
||||
alt((
|
||||
terminated(tag_no_case("DURATION"), keyword_follow_char),
|
||||
terminated(tag_no_case("END"), keyword_follow_char),
|
||||
terminated(tag_no_case("EVERY"), keyword_follow_char),
|
||||
terminated(tag_no_case("EXACT"), keyword_follow_char),
|
||||
terminated(tag_no_case("EXPLAIN"), keyword_follow_char),
|
||||
terminated(tag_no_case("FIELD"), keyword_follow_char),
|
||||
terminated(tag_no_case("FOR"), keyword_follow_char),
|
||||
terminated(tag_no_case("FROM"), keyword_follow_char),
|
||||
terminated(tag_no_case("GRANT"), keyword_follow_char),
|
||||
terminated(tag_no_case("GRANTS"), keyword_follow_char),
|
||||
terminated(tag_no_case("GROUP"), keyword_follow_char),
|
||||
terminated(tag_no_case("GROUPS"), keyword_follow_char),
|
||||
terminated(tag_no_case("IN"), keyword_follow_char),
|
||||
terminated(tag_no_case("INF"), keyword_follow_char),
|
||||
terminated(tag_no_case("INSERT"), keyword_follow_char),
|
||||
terminated(tag_no_case("INTO"), keyword_follow_char),
|
||||
terminated(tag_no_case("KEY"), keyword_follow_char),
|
||||
terminated(tag_no_case("KEYS"), keyword_follow_char),
|
||||
terminated(tag_no_case("KILL"), keyword_follow_char),
|
||||
terminated(tag_no_case("LIMIT"), keyword_follow_char),
|
||||
))(i)
|
||||
}
|
||||
|
||||
/// Parses the input for matching InfluxQL keywords from MEASUREMENT to SET.
|
||||
fn keyword_measurement_to_set(i: &str) -> IResult<&str, &str> {
|
||||
alt((
|
||||
terminated(tag_no_case("MEASUREMENT"), keyword_follow_char),
|
||||
terminated(tag_no_case("MEASUREMENTS"), keyword_follow_char),
|
||||
terminated(tag_no_case("NAME"), keyword_follow_char),
|
||||
terminated(tag_no_case("OFFSET"), keyword_follow_char),
|
||||
terminated(tag_no_case("ON"), keyword_follow_char),
|
||||
terminated(tag_no_case("ORDER"), keyword_follow_char),
|
||||
terminated(tag_no_case("PASSWORD"), keyword_follow_char),
|
||||
terminated(tag_no_case("POLICY"), keyword_follow_char),
|
||||
terminated(tag_no_case("POLICIES"), keyword_follow_char),
|
||||
terminated(tag_no_case("PRIVILEGES"), keyword_follow_char),
|
||||
terminated(tag_no_case("QUERIES"), keyword_follow_char),
|
||||
terminated(tag_no_case("QUERY"), keyword_follow_char),
|
||||
terminated(tag_no_case("READ"), keyword_follow_char),
|
||||
terminated(tag_no_case("REPLICATION"), keyword_follow_char),
|
||||
terminated(tag_no_case("RESAMPLE"), keyword_follow_char),
|
||||
terminated(tag_no_case("RETENTION"), keyword_follow_char),
|
||||
terminated(tag_no_case("REVOKE"), keyword_follow_char),
|
||||
terminated(tag_no_case("SELECT"), keyword_follow_char),
|
||||
terminated(tag_no_case("SERIES"), keyword_follow_char),
|
||||
terminated(tag_no_case("SET"), keyword_follow_char),
|
||||
))(i)
|
||||
}
|
||||
|
||||
/// Parses the input for matching InfluxQL keywords from SHOW to WRITE.
|
||||
fn keyword_show_to_write(i: &str) -> IResult<&str, &str> {
|
||||
alt((
|
||||
terminated(tag_no_case("SHOW"), keyword_follow_char),
|
||||
terminated(tag_no_case("SHARD"), keyword_follow_char),
|
||||
terminated(tag_no_case("SHARDS"), keyword_follow_char),
|
||||
terminated(tag_no_case("SLIMIT"), keyword_follow_char),
|
||||
terminated(tag_no_case("SOFFSET"), keyword_follow_char),
|
||||
terminated(tag_no_case("STATS"), keyword_follow_char),
|
||||
terminated(tag_no_case("SUBSCRIPTION"), keyword_follow_char),
|
||||
terminated(tag_no_case("SUBSCRIPTIONS"), keyword_follow_char),
|
||||
terminated(tag_no_case("TAG"), keyword_follow_char),
|
||||
terminated(tag_no_case("TO"), keyword_follow_char),
|
||||
terminated(tag_no_case("USER"), keyword_follow_char),
|
||||
terminated(tag_no_case("USERS"), keyword_follow_char),
|
||||
terminated(tag_no_case("VALUES"), keyword_follow_char),
|
||||
terminated(tag_no_case("WHERE"), keyword_follow_char),
|
||||
terminated(tag_no_case("WITH"), keyword_follow_char),
|
||||
terminated(tag_no_case("WRITE"), keyword_follow_char),
|
||||
))(i)
|
||||
}
|
||||
|
||||
// Matches any InfluxQL reserved keyword.
|
||||
pub fn sql_keyword(i: &str) -> IResult<&str, &str> {
|
||||
// NOTE that the alt function takes a tuple with a maximum arity of 21, hence
|
||||
// the reason these are broken into groups
|
||||
alt((
|
||||
keyword_all_to_drop,
|
||||
keyword_duration_to_limit,
|
||||
keyword_measurement_to_set,
|
||||
keyword_show_to_write,
|
||||
))(i)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_keywords() {
|
||||
// all keywords
|
||||
|
||||
sql_keyword("ALL").unwrap();
|
||||
sql_keyword("ALTER").unwrap();
|
||||
sql_keyword("ANALYZE").unwrap();
|
||||
sql_keyword("ANY").unwrap();
|
||||
sql_keyword("AS").unwrap();
|
||||
sql_keyword("ASC").unwrap();
|
||||
sql_keyword("BEGIN").unwrap();
|
||||
sql_keyword("BY").unwrap();
|
||||
sql_keyword("CARDINALITY").unwrap();
|
||||
sql_keyword("CREATE").unwrap();
|
||||
sql_keyword("CONTINUOUS").unwrap();
|
||||
sql_keyword("DATABASE").unwrap();
|
||||
sql_keyword("DATABASES").unwrap();
|
||||
sql_keyword("DEFAULT").unwrap();
|
||||
sql_keyword("DELETE").unwrap();
|
||||
sql_keyword("DESC").unwrap();
|
||||
sql_keyword("DESTINATIONS").unwrap();
|
||||
sql_keyword("DIAGNOSTICS").unwrap();
|
||||
sql_keyword("DISTINCT").unwrap();
|
||||
sql_keyword("DROP").unwrap();
|
||||
sql_keyword("DURATION").unwrap();
|
||||
sql_keyword("END").unwrap();
|
||||
sql_keyword("EVERY").unwrap();
|
||||
sql_keyword("EXACT").unwrap();
|
||||
sql_keyword("EXPLAIN").unwrap();
|
||||
sql_keyword("FIELD").unwrap();
|
||||
sql_keyword("FOR").unwrap();
|
||||
sql_keyword("FROM").unwrap();
|
||||
sql_keyword("GRANT").unwrap();
|
||||
sql_keyword("GRANTS").unwrap();
|
||||
sql_keyword("GROUP").unwrap();
|
||||
sql_keyword("GROUPS").unwrap();
|
||||
sql_keyword("IN").unwrap();
|
||||
sql_keyword("INF").unwrap();
|
||||
sql_keyword("INSERT").unwrap();
|
||||
sql_keyword("INTO").unwrap();
|
||||
sql_keyword("KEY").unwrap();
|
||||
sql_keyword("KEYS").unwrap();
|
||||
sql_keyword("KILL").unwrap();
|
||||
sql_keyword("LIMIT").unwrap();
|
||||
sql_keyword("MEASUREMENT").unwrap();
|
||||
sql_keyword("MEASUREMENTS").unwrap();
|
||||
sql_keyword("NAME").unwrap();
|
||||
sql_keyword("OFFSET").unwrap();
|
||||
sql_keyword("ON").unwrap();
|
||||
sql_keyword("ORDER").unwrap();
|
||||
sql_keyword("PASSWORD").unwrap();
|
||||
sql_keyword("POLICY").unwrap();
|
||||
sql_keyword("POLICIES").unwrap();
|
||||
sql_keyword("PRIVILEGES").unwrap();
|
||||
sql_keyword("QUERIES").unwrap();
|
||||
sql_keyword("QUERY").unwrap();
|
||||
sql_keyword("READ").unwrap();
|
||||
sql_keyword("REPLICATION").unwrap();
|
||||
sql_keyword("RESAMPLE").unwrap();
|
||||
sql_keyword("RETENTION").unwrap();
|
||||
sql_keyword("REVOKE").unwrap();
|
||||
sql_keyword("SELECT").unwrap();
|
||||
sql_keyword("SERIES").unwrap();
|
||||
sql_keyword("SET").unwrap();
|
||||
sql_keyword("SHOW").unwrap();
|
||||
sql_keyword("SHARD").unwrap();
|
||||
sql_keyword("SHARDS").unwrap();
|
||||
sql_keyword("SLIMIT").unwrap();
|
||||
sql_keyword("SOFFSET").unwrap();
|
||||
sql_keyword("STATS").unwrap();
|
||||
sql_keyword("SUBSCRIPTION").unwrap();
|
||||
sql_keyword("SUBSCRIPTIONS").unwrap();
|
||||
sql_keyword("TAG").unwrap();
|
||||
sql_keyword("TO").unwrap();
|
||||
sql_keyword("USER").unwrap();
|
||||
sql_keyword("USERS").unwrap();
|
||||
sql_keyword("VALUES").unwrap();
|
||||
sql_keyword("WHERE").unwrap();
|
||||
sql_keyword("WITH").unwrap();
|
||||
sql_keyword("WRITE").unwrap();
|
||||
|
||||
// case insensitivity
|
||||
sql_keyword("all").unwrap();
|
||||
|
||||
// ┌─────────────────────────────┐
|
||||
// │ Fallible tests │
|
||||
// └─────────────────────────────┘
|
||||
|
||||
let res = sql_keyword("NOT_A_KEYWORD");
|
||||
assert!(res.is_err());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
//! # Parse a subset of [InfluxQL]
|
||||
//!
|
||||
//! [InfluxQL]: https://docs.influxdata.com/influxdb/v1.8/query_language
|
||||
|
||||
#![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)]
|
||||
#![warn(
|
||||
missing_copy_implementations,
|
||||
missing_docs,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::future_not_send,
|
||||
clippy::use_self,
|
||||
clippy::clone_on_ref_ptr
|
||||
)]
|
||||
mod identifier;
|
||||
mod keywords;
|
Loading…
Reference in New Issue