2024-03-04 15:24:33 +00:00
|
|
|
use arrow_flight::sql::SqlInfo;
|
2024-03-05 20:41:30 +00:00
|
|
|
use arrow_flight::Ticket;
|
2024-02-26 20:07:48 +00:00
|
|
|
use arrow_util::assert_batches_sorted_eq;
|
2024-02-27 16:57:10 +00:00
|
|
|
use influxdb3_client::Precision;
|
2024-03-05 20:41:30 +00:00
|
|
|
use test_helpers::assert_contains;
|
2024-02-26 20:07:48 +00:00
|
|
|
|
2024-03-04 15:24:33 +00:00
|
|
|
use crate::collect_stream;
|
feat: add the `api/v3/query_influxql` API (#24696)
feat: add query_influxql api
This PR adds support for the /api/v3/query_influxql API. This re-uses code from the existing query_sql API, but some refactoring was done to allow for code re-use between the two.
The main change to the original code from the existing query_sql API was that the format is determined up front, in the event that the user provides some incorrect Accept header, so that the 400 BAD REQUEST is returned before performing the query.
Support of several InfluxQL queries that previously required a bridge to be executed in 3.0 was added:
SHOW MEASUREMENTS
SHOW TAG KEYS
SHOW TAG VALUES
SHOW FIELD KEYS
SHOW DATABASES
Handling of qualified measurement names in SELECT queries (see below)
This is accomplished with the newly added iox_query_influxql_rewrite crate, which provides the means to re-write an InfluxQL statement to strip out a database name and retention policy, if provided. Doing so allows the query_influxql API to have the database parameter optional, as it may be provided in the query string.
Handling qualified measurement names in SELECT
The implementation in this PR will inspect all measurements provided in a FROM clause and extract the database (DB) name and retention policy (RP) name (if not the default). If multiple DB/RP's are provided, an error is thrown.
Testing
E2E tests were added for performing basic queries against a running server on both the query_sql and query_influxql APIs. In addition, the test for query_influxql includes some of the InfluxQL-specific queries, e.g., SHOW MEASUREMENTS.
Other Changes
The influxdb3_client now has the api_v3_query_influxql method (and a basic test was added for this)
2024-03-01 17:27:38 +00:00
|
|
|
use crate::TestServer;
|
2024-02-26 20:07:48 +00:00
|
|
|
|
2024-10-01 17:34:00 +00:00
|
|
|
#[test_log::test(tokio::test)]
|
2024-03-04 15:24:33 +00:00
|
|
|
async fn flight() -> Result<(), influxdb3_client::Error> {
|
2024-02-26 20:07:48 +00:00
|
|
|
let server = TestServer::spawn().await;
|
|
|
|
|
feat: add the `api/v3/query_influxql` API (#24696)
feat: add query_influxql api
This PR adds support for the /api/v3/query_influxql API. This re-uses code from the existing query_sql API, but some refactoring was done to allow for code re-use between the two.
The main change to the original code from the existing query_sql API was that the format is determined up front, in the event that the user provides some incorrect Accept header, so that the 400 BAD REQUEST is returned before performing the query.
Support of several InfluxQL queries that previously required a bridge to be executed in 3.0 was added:
SHOW MEASUREMENTS
SHOW TAG KEYS
SHOW TAG VALUES
SHOW FIELD KEYS
SHOW DATABASES
Handling of qualified measurement names in SELECT queries (see below)
This is accomplished with the newly added iox_query_influxql_rewrite crate, which provides the means to re-write an InfluxQL statement to strip out a database name and retention policy, if provided. Doing so allows the query_influxql API to have the database parameter optional, as it may be provided in the query string.
Handling qualified measurement names in SELECT
The implementation in this PR will inspect all measurements provided in a FROM clause and extract the database (DB) name and retention policy (RP) name (if not the default). If multiple DB/RP's are provided, an error is thrown.
Testing
E2E tests were added for performing basic queries against a running server on both the query_sql and query_influxql APIs. In addition, the test for query_influxql includes some of the InfluxQL-specific queries, e.g., SHOW MEASUREMENTS.
Other Changes
The influxdb3_client now has the api_v3_query_influxql method (and a basic test was added for this)
2024-03-01 17:27:38 +00:00
|
|
|
server
|
|
|
|
.write_lp_to_db(
|
|
|
|
"foo",
|
|
|
|
"cpu,host=s1,region=us-east usage=0.9 1\n\
|
2024-02-26 20:07:48 +00:00
|
|
|
cpu,host=s1,region=us-east usage=0.89 2\n\
|
|
|
|
cpu,host=s1,region=us-east usage=0.85 3",
|
feat: add the `api/v3/query_influxql` API (#24696)
feat: add query_influxql api
This PR adds support for the /api/v3/query_influxql API. This re-uses code from the existing query_sql API, but some refactoring was done to allow for code re-use between the two.
The main change to the original code from the existing query_sql API was that the format is determined up front, in the event that the user provides some incorrect Accept header, so that the 400 BAD REQUEST is returned before performing the query.
Support of several InfluxQL queries that previously required a bridge to be executed in 3.0 was added:
SHOW MEASUREMENTS
SHOW TAG KEYS
SHOW TAG VALUES
SHOW FIELD KEYS
SHOW DATABASES
Handling of qualified measurement names in SELECT queries (see below)
This is accomplished with the newly added iox_query_influxql_rewrite crate, which provides the means to re-write an InfluxQL statement to strip out a database name and retention policy, if provided. Doing so allows the query_influxql API to have the database parameter optional, as it may be provided in the query string.
Handling qualified measurement names in SELECT
The implementation in this PR will inspect all measurements provided in a FROM clause and extract the database (DB) name and retention policy (RP) name (if not the default). If multiple DB/RP's are provided, an error is thrown.
Testing
E2E tests were added for performing basic queries against a running server on both the query_sql and query_influxql APIs. In addition, the test for query_influxql includes some of the InfluxQL-specific queries, e.g., SHOW MEASUREMENTS.
Other Changes
The influxdb3_client now has the api_v3_query_influxql method (and a basic test was added for this)
2024-03-01 17:27:38 +00:00
|
|
|
Precision::Nanosecond,
|
|
|
|
)
|
2024-03-04 15:24:33 +00:00
|
|
|
.await?;
|
2024-02-26 20:07:48 +00:00
|
|
|
|
2024-03-05 20:41:30 +00:00
|
|
|
let mut client = server.flight_sql_client("foo").await;
|
2024-02-26 20:07:48 +00:00
|
|
|
|
|
|
|
// Ad-hoc Query:
|
|
|
|
{
|
2024-03-26 19:22:19 +00:00
|
|
|
let response = client
|
|
|
|
.query("SELECT host, region, time, usage FROM cpu")
|
|
|
|
.await
|
|
|
|
.unwrap();
|
2024-02-26 20:07:48 +00:00
|
|
|
|
|
|
|
let batches = collect_stream(response).await;
|
|
|
|
assert_batches_sorted_eq!(
|
|
|
|
[
|
|
|
|
"+------+---------+--------------------------------+-------+",
|
|
|
|
"| host | region | time | usage |",
|
|
|
|
"+------+---------+--------------------------------+-------+",
|
|
|
|
"| s1 | us-east | 1970-01-01T00:00:00.000000001Z | 0.9 |",
|
|
|
|
"| s1 | us-east | 1970-01-01T00:00:00.000000002Z | 0.89 |",
|
|
|
|
"| s1 | us-east | 1970-01-01T00:00:00.000000003Z | 0.85 |",
|
|
|
|
"+------+---------+--------------------------------+-------+",
|
|
|
|
],
|
|
|
|
&batches
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ad-hoc Query error:
|
|
|
|
{
|
|
|
|
let error = client
|
|
|
|
.query("SELECT * FROM invalid_table")
|
|
|
|
.await
|
|
|
|
.unwrap_err();
|
|
|
|
|
|
|
|
assert!(error
|
|
|
|
.to_string()
|
|
|
|
.contains("table 'public.iox.invalid_table' not found"));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prepared query:
|
|
|
|
{
|
2024-03-26 19:22:19 +00:00
|
|
|
let handle = client
|
2024-04-03 13:36:28 +00:00
|
|
|
.prepare("SELECT host, region, time, usage FROM cpu".into(), None)
|
2024-03-26 19:22:19 +00:00
|
|
|
.await
|
|
|
|
.unwrap();
|
2024-02-26 20:07:48 +00:00
|
|
|
let stream = client.execute(handle).await.unwrap();
|
|
|
|
|
|
|
|
let batches = collect_stream(stream).await;
|
|
|
|
assert_batches_sorted_eq!(
|
|
|
|
[
|
|
|
|
"+------+---------+--------------------------------+-------+",
|
|
|
|
"| host | region | time | usage |",
|
|
|
|
"+------+---------+--------------------------------+-------+",
|
|
|
|
"| s1 | us-east | 1970-01-01T00:00:00.000000001Z | 0.9 |",
|
|
|
|
"| s1 | us-east | 1970-01-01T00:00:00.000000002Z | 0.89 |",
|
|
|
|
"| s1 | us-east | 1970-01-01T00:00:00.000000003Z | 0.85 |",
|
|
|
|
"+------+---------+--------------------------------+-------+",
|
|
|
|
],
|
|
|
|
&batches
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get SQL Infos:
|
|
|
|
{
|
|
|
|
let infos = vec![SqlInfo::FlightSqlServerName as u32];
|
|
|
|
let stream = client.get_sql_info(infos).await.unwrap();
|
|
|
|
let batches = collect_stream(stream).await;
|
|
|
|
assert_batches_sorted_eq!(
|
|
|
|
[
|
|
|
|
"+-----------+-----------------------------+",
|
|
|
|
"| info_name | value |",
|
|
|
|
"+-----------+-----------------------------+",
|
|
|
|
"| 0 | {string_value=InfluxDB IOx} |",
|
|
|
|
"+-----------+-----------------------------+",
|
|
|
|
],
|
|
|
|
&batches
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get Tables
|
|
|
|
{
|
|
|
|
type OptStr = std::option::Option<&'static str>;
|
|
|
|
let stream = client
|
|
|
|
.get_tables(OptStr::None, OptStr::None, OptStr::None, vec![], false)
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
let batches = collect_stream(stream).await;
|
|
|
|
|
|
|
|
assert_batches_sorted_eq!(
|
|
|
|
[
|
2024-12-13 22:11:38 +00:00
|
|
|
"+--------------+--------------------+----------------------------+------------+",
|
|
|
|
"| catalog_name | db_schema_name | table_name | table_type |",
|
|
|
|
"+--------------+--------------------+----------------------------+------------+",
|
|
|
|
"| public | information_schema | columns | VIEW |",
|
|
|
|
"| public | information_schema | df_settings | VIEW |",
|
|
|
|
"| public | information_schema | schemata | VIEW |",
|
|
|
|
"| public | information_schema | tables | VIEW |",
|
|
|
|
"| public | information_schema | views | VIEW |",
|
|
|
|
"| public | iox | cpu | BASE TABLE |",
|
2025-01-10 13:48:51 +00:00
|
|
|
"| public | system | distinct_caches | BASE TABLE |",
|
2024-12-13 22:11:38 +00:00
|
|
|
"| public | system | last_caches | BASE TABLE |",
|
|
|
|
"| public | system | parquet_files | BASE TABLE |",
|
|
|
|
"| public | system | processing_engine_plugins | BASE TABLE |",
|
|
|
|
"| public | system | processing_engine_triggers | BASE TABLE |",
|
|
|
|
"| public | system | queries | BASE TABLE |",
|
|
|
|
"+--------------+--------------------+----------------------------+------------+",
|
2024-02-26 20:07:48 +00:00
|
|
|
],
|
|
|
|
&batches
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get Catalogs
|
|
|
|
{
|
|
|
|
let stream = client.get_catalogs().await.unwrap();
|
|
|
|
let batches = collect_stream(stream).await;
|
|
|
|
assert_batches_sorted_eq!(
|
|
|
|
[
|
|
|
|
"+--------------+",
|
|
|
|
"| catalog_name |",
|
|
|
|
"+--------------+",
|
|
|
|
"| public |",
|
|
|
|
"+--------------+",
|
|
|
|
],
|
|
|
|
&batches
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2024-03-04 15:24:33 +00:00
|
|
|
Ok(())
|
2024-02-26 20:07:48 +00:00
|
|
|
}
|
2024-03-05 20:41:30 +00:00
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn flight_influxql() {
|
|
|
|
let server = TestServer::spawn().await;
|
|
|
|
|
|
|
|
server
|
|
|
|
.write_lp_to_db(
|
|
|
|
"foo",
|
|
|
|
"cpu,host=s1,region=us-east usage=0.9 1\n\
|
2024-03-26 19:22:19 +00:00
|
|
|
cpu,host=s1,region=us-east usage=0.89 2\n\
|
|
|
|
cpu,host=s1,region=us-east usage=0.85 3",
|
2024-03-05 20:41:30 +00:00
|
|
|
Precision::Nanosecond,
|
|
|
|
)
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
let mut client = server.flight_client().await;
|
|
|
|
|
feat: v3 write API with series key (#25066)
Introduce the experimental series key feature to monolith, along with the new `/api/v3/write` API which accepts the new line protocol to write to tables containing a series key.
Series key
* The series key is supported in the `schema::Schema` type by the addition of a metadata entry that stores the series key members in their correct order. Writes that are received to `v3` tables must have the same series key for every single write.
Series key columns are `NOT NULL`
* Nullability of columns is enforced in the core `schema` crate based on a column's membership in the series key. So, when building a `schema::Schema` using `schema::SchemaBuilder`, the arrow `Field`s that are injected into the schema will have `nullable` set to false for columns that are part of the series key, as well as the `time` column.
* The `NOT NULL` _constraint_, if you can call it that, is enforced in the buffer (see [here](https://github.com/influxdata/influxdb/pull/25066/files#diff-d70ef3dece149f3742ff6e164af17f6601c5a7818e31b0e3b27c3f83dcd7f199R102-R119)) by ensuring there are no gaps in data buffered for series key columns.
Series key columns are still tags
* Columns in the series key are annotated as tags in the arrow schema, which for now means that they are stored as Dictionaries. This was done to avoid having to support a new column type for series key columns.
New write API
* This PR introduces the new write API, `/api/v3/write`, which accepts the new `v3` line protocol. Currently, the only part of the new line protocol proposed in https://github.com/influxdata/influxdb/issues/24979 that is supported is the series key. New data types are not yet supported for fields.
Split write paths
* To support the existing write path alongside the new write path, a new module was set up to perform validation in the `influxdb3_write` crate (`write_buffer/validator.rs`). This re-uses the existing write validation logic, and replicates it with needed changes for the new API. I refactored the validation code to use a state machine over a series of nested function calls to help distinguish the fallible validation/update steps from the infallible conversion steps.
* The code in that module could potentially be refactored to reduce code duplication.
2024-06-17 18:52:06 +00:00
|
|
|
// Ad-hoc query, using qualified measurement name
|
|
|
|
// This is no longer supported in 3.0, see
|
|
|
|
// https://github.com/influxdata/influxdb_iox/pull/11254
|
2024-03-05 20:41:30 +00:00
|
|
|
{
|
|
|
|
let ticket = Ticket::new(
|
|
|
|
r#"{
|
|
|
|
"database": "foo",
|
2024-03-26 19:22:19 +00:00
|
|
|
"sql_query": "SELECT time, host, region, usage FROM foo.autogen.cpu",
|
2024-03-05 20:41:30 +00:00
|
|
|
"query_type": "influxql"
|
|
|
|
}"#,
|
|
|
|
);
|
feat: v3 write API with series key (#25066)
Introduce the experimental series key feature to monolith, along with the new `/api/v3/write` API which accepts the new line protocol to write to tables containing a series key.
Series key
* The series key is supported in the `schema::Schema` type by the addition of a metadata entry that stores the series key members in their correct order. Writes that are received to `v3` tables must have the same series key for every single write.
Series key columns are `NOT NULL`
* Nullability of columns is enforced in the core `schema` crate based on a column's membership in the series key. So, when building a `schema::Schema` using `schema::SchemaBuilder`, the arrow `Field`s that are injected into the schema will have `nullable` set to false for columns that are part of the series key, as well as the `time` column.
* The `NOT NULL` _constraint_, if you can call it that, is enforced in the buffer (see [here](https://github.com/influxdata/influxdb/pull/25066/files#diff-d70ef3dece149f3742ff6e164af17f6601c5a7818e31b0e3b27c3f83dcd7f199R102-R119)) by ensuring there are no gaps in data buffered for series key columns.
Series key columns are still tags
* Columns in the series key are annotated as tags in the arrow schema, which for now means that they are stored as Dictionaries. This was done to avoid having to support a new column type for series key columns.
New write API
* This PR introduces the new write API, `/api/v3/write`, which accepts the new `v3` line protocol. Currently, the only part of the new line protocol proposed in https://github.com/influxdata/influxdb/issues/24979 that is supported is the series key. New data types are not yet supported for fields.
Split write paths
* To support the existing write path alongside the new write path, a new module was set up to perform validation in the `influxdb3_write` crate (`write_buffer/validator.rs`). This re-uses the existing write validation logic, and replicates it with needed changes for the new API. I refactored the validation code to use a state machine over a series of nested function calls to help distinguish the fallible validation/update steps from the infallible conversion steps.
* The code in that module could potentially be refactored to reduce code duplication.
2024-06-17 18:52:06 +00:00
|
|
|
let response = client.do_get(ticket).await.unwrap_err().to_string();
|
2024-03-05 20:41:30 +00:00
|
|
|
|
feat: v3 write API with series key (#25066)
Introduce the experimental series key feature to monolith, along with the new `/api/v3/write` API which accepts the new line protocol to write to tables containing a series key.
Series key
* The series key is supported in the `schema::Schema` type by the addition of a metadata entry that stores the series key members in their correct order. Writes that are received to `v3` tables must have the same series key for every single write.
Series key columns are `NOT NULL`
* Nullability of columns is enforced in the core `schema` crate based on a column's membership in the series key. So, when building a `schema::Schema` using `schema::SchemaBuilder`, the arrow `Field`s that are injected into the schema will have `nullable` set to false for columns that are part of the series key, as well as the `time` column.
* The `NOT NULL` _constraint_, if you can call it that, is enforced in the buffer (see [here](https://github.com/influxdata/influxdb/pull/25066/files#diff-d70ef3dece149f3742ff6e164af17f6601c5a7818e31b0e3b27c3f83dcd7f199R102-R119)) by ensuring there are no gaps in data buffered for series key columns.
Series key columns are still tags
* Columns in the series key are annotated as tags in the arrow schema, which for now means that they are stored as Dictionaries. This was done to avoid having to support a new column type for series key columns.
New write API
* This PR introduces the new write API, `/api/v3/write`, which accepts the new `v3` line protocol. Currently, the only part of the new line protocol proposed in https://github.com/influxdata/influxdb/issues/24979 that is supported is the series key. New data types are not yet supported for fields.
Split write paths
* To support the existing write path alongside the new write path, a new module was set up to perform validation in the `influxdb3_write` crate (`write_buffer/validator.rs`). This re-uses the existing write validation logic, and replicates it with needed changes for the new API. I refactored the validation code to use a state machine over a series of nested function calls to help distinguish the fallible validation/update steps from the infallible conversion steps.
* The code in that module could potentially be refactored to reduce code duplication.
2024-06-17 18:52:06 +00:00
|
|
|
assert_contains!(response, "database prefix in qualified measurement syntax");
|
2024-03-05 20:41:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// InfluxQL-specific query to show measurements:
|
|
|
|
{
|
|
|
|
let ticket = Ticket::new(
|
|
|
|
r#"{
|
|
|
|
"database": "foo",
|
|
|
|
"sql_query": "SHOW MEASUREMENTS",
|
|
|
|
"query_type": "influxql"
|
|
|
|
}"#,
|
|
|
|
);
|
|
|
|
let response = client.do_get(ticket).await.unwrap();
|
|
|
|
|
|
|
|
let batches = collect_stream(response).await;
|
|
|
|
assert_batches_sorted_eq!(
|
|
|
|
[
|
|
|
|
"+------------------+------+",
|
|
|
|
"| iox::measurement | name |",
|
|
|
|
"+------------------+------+",
|
|
|
|
"| measurements | cpu |",
|
|
|
|
"+------------------+------+",
|
|
|
|
],
|
|
|
|
&batches
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// An InfluxQL query that is not supported over Flight:
|
|
|
|
{
|
|
|
|
let ticket = Ticket::new(
|
|
|
|
r#"{
|
|
|
|
"database": "foo",
|
|
|
|
"sql_query": "SHOW DATABASES",
|
|
|
|
"query_type": "influxql"
|
|
|
|
}"#,
|
|
|
|
);
|
|
|
|
let response = client.do_get(ticket).await.unwrap_err();
|
|
|
|
|
|
|
|
assert_contains!(
|
|
|
|
response.to_string(),
|
|
|
|
"This feature is not implemented: SHOW DATABASES"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|