Merge remote-tracking branch 'origin/master' into pd-partiton-store

2020-04-02 11:15:26 -04:00 · 2020-04-02 11:15:26 -04:00 · df67b9715a
parent a7d33b803c 71756da2f4
commit df67b9715a
6 changed files with 195 additions and 95 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -382,6 +382,7 @@ dependencies = [
 "csv 1.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "dotenv 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "futures 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "http 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -397,6 +398,8 @@ dependencies = [
 "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "tonic 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "tonic-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1500,6 +1503,25 @@ name = "smallvec"
 version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"

+[[package]]
+name = "snafu"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "snafu-derive"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "syn 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "socket2"
 version = "0.3.11"
@ -2327,6 +2349,8 @@ dependencies = [
 "checksum signal-hook-registry 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "94f478ede9f64724c5d173d7bb56099ec3e2d9fc2774aac65d34b8b890405f41"
 "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
 "checksum smallvec 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5c2fb2ec9bcd216a5b0d0ccf31ab17b5ed1d627960edff65bbe95d3ce221cefc"
+"checksum snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "546db9181bce2aa22ed883c33d65603b76335b4c2533a98289f54265043de7a1"
+"checksum snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bdc75da2e0323f297402fd9c8fdba709bb04e4c627cbe31d19a2c91fc8d9f0e2"
 "checksum socket2 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)" = "e8b74de517221a2cb01a53349cf54182acdc31a074727d3079068448c0676d85"
 "checksum sourcefile 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4bf77cb82ba8453b42b6ae1d692e4cdc92f9a47beaf89a847c8be83f4e328ad3"
 "checksum spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -37,12 +37,15 @@ croaring = "0.4.2"
 http = "0.2.0"
 serde_urlencoded = "0.6.1"
 nom = "5.1.1"
+snafu = "0.6.2"
+either = "1.5.3"

 [dev-dependencies]
 criterion = "0.3"
 reqwest = { version = "0.10.1", features = ["blocking"] }
 assert_cmd = "0.12.0"
 rand = "0.7.2"
+tempfile = "3.1.0"

 [[bench]]
 name = "encoders"
--- a/src/line_parser.rs
+++ b/src/line_parser.rs
@ -1,3 +1,4 @@
+use either::Either;
 use nom::{
    branch::alt,
    bytes::complete::{tag, take_while1},
@ -7,7 +8,16 @@ use nom::{
    sequence::{separated_pair, terminated, tuple},
    IResult,
 };
-use std::{error, fmt};
+use snafu::Snafu;
+use std::collections::BTreeMap;
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display(r#"Must not contain duplicate tags, but "{}" was repeated"#, tag_key))]
+    DuplicateTag { tag_key: String },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;

 #[derive(Debug, PartialEq, Clone)]
 pub struct Point<T> {
@ -18,7 +28,7 @@ pub struct Point<T> {
 }

 impl<T> Point<T> {
-    pub fn index_pairs(&self) -> Result<Vec<Pair>, ParseError> {
+    pub fn index_pairs(&self) -> Result<Vec<Pair>> {
        index_pairs(&self.series)
    }
 }
@ -97,7 +107,7 @@ impl PointType {
        }
    }

-    pub fn index_pairs(&self) -> Result<Vec<Pair>, ParseError> {
+    pub fn index_pairs(&self) -> Result<Vec<Pair>> {
        match self {
            PointType::I64(p) => p.index_pairs(),
            PointType::F64(p) => p.index_pairs(),
@ -109,7 +119,7 @@ impl PointType {
 /// index_pairs parses the series key into key value pairs for insertion into the index. In
 /// cases where this series is already in the database, this parse step can be skipped entirely.
 /// The measurement is represented as a _m key and field as _f.
-pub fn index_pairs(key: &str) -> Result<Vec<Pair>, ParseError> {
+pub fn index_pairs(key: &str) -> Result<Vec<Pair>> {
    let chars = key.chars();
    let mut pairs = vec![];
    let mut key = "_m".to_string();
@ -153,24 +163,6 @@ pub struct Pair {
    pub value: String,
 }

-#[derive(Debug, Clone)]
-pub struct ParseError {
-    description: String,
-}
-
-impl fmt::Display for ParseError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.description)
-    }
-}
-
-impl error::Error for ParseError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        // Generic error, underlying cause isn't tracked.
-        None
-    }
-}
-
 #[derive(Debug)]
 struct ParsedLine<'a> {
    measurement: &'a str,
@ -186,37 +178,54 @@ enum FieldValue {
 }

 // TODO: Return an error for invalid inputs
-pub fn parse(input: &str) -> Vec<PointType> {
+pub fn parse(input: &str) -> Result<Vec<PointType>> {
    input
        .lines()
        .flat_map(|line| match parse_line(line) {
-            Ok((_remaining, parsed_line)) => {
-                let ParsedLine {
-                    measurement,
-                    tag_set,
-                    field_set,
-                    timestamp,
-                } = parsed_line;
-
-                assert!(tag_set.is_none(), "TODO: tag set not supported");
-                let timestamp = timestamp.expect("TODO: default timestamp not supported");
-
-                field_set.into_iter().map(move |(field_key, field_value)| {
-                    let series = format!("{}\t{}", measurement, field_key);
-
-                    match field_value {
-                        FieldValue::I64(value) => PointType::new_i64(series, value, timestamp),
-                        FieldValue::F64(value) => PointType::new_f64(series, value, timestamp),
-                    }
-                })
-            }
-            Err(e) => {
-                panic!("TODO: Failed to parse: {}", e);
-            }
+            Ok((_remaining, parsed_line)) => match line_to_points(parsed_line) {
+                Ok(i) => Either::Left(i.map(Ok)),
+                Err(e) => Either::Right(std::iter::once(Err(e))),
+            },
+            Err(e) => panic!("TODO: Failed to parse: {}", e),
        })
        .collect()
 }

+fn line_to_points(parsed_line: ParsedLine<'_>) -> Result<impl Iterator<Item = PointType> + '_> {
+    let ParsedLine {
+        measurement,
+        tag_set,
+        field_set,
+        timestamp,
+    } = parsed_line;
+
+    let mut unique_sorted_tag_set = BTreeMap::new();
+    for (tag_key, tag_value) in tag_set.unwrap_or_default() {
+        if unique_sorted_tag_set.insert(tag_key, tag_value).is_some() {
+            return DuplicateTag { tag_key }.fail();
+        }
+    }
+    let tag_set = unique_sorted_tag_set;
+
+    let timestamp = timestamp.expect("TODO: default timestamp not supported");
+
+    let mut series_base = String::from(measurement);
+    for (tag_key, tag_value) in tag_set {
+        use std::fmt::Write;
+        write!(&mut series_base, ",{}={}", tag_key, tag_value).expect("Could not append string");
+    }
+    let series_base = series_base;
+
+    Ok(field_set.into_iter().map(move |(field_key, field_value)| {
+        let series = format!("{}\t{}", series_base, field_key);
+
+        match field_value {
+            FieldValue::I64(value) => PointType::new_i64(series, value, timestamp),
+            FieldValue::F64(value) => PointType::new_f64(series, value, timestamp),
+        }
+    }))
+}
+
 fn parse_line(i: &str) -> IResult<&str, ParsedLine<'_>> {
    let tag_set = map(tuple((tag(","), tag_set)), |(_, ts)| ts);
    let field_set = map(tuple((tag(" "), field_set)), |(_, fs)| fs);
@ -235,14 +244,12 @@ fn parse_line(i: &str) -> IResult<&str, ParsedLine<'_>> {
 }

 fn measurement(i: &str) -> IResult<&str, &str> {
-    // TODO: This needs to account for `,` to separate tag sets
-    take_while1(|c| c != ' ')(i)
+    take_while1(|c| c != ' ' && c != ',')(i)
 }

-// TODO: ensure that the tags are sorted
 fn tag_set(i: &str) -> IResult<&str, Vec<(&str, &str)>> {
    let tag_key = take_while1(|c| c != '=');
-    let tag_value = take_while1(|c| c != ' ');
+    let tag_value = take_while1(|c| c != ',' && c != ' ');
    let one_tag = separated_pair(tag_key, tag("="), tag_value);
    separated_list(tag(","), one_tag)(i)
 }
@ -281,40 +288,49 @@ mod test {
    use super::*;
    use crate::tests::approximately_equal;

+    type Error = Box<dyn std::error::Error>;
+    type Result<T = (), E = Error> = std::result::Result<T, E>;
+
    #[test]
-    fn parse_single_field_integer() {
+    fn parse_single_field_integer() -> Result {
        let input = "foo asdf=23i 1234";
-        let vals = parse(input);
+        let vals = parse(input)?;

        assert_eq!(vals[0].series(), "foo\tasdf");
        assert_eq!(vals[0].time(), 1234);
        assert_eq!(vals[0].i64_value().unwrap(), 23);
+
+        Ok(())
    }

    #[test]
-    fn parse_single_field_float_no_decimal() {
+    fn parse_single_field_float_no_decimal() -> Result {
        let input = "foo asdf=44 546";
-        let vals = parse(input);
+        let vals = parse(input)?;

        assert_eq!(vals[0].series(), "foo\tasdf");
        assert_eq!(vals[0].time(), 546);
        assert!(approximately_equal(vals[0].f64_value().unwrap(), 44.0));
+
+        Ok(())
    }

    #[test]
-    fn parse_single_field_float_with_decimal() {
+    fn parse_single_field_float_with_decimal() -> Result {
        let input = "foo asdf=3.74 123";
-        let vals = parse(input);
+        let vals = parse(input)?;

        assert_eq!(vals[0].series(), "foo\tasdf");
        assert_eq!(vals[0].time(), 123);
        assert!(approximately_equal(vals[0].f64_value().unwrap(), 3.74));
+
+        Ok(())
    }

    #[test]
-    fn parse_two_fields_integer() {
+    fn parse_two_fields_integer() -> Result {
        let input = "foo asdf=23i,bar=5i 1234";
-        let vals = parse(input);
+        let vals = parse(input)?;

        assert_eq!(vals[0].series(), "foo\tasdf");
        assert_eq!(vals[0].time(), 1234);
@ -323,12 +339,14 @@ mod test {
        assert_eq!(vals[1].series(), "foo\tbar");
        assert_eq!(vals[1].time(), 1234);
        assert_eq!(vals[1].i64_value().unwrap(), 5);
+
+        Ok(())
    }

    #[test]
-    fn parse_two_fields_float() {
+    fn parse_two_fields_float() -> Result {
        let input = "foo asdf=23.1,bar=5 1234";
-        let vals = parse(input);
+        let vals = parse(input)?;

        assert_eq!(vals[0].series(), "foo\tasdf");
        assert_eq!(vals[0].time(), 1234);
@ -337,12 +355,14 @@ mod test {
        assert_eq!(vals[1].series(), "foo\tbar");
        assert_eq!(vals[1].time(), 1234);
        assert!(approximately_equal(vals[1].f64_value().unwrap(), 5.0));
+
+        Ok(())
    }

    #[test]
-    fn parse_mixed_float_and_integer() {
+    fn parse_mixed_float_and_integer() -> Result {
        let input = "foo asdf=23.1,bar=5i 1234";
-        let vals = parse(input);
+        let vals = parse(input)?;

        assert_eq!(vals[0].series(), "foo\tasdf");
        assert_eq!(vals[0].time(), 1234);
@ -351,14 +371,41 @@ mod test {
        assert_eq!(vals[1].series(), "foo\tbar");
        assert_eq!(vals[1].time(), 1234);
        assert_eq!(vals[1].i64_value().unwrap(), 5);
+
+        Ok(())
    }

    #[test]
-    fn parse_tag_set_included_in_series() {
+    fn parse_tag_set_included_in_series() -> Result {
        let input = "foo,tag1=1,tag2=2 value=1 123";
-        let vals = parse(input);
+        let vals = parse(input)?;

        assert_eq!(vals[0].series(), "foo,tag1=1,tag2=2\tvalue");
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_tag_set_unsorted() -> Result {
+        let input = "foo,tag2=2,tag1=1 value=1 123";
+        let vals = parse(input)?;
+
+        assert_eq!(vals[0].series(), "foo,tag1=1,tag2=2\tvalue");
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_tag_set_duplicate_tags() -> Result {
+        let input = "foo,tag=1,tag=2 value=1 123";
+        let err = parse(input).expect_err("Parsing duplicate tags should fail");
+
+        assert_eq!(
+            err.to_string(),
+            r#"Must not contain duplicate tags, but "tag" was repeated"#
+        );
+
+        Ok(())
    }

    #[test]
--- a/src/main.rs
+++ b/src/main.rs
@ -97,7 +97,7 @@ async fn write(req: hyper::Request<Body>, app: Arc<App>) -> Result<Body, Applica
    let body = body.freeze();
    let body = str::from_utf8(&body).unwrap();

-    let mut points = line_parser::parse(body);
+    let mut points = line_parser::parse(body).expect("TODO: Unable to parse lines");

    app.db
        .write_points(write_info.org_id, bucket_id, &mut points)
--- a/src/storage/memdb.rs
+++ b/src/storage/memdb.rs
@ -1,7 +1,7 @@
 #![allow(dead_code)]

 use crate::delorean::{Node, Predicate, TimestampRange};
-use crate::line_parser::{ParseError, Point, PointType};
+use crate::line_parser::{self, Point, PointType};
 use crate::storage::partitioned_store::{ReadBatch, ReadValues};
 use crate::storage::predicate::{Evaluate, EvaluateVisitor};
 use crate::storage::series_store::ReadPoint;
@ -122,7 +122,7 @@ impl SeriesMap {
    /// of the memory size.
    const SERIES_ID_BYTES: usize = 24;

-    fn insert_series(&mut self, point: &mut PointType) -> Result<(), ParseError> {
+    fn insert_series(&mut self, point: &mut PointType) -> line_parser::Result<()> {
        if let Some(id) = self.series_key_to_id.get(point.series()) {
            point.set_series_id(*id);
            return Ok(());
--- a/tests/end-to-end.rs
+++ b/tests/end-to-end.rs
@ -20,11 +20,11 @@ use futures::prelude::*;
 use prost::Message;
 use std::convert::TryInto;
 use std::env;
-use std::process::{Command, Stdio};
+use std::process::{Child, Command, Stdio};
 use std::str;
-use std::thread::sleep;
 use std::time::{Duration, SystemTime};
 use std::u32;
+use tempfile::TempDir;

 const URL_BASE: &str = "http://localhost:8080/api/v2";
 const GRPC_URL_BASE: &str = "http://localhost:8081/";
@ -43,6 +43,9 @@ use grpc::{
    Node, Predicate, ReadFilterRequest, Tag, TagKeysRequest, TagValuesRequest, TimestampRange,
 };

+type Error = Box<dyn std::error::Error>;
+type Result<T, E = Error> = std::result::Result<T, E>;
+
 macro_rules! assert_unwrap {
    ($e:expr, $p:path) => {
        match $e {
@ -68,7 +71,7 @@ async fn read_data(
    bucket_name: &str,
    predicate: &str,
    seconds_ago: u64,
-) -> Result<String, Box<dyn std::error::Error>> {
+) -> Result<String> {
    let url = format!("{}{}", URL_BASE, path);
    Ok(client
        .get(&url)
@ -91,7 +94,7 @@ async fn write_data(
    org_id: u32,
    bucket_name: &str,
    body: String,
-) -> Result<(), Box<dyn std::error::Error>> {
+) -> Result<()> {
    let url = format!("{}{}", URL_BASE, path);
    client
        .post(&url)
@ -106,27 +109,10 @@ async fn write_data(
    Ok(())
 }

-// TODO: if TEST_DELOREAN_DB_DIR is set, create a temporary directory in that directory or
-// otherwise isolate the database used in this test with the database used in other tests, rather
-// than always ignoring TEST_DELOREAN_DB_DIR
-fn get_test_storage_path() -> String {
-    let mut path = env::temp_dir();
-    path.push("delorean/");
-    std::fs::remove_dir_all(&path).unwrap();
-    path.into_os_string()
-        .into_string()
-        .expect("Should have been able to turn temp dir into String")
-}
-
 #[tokio::test]
-async fn read_and_write_data() -> Result<(), Box<dyn std::error::Error>> {
-    let mut server_thread = Command::cargo_bin("delorean")?
-        .stdout(Stdio::null())
-        .env("DELOREAN_DB_DIR", get_test_storage_path())
-        .spawn()?;
-
-    // TODO: poll the server to see if it's ready instead of sleeping
-    sleep(Duration::from_secs(3));
+async fn read_and_write_data() -> Result<()> {
+    let server = TestServer::new()?;
+    server.wait_until_ready().await;

    let org_id = 7878;
    let bucket_name = "all";
@ -334,10 +320,6 @@ cpu_load_short,server01,us-east,value,{},1234567.891011

    assert_eq!(values, vec!["server01", "server02"]);

-    server_thread
-        .kill()
-        .expect("Should have been able to kill the test server");
-
    Ok(())
 }

@ -351,3 +333,47 @@ fn tags_as_strings(tags: &[Tag]) -> Vec<(&str, &str)> {
        })
        .collect()
 }
+
+struct TestServer {
+    server_process: Child,
+
+    // The temporary directory **must** be last so that it is
+    // dropped after the database closes.
+    #[allow(dead_code)]
+    dir: TempDir,
+}
+
+impl TestServer {
+    fn new() -> Result<Self> {
+        let _ = dotenv::dotenv(); // load .env file if present
+
+        let root = env::var_os("TEST_DELOREAN_DB_DIR").unwrap_or_else(|| env::temp_dir().into());
+
+        let dir = tempfile::Builder::new()
+            .prefix("delorean")
+            .tempdir_in(root)?;
+
+        let server_process = Command::cargo_bin("delorean")?
+            .stdout(Stdio::null())
+            .env("DELOREAN_DB_DIR", dir.path())
+            .spawn()?;
+
+        Ok(Self {
+            dir,
+            server_process,
+        })
+    }
+
+    async fn wait_until_ready(&self) {
+        // TODO: poll the server to see if it's ready instead of sleeping
+        tokio::time::delay_for(Duration::from_secs(3)).await;
+    }
+}
+
+impl Drop for TestServer {
+    fn drop(&mut self) {
+        self.server_process
+            .kill()
+            .expect("Should have been able to kill the test server");
+    }
+}