From 0606203b4064dfe1b539516c7043eb85119175b7 Mon Sep 17 00:00:00 2001
From: Paul Dix <paul@pauldix.net>
Date: Thu, 11 Mar 2021 15:18:00 -0500
Subject: [PATCH] feat: add configuration for routing rules

This is a strawman for what routing rules might look like in DatabaseRules. Once there's a chance for discussion, I'd move next to looking at how the Server would split up an incoming write into separate FB blobs to be sent to remote IOx servers. That might change what the API/configuration looks like as that's how it would be used (at least for writes).

After that it would make sense to move to adding the proto definitions with conversions and gRPC and CLI CRUD to configure routing rules.
---
 Cargo.lock                       | 12 +++++
 data_types/Cargo.toml            |  2 +
 data_types/src/database_rules.rs | 78 ++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)
diff --git a/Cargo.lock b/Cargo.lock
index 50de29dc12..12113e896b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -803,7 +803,9 @@ dependencies = [
  "generated_types",
  "influxdb_line_protocol",
  "percent-encoding",
+ "regex",
  "serde",
+ "serde_regex",
  "snafu",
  "test_helpers",
  "tracing",
@@ -3157,6 +3159,16 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_regex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8136f1a4ea815d7eac4101cfd0b16dc0cb5e1fe1b8609dfd728058656b7badf"
+dependencies = [
+ "regex",
+ "serde",
+]
+
 [[package]]
 name = "serde_urlencoded"
 version = "0.6.1"
diff --git a/data_types/Cargo.toml b/data_types/Cargo.toml
index 80cfdc44bc..d18b0fc2bd 100644
--- a/data_types/Cargo.toml
+++ b/data_types/Cargo.toml
@@ -15,6 +15,8 @@ percent-encoding = "2.1.0"
 serde = "1.0"
 snafu = "0.6"
 tracing = "0.1"
+regex = "1.4"
+serde_regex = "1.1"
 
 [dev-dependencies] # In alphabetical order
 criterion = "0.3"
diff --git a/data_types/src/database_rules.rs b/data_types/src/database_rules.rs
index 5313564c9d..cf692ce4f1 100644
--- a/data_types/src/database_rules.rs
+++ b/data_types/src/database_rules.rs
@@ -1,6 +1,7 @@
 use std::convert::{TryFrom, TryInto};
 
 use chrono::{DateTime, TimeZone, Utc};
+use regex::Regex;
 use serde::{Deserialize, Serialize};
 use snafu::Snafu;
 
@@ -51,6 +52,12 @@ pub struct DatabaseRules {
     /// in object storage.
     #[serde(default = "MutableBufferConfig::default_option")]
     pub mutable_buffer_config: Option<MutableBufferConfig>,
+
+    /// An optional config to route writes or queries to other IOx servers.
+    /// This is useful for sharding a database or copying all or part of
+    /// requests over to other servers (like shadow production, etc).
+    #[serde(default)]
+    pub routing_config: Option<RoutingConfig>,
 }
 
 impl DatabaseRules {
@@ -118,6 +125,7 @@ impl TryFrom<management::DatabaseRules> for DatabaseRules {
             partition_template,
             wal_buffer_config,
             mutable_buffer_config,
+            routing_config: None,
         })
     }
 }
@@ -701,6 +709,76 @@ impl TryFrom<management::partition_template::Part> for TemplatePart {
     }
 }
 
+/// RoutingConfig defines rules for routing write or query requests to other IOx
+/// servers. In the case of writes, routing rules can be used to create copies
+/// of the data to get sent out to other servers in a best effort manner. Each
+/// route will be checked with a copy of the write or query sent out to each
+/// match.
+#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
+pub struct RoutingConfig {
+    routes: Vec<Route>,
+}
+
+/// A specific named route to match against. Routes can be done based on
+/// specific matches to send to a collection of servers or based on hashing to
+/// send to a single server in a collection.
+#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
+pub struct Route {
+    /// The name of the route.
+    pub name: String,
+    /// An optional matcher. If there is a match, the route will be evaluated to
+    /// the given targets, otherwise the hash ring will be evaluated.
+    pub specific_targets: Option<MatcherToTargets>,
+    /// An optional default hasher which will route to one in a collection of
+    /// nodes.
+    pub hash_ring: Option<HashRing>,
+    /// If set to true the router will ignore any errors sent by the remote
+    /// targets in this route. That is, the write request will succeed
+    /// regardless of this route's success.
+    pub ignore_errors: bool,
+}
+
+/// Maps a matcher with specific targets. If it is a match the row/line or query
+/// should be sent to all targets.
+#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
+pub struct MatcherToTargets {
+    pub matcher: Matcher,
+    pub targets: Vec<WriterId>,
+}
+
+/// HashRing is a rule for creating a hash key for a row and mapping that to
+/// an individual node on a ring.
+#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)]
+pub struct HashRing {
+    /// include the table name in the hash key
+    pub table_name: Option<String>,
+    /// include the values of these columns in the hash key
+    pub columns: Vec<String>,
+    /// ring of these nodes
+    pub nodes: Vec<WriterId>,
+}
+
+/// A matcher is used to match routing rules or subscriptions on a row-by-row
+/// (or line) basis.
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Matcher {
+    /// if provided, match if the table name matches against the regex
+    #[serde(with = "serde_regex")]
+    pub table_name_regex: Option<Regex>,
+    // paul: what should we use for predicate matching here against a single row/line?
+    pub predicate: Option<String>,
+}
+
+impl PartialEq for Matcher {
+    fn eq(&self, other: &Self) -> bool {
+        // this is kind of janky, but it's only used during tests and should get the job
+        // done
+        format!("{:?}{:?}", self.table_name_regex, self.predicate)
+            == format!("{:?}{:?}", other.table_name_regex, other.predicate)
+    }
+}
+impl Eq for Matcher {}
+
 /// `PartitionId` is the object storage identifier for a specific partition. It
 /// should be a path that can be used against an object store to locate all the
 /// files and subdirectories for a partition. It takes the form of `/<writer