148 lines
4.3 KiB
Protocol Buffer
148 lines
4.3 KiB
Protocol Buffer
syntax = "proto3";
|
|
package delorean;
|
|
|
|
// TODO: how should requests handle authentication & authorization?
|
|
|
|
message CreateBucketRequest {
|
|
uint32 org_id = 1;
|
|
Bucket bucket = 2;
|
|
}
|
|
|
|
message CreateBucketResponse {
|
|
}
|
|
|
|
message DeleteBucketResponse {
|
|
}
|
|
|
|
message DeleteBucketRequest {
|
|
uint32 id = 1;
|
|
}
|
|
|
|
message GetBucketsResponse {
|
|
repeated Bucket buckets = 1;
|
|
}
|
|
|
|
message Organization {
|
|
uint32 id = 1;
|
|
string name = 2;
|
|
repeated Bucket buckets = 3;
|
|
}
|
|
|
|
message Bucket {
|
|
uint32 org_id = 1;
|
|
uint32 id = 2;
|
|
string name = 3;
|
|
string retention = 4;
|
|
// posting_list_rollover specifies how many series can be in a single posting list entry
|
|
// before the index creates a new entry for it. For example if you have _measurement=cpu and
|
|
// a rollover of 10, once 10 series are in that list, a new key/value pair is created in the
|
|
// index for the next list. So you'd have _measurement=cpu<big endian 0> and _measurement=cpu<big endian 10>
|
|
uint32 posting_list_rollover = 5;
|
|
repeated IndexLevel index_levels = 6;
|
|
}
|
|
|
|
message IndexLevel {
|
|
// duration is the amount of time this level represents. A duration of 0 is a level for an index of anything ever seen
|
|
uint32 duration_seconds = 1;
|
|
// timezone is the timezone under which this level should rollover. Only applicable for durations of 1 day or greater
|
|
string timezone = 2;
|
|
}
|
|
|
|
message S3PartitionRule {
|
|
|
|
}
|
|
|
|
service Delorean {
|
|
rpc CreateBucket(CreateBucketRequest) returns (CreateBucketResponse) {}
|
|
rpc DeleteBucket(DeleteBucketRequest) returns (DeleteBucketResponse) {}
|
|
rpc GetBuckets(Organization) returns (GetBucketsResponse) {}
|
|
}
|
|
|
|
// The following section is taken from InfluxDB so this server can implement the storage RPC. From here:
|
|
// https://github.com/influxdata/influxdb/blob/master/storage/reads/datatypes/predicate.proto
|
|
message Node {
|
|
enum Type {
|
|
LOGICAL_EXPRESSION = 0;
|
|
COMPARISON_EXPRESSION = 1;
|
|
PAREN_EXPRESSION = 2;
|
|
TAG_REF = 3;
|
|
LITERAL = 4;
|
|
FIELD_REF = 5;
|
|
}
|
|
|
|
enum Comparison {
|
|
EQUAL = 0;
|
|
NOT_EQUAL = 1;
|
|
STARTS_WITH = 2;
|
|
REGEX = 3;
|
|
NOT_REGEX = 4;
|
|
LT = 5;
|
|
LTE = 6;
|
|
GT = 7;
|
|
GTE = 8;
|
|
}
|
|
|
|
// Logical operators apply to boolean values and combine to produce a single boolean result.
|
|
enum Logical {
|
|
AND = 0;
|
|
OR = 1;
|
|
}
|
|
|
|
repeated Node children = 2;
|
|
|
|
oneof value {
|
|
string string_value = 3;
|
|
bool bool_value = 4;
|
|
int64 int_value = 5;
|
|
uint64 uint_value = 6;
|
|
double float_value = 7;
|
|
string regex_value = 8;
|
|
string tag_ref_value = 9;
|
|
string field_ref_value = 10;
|
|
Logical logical = 11;
|
|
Comparison comparison = 12;
|
|
}
|
|
}
|
|
|
|
message Predicate {
|
|
Node root = 1;
|
|
}
|
|
|
|
/*
|
|
S3 Organization scheme
|
|
|
|
Considerations:
|
|
* Buckets are tied to a region, so storage servers should use buckets from a specific region
|
|
|
|
one bucket per region. don't do per org as there is a default 100 bucket limit per account
|
|
|
|
prefix:
|
|
indexes have their own spot. They can be built based on snapshot levels, which are like granularities. So
|
|
<server_id>/<org_id>/<bucket_id>/index/<snapshot level>/
|
|
<server_id>/<org_id>/<bucket_id>/data/<start time><end time><id start><id end>.tsm
|
|
|
|
Some things we'd want to query:
|
|
measurement = cpu and host = serverA
|
|
tag keys where host = server a
|
|
tag keys where (app = foo and app = bar)
|
|
hosts where measurement = redis
|
|
fields where measurement = redis
|
|
hosts where (app = foo AND app = bar)
|
|
hosts
|
|
|
|
group data by measurement_range, time, size limit. Roll over to next measurement range when we hit either time or size
|
|
|
|
so we can say we want to snapshot to S3 at least every 1h or 100MB. Should also be able to force S3 snapshot.
|
|
|
|
We can have overlapping S3 data, which is fine. Need ability to request that the server looks at overlaps and compacts them
|
|
|
|
given a set of key/value pairs, determine which ranges we need to lookup.
|
|
|
|
are the set of ranges fixed or dynamic? If dynamic they have to be able to overlap. Which means we'll need to be able to compact them?
|
|
|
|
be able to tell the server how many requests to send to s3 in parallel to load
|
|
|
|
configure when to roll to deeper prefix (size/file count based)
|
|
|
|
on boot it should load up its S3 file index and keep in memory
|
|
*/ |