influxdb/proto/delorean/delorean.proto

syntax = "proto3";
package delorean;

// TODO: how should requests handle authentication & authorization?

message CreateBucketRequest {
    uint32 org_id = 1;
    Bucket bucket = 2;
}

message CreateBucketResponse {
}

message DeleteBucketResponse {
}

message DeleteBucketRequest {
    uint32 id = 1;
}

message GetBucketsResponse {
    repeated Bucket buckets = 1;
}

message Organization {
    uint32 id = 1;
    string name = 2;
    repeated Bucket buckets = 3;
}

message Bucket {
    uint32 org_id = 1;
    uint32 id = 2;
    string name = 3;
    string retention = 4;
    // posting_list_rollover specifies how many series can be in a single posting list entry
    // before the index creates a new entry for it. For example if you have _measurement=cpu and
    // a rollover of 10, once 10 series are in that list, a new key/value pair is created in the
    // index for the next list. So you'd have _measurement=cpu<big endian 0> and _measurement=cpu<big endian 10>
    uint32 posting_list_rollover = 5;
    repeated IndexLevel index_levels = 6;
}

message IndexLevel {
    // duration is the amount of time this level represents. A duration of 0 is a level for an index of anything ever seen
    uint32 duration_seconds = 1;
    // timezone is the timezone under which this level should rollover. Only applicable for durations of 1 day or greater
    string timezone = 2;
}

message S3PartitionRule {

}

service Delorean {
    rpc CreateBucket(CreateBucketRequest) returns (CreateBucketResponse) {}
    rpc DeleteBucket(DeleteBucketRequest) returns (DeleteBucketResponse) {}
    rpc GetBuckets(Organization) returns (GetBucketsResponse) {}
}

// The following section is taken from InfluxDB so this server can implement the storage RPC. From here:
// https://github.com/influxdata/influxdb/blob/master/storage/reads/datatypes/predicate.proto
message Node {
    enum Type {
        LOGICAL_EXPRESSION = 0;
        COMPARISON_EXPRESSION = 1;
        PAREN_EXPRESSION = 2;
        TAG_REF = 3;
        LITERAL = 4;
        FIELD_REF = 5;
    }

    enum Comparison {
        EQUAL = 0;
        NOT_EQUAL = 1;
        STARTS_WITH = 2;
        REGEX = 3;
        NOT_REGEX = 4;
        LT = 5;
        LTE = 6;
        GT = 7;
        GTE = 8;
    }

    // Logical operators apply to boolean values and combine to produce a single boolean result.
    enum Logical {
        AND = 0;
        OR = 1;
    }

    repeated Node children = 2;

    oneof value {
        string string_value = 3;
        bool bool_value = 4;
        int64 int_value = 5;
        uint64 uint_value = 6;
        double float_value = 7;
        string regex_value = 8;
        string tag_ref_value = 9;
        string field_ref_value = 10;
        Logical logical = 11;
        Comparison comparison = 12;
    }
}

message Predicate {
    Node root = 1;
}

/*
S3 Organization scheme

Considerations:
  * Buckets are tied to a region, so storage servers should use buckets from a specific region

  one bucket per region. don't do per org as there is a default 100 bucket limit per account

  prefix:
  indexes have their own spot. They can be built based on snapshot levels, which are like granularities. So
  <server_id>/<org_id>/<bucket_id>/index/<snapshot level>/
  <server_id>/<org_id>/<bucket_id>/data/<start time><end time><id start><id end>.tsm

  Some things we'd want to query:
  measurement = cpu and host = serverA
  tag keys where host = server a
  tag keys where (app = foo and app = bar)
  hosts where measurement = redis
  fields where measurement = redis
  hosts where (app = foo AND app = bar)
  hosts

  group data by measurement_range, time, size limit. Roll over to next measurement range when we hit either time or size

  so we can say we want to snapshot to S3 at least every 1h or 100MB. Should also be able to force S3 snapshot.

  We can have overlapping S3 data, which is fine. Need ability to request that the server looks at overlaps and compacts them

  given a set of key/value pairs, determine which ranges we need to lookup.

  are the set of ranges fixed or dynamic? If dynamic they have to be able to overlap. Which means we'll need to be able to compact them?

  be able to tell the server how many requests to send to s3 in parallel to load

  configure when to roll to deeper prefix (size/file count based)

  on boot it should load up its S3 file index and keep in memory
*/