From f561d599d2dd19c22d5b2df5eea8ca974c516d21 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Wed, 22 Feb 2023 16:02:08 -0500 Subject: [PATCH 1/3] fix: Remove uses of KAFKA_CONNECT, redpanda, and some talk of Kafka --- .circleci/config.yml | 3 -- docker/Dockerfile.ci.dockerignore | 1 - docs/testing.md | 9 +--- docs/underground_guide.md | 80 +++++++++---------------------- integration-docker-compose.yml | 7 --- 5 files changed, 25 insertions(+), 75 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cb4dc4a35e..f548cf6601 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -229,8 +229,6 @@ jobs: # setup multiple docker images (see https://circleci.com/docs/2.0/configuration-reference/#docker) docker: - image: quay.io/influxdb/rust:ci - - image: vectorized/redpanda:v22.1.5 - command: redpanda start --overprovisioned --smp 1 --memory 1G --reserve-memory 0M - image: postgres environment: POSTGRES_HOST_AUTH_METHOD: trust @@ -247,7 +245,6 @@ jobs: # Run integration tests TEST_INTEGRATION: 1 INFLUXDB_IOX_INTEGRATION_LOCAL: 1 - KAFKA_CONNECT: "localhost:9092" POSTGRES_USER: postgres TEST_INFLUXDB_IOX_CATALOG_DSN: "postgres://postgres@localhost/iox_shared" # When removing this, also remove the ignore on the test in trogging/src/cli.rs diff --git a/docker/Dockerfile.ci.dockerignore b/docker/Dockerfile.ci.dockerignore index 0d2f04b604..484eb504cc 100644 --- a/docker/Dockerfile.ci.dockerignore +++ b/docker/Dockerfile.ci.dockerignore @@ -1,3 +1,2 @@ # Ignore everything ** -!docker/redpanda.gpg \ No newline at end of file diff --git a/docs/testing.md b/docs/testing.md index 34fb7f2a92..1e82902b41 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -24,11 +24,7 @@ The end to end tests are run using the `cargo test --test end_to_end` command, a `TEST_INTEGRATION` and `TEST_INFLUXDB_IOX_CATALOG_DSN` environment variables. NOTE if you don't set these variables the tests will "pass" locally (really they will be skipped). -By default, the integration tests for the Kafka-based write buffer are not run. To run these -you need to set the `KAFKA_CONNECT` environment variable and `TEST_INTEGRATION=1`. - -For example, you can run this docker compose to get redpanda (a kafka-compatible message queue) -and postgres running: +For example, you can run this docker compose to get postgres running: ```shell docker-compose -f integration-docker-compose.yml up @@ -38,12 +34,11 @@ In another terminal window, you can run: ```shell export TEST_INTEGRATION=1 -export KAFKA_CONNECT=localhost:9092 export TEST_INFLUXDB_IOX_CATALOG_DSN=postgresql://postgres@localhost:5432/postgres cargo test --workspace ``` -Or for just the end-to-end tests (and not general tests or kafka): +Or for just the end-to-end tests (and not general tests): ```shell TEST_INTEGRATION=1 TEST_INFLUXDB_IOX_CATALOG_DSN=postgresql://postgres@localhost:5432/postgres cargo test --test end_to_end diff --git a/docs/underground_guide.md b/docs/underground_guide.md index c087bcce88..0406902df6 100644 --- a/docs/underground_guide.md +++ b/docs/underground_guide.md @@ -19,22 +19,18 @@ cd influxdb_iox cargo build --release --features=pprof ``` -You can also install the `influxdb_iox` command locally via +You can also install the `influxdb_iox` command locally via ```shell cd influxdb_iox cargo install --path influxdb_iox ``` -## Step 2: Start kafka and postgres +## Step 2: Start postgres -Now, start up kafka and postgres locally in docker containers: +Now, start up postgres locally in a docker container: ```shell -# get rskafka from https://github.com/influxdata/rskafka -cd rskafka -# Run kafka on localhost:9010 -docker-compose -f docker-compose-kafka.yml up & -# now run postgres +# Run postgres docker run -p 5432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust postgres & ``` @@ -47,19 +43,13 @@ you have postgres running locally on port 5432). ```shell # initialize the catalog -INFLUXDB_IOX_WRITE_BUFFER_TYPE=kafka \ -INFLUXDB_IOX_WRITE_BUFFER_ADDR=localhost:9010 \ -INFLUXDB_IOX_WRITE_BUFFER_AUTO_CREATE_TOPICS=10 \ INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \ OBJECT_STORE=file \ DATABASE_DIRECTORY=~/data_dir \ LOG_FILTER=debug \ ./target/release/influxdb_iox catalog setup -# initialize the kafka topic -INFLUXDB_IOX_WRITE_BUFFER_TYPE=kafka \ -INFLUXDB_IOX_WRITE_BUFFER_ADDR=localhost:9010 \ -INFLUXDB_IOX_WRITE_BUFFER_AUTO_CREATE_TOPICS=10 \ +# initialize the topic INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \ OBJECT_STORE=file \ DATABASE_DIRECTORY=~/data_dir \ @@ -67,10 +57,10 @@ LOG_FILTER=debug \ ./target/release/influxdb_iox catalog topic update iox-shared ``` -## Inspecting Catalog and Kafka / Redpanda state +## Inspecting Catalog state Depending on what you are trying to do, you may want to inspect the -catalog and/or the contents of Kafka / Redpands. +catalog. You can run psql like this to inspect the catalog: ```shell @@ -111,41 +101,13 @@ postgres=# \d postgres=# ``` -You can mess with redpanda using `docker exec redpanda-0 rpk` like this: - -```shell -$ docker exec redpanda-0 rpk topic list -NAME PARTITIONS REPLICAS -iox-shared 1 1 -``` - - # Step 4: Run the services -## Run Router on port 8080/8081 (http/grpc) -```shell -INFLUXDB_IOX_BIND_ADDR=localhost:8080 \ -INFLUXDB_IOX_GRPC_BIND_ADDR=localhost:8081 \ -INFLUXDB_IOX_WRITE_BUFFER_TYPE=kafka \ -INFLUXDB_IOX_WRITE_BUFFER_ADDR=localhost:9010 \ -INFLUXDB_IOX_WRITE_BUFFER_AUTO_CREATE_TOPICS=10 \ -INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \ -OBJECT_STORE=file \ -DATABASE_DIRECTORY=~/data_dir \ -LOG_FILTER=info \ -./target/release/influxdb_iox run router -``` - - ## Run Ingester on port 8083/8083 (http/grpc) + ```shell INFLUXDB_IOX_BIND_ADDR=localhost:8083 \ INFLUXDB_IOX_GRPC_BIND_ADDR=localhost:8084 \ -INFLUXDB_IOX_WRITE_BUFFER_TYPE=kafka \ -INFLUXDB_IOX_WRITE_BUFFER_ADDR=localhost:9010 \ -xINFLUXDB_IOX_WRITE_BUFFER_AUTO_CREATE_TOPICS=10 \ -INFLUXDB_IOX_SHARD_INDEX_RANGE_START=0 \ -INFLUXDB_IOX_SHARD_INDEX_RANGE_END=0 \ INFLUXDB_IOX_PAUSE_INGEST_SIZE_BYTES=5000000000 \ INFLUXDB_IOX_PERSIST_MEMORY_THRESHOLD_BYTES=4000000000 \ INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \ @@ -153,15 +115,26 @@ INFLUXDB_IOX_MAX_HTTP_REQUEST_SIZE=100000000 \ OBJECT_STORE=file \ DATABASE_DIRECTORY=~/data_dir \ LOG_FILTER=info \ -./target/release/influxdb_iox run ingester +./target/release/influxdb_iox run ingester2 ``` +## Run Router on port 8080/8081 (http/grpc) + +```shell +INFLUXDB_IOX_BIND_ADDR=localhost:8080 \ +INFLUXDB_IOX_GRPC_BIND_ADDR=localhost:8081 \ +INFLUXDB_IOX_CATALOG_DSN=postgres://postgres@localhost:5432/postgres \ +OBJECT_STORE=file \ +DATABASE_DIRECTORY=~/data_dir \ +LOG_FILTER=info \ +./target/release/influxdb_iox run router2 +``` # Step 5: Ingest data You can load data using the influxdb_iox client: ```shell -influxdb_iox --host=http://localhost:8080 -v write test_db test_fixtures/lineproto/*.lp +influxdb_iox --host=http://localhost:8080 -v write test_db test_fixtures/lineproto/*.lp ``` Now you can post data to `http://localhost:8080` with your favorite load generating tool @@ -180,21 +153,14 @@ data. The default settings at the time of this writing would result in posting fairly large requests (necessitating the `INFLUXDB_IOX_MAX_HTTP_REQUEST_SIZE` setting above) - # Step 6: Profile See [`profiling.md`](./profiling.md). - # Step 7: Clean up local state -If you find yourself needing to clean up postgres / kafka state use these commands: +If you find yourself needing to clean up postgres state, use this command: + ```shell docker ps -a -q | xargs docker stop -docker rm rskafka_proxy_1 -docker rm rskafka_kafka-0_1 -docker rm rskafka_kafka-1_1 -docker rm rskafka_kafka-2_1 -docker rm rskafka_zookeeper_1 -docker volume rm rskafka_kafka_0_data rskafka_kafka_1_data rskafka_kafka_2_data rskafka_zookeeper_data ``` diff --git a/integration-docker-compose.yml b/integration-docker-compose.yml index 8dbe82fd9c..10c55fcb07 100644 --- a/integration-docker-compose.yml +++ b/integration-docker-compose.yml @@ -1,12 +1,5 @@ version: "3.9" services: - redpanda: - pull_policy: always - image: docker.vectorized.io/vectorized/redpanda:latest - ports: - - 9092:9092 - - 9644:9644 - command: start --overprovisioned --smp 1 --memory 1G --reserve-memory 0M --node-id 0 --check=false postgres: pull_policy: always image: postgres:latest From 2e656766f881653bc6d4c2ba9a7fbb6df08d3457 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Wed, 22 Feb 2023 16:09:00 -0500 Subject: [PATCH 2/3] fix: Remove docs about testing object_store crate This crate has its own repo now. --- docs/testing.md | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/docs/testing.md b/docs/testing.md index 1e82902b41..724df96186 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -67,31 +67,6 @@ You can also see more logging using the `LOG_FILTER` variable. For example: LOG_FILTER=debug,sqlx=warn,h2=warn ``` -## Object storage - -### To run the tests or not run the tests - -If you are testing integration with some or all of the object storage options, you'll have more -setup to do. - -By default, `cargo test -p object_store` does not run any tests that actually contact -any cloud services: tests that do contact the services will silently pass. - -To run integration tests, use `TEST_INTEGRATION=1 cargo test -p object_store`, which will run the -tests that contact the cloud services and fail them if the required environment variables aren't -set. - -### Configuration differences when running the tests - -When running `influxdb_iox run`, you can pick one object store to use. When running the tests, you -can run them against all the possible object stores. There's still only one `INFLUXDB_IOX_BUCKET` -variable, though, so that will set the bucket name for all configured object stores. Use the same -bucket name when setting up the different services. - -Other than possibly configuring multiple object stores, configuring the tests to use the object -store services is the same as configuring the server to use an object store service. See the output -of `influxdb_iox run --help` for instructions. - ## InfluxDB 2 Client The `influxdb2_client` crate may be used by people using InfluxDB 2.0 OSS, and should be compatible From 8d3e2852513ae71313e793edbc6058aaaae5d88a Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 2 Mar 2023 16:29:18 -0500 Subject: [PATCH 3/3] fix: Remove outdated documentation that discusses Kafka --- docs/metrics.md | 20 -------- iox_data_generator/README.md | 88 ------------------------------------ 2 files changed, 108 deletions(-) diff --git a/docs/metrics.md b/docs/metrics.md index 5bb080dad2..b3893a51bd 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -42,26 +42,6 @@ Here are useful metrics | query_access_pruned_chunks_total | pruned_chunks | Number of chunks of a table pruned while running queries | | query_access_pruned_rows_total | pruned_rows | Number of chunks of a table pruned while running queries | - -### Read buffer RUB -| Metric name | Code Name | Description | -| --- | --- | --- | -| read_buffer_column_total | columns_total | Total number of columns in read buffer | -| read_buffer_column_values | column_values_total | Total number of values stored in read buffer column encodings, further segmented by nullness | -| read_buffer_column_raw_bytes | column_raw_bytes_total | Estimated uncompressed data size for read buffer columns, further segmented by nullness | - - -### Ingest Request (from Kafka to Query Server) -| Metric name | Code Name | Description | -| --- | --- | --- | -| write_buffer_ingest_requests_total | red | Total number of write requests | -| write_buffer_read_bytes_total | bytes_read | Total number of write requested bytes | -| write_buffer_last_sequence_number | last_sequence_number | sequence number of last write request | -| write_buffer_sequence_number_lag | sequence_number_lag | The difference between the the last sequence number available (e.g. Kafka offset) and (= minus) last consumed sequence number | -| write_buffer_last_min_ts | last_min_ts | Minimum timestamp of last write as unix timestamp in nanoseconds | -| write_buffer_last_max_ts | last_max_ts | Maximum timestamp of last write as unix timestamp in nanoseconds | -| write_buffer_last_ingest_ts | last_ingest_ts | Last seen ingest timestamp as unix timestamp in nanoseconds | - ### jemalloc | Metric name | Code Name | Description | | --- | --- | --- | diff --git a/iox_data_generator/README.md b/iox_data_generator/README.md index c78ce57b34..3bf275f21c 100644 --- a/iox_data_generator/README.md +++ b/iox_data_generator/README.md @@ -17,91 +17,3 @@ And the built binary has command line help: For examples of specifications see the [schemas folder](schemas). The [full_example](schemas/full_example.toml) is the most comprehensive with comments and example output. - -## Use with two IOx servers and Kafka - -The data generator tool can be used to simulate data being written to IOx in various shapes. This -is how to set up a local experiment for profiling or debugging purposes using a database in two IOx -instances: one writing to Kafka and one reading from Kafka. - -If you're profiling IOx, be sure you've compiled and are running a release build using either: - -``` -cargo build --release -./target/release/influxdb_iox run database --server-id 1 -``` - -or: - -``` -cargo run --release -- run database --server-id 1 -``` - -Server ID is the only required attribute for running IOx; see `influxdb_iox run database --help` for all the -other configuration options for the server you may want to set for your experiment. Note that the -default HTTP API address is `127.0.0.1:8080` unless you set something different with `--api-bind` -and the default gRPC address is `127.0.0.1:8082` unless you set something different using -`--grpc-bind`. - -For the Kafka setup, you'll need to start two IOx servers, so you'll need to set the bind addresses -for at least one of them. Here's an example of the two commands to run: - -``` -cargo run --release -- run router --server-id 1 -cargo run --release -- run database --server-id 2 --api-bind 127.0.0.1:8084 --grpc-bind 127.0.0.1:8086 -``` - -You'll also need to run a Kafka instance. There's a Docker compose script in the influxdb_iox -repo you can run with: - -``` -docker-compose -f docker/ci-kafka-docker-compose.yml up kafka -``` - -The Kafka instance will be accessible from `127.0.0.1:9093` if you run it with this script. - -Once you have the two IOx servers and one Kafka instance running, create a database with a name in -the format `[orgname]_[bucketname]`. For example, create a database in IOx named `mlb_pirates`, and -the org you'll use in the data generator will be `mlb` and the bucket will be `pirates`. The -`DatabaseRules` defined in `src/bin/create_database.rs` will set up a database in the "writer" IOx -instance to write to Kafka and the database in the "reader" IOx instance to read from Kafka if -you run it with: - -``` -cargo run --release -p iox_data_generator --bin create_database -- --writer 127.0.0.1:8082 --reader 127.0.0.1:8086 mlb_pirates -``` - -This script adds 3 rows to a `writer_test` table because [this issue with the Kafka Consumer -needing data before it can find partitions](https://github.com/influxdata/influxdb_iox/issues/2189). - -Once the database is created, decide what kind of data you would like to send it. You can use an -existing data generation schema in the `schemas` directory or create a new one, perhaps starting -from an existing schema as a guide. In this example, we're going to use -`iox_data_generator/schemas/cap-write.toml`. - -Next, run the data generation tool as follows: - -``` -cargo run --release -p iox_data_generator -- --spec iox_data_generator/schemas/cap-write.toml --continue --host 127.0.0.1:8080 --token arbitrary --org mlb --bucket pirates -``` - -- `--spec iox_data_generator/schemas/cap-write.toml` sets the schema you want to use to generate the data -- `--continue` means the data generation tool should generate data every `sampling_interval` (which - is set in the schema) until we stop it -- `--host 127.0.0.1:8080` means to write to the writer IOx server running at the default HTTP API address - of `127.0.0.1:8080` (note this is NOT the gRPC address used by the `create_database` command) -- `--token arbitrary` - the data generator requires a token value but IOx doesn't use it, so this - can be any value. -- `--org mlb` is the part of the database name you created before the `_` -- `--bucket pirates` is the part of the database name you created after the `_` - -You should be able to use `influxdb_iox sql -h http://127.0.0.1:8086` to connect to the gRPC of the reader -then `use database mlb_pirates;` and query the tables to see that the data is being inserted. That -is, - -``` -# in your influxdb_iox checkout -cargo run --release -- sql -h http://127.0.0.1:8086 -``` - -Connecting to the writer instance won't show any data.