From ccd5d22aabdadc0244df71d8733e0315c0bbe602 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 15:16:05 -0700 Subject: [PATCH] feat: build and use python-build-standalone with official builds (#25969) * feat(ci): fetch and configure for python-build-standalone binaries * fix: make the process engine usable on windows * feat(ci): build with python-build-standalone (and drop musl) * fix(ci): set rpath on Linux and libpath on OSX in ci * fix: set PYTHONHOME everywhere and PYTHONPATH on Windows * chore(ci): update to use more recent ci-packager-next * fix(ci): adjust validate to allow certain dynamically linked libraries * chore: remove install_influxdb.sh (using install_influxdb3.sh instead) * chore(install_influxdb3.sh): update for processing engine and release builds * fix: temporarily use rpm --nodeps until compile with old GLIBC * feat(ci): build docker with python-build-standalone * chore: add README_processing_engine.md * chore: add a few more details to README_processing_engine.md * fix(ci): use patchelf --set-rpath Not all patchelf versions support --add-rpath for appending to the RPATH, but --set-path can be used with a colon-separated list. Use --set-rpath first for maximum compatibility. * chore: update README_processing_engine.md for standalone local builds * fix(Dockerfile): also use patchelf --set-rpath * chore: update code comment for accuracy * chore: typos, grammar and formatting change in README_processing_engine.md * chore: update README_processing_engine.md for Docker arm64 (thanks Jackson) --- .circleci/config.yml | 117 ++++- .circleci/packages/config.yaml | 9 +- .../influxdb3/fs/usr/lib/influxdb3/.keepdir | 0 .circleci/scripts/docker_build_release.bash | 10 +- .../scripts/fetch-python-standalone.bash | 170 +++++++ .circleci/scripts/package-validation/validate | 23 +- Dockerfile | 29 +- Dockerfile.dockerignore | 1 + README_processing_engine.md | 452 ++++++++++++++++++ influxdb3/src/main.rs | 67 +++ influxdb3_processing_engine/src/virtualenv.rs | 39 +- install_influxdb.sh | 394 --------------- install_influxdb3.sh | 31 +- 13 files changed, 899 insertions(+), 443 deletions(-) create mode 100644 .circleci/packages/influxdb3/fs/usr/lib/influxdb3/.keepdir create mode 100755 .circleci/scripts/fetch-python-standalone.bash create mode 100644 README_processing_engine.md delete mode 100644 install_influxdb.sh diff --git a/.circleci/config.yml b/.circleci/config.yml index 0e8681d227..20c6dbfa29 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -213,6 +213,35 @@ jobs: name: cargo nextest command: TEST_LOG= RUST_LOG=info RUST_LOG_SPAN_EVENTS=full RUST_BACKTRACE=1 cargo nextest run --workspace --failure-output immediate-final --no-fail-fast + # Fetch python-build-standalone for official builds + fetch-python: + machine: + image: ubuntu-2204:current + resource_class: medium + # environment variables for python-build-standalone. Should correspond to + # https://github.com/astral-sh/python-build-standalone/releases. See + # scripts/fetch-python-standalone.bash for details. This should match the + # 'build-docker' job, below. + environment: + PBS_DATE: "20250106" + PBS_VERSION: "3.11.11" + steps: + - checkout + - run: + name: pull Python Build Standalone + command: | + echo "PBS_DATE=$PBS_DATE" + .circleci/scripts/fetch-python-standalone.bash \ + "python-artifacts" \ + "$PBS_DATE" \ + "$PBS_VERSION" + - store_artifacts: + path: python-artifacts + - persist_to_workspace: + root: . + paths: + - python-artifacts + # Build a dev binary. # # Compiles a binary with the default ("dev") cargo profile from the influxdb3 source @@ -220,7 +249,7 @@ jobs: # Build a dev binary. build-dev: docker: - - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3:latest + - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3@sha256:63726f571865bfb13232006bbca7aac42d2178f4c19a3526a7e5ee02ada836f8 auth: username: _json_key password: $CISUPPORT_GCS_AUTHORIZATION @@ -242,12 +271,23 @@ jobs: type: string steps: - checkout + - attach_workspace: + at: /tmp/workspace + - run: + name: Extract python for this target + command: | + tar -C /tmp/workspace/python-artifacts -zxvf /tmp/workspace/python-artifacts/all.tar.gz ./<< parameters.target >> + - run: + name: Show PYO3_CONFIG_FILE + command: cat /tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt - run: name: Install Target command: rustup target add << parameters.target >> - run: name: Cargo build - command: target-env cargo build --target=<< parameters.target >> --workspace + command: | + export PYO3_CONFIG_FILE=/tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt + target-env cargo build --target=<< parameters.target >> --features="system-py" --workspace - when: condition: not: @@ -270,7 +310,7 @@ jobs: # Compile cargo "release" profile binaries for influxdb3 edge releases build-release: docker: - - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3:latest + - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3@sha256:63726f571865bfb13232006bbca7aac42d2178f4c19a3526a7e5ee02ada836f8 auth: username: _json_key password: $CISUPPORT_GCS_AUTHORIZATION @@ -295,19 +335,59 @@ jobs: default: release steps: - checkout + - attach_workspace: + at: /tmp/workspace + - run: + name: Extract python for this target + command: | + tar -C /tmp/workspace/python-artifacts -zxvf /tmp/workspace/python-artifacts/all.tar.gz ./<< parameters.target >> + - run: + name: Show PYO3_CONFIG_FILE + command: cat /tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt - run: name: Install Target command: rustup target add << parameters.target >> - run: name: Cargo release build - command: target-env cargo build --target=<< parameters.target >> --profile=<< parameters.profile >> --workspace + command: | + export PYO3_CONFIG_FILE=/tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt + target-env cargo build --target=<< parameters.target >> --features="system-py" --profile=<< parameters.profile >> --workspace # linking might take a while and doesn't produce CLI output no_output_timeout: 30m + - when: + condition: + or: + - equal: [ << parameters.target >>, aarch64-unknown-linux-gnu ] + - equal: [ << parameters.target >>, x86_64-unknown-linux-gnu ] + steps: + - run: + # XXX: better to use 'cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/python/lib' + name: adjust RPATH for linux + command: | + # tarballs need $ORIGIN/python/lib, deb/rpm need $ORIGIN/../lib/influxdb3/python/lib + echo "Running: patchelf --set-rpath '$ORIGIN/python/lib:$ORIGIN/../lib/influxdb3/python/lib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + patchelf --set-rpath '$ORIGIN/python/lib:$ORIGIN/../lib/influxdb3/python/lib' "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" + - when: + condition: + equal: [ << parameters.target >>, aarch64-apple-darwin ] + steps: + - run: + # XXX: better to use 'cargo:rustc-link-arg=-Wl,-rpath,@executable_path/python/lib' + name: adjust LC_LOAD_DYLIB path for darwin + command: | + export PBS_LIBPYTHON=$(grep '^lib_name=' /tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt | cut -d = -f 2) + echo "Running: /osxcross/bin/aarch64-apple-darwin22.2-install_name_tool -change '/install/lib/lib${PBS_LIBPYTHON}.dylib' '@executable_path/python/lib/lib${PBS_LIBPYTHON}.dylib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + /osxcross/bin/aarch64-apple-darwin22.2-install_name_tool -change "/install/lib/lib${PBS_LIBPYTHON}.dylib" "@executable_path/python/lib/lib${PBS_LIBPYTHON}.dylib" "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" + # re-sign after install_name_tool since osxcross won't do it + echo "Running: /usr/local/bin/rcodesign sign '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + /usr/local/bin/rcodesign sign "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" - run: name: tar and gzip build artifacts command: | mkdir -p artifacts - tar --ignore-failed-read -czvf "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar.gz" -C "${PWD}/target/<< parameters.target >>/<< parameters.profile >>" influxdb3{,.exe} + tar --ignore-failed-read -cvf "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar" -C "${PWD}/target/<< parameters.target >>/<< parameters.profile >>" influxdb3{,.exe} + tar --ignore-failed-read -rvf "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar" -C "/tmp/workspace/python-artifacts/<< parameters.target >>" python + gzip "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar" - store_artifacts: path: artifacts - persist_to_workspace: @@ -316,7 +396,7 @@ jobs: - artifacts build-packages: docker: - - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-packager-next:latest + - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-packager-next@sha256:db0cd91a5445c4287154cea1d4d5566735cb0d3b7b9e2a95724a83f9d979d497 auth: username: _json_key password: $CISUPPORT_GCS_AUTHORIZATION @@ -457,6 +537,13 @@ jobs: image_name: type: string default: influxdb3-core + # environment variables for python-build-standalone. Should correspond to + # https://github.com/astral-sh/python-build-standalone/releases. See + # scripts/fetch-python-standalone.bash for details. This should match + # the 'fetch-python' job, above. + environment: + PBS_DATE: "20250106" + PBS_VERSION: "3.11.11" machine: image: default resource_class: << parameters.resource_class >> @@ -477,6 +564,8 @@ jobs: "influxdb3" \ "aws,gcp,azure,jemalloc_replacing_malloc,tokio_console,system-py" \ "<< parameters.image_name >>:latest-<< parameters.platform >>" \ + "$PBS_DATE" \ + "$PBS_VERSION" \ "<< parameters.platform >>" \ "$DOCKER_PROFILE" @@ -519,6 +608,8 @@ workflows: version: 2 snapshot: jobs: + - fetch-python: + <<: *main_filter - build-release: <<: *main_filter name: build-snapshot-<< matrix.target >> @@ -529,10 +620,10 @@ workflows: target: - aarch64-apple-darwin - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - x86_64-pc-windows-gnu - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl + requires: + - fetch-python - build-packages: <<: *main_filter requires: @@ -559,6 +650,8 @@ workflows: <<: *any_filter - cargo-audit: <<: *any_filter + - fetch-python: + <<: *any_filter - test: <<: *any_filter - build-dev: @@ -573,10 +666,10 @@ workflows: target: - aarch64-apple-darwin - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - x86_64-pc-windows-gnu - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl + requires: + - fetch-python - doc: <<: *any_filter - build-release: @@ -587,10 +680,10 @@ workflows: target: - aarch64-apple-darwin - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - x86_64-pc-windows-gnu - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl + requires: + - fetch-python - build-packages: <<: *release_filter requires: diff --git a/.circleci/packages/config.yaml b/.circleci/packages/config.yaml index 5162612bf5..71c34f5404 100644 --- a/.circleci/packages/config.yaml +++ b/.circleci/packages/config.yaml @@ -5,19 +5,19 @@ version: value: '3.0.0+snapshot-{{env.CIRCLE_SHA1[:8]}}' sources: - - binary: /tmp/workspace/artifacts/influxdb3-core_x86_64-unknown-linux-musl.tar.gz + - binary: /tmp/workspace/artifacts/influxdb3-core_x86_64-unknown-linux-gnu.tar.gz target: artifacts/ arch: amd64 plat: linux - - binary: /tmp/workspace/artifacts/influxdb3-core_aarch64-unknown-linux-musl.tar.gz + - binary: /tmp/workspace/artifacts/influxdb3-core_aarch64-unknown-linux-gnu.tar.gz target: artifacts/ arch: arm64 plat: linux - binary: /tmp/workspace/artifacts/influxdb3-core_aarch64-apple-darwin.tar.gz target: artifacts/ - arch: amd64 + arch: arm64 plat: darwin - binary: /tmp/workspace/artifacts/influxdb3-core_x86_64-pc-windows-gnu.tar.gz @@ -37,6 +37,9 @@ packages: binaries: - influxdb3 - influxdb3.exe + python-runtimes: + - source: python + target: usr/lib/influxdb3 extras: - source: LICENSE-APACHE target: usr/share/influxdb3/LICENSE-APACHE diff --git a/.circleci/packages/influxdb3/fs/usr/lib/influxdb3/.keepdir b/.circleci/packages/influxdb3/fs/usr/lib/influxdb3/.keepdir new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.circleci/scripts/docker_build_release.bash b/.circleci/scripts/docker_build_release.bash index abe7c1eb69..045a8eb027 100755 --- a/.circleci/scripts/docker_build_release.bash +++ b/.circleci/scripts/docker_build_release.bash @@ -5,8 +5,10 @@ set -euo pipefail readonly PACKAGE="$1" readonly FEATURES="$2" readonly TAG="$3" -readonly ARCH="${4:-amd64}" # Default to amd64 if not specified -readonly PROFILE="${5:-release}" # Default to release if not specified +readonly PBS_DATE="$4" +readonly PBS_VERSION="$5" +readonly ARCH="${6:-amd64}" # Default to amd64 if not specified +readonly PROFILE="${7:-release}" # Default to release if not specified RUST_VERSION="$(sed -E -ne 's/channel = "(.*)"/\1/p' rust-toolchain.toml)" COMMIT_SHA="$(git rev-parse HEAD)" @@ -24,6 +26,8 @@ exec docker buildx build \ --build-arg RUST_VERSION="$RUST_VERSION" \ --build-arg PACKAGE="$PACKAGE" \ --build-arg PROFILE="$PROFILE" \ + --build-arg PBS_DATE="$PBS_DATE" \ + --build-arg PBS_VERSION="$PBS_VERSION" \ --platform "$PLATFORM" \ --label org.opencontainers.image.created="$NOW" \ --label org.opencontainers.image.url="$REPO_URL" \ @@ -35,4 +39,4 @@ exec docker buildx build \ --label com.influxdata.image.package="$PACKAGE" \ --progress plain \ --tag "$TAG" \ - . \ No newline at end of file + . diff --git a/.circleci/scripts/fetch-python-standalone.bash b/.circleci/scripts/fetch-python-standalone.bash new file mode 100755 index 0000000000..0910f8eba1 --- /dev/null +++ b/.circleci/scripts/fetch-python-standalone.bash @@ -0,0 +1,170 @@ +#!/bin/bash +set -euo pipefail + +# See https://github.com/astral-sh/python-build-standalone/releases +# USAGE: +# fetch-python-standalone.bash +# +# Eg: +# $ fetch-python-standalone.bash ./python-artifacts 20250106 3.11.11 +# +# This script is meant to be called by CircleCI such that the specified +# is persisted to a workspace that is later attached at /tmp/workspace/. +# In this manner, build script can do something like: +# PYO3_CONFIG_FILE=/tmp/workspace//pyo3_config_file.txt cargo build... + +readonly DOWNLOAD_DIR="$1" + +# URLs are constructed from this. Eg: +# https://github.com/astral-sh/...//cpython-+-... +readonly PBS_DATE="$2" +readonly PBS_VERSION="$3" +readonly PBS_MAJ_MIN=${PBS_VERSION%.*} +readonly PBS_TOP_DIR="/tmp/workspace" + +# Official influxdb3 builds use python-build-standalone since it: +# - is built to run well as an embedded interpreter +# - has a good upstream maintenance story (https://github.com/astral-sh) with +# lots of users and corporate sponsor +# - should deliver a consistent experience across OSes and architectures +# +# python-build-standalone provides many different builds. Official influxdb3 +# build targets: +# - aarch64-apple-darwin +# - aarch64-unknown-linux-gnu +# - x86_64-unknown-linux-gnu +# - x86_64-pc-windows-msvc-shared +# +# Note: musl builds of python-build-standablone currently (as of 2025-02-04) +# have limitations: +# - don't support importing bre-built python wheels (must compile and link 3rd +# party extensions into the binary (influxdb3) +# - historical performance issues with python and musl +# - availability limited to x86_64 (no aarch64) +# +# References +# - https://github.com/astral-sh/python-build-standalone/blob/main/docs/distributions.rst +# - https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst +# - https://edu.chainguard.dev/chainguard/chainguard-images/about/images-compiled-programs/glibc-vs-musl/#python-builds +# - https://pythonspeed.com/articles/alpine-docker-python/ +readonly TARGETS="aarch64-apple-darwin aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu x86_64-pc-windows-msvc-shared" + +fetch() { + target="$1" + suffix="${2}" + if [ "${suffix}" = "full.tar.zst" ]; then + if [ "${target}" = "x86_64-pc-windows-msvc-shared" ]; then + suffix="pgo-${2}" + else + suffix="debug-${2}" + fi + fi + binary="cpython-${PBS_VERSION}+${PBS_DATE}-${target}-${suffix}" + url="https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_DATE}/${binary}" + + echo "Downloading ${binary}" + curl --proto '=https' --tlsv1.2 -sS -L "$url" -o "${DOWNLOAD_DIR}/${binary}" + + echo "Downloading ${binary}.sha256" + curl --proto '=https' --tlsv1.2 -sS -L "${url}.sha256" -o "${DOWNLOAD_DIR}/${binary}.sha256" + dl_sha=$(cut -d ' ' -f 1 "${DOWNLOAD_DIR}/${binary}.sha256") + if [ -z "$dl_sha" ]; then + echo "Could not find properly formatted SHA256 in '${DOWNLOAD_DIR}/${binary}.sha256'" + exit 1 + fi + + printf "Verifying %s: " "${binary}" + ch_sha=$(sha256sum "${DOWNLOAD_DIR}/${binary}" | cut -d ' ' -f 1) + if [ "$ch_sha" = "$dl_sha" ]; then + echo "OK" + else + echo "ERROR (${ch_sha} != ${dl_sha})" + exit 1 + fi + + echo "Unpacking ${binary} to '${DOWNLOAD_DIR}'" + UNPACK_DIR="${DOWNLOAD_DIR}/${target}" + if [ "${target}" = "x86_64-pc-windows-msvc-shared" ]; then + UNPACK_DIR="${DOWNLOAD_DIR}/x86_64-pc-windows-gnu" + fi + mkdir "${UNPACK_DIR}" 2>/dev/null || true + if [[ "${suffix}" = *full.tar.zst ]]; then + # we only need the licensing from the full distribution + tar -C "${UNPACK_DIR}" --zstd -xf "${DOWNLOAD_DIR}/${binary}" python/PYTHON.json python/licenses + mv "${UNPACK_DIR}/python/PYTHON.json" "${UNPACK_DIR}/python/licenses" + else + tar -C "${UNPACK_DIR}" -zxf "${DOWNLOAD_DIR}/${binary}" + fi + + echo "Removing ${binary}" + rm -f "${DOWNLOAD_DIR}/${binary}" "${DOWNLOAD_DIR}/${binary}.sha256" + + if [[ "${suffix}" = *install_only_stripped.tar.gz ]]; then + echo "Creating ${UNPACK_DIR}/pyo3_config_file.txt" + PYO3_CONFIG_FILE="${UNPACK_DIR}/pyo3_config_file.txt" + PBS_DIR="${PBS_TOP_DIR}"/$(basename "${DOWNLOAD_DIR}")/$(basename "${UNPACK_DIR}") + if [ "${target}" = "x86_64-pc-windows-msvc-shared" ]; then + cat > "${PYO3_CONFIG_FILE}" < "${PYO3_CONFIG_FILE}" < "${UNPACK_DIR}/python/LICENSE.md" <> "${UNPACK_DIR}/python/LICENSE.md" <_linux_amd64.tar.gz + +# without processing engine +$ /here/influxdb3 serve ... +$ /here/influxdb3 query ... + +# with the processing engine without an activated venv +$ mkdir /path/to/plugins +$ /here/influxdb3 serve --plugin-dir /path/to/plugins ... # server +$ /here/influxdb3 create database foo # client +$ /here/influxdb3 test schedule_plugin -d foo testme.py # client + +# create a venv +$ /here/python/bin/python3 -m venv /path/to/venv +$ source /path/to/venv/bin/activate +(venv)$ pip install requests +... +(venv)$ deactivate + +# start server in the venv +$ source /path/to/venv/bin/activate # server +(venv)$ /here/influxdb3 serve --plugin-dir /path/to/plugins ... # server +... ... + +$ /here/influxdb3 test schedule_plugin -d foo test-requests.py # client +``` + +### Local development with python-build-standalone + +Local development with python-build-standalone currently consists of: + +1. download python-build-standalone and unpack it somewhere + * get from https://github.com/astral-sh/python-build-standalone/releases + * based on your host OS, choose one of `aarch64-apple-darwin-install_only_stripped.tar.gz`, `aarch64-unknown-linux-gnu-install_only_stripped.tar.gz`, `x86_64-pc-windows-msvc-shared-install_only_stripped.tar.gz`, `x86_64-unknown-linux-gnu-install_only_stripped.tar.gz` +2. create `pyo3_config_file.txt` to match the unpacked dir and downloaded python version. Eg, if downloaded and unpacked a 3.11.x version to `/tmp/python`: + + ``` + $ cat ./pyo3_config_file.txt + implementation=CPython + version=3.11 + shared=true + abi3=false + lib_name=python3.11 + lib_dir=/tmp/python/lib + executable=/tmp/python/bin/python3.11 + pointer_width=64 + build_flags= + suppress_build_script_link_lines=false + ``` + +3. build with: + + ``` + # note: PYO3_CONFIG_FILE must be an absolute path + $ PYO3_CONFIG_FILE=${PWD}/pyo3_config_file.txt cargo build --features "aws,gcp,azure,jemalloc_replacing_malloc,system-py" + ``` + +4. Linux/OSX: patch up the binary to find libpython: + + ``` + # linux + $ patchelf --set-rpath '$ORIGIN/python/lib' ./target//influxdb3 + + # osx (be sure to match the libpython version with what you downloaded) + $ install_name_tool -change '/install/lib/libpython3.11.dylib' '@executable_path/python/lib/libpython3.11.dylib' ./target//influxdb3 + ``` + +5. Linux/OSX: put the python runtime in the expected location (XXX: may be + possible at run time to see where the libpython we are using is and adjust + the code to base the location of the runtime on that). Eg, if unpacked to + `/tmp/python`: + + ``` + $ test -e ./target//python || ln -s /tmp/python ./target//python + ``` + +6. run with: + + ``` + $ mkdir -p /path/to/plugin/dir + + # linux and osx (if can't find libpython or the runtime, check previous steps) + $ ./target//influxdb3 ... --plugin-dir /path/to/plugin/dir + + # windows requires moving the binary into the python-build-standalone unpack directory + $ cp ./target//influxdb3 \path\to\python-standalone\python + # run influxdb with + $ \path\to\python-standalone\python\influxdb3.exe ... --plugin-dir \path\to\plugin\dir + ``` + + +## Discussion + +### Why python-build-standalone? + +`python-build-standalone` is designed to be +[portable](https://astral.sh/blog/python-build-standalone#whats-a-standalone-python-distribution), +[maintained](https://astral.sh/blog/python-build-standalone#the-future-of-standalone-python-distributions) +and [permissively licensed](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst#licensing). +It is purpose-built for embedding and being redistributable and has a good +upstream maintenance story (https://github.com/astral-sh) with lots of users +and a corporate sponsor. + +An alternative to using a standalone python distribution is to use the system +python. While this can be a reasonable choice on systems where the python +version and installation locations can be relied upon, it is not a good choice +for official builds since users would have to ensure they had a python +installation that met InfluxDB's requirements and because the myriad of +operating systems, architectures and installed python versions would be a +problem to support. + +By choosing `python-build-standalone`, InfluxDB should deliver a consistent +experience across OSes and architectures for all users as well as providing a +reasonable maintenance story. + + +### Which builds of python-build-standalone are used? + +`python-build-standalone` provides [many different builds](https://github.com/astral-sh/python-build-standalone/blob/main/docs/distributions.rst). +Official InfluxDB builds use the following `python-build-standalone` +[recommended](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) +builds: + + * `aarch64-apple-darwin-install_only_stripped.tar.gz` + * `aarch64-unknown-linux-gnu-install_only_stripped.tar.gz` + * `x86_64-unknown-linux-gnu-install_only_stripped.tar.gz` + * `x86_64-pc-windows-msvc-shared-install_only_stripped.tar.gz` + + +### How will InfluxData maintain the embedded interpreter? + +The https://github.com/astral-sh project performs timely builds of CPython +micro-releases for `python-build-standalone` based on the release cadence of +upstream Python. InfluxData need only update the build to pull in the new +micro-release for security and maintenance releases. This is done by updating +the `PBS_DATE` and `PBS_VERSION` environment variables in +`.circleci/config.yaml`. See that file and +`.circleci/scripts/fetch-python-standalone.bash` for details. + +astral-sh creates new builds for CPython minor releases as they become +available from upstream Python. Updating the official builds to pull in a new +minor release is straightforward, but processes for verifying builds of +InfluxDB with the new `python-build-standalone` minor release are TBD. + + +### How is python-build-standalone licensed? + +Release builds of `python-build-standalone` are +[permissively licensed](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst#licensing) +and contain no copyleft code. + +The licensing information from release builds of `python-build-standalone` are +obtained by extracting the `python/PYTHON.json` and `python/licenses/*` files +from the `-debug-full.tar.zst` (Linux/Darwin) and +`-pgo-full.tar.zst` release tarballs, placing them in the +`python/licenses` directory of the InfluxDB build and generating a +`python/LICENSE.md` file with provenance information. + +Linux builds are dynamically linked against [`glibc`](https://www.gnu.org/software/libc/) +(which is permitted by the LGPL without copyleft attachment). InfluxDB does not +statically link against `glibc` nor does it redistribute `libc` (et al) in +official builds. + + +### Why not just statically link with, eg, MUSL? + +In an ideal world, InfluxDB would build against a version of +`python-build-standalone` and statically link against it and not have to worry +about dynamic library compatibility. Unfortunately, this is not possible for +many reasons: + + * static `python-build-standalone` builds for Darwin are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) and doing so may have [license implications](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#linking-static-library-on-macos) + * static `python-build-standalone` builds for Windows are [not stable](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) and considered [brittle](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#windows-static-distributions-are-extremely-brittle) + * static `python-build-standalone` builds for Linux/arm64 (aarch64) are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + * static `python-build-standalone` builds for Linux/amd64 (x86_64) are + available using MUSL libc, but: + * because they are static, they [cannot load compiled Python extensions](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + (aka, 'wheels' that have compiled C, Rust, etc code instead of pure python) + outside of the Python standard library, greatly diminishing the utility of + the processing engine. This is a limitation of [ELF](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#static-linking-of-musl-libc-prevents-extension-module-library-loading) + * there are historical [performance issues](https://edu.chainguard.dev/chainguard/chainguard-images/about/images-compiled-programs/glibc-vs-musl/#python-builds) with python and MUSL + +It is theoretically possible to statically link `glibc`, but in practice this +is technically very problematic and statically linking `glibc` has copyleft +attachment. + + +### What about alpine? + +Because MUSL can't be used with `python-build-standalone` without crippling the +InfluxDB processing engine, MUSL builds that are compatible with Alpine are not +available at this time. Alpine users can choose one of: + + * build InfluxDB locally on Alpine against Alpine's system python + * run official InfluxDB within a chroot that contains `glibc` + * run official InfluxDB with [gcompat](https://git.adelielinux.org/adelie/gcompat) (untested) + +See https://wiki.alpinelinux.org/wiki/Running_glibc_programs for details. + +InfluxData may provide Alpine builds at a future date. + + +### GLIBC portability is a problem. How will you address that? + +`glibc` is designed with portability and uses 'compat symbols' to achieve +[backward compatibility](https://developers.redhat.com/blog/2019/08/01/how-the-gnu-c-library-handles-backward-compatibility). +Most 3rd party applications for Linux use the system's `glibc` in some fashion +and this is possible because of 'compat symbols' and this has worked very well +for many, many years. + +In essence, 'compat symbols' let `glibc` and the linker choose a particular +implementation of the function. All symbols in `glibc` are versioned and when a +library function changes in an incompatible way, `glibc` keeps the old +implementation in place (with the old symbol version) while adding the new +implementation with a new symbol version. In this manner, if an application is +compiled and linked against `glibc` 2.27, it will only ever lookup symbols that +are 2.27 or earlier. When 2.28 comes out, it updates any symbols it needs to +2.28, leaving the rest as they are. When the application linked against 2.27 +runs on a system with 2.28, everything is ok since 2.28 will resolve all the +2.27 symbols in the expected way the application needs. + +Where portability becomes a problem is when the application is linked against a +newer version of `glibc` than is on the system. If the aforementioned +application compiled and linked against 2.27 was run on a system with 2.19, it +would fail to run because the symbol versions it is looking up (ie, anything +from 2.20 and later) are not available. + +Unfortunately for developers seeking portability, compiling and linking against +the system's `glibc` means the application will reference the latest available +symbols in that `glibc`. There is no facility for telling the linker to only +use symbols from a particular `glibc` version and earlier. It's also difficult +to tell the linker to use an alternate `glibc` separate from the system's. As a +result, `glibc`-using software seeking wide Linux portability typically needs +to be compiled on an older system with a `glibc` with the desired version. + +`python-build-standalone` and `rust` both support systems with `glibc` 2.17+, +which is covers distributions going back to 2014 (CentOS/RHEL 7 (EOL), Debian 8 +(Jessie; EOL), Ubuntu 14.04 LTS (EOL), Fedora 21, etc. + +Certain InfluxDB alpha releases are compiled against a too new `glibc` (2.36). +This will be addressed before release. + + +### How does InfluxDB find the correct libpython and the python runtime? + +For the best user experience, users should not have to perform any extra setup +to use the InfluxDB processing engine. This is achieved by: + + * Using an appropriate `PYO3_CONFIG_FILE` file during the build (see 'Official + builds', above) + * Build artifacts putting the runtime in an expected location (see 'Official + builds, above) + * At runtime, ensuring that Linux and Darwin binaries look for the runtime in + the expected location. Ideally this would be done with linker arguments at + builds time, but current (alpha) builds adjust the library search paths like + so: + + ```sh + # linux + $ patchelf --set-rpath '$ORIGIN/python/lib:$ORIGIN/../lib/influxdb3/python/lib' target/.../influxdb3 + + # osx + $ install_name_tool -change '/install/lib/libpython3.NN.dylib' \ + '@executable_path/python/lib/libpythonNN.dylib' target/.../influxdb3 + $ rcodesign sign target/.../influxdb3 # only with osxcross' install_name_tool + ``` + + This is required, in part, due to how `python-build-standalone` is + [built](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#references-to-build-time-paths). + When using `osxcross`'s version of `install_name_tool`, must also use + `rcodesign` from [apple-codesign](https://crates.io/crates/apple-codesign) + to re-sign the binaries (Apple's `install_name_tool` does this + automatically). Rust may gain [support](https://github.com/rust-lang/cargo/issues/5077) + for setting arbitrary rpaths at some point. + + * The Windows `zip` file for the current (alpha) builds has copies of the + top-level DLL files from the 'python/' directory alongside `influxdb3`. + Windows requires that the dynamically linked DLLs needed by the application + are either in the same directory as the binary or found somewhere in `PATH` + (and open source tooling doesn't seem to support modifying this). For user + convenience, the `*.dll` files are shipped alongside the binary on Windows + to avoid having to setup the `PATH`. Rust believes this shouldn't be handled + by [rustc](https://github.com/rust-lang/cargo/issues/1500). This may be + addressed in a future release + + +### There is no `pip.exe` on Windows. Why? + +From [upstream](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#no-pipexe-on-windows): +"The Windows distributions have pip installed however no `Scripts/pip.exe`, +`Scripts/pip3.exe`, and `Scripts/pipX.Y.exe` files are provided because the way +these executables are built isn't portable. (It might be possible to change how +these are built to make them portable.) + +To use pip, run `python.exe -m pip`. (It is generally a best practice to invoke +pip via `python -m pip` on all platforms so you can be explicit about the +python executable that pip uses.)" + + +### What limitations are there? + +See https://github.com/influxdata/influxdb/issues?q=is%3Aissue%20state%3Aopen%20label%3Av3 diff --git a/influxdb3/src/main.rs b/influxdb3/src/main.rs index 3c291b297a..cb6b420ffb 100644 --- a/influxdb3/src/main.rs +++ b/influxdb3/src/main.rs @@ -14,6 +14,8 @@ use dotenvy::dotenv; use influxdb3_clap_blocks::tokio::TokioIoConfig; use influxdb3_process::VERSION_STRING; use observability_deps::tracing::warn; +use std::env; +use std::path::{Path, PathBuf}; use trogging::{ cli::LoggingConfigBuilderExt, tracing_subscriber::{prelude::*, Registry}, @@ -120,6 +122,12 @@ fn main() -> Result<(), std::io::Error> { #[cfg(unix)] install_crash_handler(); // attempt to render a useful stacktrace to stderr + #[cfg(feature = "system-py")] + set_pythonhome(); + + #[cfg(all(target_os = "windows", feature = "system-py"))] + set_pythonpath(); + // load all environment variables from .env before doing anything load_dotenv(); @@ -301,3 +309,62 @@ fn init_logs_and_tracing( let subscriber = Registry::default().with(layers); trogging::install_global(subscriber) } + +// XXX: this should be somewhere more appropriate +#[cfg(feature = "system-py")] +fn set_pythonhome() { + // This would ideally be detected by pyo3, but it isn't + match env::var("PYTHONHOME") { + Ok(_) => {} + Err(env::VarError::NotPresent) => { + let exe_path = env::current_exe().unwrap(); + let exe_dir = exe_path.parent().unwrap(); + + let pythonhome: PathBuf = if cfg!(target_os = "linux") + && (exe_dir == Path::new("/usr/bin") || exe_dir == Path::new("/usr/local/bin")) + { + // Official Linux builds may be in /usr or /usr/local + // XXX: handle this for local build and install (eg DESTDIR) + let parent_dir = exe_dir.parent().unwrap(); + parent_dir.join("lib/influxdb3/python") + } else { + exe_dir.join("python") + }; + + if pythonhome.is_dir() { + unsafe { env::set_var("PYTHONHOME", pythonhome.to_str().unwrap()) }; + //println!("Set PYTHONHOME to '{}'", env::var("PYTHONHOME").unwrap()); + } else { + // TODO: use logger + eprintln!("Could not find python installation. May need to set PYTHONHOME"); + } + } + Err(e) => { + eprintln!("Failed to retrieve PYTHONHOME: {e}"); + } + }; +} + +// XXX: this should be somewhere more appropriate +#[cfg(target_os = "windows")] +fn set_pythonpath() { + let exe_path = env::current_exe().unwrap(); + let exe_dir = exe_path.parent().unwrap(); + let pythonpath = exe_dir.join("python/Lib"); + + // This shouldn't be needed, but it is on Windows + match env::var("PYTHONPATH") { + Ok(v) => { + let new_path = format!("{};{}", pythonpath.display(), v); + unsafe { env::set_var("PYTHONPATH", &new_path) }; + //println!("Updated PYTHONPATH to: {}", env::var("PYTHONPATH").unwrap()); + } + Err(env::VarError::NotPresent) => { + unsafe { env::set_var("PYTHONPATH", &pythonpath) }; + //println!("Updated PYTHONPATH to: {}", env::var("PYTHONPATH").unwrap()); + } + Err(e) => { + eprintln!("Failed to retrieve PYTHONPATH: {e}"); + } + } +} diff --git a/influxdb3_processing_engine/src/virtualenv.rs b/influxdb3_processing_engine/src/virtualenv.rs index f16df2719d..fb233aa83c 100644 --- a/influxdb3_processing_engine/src/virtualenv.rs +++ b/influxdb3_processing_engine/src/virtualenv.rs @@ -17,7 +17,14 @@ pub enum VenvError { } fn get_python_version() -> Result<(u8, u8), std::io::Error> { - let output = Command::new("python3") + // linux/osx have python3, but windows only has python + let python_exe = if cfg!(target_os = "windows") { + "python" + } else { + "python3" + }; + + let output = Command::new(python_exe) .args([ "-c", "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')", @@ -63,11 +70,17 @@ pub fn init_pyo3() { }); } -#[cfg(unix)] +// FIXME: this still doesn't work right on windows (sys.path isn't adding the +// venv's site-packages). Perhaps look at /path/to/venv/pyvenv.cfg? pub(crate) fn initialize_venv(venv_path: &Path) -> Result<(), VenvError> { use std::process::Command; - let activate_script = venv_path.join("bin").join("activate"); + let activate_script = if cfg!(target_os = "windows") { + venv_path.join("Scripts").join("activate") + } else { + venv_path.join("bin").join("activate") + }; + if !activate_script.exists() { return Err(VenvError::InitError(format!( "Activation script not found at {:?}", @@ -75,13 +88,19 @@ pub(crate) fn initialize_venv(venv_path: &Path) -> Result<(), VenvError> { ))); } - let output = Command::new("bash") - .arg("-c") - .arg(format!( - "source {} && env", - activate_script.to_str().unwrap() - )) - .output()?; + let output = if cfg!(target_os = "windows") { + Command::new("cmd") + .args(["/C", activate_script.to_str().unwrap()]) + .output()? + } else { + Command::new("bash") + .arg("-c") + .arg(format!( + "source {} && env", + activate_script.to_str().unwrap() + )) + .output()? + }; if !output.status.success() { return Err(VenvError::InitError( diff --git a/install_influxdb.sh b/install_influxdb.sh deleted file mode 100644 index 6acbd09f04..0000000000 --- a/install_influxdb.sh +++ /dev/null @@ -1,394 +0,0 @@ -#!/bin/sh -e - -readonly GREEN='\033[0;32m' -readonly BOLD='\033[1m' -readonly BOLDGREEN='\033[1;32m' -readonly DIM='\033[2m' -readonly NC='\033[0m' # No Color - -ARCHITECTURE=$(uname -m) -ARTIFACT="" -IS_MUSL="" -OS="" -INSTALL_LOC=~/.influxdb -BINARY_NAME="influxdb3" -PORT=8181 - -EDITION="Core" -EDITION_TAG="core" -if [ "$1" = "enterprise" ]; then - EDITION="Enterprise" - EDITION_TAG="enterprise" - shift 1 -fi - -### OS AND ARCHITECTURE DETECTION ### -case "$(uname -s)" in - Linux*) OS="Linux";; - Darwin*) OS="Darwin";; - *) OS="UNKNOWN";; -esac - -if [ "${OS}" = "Linux" ]; then - # ldd is a shell script but on some systems (eg Ubuntu) security hardening - # prevents it from running when invoked directly. Since we only want to - # use '--verbose', find the path to ldd, then invoke under sh to bypass ldd - # hardening. - # XXX: use 'uname -o | grep GNU' instead? - ldd_exec=$(command -v ldd) - if [ "${ARCHITECTURE}" = "x86_64" ] || [ "${ARCHITECTURE}" = "amd64" ]; then - # Check if we're on a GNU/Linux system, otherwise default to musl - if [ -n "$ldd_exec" ] && sh -c "$ldd_exec --version" 2>&1 | grep -Eq "(GNU|GLIBC)"; then - ARTIFACT="x86_64-unknown-linux-gnu" - else - ARTIFACT="x86_64-unknown-linux-musl" - IS_MUSL="yes" - fi - elif [ "${ARCHITECTURE}" = "aarch64" ] || [ "${ARCHITECTURE}" = "arm64" ]; then - if [ -n "$ldd_exec" ] && sh -c "$ldd_exec --version" 2>&1 | grep -Eq "(GNU|GLIBC)"; then - ARTIFACT="aarch64-unknown-linux-gnu" - else - ARTIFACT="aarch64-unknown-linux-musl" - IS_MUSL="yes" - fi - fi -elif [ "${OS}" = "Darwin" ]; then - if [ "${ARCHITECTURE}" = "x86_64" ]; then - printf "Intel Mac support is coming soon!\n" - printf "Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" - printf "View alternative binaries on our Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" - exit 1 - else - ARTIFACT="aarch64-apple-darwin" - fi -fi - -# Exit if unsupported system -[ -n "${ARTIFACT}" ] || { - printf "Unfortunately this script doesn't support your '${OS}' | '${ARCHITECTURE}' setup, or was unable to identify it correctly.\n" - printf "Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" - printf "View alternative binaries on our Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" - exit 1 -} - -URL="https://dl.influxdata.com/influxdb/snapshots/influxdb3-${EDITION_TAG}_${ARTIFACT}.tar.gz" - -START_TIME=$(date +%s) - -# Attempt to clear screen and show welcome message -clear 2>/dev/null || true # clear isn't available everywhere -printf "┌───────────────────────────────────────────────────┐\n" -printf "│ ${BOLD}Welcome to InfluxDB!${NC} We'll make this quick. │\n" -printf "└───────────────────────────────────────────────────┘\n" - -echo -printf "${BOLD}Select Installation Type${NC}\n" -echo -printf "1) ${GREEN}Docker Image${NC} ${DIM}(More Powerful, More Complex)${NC}\n" -printf " ├─ Requires knowledge of Docker and Docker management\n" -printf " └─ Includes the Processing Engine for real-time data transformation,\n" -printf " enrichment, and general custom Python code execution.\n\n" -printf "2) ${GREEN}Simple Download${NC} ${DIM}(Automated Install, Quick Setup)${NC}\n" -printf " ├─ No external dependencies required\n" -printf " └─ The Processing Engine will be available soon for binary installations,\n" -printf " bringing the same powerful processing capabilities to local deployments.\n" -echo -printf "Enter your choice (1-2): " -read -r INSTALL_TYPE - -case "$INSTALL_TYPE" in - 1) - printf "\n\n${BOLD}Download and Tag Docker Image${NC}\n" - printf "├─ ${DIM}docker pull quay.io/influxdb/influxdb3-${EDITION_TAG}:latest${NC}\n" - printf "└─ ${DIM}docker tag quay.io/influxdb/influxdb3-${EDITION_TAG}:latest influxdb3-${EDITION_TAG}${NC}\n\n" - if ! docker pull "quay.io/influxdb/influxdb3-${EDITION_TAG}:latest"; then - printf "└─ Error: Failed to download Docker image.\n" - exit 1 - fi - docker tag quay.io/influxdb/influxdb3-${EDITION_TAG}:latest influxdb3-${EDITION_TAG} - # Exit script after Docker installation - echo - printf "${BOLD}NEXT STEPS${NC}\n" - printf "1) Run the Docker image:\n" - printf " ├─ ${BOLD}mkdir plugins${NC} ${DIM}(To store and access plugins)${NC}\n" - printf " └─ ${BOLD}docker run -it -p ${PORT}:${PORT} -v ./plugins:/plugins influxdb3-${EDITION_TAG} serve --object-store memory --node-id node0 --plugin-dir /plugins${NC} ${DIM}(To start)${NC}\n" - printf "2) View documentation at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}\n\n" - - END_TIME=$(date +%s) - DURATION=$((END_TIME - START_TIME)) - - out=" Time is everything. This process took $DURATION seconds. " - mid="" - for _ in $(seq 1 ${#out}); do - mid="${mid}─" - done - printf "┌%s┐\n" "$mid" - printf "│%s│\n" "$out" - printf "└%s┘\n" "$mid" - exit 0 - ;; - 2) - printf "\n\n" - ;; - *) - printf "Invalid choice. Defaulting to binary installation.\n\n" - ;; -esac - -# attempt to find the user's shell config -shellrc= -if [ -n "$SHELL" ]; then - tmp=~/.$(basename "$SHELL")rc - if [ -e "$tmp" ]; then - shellrc="$tmp" - fi -fi - -printf "${BOLD}Downloading InfluxDB 3 %s to %s${NC}\n" "$EDITION" "$INSTALL_LOC" -printf "├─${DIM} mkdir -p '%s'${NC}\n" "$INSTALL_LOC" -mkdir -p "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s' -o '%s/influxdb3.tar.gz'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}" -o "$INSTALL_LOC/influxdb3.tar.gz" - -echo -printf "${BOLD}Verifying '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s.sha256' -o '%s/influxdb3.tar.gz.sha256'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}.sha256" -o "$INSTALL_LOC/influxdb3.tar.gz.sha256" -dl_sha=$(cut -d ' ' -f 1 "$INSTALL_LOC/influxdb3.tar.gz.sha256" | grep -E '^[0-9a-f]{64}$') -if [ -z "$dl_sha" ]; then - printf "Could not find properly formatted SHA256 in '%s/influxdb3.tar.gz.sha256'. Aborting.\n" "$INSTALL_LOC" - exit 1 -fi -printf "└─${DIM} sha256sum '%s/influxdb3.tar.gz'" "$INSTALL_LOC" -ch_sha=$(sha256sum "$INSTALL_LOC/influxdb3.tar.gz" | cut -d ' ' -f 1) -if [ "$ch_sha" = "$dl_sha" ]; then - printf " (OK: %s = %s)${NC}\n" "$ch_sha" "$dl_sha" -else - printf " (ERROR: %s != %s). Aborting.${NC}\n" "$ch_sha" "$dl_sha" - exit 1 -fi -printf "└─${DIM} rm '%s/influxdb3.tar.gz.sha256'${NC}\n" "$INSTALL_LOC" -rm "$INSTALL_LOC/influxdb3.tar.gz.sha256" - -echo -printf "${BOLD}Extracting and Processing${NC}\n" -printf "├─${DIM} tar -xf '%s/influxdb3.tar.gz' -C '%s'${NC}\n" "$INSTALL_LOC" "$INSTALL_LOC" -tar -xf "$INSTALL_LOC/influxdb3.tar.gz" -C "$INSTALL_LOC" -printf "└─${DIM} rm '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" -rm "$INSTALL_LOC/influxdb3.tar.gz" - -if [ -n "$shellrc" ] && ! grep -q "export PATH=.*$INSTALL_LOC" "$shellrc"; then - echo - printf "${BOLD}Adding InfluxDB to '%s'${NC}\n" "$shellrc" - printf "└─${DIM} export PATH=\"\$PATH:%s/\" >> '%s'${NC}\n" "$INSTALL_LOC" "$shellrc" - echo "export PATH=\"\$PATH:$INSTALL_LOC/\"" >> "$shellrc" -fi - -if [ "${EDITION}" = "Core" ]; then - # Prompt user to start the service - echo - printf "${BOLD}Configuration Options${NC}\n" - - - printf "└─ Start InfluxDB Now? (y/n): " - read -r START_SERVICE - if echo "$START_SERVICE" | grep -q "^[Yy]$" ; then - # Prompt for Node ID - echo - printf "${BOLD}Enter Your Node ID${NC}\n" - printf "├─ A Node ID is a unique, uneditable identifier for a service.\n" - printf "└─ Enter a Node ID (default: node0): " - read -r NODE_ID - NODE_ID=${NODE_ID:-node0} - - # Prompt for storage solution - echo - printf "${BOLD}Select Your Storage Solution${NC}\n" - printf "├─ 1) In-memory storage (Fastest, data cleared on restart)\n" - printf "├─ 2) File storage (Persistent local storage)\n" - printf "├─ 3) Object storage (Cloud-compatible storage)\n" - printf "└─ Enter your choice (1-3): " - read -r STORAGE_CHOICE - - case "$STORAGE_CHOICE" in - 1) - STORAGE_TYPE="memory" - STORAGE_FLAGS="--object-store=memory" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - 2) - STORAGE_TYPE="File Storage" - echo - printf "Enter storage path (default: %s/data): " "${INSTALL_LOC}" - read -r STORAGE_PATH - STORAGE_PATH=${STORAGE_PATH:-"${INSTALL_LOC}/data"} - STORAGE_FLAGS="--object-store=file --data-dir ${STORAGE_PATH}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - 3) - STORAGE_TYPE="Object Storage" - echo - printf "${BOLD}Select Cloud Provider${NC}\n" - printf "├─ 1) Amazon S3\n" - printf "├─ 2) Azure Storage\n" - printf "├─ 3) Google Cloud Storage\n" - printf "└─ Enter your choice (1-3): " - read -r CLOUD_CHOICE - - case $CLOUD_CHOICE in - 1) # AWS S3 - echo - printf "${BOLD}AWS S3 Configuration${NC}\n" - printf "├─ Enter AWS Access Key ID: " - read -r AWS_KEY - - printf "├─ Enter AWS Secret Access Key: " - stty -echo - read -r AWS_SECRET - stty echo - - echo - printf "├─ Enter S3 Bucket: " - read -r AWS_BUCKET - - printf "└─ Enter AWS Region (default: us-east-1): " - read -r AWS_REGION - AWS_REGION=${AWS_REGION:-"us-east-1"} - - STORAGE_FLAGS="--object-store=s3 --bucket=${AWS_BUCKET}" - if [ -n "$AWS_REGION" ]; then - STORAGE_FLAGS="$STORAGE_FLAGS --aws-default-region=${AWS_REGION}" - fi - STORAGE_FLAGS="$STORAGE_FLAGS --aws-access-key-id=${AWS_KEY}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS --aws-secret-access-key=..." - STORAGE_FLAGS="$STORAGE_FLAGS --aws-secret-access-key=${AWS_SECRET}" - ;; - - 2) # Azure Storage - echo - printf "${BOLD}Azure Storage Configuration${NC}\n" - printf "├─ Enter Storage Account Name: " - read -r AZURE_ACCOUNT - - printf "└─ Enter Storage Access Key: " - stty -echo - read -r AZURE_KEY - stty echo - - echo - STORAGE_FLAGS="--object-store=azure --azure-storage-account=${AZURE_ACCOUNT}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS --azure-storage-access-key=..." - STORAGE_FLAGS="$STORAGE_FLAGS --azure-storage-access-key=${AZURE_KEY}" - ;; - - 3) # Google Cloud Storage - echo - printf "${BOLD}Google Cloud Storage Configuration${NC}\n" - printf "└─ Enter path to service account JSON file: " - read -r GOOGLE_SA - STORAGE_FLAGS="--object-store=google --google-service-account=${GOOGLE_SA}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - - *) - printf "Invalid cloud provider choice. Defaulting to file storage.\n" - STORAGE_TYPE="File Storage" - STORAGE_FLAGS="--object-store=file --data-dir ${INSTALL_LOC}/data" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - esac - ;; - - *) - printf "Invalid choice. Defaulting to in-memory.\n" - STORAGE_TYPE="Memory" - STORAGE_FLAGS="--object-store=memory" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - esac - - # Ensure port is available; if not, find a new one. If IS_MUSL is set, - # assume we are on a busybox-like system whose lsof doesn't support the - # args we need - lsof_exec=$(command -v lsof) && { - while [ -n "$lsof_exec" ] && [ "$IS_MUSL" != "yes" ] && lsof -i:"$PORT" -t >/dev/null 2>&1; do - printf "├─${DIM} Port %s is in use. Finding new port.${NC}\n" "$PORT" - PORT=$((PORT + 1)) - if [ "$PORT" -gt 32767 ]; then - printf "└─${DIM} Could not find an available port. Aborting.${NC}\n" - exit 1 - fi - if ! "$lsof_exec" -i:"$PORT" -t >/dev/null 2>&1; then - printf "└─${DIM} Found an available port: %s${NC}\n" "$PORT" - break - fi - done - } - - # Start and give up to 30 seconds to respond - echo - printf "${BOLD}Starting InfluxDB${NC}\n" - printf "├─${DIM} Node ID: %s${NC}\n" "$NODE_ID" - printf "├─${DIM} Storage: %s${NC}\n" "$STORAGE_TYPE" - printf "├─${DIM} '%s' serve --node-id='%s' --http-bind='0.0.0.0:%s' %s${NC}\n" "$INSTALL_LOC/$BINARY_NAME" "$NODE_ID" "$PORT" "$STORAGE_FLAGS_ECHO" - "$INSTALL_LOC/$BINARY_NAME" serve --node-id="$NODE_ID" --http-bind="0.0.0.0:$PORT" $STORAGE_FLAGS > /dev/null & - PID="$!" - - SUCCESS=0 - for _ in $(seq 1 30); do - # on systems without a usable lsof, sleep a second to see if the pid is - # still there to give influxdb a chance to error out in case an already - # running influxdb is running on this port - if [ -z "$lsof_exec" ] || [ "$IS_MUSL" = "yes" ]; then - sleep 1 - fi - - if ! kill -0 "$PID" 2>/dev/null ; then - break - fi - - if curl --max-time 3 -s "http://localhost:$PORT/health" >/dev/null 2>&1; then - printf "└─${BOLDGREEN} ✓ InfluxDB 3 ${EDITION} is now installed and running on port %s. Nice!${NC}\n" "$PORT" - SUCCESS=1 - break - fi - sleep 1 - done - - if [ $SUCCESS -eq 0 ]; then - printf "└─${BOLD} ERROR: InfluxDB failed to start; check permissions or other potential issues.${NC}\n" "$PORT" - exit 1 - fi - - else - echo - printf "${BOLDGREEN}✓ InfluxDB 3 ${EDITION} is now installed. Nice!${NC}\n" - fi -else - echo - printf "${BOLDGREEN}✓ InfluxDB 3 ${EDITION} is now installed. Nice!${NC}\n" -fi - -### SUCCESS INFORMATION ### -echo -printf "${BOLD}Further Info${NC}\n" -if [ -n "$shellrc" ]; then - printf "├─ Run ${BOLD}source '%s'${NC}, then access InfluxDB with ${BOLD}influxdb3${NC} command.\n" "$shellrc" -else - printf "├─ Access InfluxDB with the ${BOLD}%s${NC} command.\n" "$INSTALL_LOC/$BINARY_NAME" -fi -printf "├─ View the Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" -printf "└─ Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" -echo - -END_TIME=$(date +%s) -DURATION=$((END_TIME - START_TIME)) - -out=" Time is everything. This process took $DURATION seconds. " -mid="" -for _ in $(seq 1 ${#out}); do - mid="${mid}─" -done -printf "┌%s┐\n" "$mid" -printf "│%s│\n" "$out" -printf "└%s┘\n" "$mid" diff --git a/install_influxdb3.sh b/install_influxdb3.sh index 6acbd09f04..72b983a08e 100644 --- a/install_influxdb3.sh +++ b/install_influxdb3.sh @@ -85,13 +85,9 @@ echo printf "${BOLD}Select Installation Type${NC}\n" echo printf "1) ${GREEN}Docker Image${NC} ${DIM}(More Powerful, More Complex)${NC}\n" -printf " ├─ Requires knowledge of Docker and Docker management\n" -printf " └─ Includes the Processing Engine for real-time data transformation,\n" -printf " enrichment, and general custom Python code execution.\n\n" +printf " └─ Requires knowledge of Docker and Docker management\n" printf "2) ${GREEN}Simple Download${NC} ${DIM}(Automated Install, Quick Setup)${NC}\n" -printf " ├─ No external dependencies required\n" -printf " └─ The Processing Engine will be available soon for binary installations,\n" -printf " bringing the same powerful processing capabilities to local deployments.\n" +printf " └─ No external dependencies required\n" echo printf "Enter your choice (1-2): " read -r INSTALL_TYPE @@ -147,13 +143,13 @@ fi printf "${BOLD}Downloading InfluxDB 3 %s to %s${NC}\n" "$EDITION" "$INSTALL_LOC" printf "├─${DIM} mkdir -p '%s'${NC}\n" "$INSTALL_LOC" mkdir -p "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s' -o '%s/influxdb3.tar.gz'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}" -o "$INSTALL_LOC/influxdb3.tar.gz" +printf "└─${DIM} curl -sSL '%s' -o '%s/influxdb3.tar.gz'${NC}\n" "${URL}" "$INSTALL_LOC" +curl -sSL "${URL}" -o "$INSTALL_LOC/influxdb3.tar.gz" echo printf "${BOLD}Verifying '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s.sha256' -o '%s/influxdb3.tar.gz.sha256'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}.sha256" -o "$INSTALL_LOC/influxdb3.tar.gz.sha256" +printf "└─${DIM} curl -sSL '%s.sha256' -o '%s/influxdb3.tar.gz.sha256'${NC}\n" "${URL}" "$INSTALL_LOC" +curl -sSL "${URL}.sha256" -o "$INSTALL_LOC/influxdb3.tar.gz.sha256" dl_sha=$(cut -d ' ' -f 1 "$INSTALL_LOC/influxdb3.tar.gz.sha256" | grep -E '^[0-9a-f]{64}$') if [ -z "$dl_sha" ]; then printf "Could not find properly formatted SHA256 in '%s/influxdb3.tar.gz.sha256'. Aborting.\n" "$INSTALL_LOC" @@ -172,8 +168,15 @@ rm "$INSTALL_LOC/influxdb3.tar.gz.sha256" echo printf "${BOLD}Extracting and Processing${NC}\n" -printf "├─${DIM} tar -xf '%s/influxdb3.tar.gz' -C '%s'${NC}\n" "$INSTALL_LOC" "$INSTALL_LOC" -tar -xf "$INSTALL_LOC/influxdb3.tar.gz" -C "$INSTALL_LOC" + +# some tarballs have a leading component, check for that +TAR_LEVEL=0 +if tar -tf "$INSTALL_LOC/influxdb3.tar.gz" | grep -q '[a-zA-Z0-9]/influxdb3$' ; then + TAR_LEVEL=1 +fi +printf "├─${DIM} tar -xf '%s/influxdb3.tar.gz' --strip-components=${TAR_LEVEL} -C '%s'${NC}\n" "$INSTALL_LOC" "$INSTALL_LOC" +tar -xf "$INSTALL_LOC/influxdb3.tar.gz" --strip-components="${TAR_LEVEL}" -C "$INSTALL_LOC" + printf "└─${DIM} rm '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" rm "$INSTALL_LOC/influxdb3.tar.gz" @@ -378,7 +381,9 @@ else printf "├─ Access InfluxDB with the ${BOLD}%s${NC} command.\n" "$INSTALL_LOC/$BINARY_NAME" fi printf "├─ View the Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" -printf "└─ Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" +printf "├─ Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" +printf "└─ The Processing Engine is now included for real-time data transformation,\n" +printf " enrichment, and general custom Python code execution.\n\n" echo END_TIME=$(date +%s)