Delete .github/workflows/benchmark_ci.yml
parent
5fcc6578c5
commit
01e59672bd
|
@ -1,301 +0,0 @@
|
|||
name: CI
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./benchmark/
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
branches: [master]
|
||||
inputs:
|
||||
agents:
|
||||
description: 'Agents to run (comma-separated)'
|
||||
required: false
|
||||
default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,BabyAGI,PolyGPT,Turbo' # Default agents if none are specified
|
||||
schedule:
|
||||
- cron: '0 8 * * *'
|
||||
push:
|
||||
branches: [master, ci-test*]
|
||||
paths-ignore:
|
||||
- 'reports/**'
|
||||
pull_request:
|
||||
branches: [stable, master, release-*, develop]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
min-python-version: '3.10'
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
submodules: true
|
||||
|
||||
- name: Set up Python ${{ env.min-python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ env.min-python-version }}
|
||||
|
||||
- id: get_date
|
||||
name: Get date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
export POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||
poetry install -vvv
|
||||
|
||||
- name: Lint with flake8
|
||||
run: poetry run flake8
|
||||
|
||||
- name: Check black formatting
|
||||
run: poetry run black . --exclude test.py --check
|
||||
if: success() || failure()
|
||||
|
||||
- name: Check isort formatting
|
||||
run: poetry run isort . --check
|
||||
if: success() || failure()
|
||||
|
||||
- name: Check for unused imports and pass statements
|
||||
run: |
|
||||
cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
|
||||
$cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
|
||||
if: success() || failure()
|
||||
matrix-setup:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
env-name: ${{ steps.set-matrix.outputs.env-name }}
|
||||
steps:
|
||||
- id: set-matrix
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "schedule" ]; then
|
||||
echo "::set-output name=env-name::production"
|
||||
echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'BabyAGI', 'PolyGPT', 'Turbo' ]"
|
||||
elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
|
||||
IFS=',' read -ra matrix_array <<< "${{ github.event.inputs.agents }}"
|
||||
matrix_string="[ \"$(echo "${matrix_array[@]}" | sed 's/ /", "/g')\" ]"
|
||||
echo "::set-output name=env-name::production"
|
||||
echo "::set-output name=matrix::$matrix_string"
|
||||
else
|
||||
echo "::set-output name=env-name::testing"
|
||||
echo "::set-output name=matrix::[ 'mini-agi' ]"
|
||||
fi
|
||||
|
||||
tests:
|
||||
environment:
|
||||
name: '${{ needs.matrix-setup.outputs.env-name }}'
|
||||
needs: matrix-setup
|
||||
env:
|
||||
GH_TOKEN: ${{ github.event_name == 'pull_request' && github.token || secrets.PAT }}
|
||||
min-python-version: '3.10'
|
||||
name: '${{ matrix.agent-name }}'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 50
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
agent-name: ${{fromJson(needs.matrix-setup.outputs.matrix)}}
|
||||
steps:
|
||||
- name: Print Environment Name
|
||||
run: |
|
||||
echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}"
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
submodules: true
|
||||
token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- name: Setup Chrome and ChromeDriver
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget
|
||||
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||
sudo dpkg -i google-chrome-stable_current_amd64.deb
|
||||
sudo apt-get install -f
|
||||
|
||||
- name: Set up Python ${{ env.min-python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ env.min-python-version }}
|
||||
|
||||
- id: get_date
|
||||
name: Get date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install -vvv
|
||||
poetry build
|
||||
|
||||
- name: Run regression tests
|
||||
run: |
|
||||
cd agent/$AGENT_NAME
|
||||
prefix=""
|
||||
if [ "$AGENT_NAME" == "gpt-engineer" ]; then
|
||||
make install
|
||||
source venv/bin/activate
|
||||
elif [ "$AGENT_NAME" == "Auto-GPT" ]; then
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
pip uninstall agbenchmark -y
|
||||
elif [ "$AGENT_NAME" == "mini-agi" ]; then
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
cp .env_example .env
|
||||
elif [ "$AGENT_NAME" == "smol-developer" ]; then
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
elif [ "$AGENT_NAME" == "BabyAGI" ]; then
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
elif [ "$AGENT_NAME" == "SuperAGI" ]; then
|
||||
cp config_template.yaml config.yaml
|
||||
sed -i 's/OPENAI_API_KEY:.*/OPENAI_API_KEY: "'"${{ secrets.OPENAI_API_KEY }}"'"/' config.yaml
|
||||
docker-compose up -d --build
|
||||
elif [ "$AGENT_NAME" == "beebot" ]; then
|
||||
poetry install
|
||||
poetry run playwright install
|
||||
poetry run uvicorn beebot.initiator.api:create_app --factory --timeout-graceful-shutdown=1 &
|
||||
prefix="poetry run "
|
||||
elif [ "$AGENT_NAME" == "PolyGPT" ]; then
|
||||
cp .env.template .env
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash
|
||||
export NVM_DIR=$HOME/.nvm
|
||||
source $NVM_DIR/nvm.sh
|
||||
nvm install && nvm use
|
||||
yarn install
|
||||
export NODE_TLS_REJECT_UNAUTHORIZED=0
|
||||
elif [ "$AGENT_NAME" == "Turbo" ]; then
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
cp .env.template .env
|
||||
sed -i 's/your-openai-api-key/${{ secrets.OPENAI_API_KEY }}/g' .env
|
||||
else
|
||||
echo "Unknown agent name: $AGENT_NAME"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pip install ../../dist/*.whl
|
||||
|
||||
bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
|
||||
|
||||
if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then
|
||||
set +e # Ignore non-zero exit codes and continue execution
|
||||
echo "Running the following command: ${prefix}agbenchmark start --maintain --mock"
|
||||
${prefix}agbenchmark start --maintain --mock
|
||||
EXIT_CODE=$?
|
||||
set -e # Stop ignoring non-zero exit codes
|
||||
# Check if the exit code was 5, and if so, exit with 0 instead
|
||||
if [ $EXIT_CODE -eq 5 ]; then
|
||||
echo "regression_tests.json is empty."
|
||||
fi
|
||||
echo "Running the following command: ${prefix}agbenchmark start --mock"
|
||||
${prefix}agbenchmark start --mock
|
||||
|
||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=retrieval"
|
||||
${prefix}agbenchmark start --mock --category=retrieval
|
||||
|
||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=interface"
|
||||
${prefix}agbenchmark start --mock --category=interface
|
||||
|
||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=code"
|
||||
${prefix}agbenchmark start --mock --category=code
|
||||
|
||||
echo "Running the following command: ${prefix}agbenchmark start --mock --category=memory"
|
||||
${prefix}agbenchmark start --mock --category=memory
|
||||
|
||||
echo "Running the following command: ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval"
|
||||
${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
|
||||
|
||||
echo "Running the following command: ${prefix}agbenchmark start --test=TestWriteFile"
|
||||
${prefix}agbenchmark start --test=TestWriteFile
|
||||
|
||||
cd ../..
|
||||
poetry install
|
||||
poetry run uvicorn server:app --reload &
|
||||
sleep 5
|
||||
export AGENT_NAME=mini-agi
|
||||
echo "poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000"
|
||||
poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000
|
||||
else
|
||||
echo "${prefix}agbenchmark start"
|
||||
${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."
|
||||
fi
|
||||
|
||||
cd ../..
|
||||
|
||||
env:
|
||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AGENT_NAME: ${{ matrix.agent-name }}
|
||||
PROMPT_USER: false # For mini-agi. TODO: Remove this and put it in benchmarks.py
|
||||
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
|
||||
BASERUN_API_KEY: ${{ secrets.BASERUN_API_KEY }}
|
||||
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
|
||||
HELICONE_CACHE_ENABLED: false
|
||||
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
|
||||
REPORT_LOCATION: ${{ format('../../reports/{0}', matrix.agent-name) }}
|
||||
WOLFRAM_ALPHA_APPID: ${{ secrets.WOLFRAM_ALPHA_APPID }}
|
||||
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
|
||||
BING_SUBSCRIPTION_KEY: ${{ secrets.BING_SUBSCRIPTION_KEY }}
|
||||
|
||||
- name: Upload reports
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.agent-name }}
|
||||
path: reports/${{ matrix.agent-name }}
|
||||
|
||||
- name: Authenticate and Push to Branch
|
||||
if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
run: |
|
||||
git config --global user.email "github-bot@agpt.co"
|
||||
git config --global user.name "Auto-GPT-Bot"
|
||||
|
||||
git add reports/* || echo "nothing to commit"
|
||||
commit_message="${{ matrix.agent-name }}-$(date +'%Y%m%d%H%M%S')"
|
||||
git commit -m "${commit_message}"
|
||||
git stash
|
||||
current_branch=${{ github.ref_name }}
|
||||
attempts=0
|
||||
max_attempts=3
|
||||
|
||||
while [ $attempts -lt $max_attempts ]; do
|
||||
git fetch origin $current_branch
|
||||
git rebase origin/$current_branch
|
||||
if git push origin HEAD; then
|
||||
echo "Success!"
|
||||
poetry run python reports/send_to_googledrive.py || echo "Failed to upload to Google Drive"
|
||||
exit 0
|
||||
else
|
||||
echo "Attempt $(($attempts + 1)) failed. Retrying..."
|
||||
attempts=$(($attempts + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Failed after $max_attempts attempts."
|
||||
env:
|
||||
GDRIVE_BASE64: ${{ secrets.GDRIVE_BASE64 }}
|
||||
GITHUB_REF_NAME: ${{ github.ref_name }}
|
Loading…
Reference in New Issue