diff --git a/.github/workflows/benchmark-ci.yml b/.github/workflows/benchmark-ci.yml
new file mode 100644
index 000000000..1f5bf867b
--- /dev/null
+++ b/.github/workflows/benchmark-ci.yml
@@ -0,0 +1,301 @@
+name: Benchmark CI
+
+on:
+  workflow_dispatch:
+    branches: [master]
+    inputs:
+      agents:
+        description: 'Agents to run (comma-separated)'
+        required: false
+        default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,BabyAGI,PolyGPT,Turbo' # Default agents if none are specified
+  schedule:
+    - cron: '0 8 * * *'
+  push:
+    branches: [master, ci-test*]
+    paths:
+      - 'benchmark/**'
+      - '!benchmark/reports/**'
+  pull_request:
+    branches: [stable, master, release-*]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    env:
+      min-python-version: '3.10'
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+
+      - name: Set up Python ${{ env.min-python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ env.min-python-version }}
+
+      - id: get_date
+        name: Get date
+        working-directory: ./benchmark/
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Install Poetry
+        working-directory: ./benchmark/
+        run: |
+          curl -sSL https://install.python-poetry.org | python -
+
+      - name: Install dependencies
+        working-directory: ./benchmark/
+        run: |
+          export POETRY_VIRTUALENVS_IN_PROJECT=true
+          poetry install -vvv
+
+      - name: Lint with flake8
+        working-directory: ./benchmark/
+        run: poetry run flake8
+
+      - name: Check black formatting
+        working-directory: ./benchmark/
+        run: poetry run  black . --exclude test.py --check
+        if: success() || failure()
+
+      - name: Check isort formatting
+        working-directory: ./benchmark/
+        run: poetry run  isort . --check
+        if: success() || failure()
+
+      - name: Check for unused imports and pass statements
+        working-directory: ./benchmark/
+        run: |
+          cmd="poetry run autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring agbenchmark"
+          $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
+        if: success() || failure()
+  matrix-setup:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+      env-name: ${{ steps.set-matrix.outputs.env-name }}
+    steps:
+      - id: set-matrix
+        run: |
+          if [ "${{ github.event_name }}" == "schedule" ]; then
+            echo "::set-output name=env-name::production"
+            echo "::set-output name=matrix::[ 'gpt-engineer', 'smol-developer', 'Auto-GPT', 'mini-agi', 'beebot', 'BabyAGI', 'PolyGPT', 'Turbo' ]"
+          elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+            IFS=',' read -ra matrix_array <<< "${{ github.event.inputs.agents }}"
+            matrix_string="[ \"$(echo "${matrix_array[@]}" | sed 's/ /", "/g')\" ]"
+            echo "::set-output name=env-name::production"
+            echo "::set-output name=matrix::$matrix_string"
+          else
+            echo "::set-output name=env-name::develop"
+            echo "::set-output name=matrix::[ 'mini-agi' ]"
+          fi
+
+  tests:
+    environment:
+      name: '${{ needs.matrix-setup.outputs.env-name }}'
+    needs: matrix-setup
+    env:
+      min-python-version: '3.10'
+    name: '${{ matrix.agent-name }}'
+    runs-on: ubuntu-latest
+    timeout-minutes: 50
+    strategy:
+      fail-fast: false
+      matrix:
+        agent-name: ${{fromJson(needs.matrix-setup.outputs.matrix)}}
+    steps:
+      - name: Print Environment Name
+        run: |
+          echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}"
+
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          token: ${{ secrets.PAT_REVIEW }}
+
+      - name: Set up Python ${{ env.min-python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ env.min-python-version }}
+
+      - id: get_date
+        name: Get date
+        working-directory: ./benchmark/
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python -
+
+      - name: Install dependencies
+        working-directory: ./benchmark/
+        run: |
+          poetry install -vvv
+          poetry build
+
+      - name: Run regression tests
+        working-directory: ./benchmark/
+        run: |
+          mkdir agent
+          cd agent
+          git clone https://github.com/SilenNaihin/mini-agi -b benchmark-integration
+          cd $AGENT_NAME
+          prefix=""
+          if [ "$AGENT_NAME" == "gpt-engineer" ]; then
+            make install
+            source venv/bin/activate
+          elif [ "$AGENT_NAME" == "Auto-GPT" ]; then
+            python -m venv venv
+            source venv/bin/activate
+            pip install -r requirements.txt
+            pip uninstall agbenchmark -y
+          elif [ "$AGENT_NAME" == "mini-agi" ]; then
+            python -m venv venv
+            source venv/bin/activate
+            pip install -r requirements.txt
+            cp .env_example .env
+          elif [ "$AGENT_NAME" == "smol-developer" ]; then
+            python -m venv venv
+            source venv/bin/activate
+            pip install -r requirements.txt
+          elif [ "$AGENT_NAME" == "BabyAGI" ]; then
+            python -m venv venv
+            source venv/bin/activate
+            pip install -r requirements.txt
+          elif [ "$AGENT_NAME" == "SuperAGI" ]; then
+            cp config_template.yaml config.yaml
+            sed -i 's/OPENAI_API_KEY:.*/OPENAI_API_KEY: "'"${{ secrets.OPENAI_API_KEY }}"'"/' config.yaml
+            docker-compose up -d --build
+          elif [ "$AGENT_NAME" == "beebot" ]; then
+            poetry install
+            poetry run playwright install
+            poetry run uvicorn beebot.initiator.api:create_app --factory  --timeout-graceful-shutdown=1 &
+            prefix="poetry run "
+          elif [ "$AGENT_NAME" == "PolyGPT" ]; then
+            cp .env.template .env
+            curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash
+            export NVM_DIR=$HOME/.nvm
+            source $NVM_DIR/nvm.sh
+            nvm install && nvm use
+            yarn install
+            export NODE_TLS_REJECT_UNAUTHORIZED=0
+          elif [ "$AGENT_NAME" == "Turbo" ]; then
+            python -m venv venv
+            source venv/bin/activate
+            pip install -r requirements.txt
+            cp .env.template .env
+            sed -i 's/your-openai-api-key/${{ secrets.OPENAI_API_KEY }}/g' .env
+          else
+            echo "Unknown agent name: $AGENT_NAME"
+            exit 1
+          fi
+
+          pip install ../../dist/*.whl
+          
+          bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start
+
+          cd ../..
+          if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then            
+            set +e # Ignore non-zero exit codes and continue execution
+            echo "Running the following command: ${prefix}agbenchmark start --maintain --mock"
+            ${prefix}agbenchmark start --maintain --mock
+            EXIT_CODE=$?
+            set -e  # Stop ignoring non-zero exit codes
+            # Check if the exit code was 5, and if so, exit with 0 instead
+            if [ $EXIT_CODE -eq 5 ]; then
+              echo "regression_tests.json is empty."
+            fi
+          
+            echo "Running the following command: ${prefix}agbenchmark start --mock"
+            ${prefix}agbenchmark start --mock
+
+            echo "Running the following command: ${prefix}agbenchmark start --mock --category=retrieval"
+            ${prefix}agbenchmark start --mock --category=retrieval
+
+            echo "Running the following command: ${prefix}agbenchmark start --mock --category=interface"
+            ${prefix}agbenchmark start --mock --category=interface
+
+            echo "Running the following command: ${prefix}agbenchmark start --mock --category=code"
+            ${prefix}agbenchmark start --mock --category=code
+
+            echo "Running the following command: ${prefix}agbenchmark start --mock --category=memory"
+            ${prefix}agbenchmark start --mock --category=memory
+
+            echo "Running the following command: ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval"
+            ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval
+
+            echo "Running the following command: ${prefix}agbenchmark start --test=TestWriteFile"
+            ${prefix}agbenchmark start --test=TestWriteFile
+
+            poetry install
+            poetry run uvicorn server:app --reload &
+            sleep 5
+            export AGENT_NAME=mini-agi
+          else
+            echo "${prefix}agbenchmark start"
+            ${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."
+          fi
+
+          cd ../..
+
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          AGENT_NAME: ${{ matrix.agent-name }}
+          PROMPT_USER: false # For mini-agi. TODO: Remove this and put it in benchmarks.py
+          HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
+          BASERUN_API_KEY: ${{ secrets.BASERUN_API_KEY }}
+          REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
+          HELICONE_CACHE_ENABLED: false
+          HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
+          REPORT_LOCATION: ${{ format('../../reports/{0}', matrix.agent-name) }}
+          WOLFRAM_ALPHA_APPID: ${{ secrets.WOLFRAM_ALPHA_APPID }}
+          SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
+          BING_SUBSCRIPTION_KEY: ${{ secrets.BING_SUBSCRIPTION_KEY }}
+
+      - name: Upload reports
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ matrix.agent-name }}
+          path: reports/${{ matrix.agent-name }}
+
+      - name: Authenticate and Push to Branch
+        working-directory: ./benchmark/
+        if: (success() || failure()) && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
+        run: |
+          git config --global user.email "github-bot@agpt.co"
+          git config --global user.name "Auto-GPT-Bot"
+
+          git add reports/* || echo "nothing to commit"
+          commit_message="${{ matrix.agent-name }}-$(date +'%Y%m%d%H%M%S')"
+          git commit -m "${commit_message}"
+          git stash
+          current_branch=${{ github.ref_name }}
+          attempts=0
+          max_attempts=3
+
+          while [ $attempts -lt $max_attempts ]; do
+              git fetch origin $current_branch
+              git rebase origin/$current_branch
+              if git push origin HEAD; then
+                  echo "Success!"
+                  poetry run python reports/send_to_googledrive.py || echo "Failed to upload to Google Drive"
+                  exit 0
+              else
+                  echo "Attempt $(($attempts + 1)) failed. Retrying..."
+                  attempts=$(($attempts + 1))
+              fi
+          done
+
+          echo "Failed after $max_attempts attempts."
+        env:
+          GDRIVE_BASE64: ${{ secrets.GDRIVE_BASE64 }}
+          GITHUB_REF_NAME: ${{ github.ref_name }}
diff --git a/benchmark/.gitmodules b/benchmark/.gitmodules
deleted file mode 100644
index a8a544a54..000000000
--- a/benchmark/.gitmodules
+++ /dev/null
@@ -1,39 +0,0 @@
-[submodule "agent/Auto-GPT"]
-	path = agent/Auto-GPT
-	url = https://github.com/Significant-Gravitas/Auto-GPT
-	branch = master
-[submodule "agent/gpt-engineer"]
-	path = agent/gpt-engineer
-	url = https://github.com/merwanehamadi/gpt-engineer.git
-	branch = benchmark-integration
-[submodule "agent/mini-agi"]
-	path = agent/mini-agi
-	url = https://github.com/SilenNaihin/mini-agi.git
-	branch = benchmark-integration
-[submodule "agent/smol-developer"]
-	path = agent/smol-developer
-	url = https://github.com/e2b-dev/smol-developer.git
-	branch = benchmarks
-[submodule "agent/SuperAGI"]
-	path = agent/SuperAGI
-	url = https://github.com/SilenNaihin/SuperAGI.git
-	branch = benchmark-integration
-[submodule "agent/BabyAGI"]
-	path = agent/BabyAGI
-	url = https://github.com/SilenNaihin/babyagi.git
-	branch = benchmark-integration
-[submodule "agent/beebot"]
-	path = agent/beebot
-	url = https://github.com/AutoPackAI/beebot.git
-	branch = main
-[submodule "agent/PolyGPT"]
-	path = agent/PolyGPT
-	url = https://github.com/polywrap/PolyGPT.git
-	branch = nerfzael-use-local-wrap-library
-[submodule "frontend"]
-	path = frontend
-	url = https://github.com/agbenchmark/agbenchmark-frontend.git
-[submodule "agent/Turbo"]
-	path = agent/Turbo
-	url = https://github.com/lc0rp/Auto-GPT-Turbo.git
-	branch = main
diff --git a/benchmark/agbenchmark/__init__.py b/benchmark/agbenchmark/__init__.py
index 2fc9970ce..e69de29bb 100644
--- a/benchmark/agbenchmark/__init__.py
+++ b/benchmark/agbenchmark/__init__.py
@@ -1,5 +0,0 @@
-import pydevd_pycharm
-
-pydevd_pycharm.settrace(
-    "localhost", port=9739, stdoutToServer=True, stderrToServer=True
-)
diff --git a/benchmark/agbenchmark/challenges/library/ethereum/check_price/data.json b/benchmark/agbenchmark/challenges/library/ethereum/check_price/data_draft.json
similarity index 100%
rename from benchmark/agbenchmark/challenges/library/ethereum/check_price/data.json
rename to benchmark/agbenchmark/challenges/library/ethereum/check_price/data_draft.json
diff --git a/benchmark/paper/agent_action_regex.py b/benchmark/paper/agent_action_regex.py
index 6bd55f9d1..abe4a8fdd 100644
--- a/benchmark/paper/agent_action_regex.py
+++ b/benchmark/paper/agent_action_regex.py
@@ -1,5 +1,5 @@
-import re
 import json
+import re
 
 
 def is_action_auto_gpt(log):