2023-06-18 15:14:54 +00:00
|
|
|
[tool.poetry]
|
|
|
|
name = "agbenchmark"
|
2023-09-16 22:07:53 +00:00
|
|
|
version = "0.0.10"
|
2023-06-18 15:14:54 +00:00
|
|
|
description = "Benchmarking the performance of agents far and wide, regardless of how they are set up and how they work"
|
2024-05-20 13:01:36 +00:00
|
|
|
authors = ["AutoGPT Team"]
|
2023-06-18 15:14:54 +00:00
|
|
|
license = "MIT"
|
|
|
|
readme = "README.md"
|
2023-08-11 19:52:06 +00:00
|
|
|
packages = [{ include = "agbenchmark" }]
|
2023-06-18 15:14:54 +00:00
|
|
|
|
|
|
|
[tool.poetry.dependencies]
|
2023-07-02 20:14:49 +00:00
|
|
|
python = "^3.10"
|
2024-07-02 18:45:32 +00:00
|
|
|
agent-protocol-client = {git = "https://github.com/Significant-Gravitas/agent-protocol.git", subdirectory = "packages/client/python"}
|
2023-07-02 20:14:49 +00:00
|
|
|
click = "^8.1.3"
|
2024-06-28 00:21:36 +00:00
|
|
|
click-default-group = "^1.2.4"
|
|
|
|
colorama = "^0.4.6"
|
|
|
|
fastapi = "^0.109.1"
|
2023-08-02 00:54:23 +00:00
|
|
|
gitpython = "^3.1.32"
|
2024-06-28 00:21:36 +00:00
|
|
|
httpx = "^0.24.0"
|
|
|
|
matplotlib = "^3.7.2"
|
2024-07-02 18:45:32 +00:00
|
|
|
# Multidict 6.0.4 fails to install and is a dependency of aiohttp which is a depenedency of agent-protocol-client
|
|
|
|
multidict = "^6.0.5"
|
2023-08-06 21:35:22 +00:00
|
|
|
networkx = "^3.1"
|
2024-06-28 00:21:36 +00:00
|
|
|
openai = "^1.7.2"
|
|
|
|
pandas = "^2.0.3"
|
|
|
|
pexpect = "^4.8.0"
|
|
|
|
psutil = "^5.9.5"
|
2024-07-02 18:45:32 +00:00
|
|
|
pydantic = "^2.7.2"
|
|
|
|
pydantic-settings = "^2.3.4"
|
2024-06-28 00:21:36 +00:00
|
|
|
pytest = "^7.3.2"
|
2024-06-27 20:09:00 +00:00
|
|
|
pytest-asyncio = "^0.23.3"
|
2024-06-28 00:21:36 +00:00
|
|
|
python-dotenv = "^1.0.0"
|
2024-02-13 11:36:00 +00:00
|
|
|
python-multipart = "^0.0.7"
|
2024-06-28 00:21:36 +00:00
|
|
|
pyvis = "^0.3.2"
|
|
|
|
requests = "^2.31.0"
|
|
|
|
selenium = "^4.11.2"
|
2024-02-16 14:17:11 +00:00
|
|
|
tabulate = "^0.9.0"
|
2024-06-28 00:21:36 +00:00
|
|
|
toml = "^0.10.2"
|
|
|
|
uvicorn = ">=0.23.2,<1"
|
2023-09-06 00:05:45 +00:00
|
|
|
|
2023-07-02 20:14:49 +00:00
|
|
|
[tool.poetry.group.dev.dependencies]
|
Set up unified pre-commit + CI w/ linting + type checking & FIX EVERYTHING (#7171)
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK**
### Linting
- Clean up linter configs for `autogpt`, `forge`, and `benchmark`
- Add type checking with Pyright
- Create unified pre-commit config
- Create unified linting and type checking CI workflow
### Testing
- Synchronize CI test setups for `autogpt`, `forge`, and `benchmark`
- Add missing pytest-cov to benchmark dependencies
- Mark GCS tests as slow to speed up pre-commit test runs
- Repair `forge` test suite
- Add `AgentDB.close()` method for test DB teardown in db_test.py
- Use actual temporary dir instead of forge/test_workspace/
- Move left-behind dependencies for moved `forge`-code to from autogpt to forge
### Notable type changes
- Replace uses of `ChatModelProvider` by `MultiProvider`
- Removed unnecessary exports from various __init__.py
- Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union
- Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage`
- Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes
Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive:
https://github.com/microsoft/pyright/issues/8007
- Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods
- Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai`
- Rename `ModelProvider` -> `BaseModelProvider`
- Rename `ChatModelProvider` -> `BaseChatModelProvider`
- Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider`
### Removed rather than fixed
- Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py
- Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers`
### Fixes for other issues that came to light
- Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent`
- Add fallback behavior to `ImageGeneratorComponent`
- Remove test for deprecated failure behavior
- Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows
- Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai`
- Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
2024-05-28 03:04:21 +00:00
|
|
|
black = "^23.12.1"
|
|
|
|
flake8 = "^7.0.0"
|
|
|
|
isort = "^5.13.1"
|
|
|
|
pyright = "^1.1.364"
|
2023-08-07 11:29:00 +00:00
|
|
|
pre-commit = "^3.3.3"
|
2024-06-28 00:21:36 +00:00
|
|
|
|
|
|
|
# Testing
|
Set up unified pre-commit + CI w/ linting + type checking & FIX EVERYTHING (#7171)
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK**
### Linting
- Clean up linter configs for `autogpt`, `forge`, and `benchmark`
- Add type checking with Pyright
- Create unified pre-commit config
- Create unified linting and type checking CI workflow
### Testing
- Synchronize CI test setups for `autogpt`, `forge`, and `benchmark`
- Add missing pytest-cov to benchmark dependencies
- Mark GCS tests as slow to speed up pre-commit test runs
- Repair `forge` test suite
- Add `AgentDB.close()` method for test DB teardown in db_test.py
- Use actual temporary dir instead of forge/test_workspace/
- Move left-behind dependencies for moved `forge`-code to from autogpt to forge
### Notable type changes
- Replace uses of `ChatModelProvider` by `MultiProvider`
- Removed unnecessary exports from various __init__.py
- Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union
- Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage`
- Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes
Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive:
https://github.com/microsoft/pyright/issues/8007
- Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods
- Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai`
- Rename `ModelProvider` -> `BaseModelProvider`
- Rename `ChatModelProvider` -> `BaseChatModelProvider`
- Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider`
### Removed rather than fixed
- Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py
- Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers`
### Fixes for other issues that came to light
- Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent`
- Add fallback behavior to `ImageGeneratorComponent`
- Remove test for deprecated failure behavior
- Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows
- Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai`
- Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
2024-05-28 03:04:21 +00:00
|
|
|
pytest-cov = "^5.0.0"
|
|
|
|
|
2024-06-28 00:21:36 +00:00
|
|
|
# Dependencies for stuff in reports/
|
|
|
|
gspread = "^5.10.0"
|
|
|
|
oauth2client = "^4.1.3"
|
|
|
|
|
Set up unified pre-commit + CI w/ linting + type checking & FIX EVERYTHING (#7171)
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK**
### Linting
- Clean up linter configs for `autogpt`, `forge`, and `benchmark`
- Add type checking with Pyright
- Create unified pre-commit config
- Create unified linting and type checking CI workflow
### Testing
- Synchronize CI test setups for `autogpt`, `forge`, and `benchmark`
- Add missing pytest-cov to benchmark dependencies
- Mark GCS tests as slow to speed up pre-commit test runs
- Repair `forge` test suite
- Add `AgentDB.close()` method for test DB teardown in db_test.py
- Use actual temporary dir instead of forge/test_workspace/
- Move left-behind dependencies for moved `forge`-code to from autogpt to forge
### Notable type changes
- Replace uses of `ChatModelProvider` by `MultiProvider`
- Removed unnecessary exports from various __init__.py
- Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union
- Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage`
- Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes
Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive:
https://github.com/microsoft/pyright/issues/8007
- Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods
- Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai`
- Rename `ModelProvider` -> `BaseModelProvider`
- Rename `ChatModelProvider` -> `BaseChatModelProvider`
- Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider`
### Removed rather than fixed
- Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py
- Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers`
### Fixes for other issues that came to light
- Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent`
- Add fallback behavior to `ImageGeneratorComponent`
- Remove test for deprecated failure behavior
- Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows
- Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai`
- Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
2024-05-28 03:04:21 +00:00
|
|
|
[tool.poetry.scripts]
|
|
|
|
agbenchmark = "agbenchmark.__main__:cli"
|
|
|
|
|
2023-06-18 15:14:54 +00:00
|
|
|
|
|
|
|
[build-system]
|
|
|
|
requires = ["poetry-core"]
|
|
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
|
2023-07-02 20:14:49 +00:00
|
|
|
|
|
|
|
[tool.black]
|
|
|
|
line-length = 88
|
|
|
|
target-version = ['py310']
|
|
|
|
include = '\.pyi?$'
|
Set up unified pre-commit + CI w/ linting + type checking & FIX EVERYTHING (#7171)
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK**
### Linting
- Clean up linter configs for `autogpt`, `forge`, and `benchmark`
- Add type checking with Pyright
- Create unified pre-commit config
- Create unified linting and type checking CI workflow
### Testing
- Synchronize CI test setups for `autogpt`, `forge`, and `benchmark`
- Add missing pytest-cov to benchmark dependencies
- Mark GCS tests as slow to speed up pre-commit test runs
- Repair `forge` test suite
- Add `AgentDB.close()` method for test DB teardown in db_test.py
- Use actual temporary dir instead of forge/test_workspace/
- Move left-behind dependencies for moved `forge`-code to from autogpt to forge
### Notable type changes
- Replace uses of `ChatModelProvider` by `MultiProvider`
- Removed unnecessary exports from various __init__.py
- Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union
- Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage`
- Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes
Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive:
https://github.com/microsoft/pyright/issues/8007
- Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods
- Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai`
- Rename `ModelProvider` -> `BaseModelProvider`
- Rename `ChatModelProvider` -> `BaseChatModelProvider`
- Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider`
### Removed rather than fixed
- Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py
- Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers`
### Fixes for other issues that came to light
- Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent`
- Add fallback behavior to `ImageGeneratorComponent`
- Remove test for deprecated failure behavior
- Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows
- Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai`
- Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
2024-05-28 03:04:21 +00:00
|
|
|
|
2023-07-02 20:14:49 +00:00
|
|
|
|
|
|
|
[tool.isort]
|
|
|
|
profile = "black"
|
Set up unified pre-commit + CI w/ linting + type checking & FIX EVERYTHING (#7171)
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK**
### Linting
- Clean up linter configs for `autogpt`, `forge`, and `benchmark`
- Add type checking with Pyright
- Create unified pre-commit config
- Create unified linting and type checking CI workflow
### Testing
- Synchronize CI test setups for `autogpt`, `forge`, and `benchmark`
- Add missing pytest-cov to benchmark dependencies
- Mark GCS tests as slow to speed up pre-commit test runs
- Repair `forge` test suite
- Add `AgentDB.close()` method for test DB teardown in db_test.py
- Use actual temporary dir instead of forge/test_workspace/
- Move left-behind dependencies for moved `forge`-code to from autogpt to forge
### Notable type changes
- Replace uses of `ChatModelProvider` by `MultiProvider`
- Removed unnecessary exports from various __init__.py
- Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union
- Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage`
- Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes
Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive:
https://github.com/microsoft/pyright/issues/8007
- Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods
- Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai`
- Rename `ModelProvider` -> `BaseModelProvider`
- Rename `ChatModelProvider` -> `BaseChatModelProvider`
- Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider`
### Removed rather than fixed
- Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py
- Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers`
### Fixes for other issues that came to light
- Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent`
- Add fallback behavior to `ImageGeneratorComponent`
- Remove test for deprecated failure behavior
- Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows
- Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai`
- Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
2024-05-28 03:04:21 +00:00
|
|
|
skip_glob = ["reports"]
|
2023-09-12 01:22:50 +00:00
|
|
|
|
Set up unified pre-commit + CI w/ linting + type checking & FIX EVERYTHING (#7171)
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK**
### Linting
- Clean up linter configs for `autogpt`, `forge`, and `benchmark`
- Add type checking with Pyright
- Create unified pre-commit config
- Create unified linting and type checking CI workflow
### Testing
- Synchronize CI test setups for `autogpt`, `forge`, and `benchmark`
- Add missing pytest-cov to benchmark dependencies
- Mark GCS tests as slow to speed up pre-commit test runs
- Repair `forge` test suite
- Add `AgentDB.close()` method for test DB teardown in db_test.py
- Use actual temporary dir instead of forge/test_workspace/
- Move left-behind dependencies for moved `forge`-code to from autogpt to forge
### Notable type changes
- Replace uses of `ChatModelProvider` by `MultiProvider`
- Removed unnecessary exports from various __init__.py
- Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union
- Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage`
- Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes
Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive:
https://github.com/microsoft/pyright/issues/8007
- Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods
- Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai`
- Rename `ModelProvider` -> `BaseModelProvider`
- Rename `ChatModelProvider` -> `BaseChatModelProvider`
- Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider`
### Removed rather than fixed
- Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py
- Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers`
### Fixes for other issues that came to light
- Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent`
- Add fallback behavior to `ImageGeneratorComponent`
- Remove test for deprecated failure behavior
- Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows
- Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai`
- Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
2024-05-28 03:04:21 +00:00
|
|
|
|
|
|
|
[tool.pyright]
|
|
|
|
pythonVersion = "3.10"
|
|
|
|
exclude = [
|
|
|
|
"notebooks/**",
|
|
|
|
"reports/**",
|
|
|
|
"**/node_modules",
|
|
|
|
"**/__pycache__",
|
|
|
|
"**/.*",
|
|
|
|
]
|
|
|
|
ignore = [
|
2024-09-20 14:50:43 +00:00
|
|
|
"../classic/forge/**"
|
Set up unified pre-commit + CI w/ linting + type checking & FIX EVERYTHING (#7171)
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK**
### Linting
- Clean up linter configs for `autogpt`, `forge`, and `benchmark`
- Add type checking with Pyright
- Create unified pre-commit config
- Create unified linting and type checking CI workflow
### Testing
- Synchronize CI test setups for `autogpt`, `forge`, and `benchmark`
- Add missing pytest-cov to benchmark dependencies
- Mark GCS tests as slow to speed up pre-commit test runs
- Repair `forge` test suite
- Add `AgentDB.close()` method for test DB teardown in db_test.py
- Use actual temporary dir instead of forge/test_workspace/
- Move left-behind dependencies for moved `forge`-code to from autogpt to forge
### Notable type changes
- Replace uses of `ChatModelProvider` by `MultiProvider`
- Removed unnecessary exports from various __init__.py
- Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union
- Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage`
- Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes
Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive:
https://github.com/microsoft/pyright/issues/8007
- Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods
- Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai`
- Rename `ModelProvider` -> `BaseModelProvider`
- Rename `ChatModelProvider` -> `BaseChatModelProvider`
- Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider`
### Removed rather than fixed
- Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py
- Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers`
### Fixes for other issues that came to light
- Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent`
- Add fallback behavior to `ImageGeneratorComponent`
- Remove test for deprecated failure behavior
- Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows
- Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai`
- Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
2024-05-28 03:04:21 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
[tool.pytest.ini_options]
|
|
|
|
testpaths = ["tests"]
|