"Beat Challenges" Mode (#4447)

Co-authored-by: Richard Beales <rich@richbeales.net>
pull/4460/head
merwanehamadi 2023-05-29 00:47:06 -07:00 committed by GitHub
parent daafda320b
commit 31cd836530
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 400 additions and 82 deletions

2
.gitmodules vendored
View File

@ -1,4 +1,4 @@
[submodule "tests/Auto-GPT-test-cassettes"]
path = tests/Auto-GPT-test-cassettes
url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
branch = master
branch = master

View File

@ -1,6 +1,9 @@
import pytest
from autogpt.agent import Agent
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key
@ -9,11 +12,12 @@ CYCLE_COUNT = 2
@requires_api_key("OPENAI_API_KEY")
@pytest.mark.vcr
@challenge
def test_browse_website(
browser_agent: Agent,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
# config: Config,
level_to_run: int,
) -> None:
file_path = browser_agent.workspace.get_path("browse_website.txt")
run_interaction_loop(monkeypatch, browser_agent, CYCLE_COUNT)

View File

@ -3,6 +3,9 @@ import pytest
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file
from autogpt.config import Config
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key
@ -11,11 +14,13 @@ CYCLE_COUNT = 3
@requires_api_key("OPENAI_API_KEY")
@pytest.mark.vcr
@challenge
def test_write_file(
writer_agent: Agent,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
config: Config,
level_to_run: int,
) -> None:
file_path = str(writer_agent.workspace.get_path("hello_world.txt"))
run_interaction_loop(monkeypatch, writer_agent, CYCLE_COUNT)

View File

@ -0,0 +1,21 @@
from typing import Optional
class Challenge:
BEAT_CHALLENGES = False
def __init__(
self,
name: str,
category: str,
max_level: int,
max_level_beaten: Optional[int],
level_to_run: Optional[int] = None,
) -> None:
self.name = name
self.category = category
self.max_level_beaten = max_level_beaten
self.max_level = max_level
self.succeeded = False
self.skipped = False
self.level_to_run = level_to_run

View File

@ -0,0 +1,68 @@
import contextlib
import os
from functools import wraps
from typing import Any, Callable, Optional
import pytest
from tests.integration.challenges.challenge_decorator.challenge import Challenge
from tests.integration.challenges.challenge_decorator.challenge_utils import (
create_challenge,
)
from tests.integration.challenges.challenge_decorator.score_utils import (
get_scores,
update_new_score,
)
MAX_LEVEL_TO_IMPROVE_ON = (
1 # we will attempt to beat 1 level above the current level for now.
)
def challenge(func: Callable[..., Any]) -> Callable[..., None]:
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> None:
run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
while run_remaining > 0:
current_score, new_score, new_score_location = get_scores()
level_to_run = kwargs["level_to_run"] if "level_to_run" in kwargs else None
challenge = create_challenge(
func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
)
if challenge.level_to_run is not None:
kwargs["level_to_run"] = challenge.level_to_run
with contextlib.suppress(AssertionError):
func(*args, **kwargs)
challenge.succeeded = True
else:
challenge.skipped = True
if os.environ.get("CI") == "true":
new_max_level_beaten = get_new_max_level_beaten(
challenge, Challenge.BEAT_CHALLENGES
)
update_new_score(
new_score_location, new_score, challenge, new_max_level_beaten
)
if challenge.level_to_run is None:
pytest.skip("This test has not been unlocked yet.")
if not challenge.succeeded:
if Challenge.BEAT_CHALLENGES:
# xfail
pytest.xfail("Challenge failed")
raise AssertionError("Challenge failed")
run_remaining -= 1
return wrapper
def get_new_max_level_beaten(
challenge: Challenge, beat_challenges: bool
) -> Optional[int]:
if challenge.succeeded:
return challenge.level_to_run
if challenge.skipped:
return challenge.max_level_beaten
# Challenge failed
return challenge.max_level_beaten if beat_challenges else None

View File

@ -0,0 +1,81 @@
import os
from typing import Any, Callable, Dict, Optional, Tuple
from tests.integration.challenges.challenge_decorator.challenge import Challenge
CHALLENGE_PREFIX = "test_"
def create_challenge(
func: Callable[..., Any],
current_score: Dict[str, Any],
is_beat_challenges: bool,
level_to_run: Optional[int] = None,
) -> Challenge:
challenge_category, challenge_name = get_challenge_identifiers(func)
max_level = get_max_level(current_score, challenge_category, challenge_name)
max_level_beaten = get_max_level_beaten(
current_score, challenge_category, challenge_name
)
level_to_run = get_level_to_run(
is_beat_challenges, level_to_run, max_level, max_level_beaten
)
return Challenge(
name=challenge_name,
category=challenge_category,
max_level=max_level,
max_level_beaten=max_level_beaten,
level_to_run=level_to_run,
)
def get_level_to_run(
is_beat_challenges: bool,
level_to_run: Optional[int],
max_level: int,
max_level_beaten: Optional[int],
) -> Optional[int]:
if level_to_run is not None:
if level_to_run > max_level:
raise ValueError(
f"Level to run ({level_to_run}) is greater than max level ({max_level})"
)
return level_to_run
if is_beat_challenges:
if max_level_beaten == max_level:
return None
return 1 if max_level_beaten is None else max_level_beaten + 1
return max_level_beaten
def get_challenge_identifiers(func: Callable[..., Any]) -> Tuple[str, str]:
full_path = os.path.dirname(os.path.abspath(func.__code__.co_filename))
challenge_category = os.path.basename(full_path)
challenge_name = func.__name__.replace(CHALLENGE_PREFIX, "")
return challenge_category, challenge_name
def get_max_level(
current_score: Dict[str, Any],
challenge_category: str,
challenge_name: str,
) -> int:
return (
current_score.get(challenge_category, {})
.get(challenge_name, {})
.get("max_level", 1)
)
def get_max_level_beaten(
current_score: Dict[str, Any],
challenge_category: str,
challenge_name: str,
) -> Optional[int]:
return (
current_score.get(challenge_category, {})
.get(challenge_name, {})
.get("max_level_beaten", None)
)

View File

@ -0,0 +1,59 @@
import json
import os
from typing import Any, Dict, Optional, Tuple
from tests.integration.challenges.challenge_decorator.challenge import Challenge
CURRENT_SCORE_LOCATION = "../current_score"
NEW_SCORE_LOCATION = "../new_score"
def update_new_score(
filename_new_score: str,
new_score: Dict[str, Any],
challenge: Challenge,
new_max_level_beaten: Optional[int],
) -> None:
write_new_score(new_score, challenge, new_max_level_beaten)
write_new_score_to_file(new_score, filename_new_score)
def write_new_score(
new_score: Dict[str, Any], challenge: Challenge, new_max_level_beaten: Optional[int]
) -> Dict[str, Any]:
new_score.setdefault(challenge.category, {})
new_score[challenge.category][challenge.name] = {
"max_level_beaten": new_max_level_beaten,
"max_level": challenge.max_level,
}
return new_score
def write_new_score_to_file(new_score: Dict[str, Any], filename: str) -> None:
with open(filename, "w") as file:
json.dump(new_score, file, indent=4)
def get_scores() -> Tuple[Dict[str, Any], Dict[str, Any], str]:
filename_current_score, filename_new_score = get_score_locations()
current_score = load_json(filename_current_score)
new_score = load_json(filename_new_score)
return current_score, new_score, filename_new_score
def load_json(filename: str) -> Dict[str, Any]:
if os.path.isfile(filename):
with open(filename, "r") as file:
return json.load(file)
else:
return {}
def get_score_locations() -> Tuple[str, str]:
pid = os.getpid()
project_root = os.path.dirname(os.path.abspath(__file__))
filename_current_score = os.path.join(
project_root, f"{CURRENT_SCORE_LOCATION}.json"
)
filename_new_score = os.path.join(project_root, f"{NEW_SCORE_LOCATION}_{pid}.json")
return filename_current_score, filename_new_score

View File

@ -3,18 +3,33 @@ from _pytest.config import Config
from _pytest.config.argparsing import Parser
from _pytest.fixtures import FixtureRequest
from tests.integration.challenges.challenge_decorator.challenge import Challenge
def pytest_addoption(parser: Parser) -> None:
parser.addoption(
"--level", action="store", default=None, type=int, help="Specify test level"
)
parser.addoption(
"--beat-challenges",
action="store_true",
help="Spepcifies whether the test suite should attempt to beat challenges",
)
def pytest_configure(config: Config) -> None:
config.option.level = config.getoption("--level")
level = config.getoption("--level", default=None)
config.option.level = level
beat_challenges = config.getoption("--beat-challenges", default=False)
config.option.beat_challenges = beat_challenges
@pytest.fixture
def user_selected_level(request: FixtureRequest) -> int:
def level_to_run(request: FixtureRequest) -> int:
## used for challenges in the goal oriented tests
return request.config.option.level
@pytest.fixture(autouse=True)
def check_beat_challenges(request: FixtureRequest) -> None:
Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")

View File

@ -0,0 +1,38 @@
{
"basic_abilities": {
"browse_website": {
"max_level": 1,
"max_level_beaten": 1
},
"write_file": {
"max_level": 1,
"max_level_beaten": 1
}
},
"information_retrieval": {
"information_retrieval_challenge_a": {
"max_level": 1,
"max_level_beaten": 1
}
},
"kubernetes": {
"kubernetes_template_challenge_a": {
"max_level": 1,
"max_level_beaten": null
}
},
"memory": {
"memory_challenge_a": {
"max_level": 3,
"max_level_beaten": 3
},
"memory_challenge_b": {
"max_level": 5,
"max_level_beaten": 1
},
"memory_challenge_c": {
"max_level": 5,
"max_level_beaten": 1
}
}
}

View File

@ -2,6 +2,9 @@ import pytest
from autogpt.commands.file_operations import read_file
from autogpt.config import Config
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key
@ -11,11 +14,13 @@ from autogpt.agent import Agent
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_information_retrieval_challenge_a(
get_company_revenue_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: None,
config: Config,
level_to_run: int,
) -> None:
"""
Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.

View File

@ -4,24 +4,33 @@ import yaml
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file
from autogpt.config import Config
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key
CYCLE_COUNT = 6
CYCLE_COUNT = 3
@pytest.mark.skip("This challenge hasn't been beaten yet.")
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_kubernetes_template_challenge_a(
kubernetes_agent: Agent, monkeypatch: pytest.MonkeyPatch, config: Config
kubernetes_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
config: Config,
level_to_run: int,
) -> None:
"""
Test the challenge_a function in a given agent by mocking user inputs
and checking the output file content.
:param get_company_revenue_agent: The agent to test.
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
Args:
kubernetes_agent (Agent)
monkeypatch (pytest.MonkeyPatch)
config (Config)
level_to_run (int)
"""
run_interaction_loop(monkeypatch, kubernetes_agent, CYCLE_COUNT)

View File

@ -3,37 +3,38 @@ import pytest
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from autogpt.config import Config
from tests.integration.challenges.utils import get_level_to_run, run_interaction_loop
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key
LEVEL_CURRENTLY_BEATEN = 3 # real level beaten 30 and maybe more, but we can't record it, the cassette is too big
MAX_LEVEL = 3
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_memory_challenge_a(
memory_management_agent: Agent,
user_selected_level: int,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
config: Config,
level_to_run: int,
) -> None:
"""
The agent reads a file containing a task_id. Then, it reads a series of other files.
After reading 'n' files, the agent must write the task_id into a new file.
Args:
memory_management_agent (Agent)
user_selected_level (int)
patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch)
config (Config)
level_to_run (int)
"""
num_files = get_level_to_run(user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL)
task_id = "2314"
create_instructions_files(memory_management_agent, num_files, task_id, config)
create_instructions_files(memory_management_agent, level_to_run, task_id, config)
run_interaction_loop(monkeypatch, memory_management_agent, num_files + 2)
run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
file_path = str(memory_management_agent.workspace.get_path("output.txt"))
content = read_file(file_path, config)

View File

@ -3,26 +3,24 @@ import pytest
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from autogpt.config import Config
from tests.integration.challenges.utils import (
generate_noise,
get_level_to_run,
run_interaction_loop,
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import generate_noise, run_interaction_loop
from tests.utils import requires_api_key
LEVEL_CURRENTLY_BEATEN = -1
MAX_LEVEL = 5
NOISE = 1000
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_memory_challenge_b(
memory_management_agent: Agent,
user_selected_level: int,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
config: Config,
level_to_run: int,
) -> None:
"""
The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
@ -30,15 +28,14 @@ def test_memory_challenge_b(
Args:
memory_management_agent (Agent)
user_selected_level (int)
patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch)
level_to_run (int)
"""
current_level = get_level_to_run(
user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL
)
task_ids = [str(i * 1111) for i in range(1, current_level + 1)]
create_instructions_files(memory_management_agent, current_level, task_ids, config)
task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
create_instructions_files(memory_management_agent, level_to_run, task_ids, config)
run_interaction_loop(monkeypatch, memory_management_agent, current_level + 2)
run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
file_path = str(memory_management_agent.workspace.get_path("output.txt"))
content = read_file(file_path, config)

View File

@ -3,26 +3,25 @@ import pytest
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from autogpt.config import Config
from tests.integration.challenges.utils import (
generate_noise,
get_level_to_run,
run_interaction_loop,
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import generate_noise, run_interaction_loop
from tests.utils import requires_api_key
LEVEL_CURRENTLY_BEATEN = -1
MAX_LEVEL = 5
NOISE = 1000
# @pytest.mark.vcr
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_memory_challenge_c(
memory_management_agent: Agent,
user_selected_level: int,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
config: Config,
level_to_run: int,
) -> None:
"""
Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@ -31,11 +30,11 @@ def test_memory_challenge_c(
Args:
memory_management_agent (Agent)
user_selected_level (int)
patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch)
config (Config)
level_to_run (int)
"""
current_level = get_level_to_run(
user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL
)
silly_phrases = [
"The purple elephant danced on a rainbow while eating a taco.",
"The sneaky toaster stole my socks and ran away to Hawaii.",
@ -49,15 +48,15 @@ def test_memory_challenge_c(
"The ninja unicorn disguised itself as a potted plant and infiltrated the office.",
]
level_silly_phrases = silly_phrases[:current_level]
level_silly_phrases = silly_phrases[:level_to_run]
create_instructions_files(
memory_management_agent, current_level, level_silly_phrases, config=config
memory_management_agent, level_to_run, level_silly_phrases, config=config
)
run_interaction_loop(monkeypatch, memory_management_agent, current_level + 2)
run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
file_path = str(memory_management_agent.workspace.get_path("output.txt"))
content = read_file(file_path)
content = read_file(file_path, config)
for phrase in level_silly_phrases:
assert phrase in content, f"Expected the file to contain {phrase}"

View File

@ -1,45 +1,14 @@
import contextlib
import random
from functools import wraps
from typing import Any, Callable, Dict, Generator, Tuple
from typing import Generator
import pytest
from autogpt.agent import Agent
def get_level_to_run(
user_selected_level: int,
level_currently_beaten: int,
max_level: int,
) -> int:
"""
Determines the appropriate level to run for a challenge, based on user-selected level, level currently beaten, and maximum level.
Args:
user_selected_level (int | None): The level selected by the user. If not provided, the level currently beaten is used.
level_currently_beaten (int | None): The highest level beaten so far. If not provided, the test will be skipped.
max_level (int): The maximum level allowed for the challenge.
Returns:
int: The level to run for the challenge.
Raises:
ValueError: If the user-selected level is greater than the maximum level allowed.
"""
if user_selected_level is None:
if level_currently_beaten == -1:
pytest.skip(
"No one has beaten any levels so we cannot run the test in our pipeline"
)
# by default we run the level currently beaten.
return level_currently_beaten
if user_selected_level > max_level:
raise ValueError(f"This challenge was not designed to go beyond {max_level}")
return user_selected_level
def generate_noise(noise_size: int) -> str:
random.seed(42)
return "".join(
random.choices(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",

View File

@ -0,0 +1,47 @@
import glob
import json
import os
from typing import Any, Dict
def deep_merge(source: Dict[Any, Any], dest: Dict[Any, Any]) -> Dict[Any, Any]:
for key, value in source.items():
if isinstance(value, Dict):
dest[key] = deep_merge(value, dest.get(key, {}))
else:
dest[key] = value
return dest
import collections
def recursive_sort_dict(data: dict) -> dict:
for key, value in data.items():
if isinstance(value, dict):
data[key] = recursive_sort_dict(value)
return collections.OrderedDict(sorted(data.items()))
# setup
cwd = os.getcwd() # get current working directory
new_score_filename_pattern = os.path.join(
cwd, "tests/integration/challenges/new_score_*.json"
)
current_score_filename = os.path.join(
cwd, "tests/integration/challenges/current_score.json"
)
merged_data: Dict[str, Any] = {}
for filename in glob.glob(new_score_filename_pattern):
with open(filename, "r") as f_new:
data = json.load(f_new)
merged_data = deep_merge(
data, merged_data
) # deep merge the new data with the merged data
os.remove(filename) # remove the individual file
sorted_data = recursive_sort_dict(merged_data)
with open(current_score_filename, "w") as f_current:
json.dump(sorted_data, f_current, indent=4)