From 6fb9b6d03b268e799672f9b889d5c61efefcb29e Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Sun, 11 Jun 2023 07:21:26 -0700 Subject: [PATCH] Retry regression tests (#4648) --- requirements.txt | 1 + .../basic_abilities/test_browse_website.py | 5 +- .../basic_abilities/test_write_file.py | 5 +- .../challenge_decorator.py | 95 +++++++++++-------- .../debug_code/test_debug_code_challenge_a.py | 5 +- .../test_information_retrieval_challenge_a.py | 5 +- .../test_information_retrieval_challenge_b.py | 5 +- .../test_kubernetes_template_challenge_a.py | 5 +- .../memory/test_memory_challenge_a.py | 6 +- .../memory/test_memory_challenge_b.py | 5 +- .../memory/test_memory_challenge_c.py | 6 +- .../memory/test_memory_challenge_d.py | 5 +- 12 files changed, 64 insertions(+), 84 deletions(-) diff --git a/requirements.txt b/requirements.txt index 31f7706a3..16ed85589 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,3 +61,4 @@ pytest-mock vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master pytest-recording pytest-xdist +flaky diff --git a/tests/challenges/basic_abilities/test_browse_website.py b/tests/challenges/basic_abilities/test_browse_website.py index b918434c0..6ee80d33a 100644 --- a/tests/challenges/basic_abilities/test_browse_website.py +++ b/tests/challenges/basic_abilities/test_browse_website.py @@ -3,14 +3,11 @@ import pytest from autogpt.agent import Agent from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.utils import run_interaction_loop -from tests.utils import requires_api_key CYCLE_COUNT = 2 -@requires_api_key("OPENAI_API_KEY") -@pytest.mark.vcr -@challenge +@challenge() def test_browse_website( browser_agent: Agent, patched_api_requestor: None, diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py index e84529bc0..9aa837e9a 100644 --- a/tests/challenges/basic_abilities/test_write_file.py +++ b/tests/challenges/basic_abilities/test_write_file.py @@ -6,7 +6,6 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.utils import get_workspace_path, run_interaction_loop -from tests.utils import requires_api_key CYCLE_COUNT_PER_LEVEL = [1, 1] EXPECTED_OUTPUTS_PER_LEVEL = [ @@ -15,9 +14,7 @@ EXPECTED_OUTPUTS_PER_LEVEL = [ ] -@requires_api_key("OPENAI_API_KEY") -@pytest.mark.vcr -@challenge +@challenge() def test_write_file( file_system_agents: List[Agent], patched_api_requestor: None, diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py index d887e21dd..7f721de1f 100644 --- a/tests/challenges/challenge_decorator/challenge_decorator.py +++ b/tests/challenges/challenge_decorator/challenge_decorator.py @@ -3,6 +3,7 @@ from functools import wraps from typing import Any, Callable, Optional import pytest +from flaky import flaky # type: ignore from tests.challenges.challenge_decorator.challenge import Challenge from tests.challenges.challenge_decorator.challenge_utils import create_challenge @@ -10,6 +11,7 @@ from tests.challenges.challenge_decorator.score_utils import ( get_scores, update_new_score, ) +from tests.utils import requires_api_key MAX_LEVEL_TO_IMPROVE_ON = ( 1 # we will attempt to beat 1 level above the current level for now. @@ -18,52 +20,61 @@ MAX_LEVEL_TO_IMPROVE_ON = ( CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt and reach out to us on the 'challenges' channel to let us know the challenge you're struggling with." -def challenge(func: Callable[..., Any]) -> Callable[..., None]: - @wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> None: - run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1 - original_error: Optional[Exception] = None +def challenge( + max_runs: int = 2, min_passes: int = 1, api_key: str = "OPENAI_API_KEY" +) -> Callable[[Callable[..., Any]], Callable[..., None]]: + def decorator(func: Callable[..., Any]) -> Callable[..., None]: + @requires_api_key(api_key) + @pytest.mark.vcr + @flaky(max_runs=max_runs, min_passes=min_passes) + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> None: + run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1 + original_error: Optional[Exception] = None - while run_remaining > 0: - current_score, new_score, new_score_location = get_scores() - level_to_run = kwargs["level_to_run"] if "level_to_run" in kwargs else None - challenge = create_challenge( - func, current_score, Challenge.BEAT_CHALLENGES, level_to_run - ) - if challenge.level_to_run is not None: - kwargs["level_to_run"] = challenge.level_to_run - try: - func(*args, **kwargs) - challenge.succeeded = True - except AssertionError as err: - original_error = AssertionError( - f"{CHALLENGE_FAILED_MESSAGE}\n{err}" + while run_remaining > 0: + current_score, new_score, new_score_location = get_scores() + level_to_run = ( + kwargs["level_to_run"] if "level_to_run" in kwargs else None + ) + challenge = create_challenge( + func, current_score, Challenge.BEAT_CHALLENGES, level_to_run + ) + if challenge.level_to_run is not None: + kwargs["level_to_run"] = challenge.level_to_run + try: + func(*args, **kwargs) + challenge.succeeded = True + except AssertionError as err: + original_error = AssertionError( + f"{CHALLENGE_FAILED_MESSAGE}\n{err}" + ) + challenge.succeeded = False + except Exception as err: + original_error = err + challenge.succeeded = False + else: + challenge.skipped = True + if os.environ.get("CI") == "true": + new_max_level_beaten = get_new_max_level_beaten( + challenge, Challenge.BEAT_CHALLENGES ) - challenge.succeeded = False - except Exception as err: - original_error = err - challenge.succeeded = False - else: - challenge.skipped = True - if os.environ.get("CI") == "true": - new_max_level_beaten = get_new_max_level_beaten( - challenge, Challenge.BEAT_CHALLENGES - ) - update_new_score( - new_score_location, new_score, challenge, new_max_level_beaten - ) - if challenge.level_to_run is None: - pytest.skip("This test has not been unlocked yet.") + update_new_score( + new_score_location, new_score, challenge, new_max_level_beaten + ) + if challenge.level_to_run is None: + pytest.skip("This test has not been unlocked yet.") - if not challenge.succeeded: - if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge: - # xfail - pytest.xfail(str(original_error)) - if original_error: - raise original_error - run_remaining -= 1 + if not challenge.succeeded: + if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge: + pytest.xfail(str(original_error)) + if original_error: + raise original_error + run_remaining -= 1 - return wrapper + return wrapper + + return decorator def get_new_max_level_beaten( diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py index ca85675cd..43f524c71 100644 --- a/tests/challenges/debug_code/test_debug_code_challenge_a.py +++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py @@ -11,7 +11,6 @@ from tests.challenges.utils import ( get_workspace_path, run_interaction_loop, ) -from tests.utils import requires_api_key CYCLE_COUNT = 5 EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"] @@ -20,9 +19,7 @@ CODE_FILE_PATH = "code.py" TEST_FILE_PATH = "test.py" -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_debug_code_challenge_a( debug_code_agents: Agent, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py index ee2c17c97..8e0fdb63c 100644 --- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py +++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py @@ -4,7 +4,6 @@ from pytest_mock import MockerFixture from autogpt.commands.file_operations import read_file from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.utils import get_workspace_path, run_interaction_loop -from tests.utils import requires_api_key CYCLE_COUNT = 3 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]] @@ -13,9 +12,7 @@ from autogpt.agent import Agent OUTPUT_LOCATION = "output.txt" -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_information_retrieval_challenge_a( information_retrieval_agents: Agent, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py index 94ca4e44a..0672b879c 100644 --- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py +++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py @@ -7,15 +7,12 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.utils import get_workspace_path, run_interaction_loop -from tests.utils import requires_api_key CYCLE_COUNT = 3 OUTPUT_LOCATION = "2010_nobel_prize_winners.txt" -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_information_retrieval_challenge_b( get_nobel_prize_agent: Agent, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py index 93a2695f0..c2eaab753 100644 --- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py +++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py @@ -6,15 +6,12 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.utils import get_workspace_path, run_interaction_loop -from tests.utils import requires_api_key CYCLE_COUNT = 3 OUTPUT_LOCATION = "kube.yaml" -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_kubernetes_template_challenge_a( kubernetes_agent: Agent, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py index b67af6f7b..7050696aa 100644 --- a/tests/challenges/memory/test_memory_challenge_a.py +++ b/tests/challenges/memory/test_memory_challenge_a.py @@ -5,14 +5,11 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file, write_to_file from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.utils import get_workspace_path, run_interaction_loop -from tests.utils import requires_api_key OUTPUT_LOCATION = "output.txt" -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_memory_challenge_a( memory_management_agent: Agent, patched_api_requestor: MockerFixture, @@ -28,7 +25,6 @@ def test_memory_challenge_a( monkeypatch (pytest.MonkeyPatch) level_to_run (int) """ - task_id = "2314" create_instructions_files(memory_management_agent, level_to_run, task_id) diff --git a/tests/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py index 4a4d30e0c..4abc41888 100644 --- a/tests/challenges/memory/test_memory_challenge_b.py +++ b/tests/challenges/memory/test_memory_challenge_b.py @@ -9,15 +9,12 @@ from tests.challenges.utils import ( get_workspace_path, run_interaction_loop, ) -from tests.utils import requires_api_key NOISE = 1000 OUTPUT_LOCATION = "output.txt" -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_memory_challenge_b( memory_management_agent: Agent, patched_api_requestor: MockerFixture, diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py index 2479da25f..f8f4ad988 100644 --- a/tests/challenges/memory/test_memory_challenge_c.py +++ b/tests/challenges/memory/test_memory_challenge_c.py @@ -9,16 +9,12 @@ from tests.challenges.utils import ( get_workspace_path, run_interaction_loop, ) -from tests.utils import requires_api_key NOISE = 1000 OUTPUT_LOCATION = "output.txt" -# @pytest.mark.vcr -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_memory_challenge_c( memory_management_agent: Agent, patched_api_requestor: MockerFixture, diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index fc7b5a330..ff35e1837 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -8,16 +8,13 @@ from autogpt.agent import Agent from autogpt.commands.file_operations import read_file, write_to_file from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.utils import get_workspace_path, run_interaction_loop -from tests.utils import requires_api_key LEVEL_CURRENTLY_BEATEN = 1 MAX_LEVEL = 5 OUTPUT_LOCATION = "output.txt" -@pytest.mark.vcr -@requires_api_key("OPENAI_API_KEY") -@challenge +@challenge() def test_memory_challenge_d( memory_management_agent: Agent, patched_api_requestor: MockerFixture,