"Beat Challenges" Mode (#4447)

Co-authored-by: Richard Beales <rich@richbeales.net>
2023-05-29 00:47:06 -07:00 · 2023-05-29 00:47:06 -07:00 · 31cd836530
parent daafda320b
commit 31cd836530
17 changed files with 400 additions and 82 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,4 +1,4 @@
 [submodule "tests/Auto-GPT-test-cassettes"]
    path = tests/Auto-GPT-test-cassettes
    url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
-    branch = master
+    branch = master
--- a/tests/integration/challenges/basic_abilities/test_browse_website.py
+++ b/tests/integration/challenges/basic_abilities/test_browse_website.py
@ -1,6 +1,9 @@
 import pytest

 from autogpt.agent import Agent
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
 from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key

@ -9,11 +12,12 @@ CYCLE_COUNT = 2

@requires_api_key("OPENAI_API_KEY")
@pytest.mark.vcr
+@challenge
 def test_browse_website(
    browser_agent: Agent,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
-    # config: Config,
+    level_to_run: int,
 ) -> None:
    file_path = browser_agent.workspace.get_path("browse_website.txt")
    run_interaction_loop(monkeypatch, browser_agent, CYCLE_COUNT)
--- a/tests/integration/challenges/basic_abilities/test_write_file.py
+++ b/tests/integration/challenges/basic_abilities/test_write_file.py
@ -3,6 +3,9 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
 from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key

@ -11,11 +14,13 @@ CYCLE_COUNT = 3

@requires_api_key("OPENAI_API_KEY")
@pytest.mark.vcr
+@challenge
 def test_write_file(
    writer_agent: Agent,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
    config: Config,
+    level_to_run: int,
 ) -> None:
    file_path = str(writer_agent.workspace.get_path("hello_world.txt"))
    run_interaction_loop(monkeypatch, writer_agent, CYCLE_COUNT)
--- a/tests/integration/challenges/challenge_decorator/init.py
+++ b/tests/integration/challenges/challenge_decorator/init.py
--- a/tests/integration/challenges/challenge_decorator/challenge.py
+++ b/tests/integration/challenges/challenge_decorator/challenge.py
@ -0,0 +1,21 @@
+from typing import Optional
+
+
+class Challenge:
+    BEAT_CHALLENGES = False
+
+    def __init__(
+        self,
+        name: str,
+        category: str,
+        max_level: int,
+        max_level_beaten: Optional[int],
+        level_to_run: Optional[int] = None,
+    ) -> None:
+        self.name = name
+        self.category = category
+        self.max_level_beaten = max_level_beaten
+        self.max_level = max_level
+        self.succeeded = False
+        self.skipped = False
+        self.level_to_run = level_to_run
--- a/tests/integration/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/integration/challenges/challenge_decorator/challenge_decorator.py
@ -0,0 +1,68 @@
+import contextlib
+import os
+from functools import wraps
+from typing import Any, Callable, Optional
+
+import pytest
+
+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.integration.challenges.challenge_decorator.challenge_utils import (
+    create_challenge,
+)
+from tests.integration.challenges.challenge_decorator.score_utils import (
+    get_scores,
+    update_new_score,
+)
+
+MAX_LEVEL_TO_IMPROVE_ON = (
+    1  # we will attempt to beat 1 level above the current level for now.
+)
+
+
+def challenge(func: Callable[..., Any]) -> Callable[..., None]:
+    @wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> None:
+        run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
+
+        while run_remaining > 0:
+            current_score, new_score, new_score_location = get_scores()
+            level_to_run = kwargs["level_to_run"] if "level_to_run" in kwargs else None
+            challenge = create_challenge(
+                func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
+            )
+            if challenge.level_to_run is not None:
+                kwargs["level_to_run"] = challenge.level_to_run
+                with contextlib.suppress(AssertionError):
+                    func(*args, **kwargs)
+                    challenge.succeeded = True
+            else:
+                challenge.skipped = True
+            if os.environ.get("CI") == "true":
+                new_max_level_beaten = get_new_max_level_beaten(
+                    challenge, Challenge.BEAT_CHALLENGES
+                )
+                update_new_score(
+                    new_score_location, new_score, challenge, new_max_level_beaten
+                )
+            if challenge.level_to_run is None:
+                pytest.skip("This test has not been unlocked yet.")
+
+            if not challenge.succeeded:
+                if Challenge.BEAT_CHALLENGES:
+                    # xfail
+                    pytest.xfail("Challenge failed")
+                raise AssertionError("Challenge failed")
+            run_remaining -= 1
+
+    return wrapper
+
+
+def get_new_max_level_beaten(
+    challenge: Challenge, beat_challenges: bool
+) -> Optional[int]:
+    if challenge.succeeded:
+        return challenge.level_to_run
+    if challenge.skipped:
+        return challenge.max_level_beaten
+    # Challenge failed
+    return challenge.max_level_beaten if beat_challenges else None
--- a/tests/integration/challenges/challenge_decorator/challenge_utils.py
+++ b/tests/integration/challenges/challenge_decorator/challenge_utils.py
@ -0,0 +1,81 @@
+import os
+from typing import Any, Callable, Dict, Optional, Tuple
+
+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+
+CHALLENGE_PREFIX = "test_"
+
+
+def create_challenge(
+    func: Callable[..., Any],
+    current_score: Dict[str, Any],
+    is_beat_challenges: bool,
+    level_to_run: Optional[int] = None,
+) -> Challenge:
+    challenge_category, challenge_name = get_challenge_identifiers(func)
+
+    max_level = get_max_level(current_score, challenge_category, challenge_name)
+    max_level_beaten = get_max_level_beaten(
+        current_score, challenge_category, challenge_name
+    )
+    level_to_run = get_level_to_run(
+        is_beat_challenges, level_to_run, max_level, max_level_beaten
+    )
+
+    return Challenge(
+        name=challenge_name,
+        category=challenge_category,
+        max_level=max_level,
+        max_level_beaten=max_level_beaten,
+        level_to_run=level_to_run,
+    )
+
+
+def get_level_to_run(
+    is_beat_challenges: bool,
+    level_to_run: Optional[int],
+    max_level: int,
+    max_level_beaten: Optional[int],
+) -> Optional[int]:
+    if level_to_run is not None:
+        if level_to_run > max_level:
+            raise ValueError(
+                f"Level to run ({level_to_run}) is greater than max level ({max_level})"
+            )
+        return level_to_run
+    if is_beat_challenges:
+        if max_level_beaten == max_level:
+            return None
+        return 1 if max_level_beaten is None else max_level_beaten + 1
+    return max_level_beaten
+
+
+def get_challenge_identifiers(func: Callable[..., Any]) -> Tuple[str, str]:
+    full_path = os.path.dirname(os.path.abspath(func.__code__.co_filename))
+    challenge_category = os.path.basename(full_path)
+    challenge_name = func.__name__.replace(CHALLENGE_PREFIX, "")
+    return challenge_category, challenge_name
+
+
+def get_max_level(
+    current_score: Dict[str, Any],
+    challenge_category: str,
+    challenge_name: str,
+) -> int:
+    return (
+        current_score.get(challenge_category, {})
+        .get(challenge_name, {})
+        .get("max_level", 1)
+    )
+
+
+def get_max_level_beaten(
+    current_score: Dict[str, Any],
+    challenge_category: str,
+    challenge_name: str,
+) -> Optional[int]:
+    return (
+        current_score.get(challenge_category, {})
+        .get(challenge_name, {})
+        .get("max_level_beaten", None)
+    )
--- a/tests/integration/challenges/challenge_decorator/score_utils.py
+++ b/tests/integration/challenges/challenge_decorator/score_utils.py
@ -0,0 +1,59 @@
+import json
+import os
+from typing import Any, Dict, Optional, Tuple
+
+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+
+CURRENT_SCORE_LOCATION = "../current_score"
+NEW_SCORE_LOCATION = "../new_score"
+
+
+def update_new_score(
+    filename_new_score: str,
+    new_score: Dict[str, Any],
+    challenge: Challenge,
+    new_max_level_beaten: Optional[int],
+) -> None:
+    write_new_score(new_score, challenge, new_max_level_beaten)
+    write_new_score_to_file(new_score, filename_new_score)
+
+
+def write_new_score(
+    new_score: Dict[str, Any], challenge: Challenge, new_max_level_beaten: Optional[int]
+) -> Dict[str, Any]:
+    new_score.setdefault(challenge.category, {})
+    new_score[challenge.category][challenge.name] = {
+        "max_level_beaten": new_max_level_beaten,
+        "max_level": challenge.max_level,
+    }
+    return new_score
+
+
+def write_new_score_to_file(new_score: Dict[str, Any], filename: str) -> None:
+    with open(filename, "w") as file:
+        json.dump(new_score, file, indent=4)
+
+
+def get_scores() -> Tuple[Dict[str, Any], Dict[str, Any], str]:
+    filename_current_score, filename_new_score = get_score_locations()
+    current_score = load_json(filename_current_score)
+    new_score = load_json(filename_new_score)
+    return current_score, new_score, filename_new_score
+
+
+def load_json(filename: str) -> Dict[str, Any]:
+    if os.path.isfile(filename):
+        with open(filename, "r") as file:
+            return json.load(file)
+    else:
+        return {}
+
+
+def get_score_locations() -> Tuple[str, str]:
+    pid = os.getpid()
+    project_root = os.path.dirname(os.path.abspath(__file__))
+    filename_current_score = os.path.join(
+        project_root, f"{CURRENT_SCORE_LOCATION}.json"
+    )
+    filename_new_score = os.path.join(project_root, f"{NEW_SCORE_LOCATION}_{pid}.json")
+    return filename_current_score, filename_new_score
--- a/tests/integration/challenges/conftest.py
+++ b/tests/integration/challenges/conftest.py
@ -3,18 +3,33 @@ from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest

+from tests.integration.challenges.challenge_decorator.challenge import Challenge
+

 def pytest_addoption(parser: Parser) -> None:
    parser.addoption(
        "--level", action="store", default=None, type=int, help="Specify test level"
    )
+    parser.addoption(
+        "--beat-challenges",
+        action="store_true",
+        help="Spepcifies whether the test suite should attempt to beat challenges",
+    )


 def pytest_configure(config: Config) -> None:
-    config.option.level = config.getoption("--level")
+    level = config.getoption("--level", default=None)
+    config.option.level = level
+    beat_challenges = config.getoption("--beat-challenges", default=False)
+    config.option.beat_challenges = beat_challenges


@pytest.fixture
-def user_selected_level(request: FixtureRequest) -> int:
+def level_to_run(request: FixtureRequest) -> int:
    ## used for challenges in the goal oriented tests
    return request.config.option.level
+
+
+@pytest.fixture(autouse=True)
+def check_beat_challenges(request: FixtureRequest) -> None:
+    Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")
--- a/tests/integration/challenges/current_score.json
+++ b/tests/integration/challenges/current_score.json
@ -0,0 +1,38 @@
+{
+    "basic_abilities": {
+        "browse_website": {
+            "max_level": 1,
+            "max_level_beaten": 1
+        },
+        "write_file": {
+            "max_level": 1,
+            "max_level_beaten": 1
+        }
+    },
+    "information_retrieval": {
+        "information_retrieval_challenge_a": {
+            "max_level": 1,
+            "max_level_beaten": 1
+        }
+    },
+    "kubernetes": {
+        "kubernetes_template_challenge_a": {
+            "max_level": 1,
+            "max_level_beaten": null
+        }
+    },
+    "memory": {
+        "memory_challenge_a": {
+            "max_level": 3,
+            "max_level_beaten": 3
+        },
+        "memory_challenge_b": {
+            "max_level": 5,
+            "max_level_beaten": 1
+        },
+        "memory_challenge_c": {
+            "max_level": 5,
+            "max_level_beaten": 1
+        }
+    }
+}
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@ -2,6 +2,9 @@ import pytest

 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
 from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key

@ -11,11 +14,13 @@ from autogpt.agent import Agent

@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
+@challenge
 def test_information_retrieval_challenge_a(
    get_company_revenue_agent: Agent,
    monkeypatch: pytest.MonkeyPatch,
    patched_api_requestor: None,
    config: Config,
+    level_to_run: int,
 ) -> None:
    """
    Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
--- a/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@ -4,24 +4,33 @@ import yaml
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
 from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key

-CYCLE_COUNT = 6
+CYCLE_COUNT = 3


-@pytest.mark.skip("This challenge hasn't been beaten yet.")
@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
+@challenge
 def test_kubernetes_template_challenge_a(
-    kubernetes_agent: Agent, monkeypatch: pytest.MonkeyPatch, config: Config
+    kubernetes_agent: Agent,
+    monkeypatch: pytest.MonkeyPatch,
+    config: Config,
+    level_to_run: int,
 ) -> None:
    """
    Test the challenge_a function in a given agent by mocking user inputs
    and checking the output file content.

-    :param get_company_revenue_agent: The agent to test.
-    :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
+    Args:
+        kubernetes_agent (Agent)
+        monkeypatch (pytest.MonkeyPatch)
+        config (Config)
+        level_to_run (int)
    """
    run_interaction_loop(monkeypatch, kubernetes_agent, CYCLE_COUNT)

--- a/tests/integration/challenges/memory/test_memory_challenge_a.py
+++ b/tests/integration/challenges/memory/test_memory_challenge_a.py
@ -3,37 +3,38 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.utils import get_level_to_run, run_interaction_loop
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
+from tests.integration.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key

-LEVEL_CURRENTLY_BEATEN = 3  # real level beaten 30 and maybe more, but we can't record it, the cassette is too big
-MAX_LEVEL = 3
-

@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
+@challenge
 def test_memory_challenge_a(
    memory_management_agent: Agent,
-    user_selected_level: int,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
    config: Config,
+    level_to_run: int,
 ) -> None:
    """
    The agent reads a file containing a task_id. Then, it reads a series of other files.
    After reading 'n' files, the agent must write the task_id into a new file.
-
    Args:
        memory_management_agent (Agent)
-        user_selected_level (int)
+        patched_api_requestor (MockerFixture)
+        monkeypatch (pytest.MonkeyPatch)
+        config (Config)
+        level_to_run (int)
    """

-    num_files = get_level_to_run(user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL)
-
    task_id = "2314"
-    create_instructions_files(memory_management_agent, num_files, task_id, config)
+    create_instructions_files(memory_management_agent, level_to_run, task_id, config)

-    run_interaction_loop(monkeypatch, memory_management_agent, num_files + 2)
+    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)

    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
    content = read_file(file_path, config)
--- a/tests/integration/challenges/memory/test_memory_challenge_b.py
+++ b/tests/integration/challenges/memory/test_memory_challenge_b.py
@ -3,26 +3,24 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.utils import (
-    generate_noise,
-    get_level_to_run,
-    run_interaction_loop,
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
 )
+from tests.integration.challenges.utils import generate_noise, run_interaction_loop
 from tests.utils import requires_api_key

-LEVEL_CURRENTLY_BEATEN = -1
-MAX_LEVEL = 5
 NOISE = 1000


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
+@challenge
 def test_memory_challenge_b(
    memory_management_agent: Agent,
-    user_selected_level: int,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
    config: Config,
+    level_to_run: int,
 ) -> None:
    """
    The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
@ -30,15 +28,14 @@ def test_memory_challenge_b(

    Args:
        memory_management_agent (Agent)
-        user_selected_level (int)
+        patched_api_requestor (MockerFixture)
+        monkeypatch (pytest.MonkeyPatch)
+        level_to_run (int)
    """
-    current_level = get_level_to_run(
-        user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL
-    )
-    task_ids = [str(i * 1111) for i in range(1, current_level + 1)]
-    create_instructions_files(memory_management_agent, current_level, task_ids, config)
+    task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
+    create_instructions_files(memory_management_agent, level_to_run, task_ids, config)

-    run_interaction_loop(monkeypatch, memory_management_agent, current_level + 2)
+    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)

    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
    content = read_file(file_path, config)
--- a/tests/integration/challenges/memory/test_memory_challenge_c.py
+++ b/tests/integration/challenges/memory/test_memory_challenge_c.py
@ -3,26 +3,25 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.utils import (
-    generate_noise,
-    get_level_to_run,
-    run_interaction_loop,
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
 )
+from tests.integration.challenges.utils import generate_noise, run_interaction_loop
 from tests.utils import requires_api_key

-LEVEL_CURRENTLY_BEATEN = -1
-MAX_LEVEL = 5
 NOISE = 1000


 # @pytest.mark.vcr
+@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
+@challenge
 def test_memory_challenge_c(
    memory_management_agent: Agent,
-    user_selected_level: int,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
    config: Config,
+    level_to_run: int,
 ) -> None:
    """
    Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@ -31,11 +30,11 @@ def test_memory_challenge_c(

    Args:
        memory_management_agent (Agent)
-        user_selected_level (int)
+        patched_api_requestor (MockerFixture)
+        monkeypatch (pytest.MonkeyPatch)
+        config (Config)
+        level_to_run (int)
    """
-    current_level = get_level_to_run(
-        user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL
-    )
    silly_phrases = [
        "The purple elephant danced on a rainbow while eating a taco.",
        "The sneaky toaster stole my socks and ran away to Hawaii.",
@ -49,15 +48,15 @@ def test_memory_challenge_c(
        "The ninja unicorn disguised itself as a potted plant and infiltrated the office.",
    ]

-    level_silly_phrases = silly_phrases[:current_level]
+    level_silly_phrases = silly_phrases[:level_to_run]
    create_instructions_files(
-        memory_management_agent, current_level, level_silly_phrases, config=config
+        memory_management_agent, level_to_run, level_silly_phrases, config=config
    )

-    run_interaction_loop(monkeypatch, memory_management_agent, current_level + 2)
+    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)

    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
-    content = read_file(file_path)
+    content = read_file(file_path, config)
    for phrase in level_silly_phrases:
        assert phrase in content, f"Expected the file to contain {phrase}"

--- a/tests/integration/challenges/utils.py
+++ b/tests/integration/challenges/utils.py
@ -1,45 +1,14 @@
 import contextlib
 import random
-from functools import wraps
-from typing import Any, Callable, Dict, Generator, Tuple
+from typing import Generator

 import pytest

 from autogpt.agent import Agent


-def get_level_to_run(
-    user_selected_level: int,
-    level_currently_beaten: int,
-    max_level: int,
-) -> int:
-    """
-    Determines the appropriate level to run for a challenge, based on user-selected level, level currently beaten, and maximum level.
-
-    Args:
-        user_selected_level (int | None): The level selected by the user. If not provided, the level currently beaten is used.
-        level_currently_beaten (int | None): The highest level beaten so far. If not provided, the test will be skipped.
-        max_level (int): The maximum level allowed for the challenge.
-
-    Returns:
-        int: The level to run for the challenge.
-
-    Raises:
-        ValueError: If the user-selected level is greater than the maximum level allowed.
-    """
-    if user_selected_level is None:
-        if level_currently_beaten == -1:
-            pytest.skip(
-                "No one has beaten any levels so we cannot run the test in our pipeline"
-            )
-        # by default we run the level currently beaten.
-        return level_currently_beaten
-    if user_selected_level > max_level:
-        raise ValueError(f"This challenge was not designed to go beyond {max_level}")
-    return user_selected_level
-
-
 def generate_noise(noise_size: int) -> str:
+    random.seed(42)
    return "".join(
        random.choices(
            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
--- a/tests/integration/challenges/utils/build_current_score.py
+++ b/tests/integration/challenges/utils/build_current_score.py
@ -0,0 +1,47 @@
+import glob
+import json
+import os
+from typing import Any, Dict
+
+
+def deep_merge(source: Dict[Any, Any], dest: Dict[Any, Any]) -> Dict[Any, Any]:
+    for key, value in source.items():
+        if isinstance(value, Dict):
+            dest[key] = deep_merge(value, dest.get(key, {}))
+        else:
+            dest[key] = value
+    return dest
+
+
+import collections
+
+
+def recursive_sort_dict(data: dict) -> dict:
+    for key, value in data.items():
+        if isinstance(value, dict):
+            data[key] = recursive_sort_dict(value)
+    return collections.OrderedDict(sorted(data.items()))
+
+    # setup
+
+
+cwd = os.getcwd()  # get current working directory
+new_score_filename_pattern = os.path.join(
+    cwd, "tests/integration/challenges/new_score_*.json"
+)
+current_score_filename = os.path.join(
+    cwd, "tests/integration/challenges/current_score.json"
+)
+
+merged_data: Dict[str, Any] = {}
+for filename in glob.glob(new_score_filename_pattern):
+    with open(filename, "r") as f_new:
+        data = json.load(f_new)
+    merged_data = deep_merge(
+        data, merged_data
+    )  # deep merge the new data with the merged data
+    os.remove(filename)  # remove the individual file
+sorted_data = recursive_sort_dict(merged_data)
+
+with open(current_score_filename, "w") as f_current:
+    json.dump(sorted_data, f_current, indent=4)