Merge pull request #3066 from collijk/bugfix/make-local-memory-json-when-it-doesnt-exist

Bugfix/make local memory json when it doesnt exist
2023-04-23 17:49:36 -07:00 · 2023-04-23 17:49:36 -07:00 · e8473d4920
parent aedd288dbe 91aa40e0df
commit e8473d4920
6 changed files with 134 additions and 98 deletions
--- a/autogpt/commands/web_requests.py
+++ b/autogpt/commands/web_requests.py
@ -9,11 +9,9 @@ from requests import Response
 from requests.compat import urljoin

 from autogpt.config import Config
-from autogpt.memory import get_memory
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks

 CFG = Config()
-memory = get_memory(CFG)

 session = requests.Session()
 session.headers.update({"User-Agent": CFG.user_agent})
--- a/autogpt/memory/local.py
+++ b/autogpt/memory/local.py
@ -1,7 +1,7 @@
 from __future__ import annotations

 import dataclasses
-import os
+from pathlib import Path
 from typing import Any, List

 import numpy as np
@ -38,26 +38,16 @@ class LocalCache(MemoryProviderSingleton):
        Returns:
            None
        """
-        self.filename = f"{cfg.memory_index}.json"
-        if os.path.exists(self.filename):
-            try:
-                with open(self.filename, "w+b") as f:
-                    file_content = f.read()
-                    if not file_content.strip():
-                        file_content = b"{}"
-                        f.write(file_content)
+        workspace_path = Path(cfg.workspace_path)
+        self.filename = workspace_path / f"{cfg.memory_index}.json"

-                    loaded = orjson.loads(file_content)
-                    self.data = CacheContent(**loaded)
-            except orjson.JSONDecodeError:
-                print(f"Error: The file '{self.filename}' is not in JSON format.")
-                self.data = CacheContent()
-        else:
-            print(
-                f"Warning: The file '{self.filename}' does not exist. "
-                "Local memory would not be saved to a file."
-            )
-            self.data = CacheContent()
+        self.filename.touch(exist_ok=True)
+
+        file_content = b"{}"
+        with self.filename.open("w+b") as f:
+            f.write(file_content)
+
+        self.data = CacheContent()

    def add(self, text: str):
        """
--- a/autogpt/processing/text.py
+++ b/autogpt/processing/text.py
@ -10,7 +10,6 @@ from autogpt.llm_utils import create_chat_completion
 from autogpt.memory import get_memory

 CFG = Config()
-MEMORY = get_memory(CFG)


 def split_text(
@ -109,7 +108,8 @@ def summarize_text(

        memory_to_add = f"Source: {url}\n" f"Raw content part#{i + 1}: {chunk}"

-        MEMORY.add(memory_to_add)
+        memory = get_memory(CFG)
+        memory.add(memory_to_add)

        messages = [create_message(chunk, question)]
        tokens_for_chunk = token_counter.count_message_tokens(messages, model)
@ -128,7 +128,7 @@ def summarize_text(

        memory_to_add = f"Source: {url}\n" f"Content summary part#{i + 1}: {summary}"

-        MEMORY.add(memory_to_add)
+        memory.add(memory_to_add)

    print(f"Summarized {len(chunks)} chunks.")

--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -3,6 +3,7 @@ from pathlib import Path
 import pytest
 from dotenv import load_dotenv

+from autogpt.config import Config
 from autogpt.workspace import Workspace

 load_dotenv()
@ -17,3 +18,14 @@ def workspace_root(tmp_path) -> Path:
 def workspace(workspace_root: Path) -> Workspace:
    workspace_root = Workspace.make_workspace(workspace_root)
    return Workspace(workspace_root, restrict_to_workspace=True)
+
+
+@pytest.fixture()
+def config(workspace: Workspace) -> Config:
+    config = Config()
+
+    # Do a little setup and teardown since the config object is a singleton
+    old_ws_path = config.workspace_path
+    config.workspace_path = workspace.root
+    yield config
+    config.workspace_path = old_ws_path
--- a/tests/local_cache_test.py
+++ b/tests/local_cache_test.py
@ -1,73 +0,0 @@
-# sourcery skip: snake-case-functions
-"""Tests for LocalCache class"""
-import os
-import sys
-import unittest
-
-import pytest
-
-from autogpt.memory.local import LocalCache
-from tests.utils import requires_api_key
-
-
-def mock_config() -> dict:
-    """Mock the Config class"""
-    return type(
-        "MockConfig",
-        (object,),
-        {
-            "debug_mode": False,
-            "continuous_mode": False,
-            "speak_mode": False,
-            "memory_index": "auto-gpt",
-        },
-    )
-
-
-@pytest.mark.integration_test
-class TestLocalCache(unittest.TestCase):
-    """Tests for LocalCache class"""
-
-    def setUp(self) -> None:
-        """Set up the test environment"""
-        self.cfg = mock_config()
-        self.cache = LocalCache(self.cfg)
-
-    @requires_api_key("OPENAI_API_KEY")
-    def test_add(self) -> None:
-        """Test adding a text to the cache"""
-        text = "Sample text"
-        self.cache.add(text)
-        self.assertIn(text, self.cache.data.texts)
-
-    @requires_api_key("OPENAI_API_KEY")
-    def test_clear(self) -> None:
-        """Test clearing the cache"""
-        self.cache.clear()
-        self.assertEqual(self.cache.data.texts, [])
-
-    @requires_api_key("OPENAI_API_KEY")
-    def test_get(self) -> None:
-        """Test getting a text from the cache"""
-        text = "Sample text"
-        self.cache.add(text)
-        result = self.cache.get(text)
-        self.assertEqual(result, [text])
-
-    @requires_api_key("OPENAI_API_KEY")
-    def test_get_relevant(self) -> None:
-        """Test getting relevant texts from the cache"""
-        text1 = "Sample text 1"
-        text2 = "Sample text 2"
-        self.cache.add(text1)
-        self.cache.add(text2)
-        result = self.cache.get_relevant(text1, 1)
-        self.assertEqual(result, [text1])
-
-    @requires_api_key("OPENAI_API_KEY")
-    def test_get_stats(self) -> None:
-        """Test getting the cache stats"""
-        text = "Sample text"
-        self.cache.add(text)
-        stats = self.cache.get_stats()
-        self.assertEqual(stats, (4, self.cache.data.embeddings.shape))
--- a/tests/test_local_cache.py
+++ b/tests/test_local_cache.py
@ -0,0 +1,109 @@
+# sourcery skip: snake-case-functions
+"""Tests for LocalCache class"""
+import unittest
+
+import orjson
+import pytest
+
+from autogpt.memory.local import EMBED_DIM, SAVE_OPTIONS
+from autogpt.memory.local import LocalCache as LocalCache_
+from tests.utils import requires_api_key
+
+
+@pytest.fixture
+def LocalCache():
+    # Hack, real gross. Singletons are not good times.
+    if LocalCache_ in LocalCache_._instances:
+        del LocalCache_._instances[LocalCache_]
+    return LocalCache_
+
+
+@pytest.fixture
+def mock_embed_with_ada(mocker):
+    mocker.patch(
+        "autogpt.memory.local.create_embedding_with_ada",
+        return_value=[0.1] * EMBED_DIM,
+    )
+
+
+def test_init_without_backing_file(LocalCache, config, workspace):
+    cache_file = workspace.root / f"{config.memory_index}.json"
+
+    assert not cache_file.exists()
+    LocalCache(config)
+    assert cache_file.exists()
+    assert cache_file.read_text() == "{}"
+
+
+def test_init_with_backing_empty_file(LocalCache, config, workspace):
+    cache_file = workspace.root / f"{config.memory_index}.json"
+    cache_file.touch()
+
+    assert cache_file.exists()
+    LocalCache(config)
+    assert cache_file.exists()
+    assert cache_file.read_text() == "{}"
+
+
+def test_init_with_backing_file(LocalCache, config, workspace):
+    cache_file = workspace.root / f"{config.memory_index}.json"
+    cache_file.touch()
+
+    raw_data = {"texts": ["test"]}
+    data = orjson.dumps(raw_data, option=SAVE_OPTIONS)
+    with cache_file.open("wb") as f:
+        f.write(data)
+
+    assert cache_file.exists()
+    LocalCache(config)
+    assert cache_file.exists()
+    assert cache_file.read_text() == "{}"
+
+
+def test_add(LocalCache, config, mock_embed_with_ada):
+    cache = LocalCache(config)
+    cache.add("test")
+    assert cache.data.texts == ["test"]
+    assert cache.data.embeddings.shape == (1, EMBED_DIM)
+
+
+def test_clear(LocalCache, config, mock_embed_with_ada):
+    cache = LocalCache(config)
+    assert cache.data.texts == []
+    assert cache.data.embeddings.shape == (0, EMBED_DIM)
+
+    cache.add("test")
+    assert cache.data.texts == ["test"]
+    assert cache.data.embeddings.shape == (1, EMBED_DIM)
+
+    cache.clear()
+    assert cache.data.texts == []
+    assert cache.data.embeddings.shape == (0, EMBED_DIM)
+
+
+def test_get(LocalCache, config, mock_embed_with_ada):
+    cache = LocalCache(config)
+    assert cache.get("test") == []
+
+    cache.add("test")
+    assert cache.get("test") == ["test"]
+
+
+@requires_api_key("OPENAI_API_KEY")
+def test_get_relevant(LocalCache, config) -> None:
+    cache = LocalCache(config)
+    text1 = "Sample text 1"
+    text2 = "Sample text 2"
+    cache.add(text1)
+    cache.add(text2)
+
+    result = cache.get_relevant(text1, 1)
+    assert result == [text1]
+
+
+def test_get_stats(LocalCache, config, mock_embed_with_ada) -> None:
+    cache = LocalCache(config)
+    text = "Sample text"
+    cache.add(text)
+    stats = cache.get_stats()
+    assert stats == (1, cache.data.embeddings.shape)