fix(agent/text_processing): Fix `extract_information` LLM response parsing

OpenAI's newest models return JSON with markdown fences around it, breaking the `json.loads` parser. This commit adds an `extract_list_from_response` function to json_utils/utilities.py and uses this function to replace `json.loads` in `_process_text`.
2024-02-13 18:28:17 +01:00 · 2024-02-13 18:28:17 +01:00 · bb7f5abc6c
parent 393d6b97e6
commit bb7f5abc6c
2 changed files with 28 additions and 2 deletions
--- a/autogpts/autogpt/autogpt/json_utils/utilities.py
+++ b/autogpts/autogpt/autogpt/json_utils/utilities.py
@ -29,3 +29,27 @@ def extract_dict_from_response(response_content: str) -> dict[str, Any]:
            f"non-dict value {repr(result)}"
        )
    return result
+
+
+def extract_list_from_response(response_content: str) -> list[Any]:
+    # Sometimes the response includes the JSON in a code block with ```
+    pattern = r"```(?:json|JSON)*([\s\S]*?)```"
+    match = re.search(pattern, response_content)
+
+    if match:
+        response_content = match.group(1).strip()
+    else:
+        # The string may contain JSON.
+        json_pattern = r"\[[\s\S]*\]"
+        match = re.search(json_pattern, response_content)
+
+        if match:
+            response_content = match.group()
+
+    result = json.loads(response_content)
+    if not isinstance(result, list):
+        raise ValueError(
+            f"Response '''{response_content}''' evaluated to "
+            f"non-list value {repr(result)}"
+        )
+    return result
--- a/autogpts/autogpt/autogpt/processing/text.py
+++ b/autogpts/autogpt/autogpt/processing/text.py
@ -1,5 +1,4 @@
 """Text processing functions"""
-import json
 import logging
 import math
 from typing import Iterator, Optional, TypeVar
@ -13,6 +12,7 @@ from autogpt.core.resource.model_providers import (
    ChatModelProvider,
    ModelTokenizer,
 )
+from autogpt.json_utils.utilities import extract_list_from_response

 logger = logging.getLogger(__name__)

@ -161,7 +161,9 @@ async def _process_text(
            temperature=0.5,
            max_tokens=max_result_tokens,
            completion_parser=lambda s: (
-                json.loads(s.content) if output_type is not str else None
+                extract_list_from_response(s.content)
+                if output_type is not str
+                else None
            ),
        )