fix(agent/text_processing): Fix `extract_information` LLM response parsing

OpenAI's newest models return JSON with markdown fences around it, breaking the `json.loads` parser.

This commit adds an `extract_list_from_response` function to json_utils/utilities.py and uses this function to replace `json.loads` in `_process_text`.
pull/6855/head
Reinier van der Leer 2024-02-13 18:28:17 +01:00
parent 393d6b97e6
commit bb7f5abc6c
No known key found for this signature in database
GPG Key ID: CDC1180FDAE06193
2 changed files with 28 additions and 2 deletions

View File

@ -29,3 +29,27 @@ def extract_dict_from_response(response_content: str) -> dict[str, Any]:
f"non-dict value {repr(result)}"
)
return result
def extract_list_from_response(response_content: str) -> list[Any]:
# Sometimes the response includes the JSON in a code block with ```
pattern = r"```(?:json|JSON)*([\s\S]*?)```"
match = re.search(pattern, response_content)
if match:
response_content = match.group(1).strip()
else:
# The string may contain JSON.
json_pattern = r"\[[\s\S]*\]"
match = re.search(json_pattern, response_content)
if match:
response_content = match.group()
result = json.loads(response_content)
if not isinstance(result, list):
raise ValueError(
f"Response '''{response_content}''' evaluated to "
f"non-list value {repr(result)}"
)
return result

View File

@ -1,5 +1,4 @@
"""Text processing functions"""
import json
import logging
import math
from typing import Iterator, Optional, TypeVar
@ -13,6 +12,7 @@ from autogpt.core.resource.model_providers import (
ChatModelProvider,
ModelTokenizer,
)
from autogpt.json_utils.utilities import extract_list_from_response
logger = logging.getLogger(__name__)
@ -161,7 +161,9 @@ async def _process_text(
temperature=0.5,
max_tokens=max_result_tokens,
completion_parser=lambda s: (
json.loads(s.content) if output_type is not str else None
extract_list_from_response(s.content)
if output_type is not str
else None
),
)