Support /v1/responses for OpenAI models. #9795

* Support /v1/responses for OpenAI models. #9795 * Address CodeRabbit review feedback on OpenAI provider. - Preserve exception chains with 'raise ... from e' in all exception handlers for better debugging tracebacks. - Use f-string !s conversion instead of str() calls. - Extract duplicated max_tokens error handling into a shared _raise_max_tokens_error() helper method. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Validate api_url and use incomplete_details from Responses API. - Strip known endpoint suffixes (/chat/completions, /responses) from api_url in __init__ to prevent doubled paths if a user provides a full endpoint URL instead of a base URL. - Use incomplete_details.reason from the Responses API to properly distinguish between max_output_tokens and content_filter when the response status is 'incomplete', in both the non-streaming and streaming parsers. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 09:46:22 +01:00 · 2026-03-30 09:46:22 +01:00 · 9bb96360dd
parent 2228575564
commit 9bb96360dd
1 changed files with 435 additions and 58 deletions
--- a/web/pgadmin/llm/providers/openai.py
+++ b/web/pgadmin/llm/providers/openai.py
@ -47,7 +47,9 @@ class OpenAIClient(LLMClient):
    OpenAI GPT API client.

    Implements the LLMClient interface for OpenAI's GPT models
-    and any OpenAI-compatible API endpoint.
+    and any OpenAI-compatible API endpoint. Supports both the
+    Chat Completions API (/v1/chat/completions) and the Responses
+    API (/v1/responses) for newer models that require it.
    """

    def __init__(self, api_key: Optional[str] = None,
@ -67,7 +69,20 @@ class OpenAIClient(LLMClient):
        self._api_key = api_key or ''
        self._model = model or DEFAULT_MODEL
        base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/')
-        self._api_url = f'{base_url}/chat/completions'
+        # Strip known endpoint suffixes in case the user provided a full URL
+        for suffix in ('/chat/completions', '/responses'):
+            if base_url.endswith(suffix):
+                base_url = base_url[:-len(suffix)].rstrip('/')
+                break
+        self._base_url = base_url
+        self._use_responses_api = False
+
+    @property
+    def _api_url(self) -> str:
+        """Return the appropriate API endpoint URL."""
+        if self._use_responses_api:
+            return f'{self._base_url}/responses'
+        return f'{self._base_url}/chat/completions'

    @property
    def provider_name(self) -> str:
@ -81,7 +96,9 @@ class OpenAIClient(LLMClient):
        """Check if the client is properly configured."""
        # API key is required for the default OpenAI endpoint, but optional
        # for custom endpoints (e.g., local LLM servers).
-        if self._api_url.startswith(DEFAULT_API_BASE_URL):
+        if self._base_url.rstrip('/').startswith(
+            DEFAULT_API_BASE_URL.rstrip('/')
+        ):
            return bool(self._api_key)
        return True

@ -109,10 +126,72 @@ class OpenAIClient(LLMClient):
        Raises:
            LLMClientError: If the request fails.
        """
-        # Build the request payload
+        if self._use_responses_api:
+            return self._chat_responses(
+                messages, tools, system_prompt, max_tokens
+            )
+
+        # Try Chat Completions API first
+        payload = self._build_chat_payload(
+            messages, tools, system_prompt, max_tokens
+        )
+
+        try:
+            response_data = self._make_request(payload)
+            return self._parse_response(response_data)
+        except LLMClientError as e:
+            if self._should_use_responses_api(e):
+                self._use_responses_api = True
+                return self._chat_responses(
+                    messages, tools, system_prompt, max_tokens
+                )
+            raise
+        except Exception as e:
+            raise LLMClientError(LLMError(
+                message=f"Request failed: {e!s}",
+                provider=self.provider_name
+            )) from e
+
+    def _chat_responses(
+        self,
+        messages: list[Message],
+        tools: Optional[list[Tool]] = None,
+        system_prompt: Optional[str] = None,
+        max_tokens: int = 4096,
+    ) -> LLMResponse:
+        """Send a chat request using the Responses API."""
+        payload = self._build_responses_payload(
+            messages, tools, system_prompt, max_tokens
+        )
+
+        try:
+            response_data = self._make_request(payload)
+            return self._parse_responses_response(response_data)
+        except LLMClientError:
+            raise
+        except Exception as e:
+            raise LLMClientError(LLMError(
+                message=f"Request failed: {e!s}",
+                provider=self.provider_name
+            )) from e
+
+    def _should_use_responses_api(self, error: LLMClientError) -> bool:
+        """Check if the error indicates we should use the Responses API."""
+        error_msg = str(error).lower()
+        return ('v1/responses' in error_msg or
+                'not supported in the v1/chat/completions' in error_msg or
+                'not a chat model' in error_msg)
+
+    def _build_chat_payload(
+        self,
+        messages: list[Message],
+        tools: Optional[list[Tool]],
+        system_prompt: Optional[str],
+        max_tokens: int
+    ) -> dict:
+        """Build payload for the Chat Completions API."""
        converted_messages = self._convert_messages(messages)

-        # Add system prompt at the beginning if provided
        if system_prompt:
            converted_messages.insert(0, {
                'role': 'system',
@ -129,20 +208,35 @@ class OpenAIClient(LLMClient):
            payload['tools'] = self._convert_tools(tools)
            payload['tool_choice'] = 'auto'

-        # Make the API request
-        try:
-            response_data = self._make_request(payload)
-            return self._parse_response(response_data)
-        except LLMClientError:
-            raise
-        except Exception as e:
-            raise LLMClientError(LLMError(
-                message=f"Request failed: {str(e)}",
-                provider=self.provider_name
-            ))
+        return payload
+
+    def _build_responses_payload(
+        self,
+        messages: list[Message],
+        tools: Optional[list[Tool]],
+        system_prompt: Optional[str],
+        max_tokens: int
+    ) -> dict:
+        """Build payload for the Responses API."""
+        input_items = self._convert_messages_responses(messages)
+
+        payload = {
+            'model': self._model,
+            'input': input_items,
+            'max_output_tokens': max_tokens,
+        }
+
+        if system_prompt:
+            payload['instructions'] = system_prompt
+
+        if tools:
+            payload['tools'] = self._convert_tools_responses(tools)
+            payload['tool_choice'] = 'auto'
+
+        return payload

    def _convert_messages(self, messages: list[Message]) -> list[dict]:
-        """Convert Message objects to OpenAI API format."""
+        """Convert Message objects to OpenAI Chat Completions API format."""
        result = []

        for msg in messages:
@ -191,8 +285,53 @@ class OpenAIClient(LLMClient):

        return result

+    def _convert_messages_responses(
+        self, messages: list[Message]
+    ) -> list[dict]:
+        """Convert Message objects to OpenAI Responses API format."""
+        result = []
+
+        for msg in messages:
+            if msg.role == Role.SYSTEM:
+                result.append({
+                    'role': 'developer',
+                    'content': msg.content
+                })
+
+            elif msg.role == Role.USER:
+                result.append({
+                    'role': 'user',
+                    'content': msg.content
+                })
+
+            elif msg.role == Role.ASSISTANT:
+                if msg.content:
+                    result.append({
+                        'role': 'assistant',
+                        'content': msg.content
+                    })
+                # Tool calls are separate items in Responses API
+                if msg.tool_calls:
+                    for tc in msg.tool_calls:
+                        result.append({
+                            'type': 'function_call',
+                            'call_id': tc.id,
+                            'name': tc.name,
+                            'arguments': json.dumps(tc.arguments)
+                        })
+
+            elif msg.role == Role.TOOL:
+                for tr in msg.tool_results:
+                    result.append({
+                        'type': 'function_call_output',
+                        'call_id': tr.tool_call_id,
+                        'output': tr.content
+                    })
+
+        return result
+
    def _convert_tools(self, tools: list[Tool]) -> list[dict]:
-        """Convert Tool objects to OpenAI API format."""
+        """Convert Tool objects to Chat Completions API format."""
        return [
            {
                'type': 'function',
@ -205,6 +344,18 @@ class OpenAIClient(LLMClient):
            for tool in tools
        ]

+    def _convert_tools_responses(self, tools: list[Tool]) -> list[dict]:
+        """Convert Tool objects to Responses API format."""
+        return [
+            {
+                'type': 'function',
+                'name': tool.name,
+                'description': tool.description,
+                'parameters': tool.parameters
+            }
+            for tool in tools
+        ]
+
    def _make_request(self, payload: dict) -> dict:
        """Make an HTTP request to the OpenAI API."""
        headers = {
@ -255,8 +406,24 @@ class OpenAIClient(LLMClient):
                retryable=True
            ))

+    def _raise_max_tokens_error(self, input_tokens: int):
+        """Raise an error when a response is truncated due to token limit."""
+        raise LLMClientError(LLMError(
+            message=f'Response truncated due to token limit '
+                    f'(input: {input_tokens} tokens). '
+                    f'The request is too large for model '
+                    f'{self._model}. '
+                    f'Try using a model with a larger context '
+                    f'window, or analyze a smaller scope (e.g., a '
+                    f'specific schema instead of the entire '
+                    f'database).',
+            code='max_tokens',
+            provider=self.provider_name,
+            retryable=False
+        ))
+
    def _parse_response(self, data: dict) -> LLMResponse:
-        """Parse the OpenAI API response into an LLMResponse."""
+        """Parse the Chat Completions API response into an LLMResponse."""
        # Check for API-level errors in the response
        if 'error' in data:
            error_info = data['error']
@ -325,20 +492,7 @@ class OpenAIClient(LLMClient):
        # Check for problematic responses
        if not content and not tool_calls:
            if stop_reason == StopReason.MAX_TOKENS:
-                input_tokens = usage.input_tokens
-                raise LLMClientError(LLMError(
-                    message=f'Response truncated due to token limit '
-                            f'(input: {input_tokens} tokens). '
-                            f'The request is too large for model '
-                            f'{self._model}. '
-                            f'Try using a model with a larger context '
-                            f'window, or analyze a smaller scope (e.g., a '
-                            f'specific schema instead of the entire '
-                            f'database).',
-                    code='max_tokens',
-                    provider=self.provider_name,
-                    retryable=False
-                ))
+                self._raise_max_tokens_error(usage.input_tokens)
            elif finish_reason and finish_reason not in ('stop', 'tool_calls'):
                raise LLMClientError(LLMError(
                    message=(f'Empty response with finish reason: '
@ -357,6 +511,92 @@ class OpenAIClient(LLMClient):
            raw_response=data
        )

+    def _parse_responses_response(self, data: dict) -> LLMResponse:
+        """Parse the Responses API response into an LLMResponse."""
+        # Check for API-level errors
+        if 'error' in data:
+            error_info = data['error']
+            raise LLMClientError(LLMError(
+                message=error_info.get('message', 'Unknown API error'),
+                code=error_info.get('code', 'unknown'),
+                provider=self.provider_name,
+                retryable=False
+            ))
+
+        output = data.get('output', [])
+        content = ''
+        tool_calls = []
+
+        for item in output:
+            item_type = item.get('type', '')
+
+            if item_type == 'message':
+                for part in item.get('content', []):
+                    if part.get('type') == 'output_text':
+                        content += part.get('text', '')
+
+            elif item_type == 'function_call':
+                try:
+                    arguments = json.loads(
+                        item.get('arguments', '{}')
+                    )
+                except json.JSONDecodeError:
+                    arguments = {}
+
+                tool_calls.append(ToolCall(
+                    id=item.get('call_id', str(uuid.uuid4())),
+                    name=item.get('name', ''),
+                    arguments=arguments
+                ))
+
+        # Determine stop reason from status and incomplete_details
+        status = data.get('status', '')
+        if tool_calls:
+            stop_reason = StopReason.TOOL_USE
+        elif status == 'completed':
+            stop_reason = StopReason.END_TURN
+        elif status == 'incomplete':
+            reason = data.get(
+                'incomplete_details', {}
+            ).get('reason', '')
+            if reason == 'content_filter':
+                stop_reason = StopReason.STOP_SEQUENCE
+            elif reason == 'max_output_tokens':
+                stop_reason = StopReason.MAX_TOKENS
+            else:
+                stop_reason = StopReason.MAX_TOKENS
+        else:
+            stop_reason = StopReason.UNKNOWN
+
+        # Parse usage information
+        usage_data = data.get('usage', {})
+        usage = Usage(
+            input_tokens=usage_data.get('input_tokens', 0),
+            output_tokens=usage_data.get('output_tokens', 0),
+            total_tokens=usage_data.get('total_tokens', 0)
+        )
+
+        # Check for problematic responses
+        if not content and not tool_calls:
+            if stop_reason == StopReason.MAX_TOKENS:
+                self._raise_max_tokens_error(usage.input_tokens)
+            elif stop_reason == StopReason.STOP_SEQUENCE:
+                raise LLMClientError(LLMError(
+                    message='Response blocked by content filter.',
+                    code='content_filter',
+                    provider=self.provider_name,
+                    retryable=False
+                ))
+
+        return LLMResponse(
+            content=content,
+            tool_calls=tool_calls,
+            stop_reason=stop_reason,
+            model=data.get('model', self._model),
+            usage=usage,
+            raw_response=data
+        )
+
    def chat_stream(
        self,
        messages: list[Message],
@ -367,35 +607,46 @@ class OpenAIClient(LLMClient):
        **kwargs
    ) -> Generator[Union[str, LLMResponse], None, None]:
        """Stream a chat response from OpenAI."""
-        converted_messages = self._convert_messages(messages)
+        if self._use_responses_api:
+            payload = self._build_responses_payload(
+                messages, tools, system_prompt, max_tokens
+            )
+            payload['stream'] = True
+            try:
+                yield from self._process_stream(payload)
+            except LLMClientError:
+                raise
+            except Exception as e:
+                raise LLMClientError(LLMError(
+                    message=f"Streaming request failed: {e!s}",
+                    provider=self.provider_name
+                )) from e
+            return

-        if system_prompt:
-            converted_messages.insert(0, {
-                'role': 'system',
-                'content': system_prompt
-            })
-
-        payload = {
-            'model': self._model,
-            'messages': converted_messages,
-            'max_completion_tokens': max_tokens,
-            'stream': True,
-            'stream_options': {'include_usage': True}
-        }
-
-        if tools:
-            payload['tools'] = self._convert_tools(tools)
-            payload['tool_choice'] = 'auto'
+        # Try Chat Completions API first
+        payload = self._build_chat_payload(
+            messages, tools, system_prompt, max_tokens
+        )
+        payload['stream'] = True
+        payload['stream_options'] = {'include_usage': True}

        try:
            yield from self._process_stream(payload)
-        except LLMClientError:
-            raise
+        except LLMClientError as e:
+            if self._should_use_responses_api(e):
+                self._use_responses_api = True
+                payload = self._build_responses_payload(
+                    messages, tools, system_prompt, max_tokens
+                )
+                payload['stream'] = True
+                yield from self._process_stream(payload)
+            else:
+                raise
        except Exception as e:
            raise LLMClientError(LLMError(
-                message=f"Streaming request failed: {str(e)}",
+                message=f"Streaming request failed: {e!s}",
                provider=self.provider_name
-            ))
+            )) from e

    def _process_stream(
        self, payload: dict
@ -449,16 +700,19 @@ class OpenAIClient(LLMClient):
            ))

        try:
-            yield from self._read_openai_stream(response)
+            if self._use_responses_api:
+                yield from self._read_responses_stream(response)
+            else:
+                yield from self._read_openai_stream(response)
        finally:
            response.close()

    def _read_openai_stream(
        self, response
    ) -> Generator[Union[str, LLMResponse], None, None]:
-        """Read and parse an OpenAI-format SSE stream.
+        """Read and parse an OpenAI Chat Completions SSE stream.

-        Uses readline() for incremental reading — it returns as soon
+        Uses readline() for incremental reading -- it returns as soon
        as a complete line arrives from the server, unlike read()
        which blocks until a buffer fills up.
        """
@ -574,3 +828,126 @@ class OpenAIClient(LLMClient):
            model=model_name,
            usage=usage
        )
+
+    def _read_responses_stream(
+        self, response
+    ) -> Generator[Union[str, LLMResponse], None, None]:
+        """Read and parse an OpenAI Responses API SSE stream.
+
+        The Responses API uses named events with types like
+        response.output_text.delta for text streaming and
+        response.completed for the final response.
+        """
+        content_parts = []
+        # tool_calls_data: {call_id: {name, arguments}}
+        tool_calls_data = {}
+        model_name = self._model
+        usage = Usage()
+        resp_status = ''
+        resp_incomplete = {}
+
+        while True:
+            line_bytes = response.readline()
+            if not line_bytes:
+                break
+
+            line = line_bytes.decode('utf-8', errors='replace').strip()
+
+            if not line or line.startswith(':'):
+                continue
+
+            # Skip event type lines - we identify events by data type field
+            if line.startswith('event: '):
+                continue
+
+            if not line.startswith('data: '):
+                continue
+
+            try:
+                data = json.loads(line[6:])
+            except json.JSONDecodeError:
+                continue
+
+            event_type = data.get('type', '')
+
+            if event_type == 'response.output_text.delta':
+                delta = data.get('delta', '')
+                if delta:
+                    content_parts.append(delta)
+                    yield delta
+
+            elif event_type == 'response.output_item.added':
+                item = data.get('item', {})
+                if item.get('type') == 'function_call':
+                    call_id = item.get('call_id', '')
+                    tool_calls_data[call_id] = {
+                        'name': item.get('name', ''),
+                        'arguments': ''
+                    }
+
+            elif event_type == 'response.function_call_arguments.delta':
+                call_id = data.get('call_id', '')
+                if call_id not in tool_calls_data:
+                    tool_calls_data[call_id] = {
+                        'name': '', 'arguments': ''
+                    }
+                tool_calls_data[call_id]['arguments'] += data.get(
+                    'delta', ''
+                )
+
+            elif event_type == 'response.completed':
+                resp = data.get('response', {})
+                u = resp.get('usage', {})
+                usage = Usage(
+                    input_tokens=u.get('input_tokens', 0),
+                    output_tokens=u.get('output_tokens', 0),
+                    total_tokens=u.get('total_tokens', 0)
+                )
+                model_name = resp.get('model', model_name)
+                resp_status = resp.get('status', '')
+                resp_incomplete = resp.get('incomplete_details', {})
+
+        # Build final response
+        content = ''.join(content_parts)
+        tool_calls = []
+        for call_id, tc in tool_calls_data.items():
+            try:
+                arguments = json.loads(tc['arguments']) \
+                    if tc['arguments'] else {}
+            except json.JSONDecodeError:
+                arguments = {}
+            tool_calls.append(ToolCall(
+                id=call_id or str(uuid.uuid4()),
+                name=tc['name'],
+                arguments=arguments
+            ))
+
+        # Determine stop reason from final response status
+        if tool_calls:
+            stop_reason = StopReason.TOOL_USE
+        elif resp_status == 'incomplete':
+            reason = resp_incomplete.get('reason', '') \
+                if resp_incomplete else ''
+            if reason == 'content_filter':
+                stop_reason = StopReason.STOP_SEQUENCE
+            else:
+                stop_reason = StopReason.MAX_TOKENS
+        elif content:
+            stop_reason = StopReason.END_TURN
+        else:
+            stop_reason = StopReason.UNKNOWN
+
+        if not content and not tool_calls:
+            raise LLMClientError(LLMError(
+                message='No response content returned from API',
+                provider=self.provider_name,
+                retryable=False
+            ))
+
+        yield LLMResponse(
+            content=content,
+            tool_calls=tool_calls,
+            stop_reason=stop_reason,
+            model=model_name,
+            usage=usage
+        )