pgadmin4/web/pgadmin/llm/providers/docker.py

360 lines
12 KiB
Python

##########################################################################
#
# pgAdmin 4 - PostgreSQL Tools
#
# Copyright (C) 2013 - 2026, The pgAdmin Development Team
# This software is released under the PostgreSQL Licence
#
##########################################################################
"""Docker Model Runner LLM client implementation.
Docker Desktop 4.40+ includes a built-in model runner that provides an
OpenAI-compatible API at http://localhost:12434. No API key is required.
"""
import json
import socket
import ssl
import urllib.request
import urllib.error
from typing import Optional
import uuid
# Try to use certifi for proper SSL certificate handling
try:
import certifi
SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
except ImportError:
SSL_CONTEXT = ssl.create_default_context()
# Enforce minimum TLS 1.2 to satisfy security requirements
SSL_CONTEXT.minimum_version = ssl.TLSVersion.TLSv1_2
from pgadmin.llm.client import LLMClient, LLMClientError
from pgadmin.llm.models import (
Message, Tool, ToolCall, LLMResponse, LLMError,
Role, StopReason, Usage
)
# Default configuration
DEFAULT_API_URL = 'http://localhost:12434'
DEFAULT_MODEL = 'ai/qwen3-coder'
class DockerClient(LLMClient):
"""
Docker Model Runner API client.
Implements the LLMClient interface for Docker's built-in model runner,
which provides an OpenAI-compatible API.
"""
def __init__(
self, api_url: Optional[str] = None, model: Optional[str] = None
):
"""
Initialize the Docker Model Runner client.
Args:
api_url: The Docker Model Runner API URL
(default: http://localhost:12434).
model: Optional model name. Defaults to ai/qwen3-coder.
"""
self._api_url = (api_url or DEFAULT_API_URL).rstrip('/')
self._model = model or DEFAULT_MODEL
@property
def provider_name(self) -> str:
return 'docker'
@property
def model_name(self) -> str:
return self._model
def is_available(self) -> bool:
"""Check if the client is properly configured."""
return bool(self._api_url)
def chat(
self,
messages: list[Message],
tools: Optional[list[Tool]] = None,
system_prompt: Optional[str] = None,
max_tokens: int = 4096,
temperature: float = 0.0,
**kwargs
) -> LLMResponse:
"""
Send a chat request to Docker Model Runner.
Args:
messages: List of conversation messages.
tools: Optional list of tools the model can use.
system_prompt: Optional system prompt.
max_tokens: Maximum tokens in response.
temperature: Sampling temperature.
**kwargs: Additional parameters.
Returns:
LLMResponse containing the model's response.
Raises:
LLMClientError: If the request fails.
"""
# Build the request payload
converted_messages = self._convert_messages(messages)
# Add system prompt at the beginning if provided
if system_prompt:
converted_messages.insert(0, {
'role': 'system',
'content': system_prompt
})
payload = {
'model': self._model,
'messages': converted_messages,
'max_completion_tokens': max_tokens,
'temperature': temperature
}
if tools:
payload['tools'] = self._convert_tools(tools)
payload['tool_choice'] = 'auto'
# Make the API request
try:
response_data = self._make_request(payload)
return self._parse_response(response_data)
except LLMClientError:
raise
except Exception as e:
raise LLMClientError(LLMError(
message=f"Request failed: {str(e)}",
provider=self.provider_name
))
def _convert_messages(self, messages: list[Message]) -> list[dict]:
"""Convert Message objects to OpenAI API format."""
result = []
for msg in messages:
if msg.role == Role.SYSTEM:
result.append({
'role': 'system',
'content': msg.content
})
elif msg.role == Role.USER:
result.append({
'role': 'user',
'content': msg.content
})
elif msg.role == Role.ASSISTANT:
message = {
'role': 'assistant',
'content': msg.content or None
}
# Add tool calls if present
if msg.tool_calls:
message['tool_calls'] = [
{
'id': tc.id,
'type': 'function',
'function': {
'name': tc.name,
'arguments': json.dumps(tc.arguments)
}
}
for tc in msg.tool_calls
]
result.append(message)
elif msg.role == Role.TOOL:
# Each tool result is a separate message in OpenAI format
for tr in msg.tool_results:
result.append({
'role': 'tool',
'tool_call_id': tr.tool_call_id,
'content': tr.content
})
return result
def _convert_tools(self, tools: list[Tool]) -> list[dict]:
"""Convert Tool objects to OpenAI API format."""
return [
{
'type': 'function',
'function': {
'name': tool.name,
'description': tool.description,
'parameters': tool.parameters
}
}
for tool in tools
]
def _make_request(self, payload: dict) -> dict:
"""Make an HTTP request to the Docker Model Runner API."""
headers = {
'Content-Type': 'application/json'
}
# Docker Model Runner uses /engines/v1 path for OpenAI-compatible API
url = f'{self._api_url}/engines/v1/chat/completions'
request = urllib.request.Request(
url,
data=json.dumps(payload).encode('utf-8'),
headers=headers,
method='POST'
)
try:
# Use longer timeout for local models which can be slower
with urllib.request.urlopen(
request, timeout=300, context=SSL_CONTEXT
) as response:
return json.loads(response.read().decode('utf-8'))
except urllib.error.HTTPError as e:
error_body = e.read().decode('utf-8')
try:
error_data = json.loads(error_body)
error_msg = error_data.get('error', {}).get('message', str(e))
except json.JSONDecodeError:
error_msg = error_body or str(e)
raise LLMClientError(LLMError(
message=error_msg,
code=str(e.code),
provider=self.provider_name,
retryable=e.code in (429, 500, 502, 503, 504)
))
except urllib.error.URLError as e:
raise LLMClientError(LLMError(
message=f"Connection error: {e.reason}. "
f"Is Docker Model Runner running at {self._api_url}?",
provider=self.provider_name,
retryable=True
))
except socket.timeout:
raise LLMClientError(LLMError(
message="Request timed out. Local models can be slow - "
"try a smaller model or wait for the response.",
code='timeout',
provider=self.provider_name,
retryable=True
))
def _parse_response(self, data: dict) -> LLMResponse:
"""Parse the API response into an LLMResponse."""
# Check for API-level errors in the response
if 'error' in data:
error_info = data['error']
raise LLMClientError(LLMError(
message=error_info.get('message', 'Unknown API error'),
code=error_info.get('code', 'unknown'),
provider=self.provider_name,
retryable=False
))
choices = data.get('choices', [])
if not choices:
raise LLMClientError(LLMError(
message='No response choices returned from API',
provider=self.provider_name,
retryable=False
))
choice = choices[0]
message = choice.get('message', {})
# Check for refusal (content moderation)
if message.get('refusal'):
raise LLMClientError(LLMError(
message=f"Request refused: {message.get('refusal')}",
provider=self.provider_name,
retryable=False
))
content = message.get('content', '') or ''
tool_calls = []
# Parse tool calls if present
for tc in message.get('tool_calls', []):
if tc.get('type') == 'function':
func = tc.get('function', {})
try:
arguments = json.loads(func.get('arguments', '{}'))
except json.JSONDecodeError:
arguments = {}
tool_calls.append(ToolCall(
id=tc.get('id', str(uuid.uuid4())),
name=func.get('name', ''),
arguments=arguments
))
# Map finish reasons to our enum
finish_reason = choice.get('finish_reason', '')
stop_reason_map = {
'stop': StopReason.END_TURN,
'tool_calls': StopReason.TOOL_USE,
'length': StopReason.MAX_TOKENS,
'content_filter': StopReason.STOP_SEQUENCE
}
stop_reason = stop_reason_map.get(finish_reason, StopReason.UNKNOWN)
# Parse usage information
usage_data = data.get('usage', {})
usage = Usage(
input_tokens=usage_data.get('prompt_tokens', 0),
output_tokens=usage_data.get('completion_tokens', 0),
total_tokens=usage_data.get('total_tokens', 0)
)
# Check for problematic responses
if not content and not tool_calls:
if stop_reason == StopReason.MAX_TOKENS:
input_tokens = usage.input_tokens
raise LLMClientError(LLMError(
message=(
f'Response truncated due to token limit '
f'(input: {input_tokens} tokens). '
f'The request is too large for model '
f'{self._model}. '
f'Try using a model with a larger context '
f'window, or analyze a smaller scope.'
),
code='max_tokens',
provider=self.provider_name,
retryable=False
))
elif finish_reason and finish_reason not in (
'stop', 'tool_calls'
):
raise LLMClientError(LLMError(
message=(
f'Empty response with finish reason: '
f'{finish_reason}'
),
code=finish_reason,
provider=self.provider_name,
retryable=False
))
return LLMResponse(
content=content,
tool_calls=tool_calls,
stop_reason=stop_reason,
model=data.get('model', self._model),
usage=usage,
raw_response=data
)