pgadmin4/web/pgadmin/llm/reports/generator.py

##########################################################################
#
# pgAdmin 4 - PostgreSQL Tools
#
# Copyright (C) 2013 - 2026, The pgAdmin Development Team
# This software is released under the PostgreSQL Licence
#
##########################################################################

"""High-level report generation functions using the pipeline."""

import json
from typing import Generator, Optional, Any

from flask import Response, stream_with_context
from flask_babel import gettext

from pgadmin.llm.client import get_llm_client, LLMClient
from pgadmin.llm.reports.pipeline import ReportPipeline
from pgadmin.llm.reports.sections import get_sections_for_scope
from pgadmin.llm.reports.queries import QUERIES


def create_query_executor(conn) -> callable:
    """Create a query executor function for the pipeline.

    Args:
        conn: Database connection object.

    Returns:
        A callable that executes queries by ID.
    """
    def executor(query_id: str, context: dict) -> dict[str, Any]:
        """Execute a query by ID.

        Args:
            query_id: The query identifier from QUERIES registry.
            context: Execution context (may contain schema_id for filtering).

        Returns:
            Dictionary with query results.
        """
        query_def = QUERIES.get(query_id)
        if not query_def:
            return {'error': f'Unknown query: {query_id}', 'rows': []}

        sql = query_def['sql']

        # Check if query requires an extension
        required_ext = query_def.get('requires_extension')
        if required_ext:
            check_sql = """
                SELECT EXISTS (
                    SELECT 1 FROM pg_extension WHERE extname = %s
                ) as available
            """
            status, result = conn.execute_dict(check_sql, [required_ext])
            if not (status and result and
                    result.get('rows', [{}])[0].get('available', False)):
                return {
                    'note': f"Extension '{required_ext}' not installed",
                    'rows': []
                }

        # Handle schema-scoped queries
        schema_id = context.get('schema_id')
        if schema_id and '%s' in sql:
            status, result = conn.execute_dict(sql, [schema_id])
        else:
            status, result = conn.execute_dict(sql)

        if status and result:
            return {'rows': result.get('rows', [])}
        else:
            return {'error': 'Query failed', 'rows': []}

    return executor


def generate_report_streaming(
    report_type: str,
    scope: str,
    conn,
    manager,
    context: dict,
    client: Optional[LLMClient] = None
) -> Generator[str, None, None]:
    """Generate a report with streaming progress updates.

    Yields Server-Sent Events (SSE) formatted strings.

    Args:
        report_type: One of 'security', 'performance', 'design'.
        scope: One of 'server', 'database', 'schema'.
        conn: Database connection.
        manager: Connection manager.
        context: Report context dict with keys like:
            - server_version
            - database_name
            - schema_name
            - schema_id (for schema-scoped reports)
        client: Optional LLM client (will create one if not provided).

    Yields:
        SSE-formatted event strings.
    """
    # Get or create LLM client
    if client is None:
        client = get_llm_client()
        if not client:
            yield _sse_event({
                'type': 'error',
                'message': gettext('Failed to initialize LLM client.')
            })
            return

    # Get sections for this report type and scope
    sections = get_sections_for_scope(report_type, scope)
    if not sections:
        yield _sse_event({
            'type': 'error',
            'message': gettext('No sections available for this report type.')
        })
        return

    # Add server version to context
    context['server_version'] = manager.ver

    # Create the pipeline
    query_executor = create_query_executor(conn)
    pipeline = ReportPipeline(
        report_type=report_type,
        sections=sections,
        client=client,
        query_executor=query_executor
    )

    # Execute pipeline and stream events
    try:
        for event in pipeline.execute_with_progress(context):
            if event.get('type') == 'complete':
                # Add disclaimer to final report
                report = event.get('report', '')
                disclaimer = gettext(
                    '> **Note:** This report was generated by '
                    '%(provider)s / %(model)s. '
                    'AI systems can make mistakes. Please verify all findings '
                    'and recommendations before taking action.\n\n'
                ) % {
                    'provider': client.provider_name,
                    'model': client.model_name
                }
                event['report'] = disclaimer + report

            yield _sse_event(event)

    except Exception as e:
        yield _sse_event({
            'type': 'error',
            'message': gettext('Failed to generate report: ') + str(e)
        })


def generate_report_sync(
    report_type: str,
    scope: str,
    conn,
    manager,
    context: dict,
    client: Optional[LLMClient] = None
) -> tuple[bool, str]:
    """Generate a report synchronously (non-streaming).

    Args:
        report_type: One of 'security', 'performance', 'design'.
        scope: One of 'server', 'database', 'schema'.
        conn: Database connection.
        manager: Connection manager.
        context: Report context dict.
        client: Optional LLM client.

    Returns:
        Tuple of (success, report_or_error_message).
    """
    # Get or create LLM client
    if client is None:
        client = get_llm_client()
        if not client:
            return False, gettext('Failed to initialize LLM client.')

    # Get sections for this report type and scope
    sections = get_sections_for_scope(report_type, scope)
    if not sections:
        return False, gettext('No sections available for this report type.')

    # Add server version to context
    context['server_version'] = manager.ver

    # Create and execute the pipeline
    query_executor = create_query_executor(conn)
    pipeline = ReportPipeline(
        report_type=report_type,
        sections=sections,
        client=client,
        query_executor=query_executor
    )

    try:
        report = pipeline.execute(context)

        # Add disclaimer
        disclaimer = gettext(
            '> **Note:** This report was generated by '
            '%(provider)s / %(model)s. '
            'AI systems can make mistakes. Please verify all findings '
            'and recommendations before taking action.\n\n'
        ) % {
            'provider': client.provider_name,
            'model': client.model_name
        }

        return True, disclaimer + report

    except Exception as e:
        return False, gettext('Failed to generate report: ') + str(e)


def _sse_event(data: dict) -> bytes:
    """Format data as an SSE event.

    Args:
        data: Event data dictionary.

    Returns:
        SSE-formatted bytes with padding to help flush buffers.
    """
    # Add padding comment to help flush buffers in some WSGI servers
    # Some servers buffer until a certain amount of data is received
    json_data = json.dumps(data)
    # Minimum 2KB total to help flush various buffer sizes
    padding_needed = max(0, 2048 - len(json_data) - 20)
    padding = f": {'.' * padding_needed}\n" if padding_needed > 0 else ""
    return f"{padding}data: {json_data}\n\n".encode('utf-8')


def _wrap_generator_with_keepalive(generator: Generator) -> Generator:
    """Wrap a generator to add SSE keepalive and initial flush.

    Args:
        generator: Original event generator.

    Yields:
        SSE events (as bytes) with initial connection event.
    """
    # Send initial comment to establish connection and flush headers
    # The retry directive tells browser to reconnect after 3s if disconnected
    yield b": SSE stream connected\nretry: 3000\n\n"

    # Yield all events from the original generator
    for event in generator:
        yield event


def create_sse_response(generator: Generator) -> Response:
    """Create a Flask Response for SSE streaming.

    Args:
        generator: Generator that yields SSE event strings.

    Returns:
        Flask Response configured for SSE.
    """
    # Wrap generator with keepalive/flush helper
    wrapped = _wrap_generator_with_keepalive(generator)

    # stream_with_context maintains Flask's request context throughout
    # the generator's lifecycle, which is required for streaming responses
    response = Response(
        stream_with_context(wrapped),
        mimetype='text/event-stream',
        headers={
            'Cache-Control': 'no-cache, no-store, must-revalidate',
            'Pragma': 'no-cache',
            'Expires': '0',
            'Connection': 'keep-alive',
            'X-Accel-Buffering': 'no',  # Disable nginx buffering
        }
    )
    # Disable Werkzeug's response buffering - critical for SSE to work
    response.direct_passthrough = True
    return response