Allow arbitrary Gemini attachments (#138751)

* Gemini: Allow arbitrary attachments

This lets me use Gemini to extract information from PDFs, HTML, or other files.

* Gemini: Only add deprecation warning when deprecated parameter has a value

* Gemini: Use Files.upload() for both images and other files

This simplifies the code.

Within the Google client, this takes a different codepath (it uploads images as a file instead of re-saving them into inline bytes).  I think that's a feature (it's probably more efficient?).

* Gemini: Deduplicate filenames
pull/139157/head
SLaks 2025-02-23 19:11:38 -05:00 committed by GitHub
parent d62c18c225
commit 580c6f2684
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 59 additions and 50 deletions

View File

@ -2,12 +2,10 @@
from __future__ import annotations
import mimetypes
from pathlib import Path
from google import genai # type: ignore[attr-defined]
from google.genai.errors import APIError, ClientError
from PIL import Image
from requests.exceptions import Timeout
import voluptuous as vol
@ -26,6 +24,7 @@ from homeassistant.exceptions import (
HomeAssistantError,
)
from homeassistant.helpers import config_validation as cv
from homeassistant.helpers.issue_registry import IssueSeverity, async_create_issue
from homeassistant.helpers.typing import ConfigType
from .const import (
@ -38,6 +37,7 @@ from .const import (
SERVICE_GENERATE_CONTENT = "generate_content"
CONF_IMAGE_FILENAME = "image_filename"
CONF_FILENAMES = "filenames"
CONFIG_SCHEMA = cv.config_entry_only_config_schema(DOMAIN)
PLATFORMS = (Platform.CONVERSATION,)
@ -50,31 +50,43 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
async def generate_content(call: ServiceCall) -> ServiceResponse:
"""Generate content from text and optionally images."""
prompt_parts = [call.data[CONF_PROMPT]]
def append_images_to_prompt():
image_filenames = call.data[CONF_IMAGE_FILENAME]
for image_filename in image_filenames:
if not hass.config.is_allowed_path(image_filename):
raise HomeAssistantError(
f"Cannot read `{image_filename}`, no access to path; "
"`allowlist_external_dirs` may need to be adjusted in "
"`configuration.yaml`"
if call.data[CONF_IMAGE_FILENAME]:
# Deprecated in 2025.3, to remove in 2025.9
async_create_issue(
hass,
DOMAIN,
"deprecated_image_filename_parameter",
breaks_in_ha_version="2025.9.0",
is_fixable=False,
severity=IssueSeverity.WARNING,
translation_key="deprecated_image_filename_parameter",
)
if not Path(image_filename).exists():
raise HomeAssistantError(f"`{image_filename}` does not exist")
mime_type, _ = mimetypes.guess_type(image_filename)
if mime_type is None or not mime_type.startswith("image"):
raise HomeAssistantError(f"`{image_filename}` is not an image")
prompt_parts.append(Image.open(image_filename))
await hass.async_add_executor_job(append_images_to_prompt)
prompt_parts = [call.data[CONF_PROMPT]]
config_entry: GoogleGenerativeAIConfigEntry = hass.config_entries.async_entries(
DOMAIN
)[0]
client = config_entry.runtime_data
def append_files_to_prompt():
image_filenames = call.data[CONF_IMAGE_FILENAME]
filenames = call.data[CONF_FILENAMES]
for filename in set(image_filenames + filenames):
if not hass.config.is_allowed_path(filename):
raise HomeAssistantError(
f"Cannot read `{filename}`, no access to path; "
"`allowlist_external_dirs` may need to be adjusted in "
"`configuration.yaml`"
)
if not Path(filename).exists():
raise HomeAssistantError(f"`{filename}` does not exist")
prompt_parts.append(client.files.upload(file=filename))
await hass.async_add_executor_job(append_files_to_prompt)
try:
response = await client.aio.models.generate_content(
model=RECOMMENDED_CHAT_MODEL, contents=prompt_parts
@ -105,6 +117,9 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
vol.Optional(CONF_IMAGE_FILENAME, default=[]): vol.All(
cv.ensure_list, [cv.string]
),
vol.Optional(CONF_FILENAMES, default=[]): vol.All(
cv.ensure_list, [cv.string]
),
}
),
supports_response=SupportsResponse.ONLY,

View File

@ -9,3 +9,8 @@ generate_content:
required: false
selector:
object:
filenames:
required: false
selector:
text:
multiple: true

View File

@ -56,10 +56,21 @@
},
"image_filename": {
"name": "Image filename",
"description": "Images",
"description": "Deprecated. Use filenames instead.",
"example": "/config/www/image.jpg"
},
"filenames": {
"name": "Attachment filenames",
"description": "Attachments to add to the prompt (images, PDFs, etc)",
"example": "/config/www/image.jpg"
}
}
}
},
"issues": {
"deprecated_image_filename_parameter": {
"title": "Deprecated 'image_filename' parameter",
"description": "The 'image_filename' parameter in Google Generative AI actions is deprecated. Please edit scripts and automations to use 'filenames' intead."
}
}
}

View File

@ -8,7 +8,8 @@
dict({
'contents': list([
'Describe this image from my doorbell camera',
b'image bytes',
b'some file',
b'some file',
]),
'model': 'models/gemini-2.0-flash',
}),

View File

@ -66,8 +66,8 @@ async def test_generate_content_service_with_image(
),
) as mock_generate,
patch(
"homeassistant.components.google_generative_ai_conversation.Image.open",
return_value=b"image bytes",
"google.genai.files.Files.upload",
return_value=b"some file",
),
patch("pathlib.Path.exists", return_value=True),
patch.object(hass.config, "is_allowed_path", return_value=True),
@ -77,7 +77,7 @@ async def test_generate_content_service_with_image(
"generate_content",
{
"prompt": "Describe this image from my doorbell camera",
"image_filename": "doorbell_snapshot.jpg",
"filenames": ["doorbell_snapshot.jpg", "context.txt", "context.txt"],
},
blocking=True,
return_response=True,
@ -161,7 +161,7 @@ async def test_generate_content_service_with_image_not_allowed_path(
"generate_content",
{
"prompt": "Describe this image from my doorbell camera",
"image_filename": "doorbell_snapshot.jpg",
"filenames": "doorbell_snapshot.jpg",
},
blocking=True,
return_response=True,
@ -186,30 +186,7 @@ async def test_generate_content_service_with_image_not_exists(
"generate_content",
{
"prompt": "Describe this image from my doorbell camera",
"image_filename": "doorbell_snapshot.jpg",
},
blocking=True,
return_response=True,
)
@pytest.mark.usefixtures("mock_init_component")
async def test_generate_content_service_with_non_image(hass: HomeAssistant) -> None:
"""Test generate content service with a non image."""
with (
patch("pathlib.Path.exists", return_value=True),
patch.object(hass.config, "is_allowed_path", return_value=True),
patch("pathlib.Path.exists", return_value=True),
pytest.raises(
HomeAssistantError, match="`doorbell_snapshot.mp4` is not an image"
),
):
await hass.services.async_call(
"google_generative_ai_conversation",
"generate_content",
{
"prompt": "Describe this image from my doorbell camera",
"image_filename": "doorbell_snapshot.mp4",
"filenames": "doorbell_snapshot.jpg",
},
blocking=True,
return_response=True,