fix(searchtheweb): Fix the Jina Search Block (#8583)

* update jina search web block

* update to false
contributor/master
Aarushi 2024-11-07 17:26:17 -06:00 committed by GitHub
parent 91edf08540
commit c25d03e945
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 90 additions and 47 deletions

View File

@ -0,0 +1,15 @@
from typing import Any, Optional
import requests
class GetRequest:
@classmethod
def get_request(
cls, url: str, headers: Optional[dict] = None, json: bool = False
) -> Any:
if headers is None:
headers = {}
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json() if json else response.text

View File

@ -11,6 +11,20 @@ JinaCredentialsInput = CredentialsMetaInput[
Literal["api_key"],
]
TEST_CREDENTIALS = APIKeyCredentials(
id="01234567-89ab-cdef-0123-456789abcdef",
provider="jina",
api_key=SecretStr("mock-jina-api-key"),
title="Mock Jina API key",
expires_at=None,
)
TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.type,
}
def JinaCredentialsField() -> JinaCredentialsInput:
"""

View File

@ -0,0 +1,57 @@
from groq._utils._utils import quote
from backend.blocks.jina._auth import (
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
JinaCredentials,
JinaCredentialsField,
JinaCredentialsInput,
)
from backend.blocks.search import GetRequest
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import SchemaField
class SearchTheWebBlock(Block, GetRequest):
class Input(BlockSchema):
credentials: JinaCredentialsInput = JinaCredentialsField()
query: str = SchemaField(description="The search query to search the web for")
class Output(BlockSchema):
results: str = SchemaField(
description="The search results including content from top 5 URLs"
)
error: str = SchemaField(description="Error message if the search fails")
def __init__(self):
super().__init__(
id="87840993-2053-44b7-8da4-187ad4ee518c",
description="This block searches the internet for the given search query.",
categories={BlockCategory.SEARCH},
input_schema=SearchTheWebBlock.Input,
output_schema=SearchTheWebBlock.Output,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
"query": "Artificial Intelligence",
},
test_credentials=TEST_CREDENTIALS,
test_output=("results", "search content"),
test_mock={"get_request": lambda *args, **kwargs: "search content"},
)
def run(
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
) -> BlockOutput:
# Encode the search query
encoded_query = quote(input_data.query)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
}
# Prepend the Jina Search URL to the encoded query
jina_search_url = f"https://s.jina.ai/{encoded_query}"
results = self.get_request(jina_search_url, headers=headers, json=False)
# Output the search results
yield "results", results

View File

@ -1,22 +1,14 @@
from typing import Any, Literal
from typing import Literal
from urllib.parse import quote
import requests
from autogpt_libs.supabase_integration_credentials_store.types import APIKeyCredentials
from pydantic import SecretStr
from backend.blocks.helpers.http import GetRequest
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import CredentialsField, CredentialsMetaInput, SchemaField
class GetRequest:
@classmethod
def get_request(cls, url: str, json=False) -> Any:
response = requests.get(url)
response.raise_for_status()
return response.json() if json else response.text
class GetWikipediaSummaryBlock(Block, GetRequest):
class Input(BlockSchema):
topic: str = SchemaField(description="The topic to fetch the summary for")
@ -48,42 +40,6 @@ class GetWikipediaSummaryBlock(Block, GetRequest):
yield "summary", response["extract"]
class SearchTheWebBlock(Block, GetRequest):
class Input(BlockSchema):
query: str = SchemaField(description="The search query to search the web for")
class Output(BlockSchema):
results: str = SchemaField(
description="The search results including content from top 5 URLs"
)
error: str = SchemaField(description="Error message if the search fails")
def __init__(self):
super().__init__(
id="87840993-2053-44b7-8da4-187ad4ee518c",
description="This block searches the internet for the given search query.",
categories={BlockCategory.SEARCH},
input_schema=SearchTheWebBlock.Input,
output_schema=SearchTheWebBlock.Output,
test_input={"query": "Artificial Intelligence"},
test_output=("results", "search content"),
test_mock={"get_request": lambda url, json: "search content"},
)
def run(self, input_data: Input, **kwargs) -> BlockOutput:
# Encode the search query
encoded_query = quote(input_data.query)
# Prepend the Jina Search URL to the encoded query
jina_search_url = f"https://s.jina.ai/{encoded_query}"
# Make the request to Jina Search
response = self.get_request(jina_search_url, json=False)
# Output the search results
yield "results", response
class ExtractWebsiteContentBlock(Block, GetRequest):
class Input(BlockSchema):
url: str = SchemaField(description="The URL to scrape the content from")

View File

@ -20,6 +20,7 @@ from pydantic import BaseModel
from backend.blocks.ai_shortform_video_block import AIShortformVideoCreatorBlock
from backend.blocks.ideogram import IdeogramModelBlock
from backend.blocks.jina.search import SearchTheWebBlock
from backend.blocks.llm import (
MODEL_METADATA,
AIConversationBlock,
@ -29,7 +30,7 @@ from backend.blocks.llm import (
LlmModel,
)
from backend.blocks.replicate_flux_advanced import ReplicateFluxAdvancedModelBlock
from backend.blocks.search import ExtractWebsiteContentBlock, SearchTheWebBlock
from backend.blocks.search import ExtractWebsiteContentBlock
from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
from backend.data.block import Block, BlockInput, get_block
from backend.util.settings import Config