AutoGPT/autogpt/url_utils/validators.py

108 lines
2.8 KiB
Python

import functools
import re
from typing import Any, Callable
from urllib.parse import urljoin, urlparse
from requests.compat import urljoin
def validate_url(func: Callable[..., Any]) -> Any:
"""The method decorator validate_url is used to validate urls for any command that requires
a url as an argument"""
@functools.wraps(func)
def wrapper(url: str, *args, **kwargs) -> Any:
"""Check if the URL is valid using a basic check, urllib check, and local file check
Args:
url (str): The URL to check
Returns:
the result of the wrapped function
Raises:
ValueError if the url fails any of the validation tests
"""
# Most basic check if the URL is valid:
if not re.match(r"^https?://", url):
raise ValueError("Invalid URL format")
if not is_valid_url(url):
raise ValueError("Missing Scheme or Network location")
# Restrict access to local files
if check_local_file_access(url):
raise ValueError("Access to local files is restricted")
# Check URL length
if len(url) > 2000:
raise ValueError("URL is too long")
return func(sanitize_url(url), *args, **kwargs)
return wrapper
def is_valid_url(url: str) -> bool:
"""Check if the URL is valid
Args:
url (str): The URL to check
Returns:
bool: True if the URL is valid, False otherwise
"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False
def sanitize_url(url: str) -> str:
"""Sanitize the URL
Args:
url (str): The URL to sanitize
Returns:
str: The sanitized URL
"""
parsed_url = urlparse(url)
reconstructed_url = f"{parsed_url.path}{parsed_url.params}?{parsed_url.query}"
return urljoin(url, reconstructed_url)
def check_local_file_access(url: str) -> bool:
"""Check if the URL is a local file
Args:
url (str): The URL to check
Returns:
bool: True if the URL is a local file, False otherwise
"""
local_prefixes = [
"file:///",
"file://localhost/",
"file://localhost",
"http://localhost",
"http://localhost/",
"https://localhost",
"https://localhost/",
"http://2130706433",
"http://2130706433/",
"https://2130706433",
"https://2130706433/",
"http://127.0.0.1/",
"http://127.0.0.1",
"https://127.0.0.1/",
"https://127.0.0.1",
"https://0.0.0.0/",
"https://0.0.0.0",
"http://0.0.0.0/",
"http://0.0.0.0",
"http://0000",
"http://0000/",
"https://0000",
"https://0000/",
]
return any(url.startswith(prefix) for prefix in local_prefixes)