108 lines
2.8 KiB
Python
108 lines
2.8 KiB
Python
import functools
|
|
import re
|
|
from typing import Any, Callable
|
|
from urllib.parse import urljoin, urlparse
|
|
|
|
from requests.compat import urljoin
|
|
|
|
|
|
def validate_url(func: Callable[..., Any]) -> Any:
|
|
"""The method decorator validate_url is used to validate urls for any command that requires
|
|
a url as an argument"""
|
|
|
|
@functools.wraps(func)
|
|
def wrapper(url: str, *args, **kwargs) -> Any:
|
|
"""Check if the URL is valid using a basic check, urllib check, and local file check
|
|
|
|
Args:
|
|
url (str): The URL to check
|
|
|
|
Returns:
|
|
the result of the wrapped function
|
|
|
|
Raises:
|
|
ValueError if the url fails any of the validation tests
|
|
"""
|
|
# Most basic check if the URL is valid:
|
|
if not re.match(r"^https?://", url):
|
|
raise ValueError("Invalid URL format")
|
|
if not is_valid_url(url):
|
|
raise ValueError("Missing Scheme or Network location")
|
|
# Restrict access to local files
|
|
if check_local_file_access(url):
|
|
raise ValueError("Access to local files is restricted")
|
|
# Check URL length
|
|
if len(url) > 2000:
|
|
raise ValueError("URL is too long")
|
|
|
|
return func(sanitize_url(url), *args, **kwargs)
|
|
|
|
return wrapper
|
|
|
|
|
|
def is_valid_url(url: str) -> bool:
|
|
"""Check if the URL is valid
|
|
|
|
Args:
|
|
url (str): The URL to check
|
|
|
|
Returns:
|
|
bool: True if the URL is valid, False otherwise
|
|
"""
|
|
try:
|
|
result = urlparse(url)
|
|
return all([result.scheme, result.netloc])
|
|
except ValueError:
|
|
return False
|
|
|
|
|
|
def sanitize_url(url: str) -> str:
|
|
"""Sanitize the URL
|
|
|
|
Args:
|
|
url (str): The URL to sanitize
|
|
|
|
Returns:
|
|
str: The sanitized URL
|
|
"""
|
|
parsed_url = urlparse(url)
|
|
reconstructed_url = f"{parsed_url.path}{parsed_url.params}?{parsed_url.query}"
|
|
return urljoin(url, reconstructed_url)
|
|
|
|
|
|
def check_local_file_access(url: str) -> bool:
|
|
"""Check if the URL is a local file
|
|
|
|
Args:
|
|
url (str): The URL to check
|
|
|
|
Returns:
|
|
bool: True if the URL is a local file, False otherwise
|
|
"""
|
|
local_prefixes = [
|
|
"file:///",
|
|
"file://localhost/",
|
|
"file://localhost",
|
|
"http://localhost",
|
|
"http://localhost/",
|
|
"https://localhost",
|
|
"https://localhost/",
|
|
"http://2130706433",
|
|
"http://2130706433/",
|
|
"https://2130706433",
|
|
"https://2130706433/",
|
|
"http://127.0.0.1/",
|
|
"http://127.0.0.1",
|
|
"https://127.0.0.1/",
|
|
"https://127.0.0.1",
|
|
"https://0.0.0.0/",
|
|
"https://0.0.0.0",
|
|
"http://0.0.0.0/",
|
|
"http://0.0.0.0",
|
|
"http://0000",
|
|
"http://0000/",
|
|
"https://0000",
|
|
"https://0000/",
|
|
]
|
|
return any(url.startswith(prefix) for prefix in local_prefixes)
|