fix(backend): Fix validation of hostname-less URLs (#9171)
Previously, `http://` would be converted to `http://http` and pass the no-hostname check that way. It eventually fails validation, but only at hostname lookup which times out -> takes very long. ### Changes 🏗️ - Fix URL canonicalization logic - Merge `_canonicalize_url` into `validate_url` ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: <!-- Put your test plan here: --> - [x] CIpull/9180/head
parent
d7d69f397f
commit
fa98827fd1
|
@ -33,20 +33,6 @@ ALLOWED_SCHEMES = ["http", "https"]
|
|||
HOSTNAME_REGEX = re.compile(r"^[A-Za-z0-9.-]+$") # Basic DNS-safe hostname pattern
|
||||
|
||||
|
||||
def _canonicalize_url(url: str) -> str:
|
||||
"""
|
||||
Normalizes the URL by:
|
||||
1. Stripping whitespace and trailing slashes.
|
||||
2. Ensuring the scheme is http:// or https:// if missing.
|
||||
3. Replacing backslashes with forward slashes.
|
||||
"""
|
||||
url = url.strip().strip("/")
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = "http://" + url
|
||||
url = url.replace("\\", "/")
|
||||
return url
|
||||
|
||||
|
||||
def _is_ip_blocked(ip: str) -> bool:
|
||||
"""
|
||||
Checks if the IP address is in a blocked network.
|
||||
|
@ -61,8 +47,11 @@ def validate_url(url: str, trusted_origins: list[str]) -> str:
|
|||
to a private, link-local, or otherwise blocked IP address — unless
|
||||
the hostname is explicitly trusted.
|
||||
"""
|
||||
# Normalize/canonicalize input
|
||||
url = _canonicalize_url(url)
|
||||
# Canonicalize URL
|
||||
url = url.strip("/ ").replace("\\", "/")
|
||||
parsed = urlparse(url)
|
||||
if not parsed.scheme:
|
||||
url = f"http://{url}"
|
||||
parsed = urlparse(url)
|
||||
|
||||
# Check scheme
|
||||
|
|
Loading…
Reference in New Issue