fix(backend): Fix validation of hostname-less URLs (#9171)

Previously, `http://` would be converted to `http://http` and pass the
no-hostname check that way. It eventually fails validation, but only at
hostname lookup which times out -> takes very long.

### Changes 🏗️

- Fix URL canonicalization logic
- Merge `_canonicalize_url` into `validate_url`

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
  - [x] CI
pull/9180/head
Reinier van der Leer 2025-01-03 10:48:30 +01:00 committed by GitHub
parent d7d69f397f
commit fa98827fd1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 5 additions and 16 deletions

View File

@ -33,20 +33,6 @@ ALLOWED_SCHEMES = ["http", "https"]
HOSTNAME_REGEX = re.compile(r"^[A-Za-z0-9.-]+$") # Basic DNS-safe hostname pattern
def _canonicalize_url(url: str) -> str:
"""
Normalizes the URL by:
1. Stripping whitespace and trailing slashes.
2. Ensuring the scheme is http:// or https:// if missing.
3. Replacing backslashes with forward slashes.
"""
url = url.strip().strip("/")
if not url.startswith(("http://", "https://")):
url = "http://" + url
url = url.replace("\\", "/")
return url
def _is_ip_blocked(ip: str) -> bool:
"""
Checks if the IP address is in a blocked network.
@ -61,8 +47,11 @@ def validate_url(url: str, trusted_origins: list[str]) -> str:
to a private, link-local, or otherwise blocked IP address unless
the hostname is explicitly trusted.
"""
# Normalize/canonicalize input
url = _canonicalize_url(url)
# Canonicalize URL
url = url.strip("/ ").replace("\\", "/")
parsed = urlparse(url)
if not parsed.scheme:
url = f"http://{url}"
parsed = urlparse(url)
# Check scheme