fix(backend): Make URL pinning work with `extra_url_validator` (#9940)

Github Blocks use an URL transformer passed to `Requests` to convert web
URLs to the API URLs. This doesn't always work with the anti-SSRF URL
pinning mechanism that was implemented in #8531.

### Changes 🏗️
In `Requests.request(..)`:
- Apply `validate_url` *after* `extra_url_validator`, to prevent
mismatch between `pinned_url` and `original_hostname`
- Simplify logic & add clarifying comments

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - [x] Tested the github blocks that had the issue

---------

Co-authored-by: Reinier van der Leer <pwuts@agpt.co>
pull/9938/head
Nicholas Tindle 2025-05-15 08:12:39 -05:00 committed by GitHub
parent 2dc038b6c0
commit 9bef383df2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 20 deletions

6
.vscode/launch.json vendored
View File

@ -32,9 +32,9 @@
"type": "debugpy",
"request": "launch",
"module": "backend.app",
// "env": {
// "ENV": "dev"
// },
"env": {
"OBJC_DISABLE_INITIALIZE_FORK_SAFETY": "YES"
},
"envFile": "${workspaceFolder}/backend/.env",
"justMyCode": false,
"cwd": "${workspaceFolder}/autogpt_platform/backend"

View File

@ -224,33 +224,31 @@ class Requests:
*args,
**kwargs,
) -> req.Response:
# Validate URL and get pinned URL + original hostname
pinned_url, original_hostname = validate_url(url, self.trusted_origins)
# Apply any extra user-defined validation/transformation
if self.extra_url_validator is not None:
pinned_url = self.extra_url_validator(pinned_url)
url = self.extra_url_validator(url)
# Validate URL and get pinned URL + hostname
pinned_url, hostname = validate_url(url, self.trusted_origins)
# Merge any extra headers
headers = dict(headers) if headers else {}
if self.extra_headers is not None:
headers.update(self.extra_headers)
# Force the Host header to the original hostname
headers["Host"] = original_hostname
# Create a fresh session & mount our HostSSLAdapter if pinned to IP
session = req.Session()
pinned_parsed = urlparse(pinned_url)
# If pinned_url netloc is an IP (not in trusted_origins),
# then we attach the custom SNI adapter:
if pinned_parsed.hostname and pinned_parsed.hostname != original_hostname:
# That means we definitely pinned to an IP
mount_prefix = f"{pinned_parsed.scheme}://{pinned_parsed.hostname}"
if pinned_parsed.port:
mount_prefix += f":{pinned_parsed.port}"
adapter = HostSSLAdapter(ssl_hostname=original_hostname)
# If untrusted, the hostname in the URL is replaced with the corresponding
# IP address, and we need to override the Host header with the actual hostname.
if (pinned := urlparse(pinned_url)).hostname != hostname:
headers["Host"] = hostname
# If hostname was untrusted and we replaced it by (pinned it to) its IP,
# we also need to attach a custom SNI adapter to make SSL work:
mount_prefix = f"{pinned.scheme}://{pinned.hostname}"
if pinned.port:
mount_prefix += f":{pinned.port}"
adapter = HostSSLAdapter(ssl_hostname=hostname)
session.mount("https://", adapter)
# Perform the request with redirects disabled for manual handling