Issue #88183 by Wim Leers, ahoeben, brianV, alexpott, kylebrowning, marvil07, chx, twistor, dawehner: Relative URLs in feeds should be converted to absolute ones
parent
22a241ca04
commit
417d900623
|
@ -1635,3 +1635,7 @@ services:
|
|||
arguments: ['@current_user', '@path.current', '@path.matcher', '@language_manager']
|
||||
tags:
|
||||
- { name: event_subscriber }
|
||||
response_filter.rss.relative_url:
|
||||
class: Drupal\Core\EventSubscriber\RssResponseRelativeUrlFilter
|
||||
tags:
|
||||
- { name: event_subscriber }
|
||||
|
|
|
@ -36,6 +36,25 @@ class Html {
|
|||
*/
|
||||
protected static $isAjax = FALSE;
|
||||
|
||||
/**
|
||||
* All attributes that may contain URIs.
|
||||
*
|
||||
* - The attributes 'code' and 'codebase' are omitted, because they only exist
|
||||
* for the <applet> tag. The time of Java applets has passed.
|
||||
* - The attribute 'icon' is omitted, because no browser implements the
|
||||
* <command> tag anymore.
|
||||
* See https://developer.mozilla.org/en-US/docs/Web/HTML/Element/command.
|
||||
* - The 'manifest' attribute is omitted because it only exists for the <html>
|
||||
* tag. That tag only makes sense in a HTML-served-as-HTML context, in which
|
||||
* case relative URLs are guaranteed to work.
|
||||
*
|
||||
* @see https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
|
||||
* @see https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
|
||||
*
|
||||
* @var string[]
|
||||
*/
|
||||
protected static $uriAttributes = ['href', 'poster', 'src', 'cite', 'data', 'action', 'formaction', 'srcset', 'about'];
|
||||
|
||||
/**
|
||||
* Prepares a string for use as a valid class name.
|
||||
*
|
||||
|
@ -402,4 +421,61 @@ EOD;
|
|||
return htmlspecialchars($text, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts all root-relative URLs to absolute URLs.
|
||||
*
|
||||
* Does not change any existing protocol-relative or absolute URLs. Does not
|
||||
* change other relative URLs because they would result in different absolute
|
||||
* URLs depending on the current path. For example: when the same content
|
||||
* containing such a relative URL (for example 'image.png'), is served from
|
||||
* its canonical URL (for example 'http://example.com/some-article') or from
|
||||
* a listing or feed (for example 'http://example.com/all-articles') their
|
||||
* "current path" differs, resulting in different absolute URLs:
|
||||
* 'http://example.com/some-article/image.png' versus
|
||||
* 'http://example.com/all-articles/image.png'. Only one can be correct.
|
||||
* Therefore relative URLs that are not root-relative cannot be safely
|
||||
* transformed and should generally be avoided.
|
||||
*
|
||||
* Necessary for HTML that is served outside of a website, for example, RSS
|
||||
* and e-mail.
|
||||
*
|
||||
* @param string $html
|
||||
* The partial (X)HTML snippet to load. Invalid markup will be corrected on
|
||||
* import.
|
||||
* @param string $scheme_and_host
|
||||
* The root URL, which has a URI scheme, host and optional port.
|
||||
*
|
||||
* @return string
|
||||
* The updated (X)HTML snippet.
|
||||
*/
|
||||
public static function transformRootRelativeUrlsToAbsolute($html, $scheme_and_host) {
|
||||
assert('empty(array_diff(array_keys(parse_url($scheme_and_host)), ["scheme", "host", "port"]))', '$scheme_and_host contains scheme, host and port at most.');
|
||||
assert('isset(parse_url($scheme_and_host)["scheme"])', '$scheme_and_host is absolute and hence has a scheme.');
|
||||
assert('isset(parse_url($scheme_and_host)["host"])', '$base_url is absolute and hence has a host.');
|
||||
|
||||
$html_dom = Html::load($html);
|
||||
$xpath = new \DOMXpath($html_dom);
|
||||
|
||||
// Update all root-relative URLs to absolute URLs in the given HTML.
|
||||
foreach (static::$uriAttributes as $attr) {
|
||||
foreach ($xpath->query("//*[starts-with(@$attr, '/') and not(starts-with(@$attr, '//'))]") as $node) {
|
||||
$node->setAttribute($attr, $scheme_and_host . $node->getAttribute($attr));
|
||||
}
|
||||
foreach ($xpath->query("//*[@srcset]") as $node) {
|
||||
// @see https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset
|
||||
// @see https://html.spec.whatwg.org/multipage/embedded-content.html#image-candidate-string
|
||||
$image_candidate_strings = explode(',', $node->getAttribute('srcset'));
|
||||
$image_candidate_strings = array_map('trim', $image_candidate_strings);
|
||||
for ($i = 0; $i < count($image_candidate_strings); $i++) {
|
||||
$image_candidate_string = $image_candidate_strings[$i];
|
||||
if ($image_candidate_string[0] === '/' && $image_candidate_string[1] !== '/') {
|
||||
$image_candidate_strings[$i] = $scheme_and_host . $image_candidate_string;
|
||||
}
|
||||
}
|
||||
$node->setAttribute('srcset', implode(', ', $image_candidate_strings));
|
||||
}
|
||||
}
|
||||
return Html::serialize($html_dom);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
<?php
|
||||
|
||||
namespace Drupal\Core\EventSubscriber;
|
||||
|
||||
use Drupal\Component\Utility\Html;
|
||||
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
|
||||
use Symfony\Component\HttpFoundation\Request;
|
||||
use Symfony\Component\HttpKernel\Event\FilterResponseEvent;
|
||||
use Symfony\Component\HttpKernel\KernelEvents;
|
||||
|
||||
/**
|
||||
* Subscribes to filter RSS responses, to make relative URIs absolute.
|
||||
*/
|
||||
class RssResponseRelativeUrlFilter implements EventSubscriberInterface {
|
||||
|
||||
/**
|
||||
* Converts relative URLs to absolute URLs.
|
||||
*
|
||||
* @param \Symfony\Component\HttpKernel\Event\FilterResponseEvent $event
|
||||
* The response event.
|
||||
*/
|
||||
public function onResponse(FilterResponseEvent $event) {
|
||||
// Only care about RSS responses.
|
||||
if (stripos($event->getResponse()->headers->get('Content-Type'), 'application/rss+xml') === FALSE) {
|
||||
return;
|
||||
}
|
||||
|
||||
$response = $event->getResponse();
|
||||
$response->setContent($this->transformRootRelativeUrlsToAbsolute($response->getContent(), $event->getRequest()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts all root-relative URLs to absolute URLs in RSS markup.
|
||||
*
|
||||
* Does not change any existing protocol-relative or absolute URLs.
|
||||
*
|
||||
* @param string $rss_markup
|
||||
* The RSS markup to update.
|
||||
* @param \Symfony\Component\HttpFoundation\Request $request
|
||||
* The current request.
|
||||
*
|
||||
* @return string
|
||||
* The updated RSS markup.
|
||||
*/
|
||||
protected function transformRootRelativeUrlsToAbsolute($rss_markup, Request $request) {
|
||||
$rss_dom = new \DOMDocument();
|
||||
$rss_dom->loadXML($rss_markup);
|
||||
|
||||
// Invoke Html::transformRootRelativeUrlsToAbsolute() on all HTML content
|
||||
// embedded in this RSS feed.
|
||||
foreach ($rss_dom->getElementsByTagName('description') as $node) {
|
||||
$html_markup = $node->nodeValue;
|
||||
if (!empty($html_markup)) {
|
||||
$node->nodeValue = Html::transformRootRelativeUrlsToAbsolute($html_markup, $request->getSchemeAndHttpHost());
|
||||
}
|
||||
}
|
||||
|
||||
return $rss_dom->saveXML();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public static function getSubscribedEvents() {
|
||||
// Should run after any other response subscriber that modifies the markup.
|
||||
// @see \Drupal\Core\EventSubscriber\ActiveLinkResponseFilter
|
||||
$events[KernelEvents::RESPONSE][] = ['onResponse', -512];
|
||||
|
||||
return $events;
|
||||
}
|
||||
|
||||
}
|
|
@ -56,13 +56,13 @@ class FileFieldRSSContentTest extends FileFieldTestBase {
|
|||
// Check that the RSS enclosure appears in the RSS feed.
|
||||
$this->drupalGet('rss.xml');
|
||||
$uploaded_filename = str_replace('public://', '', $node_file->getFileUri());
|
||||
$test_element = sprintf(
|
||||
'<enclosure url="%s" length="%s" type="%s" />',
|
||||
$selector = sprintf(
|
||||
'enclosure[url="%s"][length="%s"][type="%s"]',
|
||||
file_create_url("public://$uploaded_filename", array('absolute' => TRUE)),
|
||||
$node_file->getSize(),
|
||||
$node_file->getMimeType()
|
||||
);
|
||||
$this->assertRaw($test_element, 'File field RSS enclosure is displayed when viewing the RSS feed.');
|
||||
$this->assertTrue(!empty($this->cssSelect($selector)), 'File field RSS enclosure is displayed when viewing the RSS feed.');
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
namespace Drupal\node\Tests;
|
||||
|
||||
use Drupal\filter\Entity\FilterFormat;
|
||||
|
||||
/**
|
||||
* Ensures that data added to nodes by other modules appears in RSS feeds.
|
||||
*
|
||||
|
@ -60,4 +62,47 @@ class NodeRSSContentTest extends NodeTestBase {
|
|||
$this->assertNoText($rss_only_content, 'Node content designed for RSS does not appear when viewing node.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests relative, root-relative, protocol-relative and absolute URLs.
|
||||
*/
|
||||
public function testUrlHandling() {
|
||||
// Only the plain_text text format is available by default, which escapes
|
||||
// all HTML.
|
||||
FilterFormat::create([
|
||||
'format' => 'full_html',
|
||||
'name' => 'Full HTML',
|
||||
'filters' => [],
|
||||
])->save();
|
||||
|
||||
$defaults = [
|
||||
'type' => 'article',
|
||||
'promote' => 1,
|
||||
];
|
||||
$this->drupalCreateNode($defaults + [
|
||||
'body' => [
|
||||
'value' => '<p><a href="' . file_url_transform_relative(file_create_url('public://root-relative')) . '">Root-relative URL</a></p>',
|
||||
'format' => 'full_html',
|
||||
],
|
||||
]);
|
||||
$protocol_relative_url = substr(file_create_url('public://protocol-relative'), strlen(\Drupal::request()->getScheme() . ':'));
|
||||
$this->drupalCreateNode($defaults + [
|
||||
'body' => [
|
||||
'value' => '<p><a href="' . $protocol_relative_url . '">Protocol-relative URL</a></p>',
|
||||
'format' => 'full_html',
|
||||
],
|
||||
]);
|
||||
$absolute_url = file_create_url('public://absolute');
|
||||
$this->drupalCreateNode($defaults + [
|
||||
'body' => [
|
||||
'value' => '<p><a href="' . $absolute_url . '">Absolute URL</a></p>',
|
||||
'format' => 'full_html',
|
||||
],
|
||||
]);
|
||||
|
||||
$this->drupalGet('rss.xml');
|
||||
$this->assertRaw(file_create_url('public://root-relative'), 'Root-relative URL is transformed to absolute.');
|
||||
$this->assertRaw($protocol_relative_url, 'Protocol-relative URL is left untouched.');
|
||||
$this->assertRaw($absolute_url, 'Absolute URL is left untouched.');
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -105,7 +105,7 @@ class RssTest extends TaxonomyTestBase {
|
|||
|
||||
// Test that the feed page exists for the term.
|
||||
$this->drupalGet("taxonomy/term/{$term1->id()}/feed");
|
||||
$this->assertRaw('<rss version="2.0"', "Feed page is RSS.");
|
||||
$this->assertTrue(!empty($this->cssSelect('rss[version="2.0"]')), "Feed page is RSS.");
|
||||
|
||||
// Check that the "Exception value" is disabled by default.
|
||||
$this->drupalGet('taxonomy/term/all/feed');
|
||||
|
|
|
@ -80,7 +80,7 @@ class BasicTest extends WizardTestBase {
|
|||
$elements = $this->cssSelect('link[href="' . Url::fromRoute('view.' . $view2['id'] . '.feed_1', [], ['absolute' => TRUE])->toString() . '"]');
|
||||
$this->assertEqual(count($elements), 1, 'Feed found.');
|
||||
$this->drupalGet($view2['page[feed_properties][path]']);
|
||||
$this->assertRaw('<rss version="2.0"');
|
||||
$this->assertTrue(!empty($this->cssSelect('rss[version="2.0"]')));
|
||||
// The feed should have the same title and nodes as the page.
|
||||
$this->assertText($view2['page[title]']);
|
||||
$this->assertRaw($node1->url('canonical', ['absolute' => TRUE]));
|
||||
|
|
|
@ -321,4 +321,72 @@ class HtmlTest extends UnitTestCase {
|
|||
$this->assertSame('', $result);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::transformRootRelativeUrlsToAbsolute
|
||||
* @dataProvider providerTestTransformRootRelativeUrlsToAbsolute
|
||||
*/
|
||||
public function testTransformRootRelativeUrlsToAbsolute($html, $scheme_and_host, $expected_html) {
|
||||
$this->assertSame($expected_html ?: $html, Html::transformRootRelativeUrlsToAbsolute($html, $scheme_and_host));
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::transformRootRelativeUrlsToAbsolute
|
||||
* @dataProvider providerTestTransformRootRelativeUrlsToAbsoluteAssertion
|
||||
* @expectedException \AssertionError
|
||||
*/
|
||||
public function testTransformRootRelativeUrlsToAbsoluteAssertion($scheme_and_host) {
|
||||
Html::transformRootRelativeUrlsToAbsolute('', $scheme_and_host);
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides test data for testTransformRootRelativeUrlsToAbsolute().
|
||||
*
|
||||
* @return array
|
||||
* Test data.
|
||||
*/
|
||||
public function providerTestTransformRootRelativeUrlsToAbsolute() {
|
||||
$data = [];
|
||||
|
||||
// One random tag name.
|
||||
$tag_name = strtolower($this->randomMachineName());
|
||||
|
||||
// A site installed either in the root of a domain or a subdirectory.
|
||||
$base_paths = ['/', '/subdir/' . $this->randomMachineName() . '/'];
|
||||
|
||||
foreach ($base_paths as $base_path) {
|
||||
// The only attribute that has more than just a URL as its value, is
|
||||
// 'srcset', so special-case it.
|
||||
$data += [
|
||||
"$tag_name, srcset, $base_path: root-relative" => ["<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, {$base_path}root-relative 300w\">root-relative test</$tag_name>", 'http://example.com', "<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, http://example.com{$base_path}root-relative 300w\">root-relative test</$tag_name>"],
|
||||
"$tag_name, srcset, $base_path: protocol-relative" => ["<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, //example.com{$base_path}protocol-relative 300w\">protocol-relative test</$tag_name>", 'http://example.com', FALSE],
|
||||
"$tag_name, srcset, $base_path: absolute" => ["<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, http://example.com{$base_path}absolute 300w\">absolute test</$tag_name>", 'http://example.com', FALSE],
|
||||
];
|
||||
|
||||
foreach (['href', 'poster', 'src', 'cite', 'data', 'action', 'formaction', 'about'] as $attribute) {
|
||||
$data += [
|
||||
"$tag_name, $attribute, $base_path: root-relative" => ["<$tag_name $attribute=\"{$base_path}root-relative\">root-relative test</$tag_name>", 'http://example.com', "<$tag_name $attribute=\"http://example.com{$base_path}root-relative\">root-relative test</$tag_name>"],
|
||||
"$tag_name, $attribute, $base_path: protocol-relative" => ["<$tag_name $attribute=\"//example.com{$base_path}protocol-relative\">protocol-relative test</$tag_name>", 'http://example.com', FALSE],
|
||||
"$tag_name, $attribute, $base_path: absolute" => ["<$tag_name $attribute=\"http://example.com{$base_path}absolute\">absolute test</$tag_name>", 'http://example.com', FALSE],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides test data for testTransformRootRelativeUrlsToAbsoluteAssertion().
|
||||
*
|
||||
* @return array
|
||||
* Test data.
|
||||
*/
|
||||
public function providerTestTransformRootRelativeUrlsToAbsoluteAssertion() {
|
||||
return [
|
||||
'only relative path' => ['llama'],
|
||||
'only root-relative path' => ['/llama'],
|
||||
'host and path' => ['example.com/llama'],
|
||||
'scheme, host and path' => ['http://example.com/llama'],
|
||||
];
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue