Issue #88183 by Wim Leers, ahoeben, brianV, alexpott, kylebrowning, marvil07, chx, twistor, dawehner: Relative URLs in feeds should be converted to absolute ones

8.2.x
Alex Pott 2016-07-30 16:07:44 +01:00
parent 22a241ca04
commit 417d900623
8 changed files with 270 additions and 5 deletions

View File

@ -1635,3 +1635,7 @@ services:
arguments: ['@current_user', '@path.current', '@path.matcher', '@language_manager']
tags:
- { name: event_subscriber }
response_filter.rss.relative_url:
class: Drupal\Core\EventSubscriber\RssResponseRelativeUrlFilter
tags:
- { name: event_subscriber }

View File

@ -36,6 +36,25 @@ class Html {
*/
protected static $isAjax = FALSE;
/**
* All attributes that may contain URIs.
*
* - The attributes 'code' and 'codebase' are omitted, because they only exist
* for the <applet> tag. The time of Java applets has passed.
* - The attribute 'icon' is omitted, because no browser implements the
* <command> tag anymore.
* See https://developer.mozilla.org/en-US/docs/Web/HTML/Element/command.
* - The 'manifest' attribute is omitted because it only exists for the <html>
* tag. That tag only makes sense in a HTML-served-as-HTML context, in which
* case relative URLs are guaranteed to work.
*
* @see https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
* @see https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
*
* @var string[]
*/
protected static $uriAttributes = ['href', 'poster', 'src', 'cite', 'data', 'action', 'formaction', 'srcset', 'about'];
/**
* Prepares a string for use as a valid class name.
*
@ -402,4 +421,61 @@ EOD;
return htmlspecialchars($text, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
}
/**
* Converts all root-relative URLs to absolute URLs.
*
* Does not change any existing protocol-relative or absolute URLs. Does not
* change other relative URLs because they would result in different absolute
* URLs depending on the current path. For example: when the same content
* containing such a relative URL (for example 'image.png'), is served from
* its canonical URL (for example 'http://example.com/some-article') or from
* a listing or feed (for example 'http://example.com/all-articles') their
* "current path" differs, resulting in different absolute URLs:
* 'http://example.com/some-article/image.png' versus
* 'http://example.com/all-articles/image.png'. Only one can be correct.
* Therefore relative URLs that are not root-relative cannot be safely
* transformed and should generally be avoided.
*
* Necessary for HTML that is served outside of a website, for example, RSS
* and e-mail.
*
* @param string $html
* The partial (X)HTML snippet to load. Invalid markup will be corrected on
* import.
* @param string $scheme_and_host
* The root URL, which has a URI scheme, host and optional port.
*
* @return string
* The updated (X)HTML snippet.
*/
public static function transformRootRelativeUrlsToAbsolute($html, $scheme_and_host) {
assert('empty(array_diff(array_keys(parse_url($scheme_and_host)), ["scheme", "host", "port"]))', '$scheme_and_host contains scheme, host and port at most.');
assert('isset(parse_url($scheme_and_host)["scheme"])', '$scheme_and_host is absolute and hence has a scheme.');
assert('isset(parse_url($scheme_and_host)["host"])', '$base_url is absolute and hence has a host.');
$html_dom = Html::load($html);
$xpath = new \DOMXpath($html_dom);
// Update all root-relative URLs to absolute URLs in the given HTML.
foreach (static::$uriAttributes as $attr) {
foreach ($xpath->query("//*[starts-with(@$attr, '/') and not(starts-with(@$attr, '//'))]") as $node) {
$node->setAttribute($attr, $scheme_and_host . $node->getAttribute($attr));
}
foreach ($xpath->query("//*[@srcset]") as $node) {
// @see https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset
// @see https://html.spec.whatwg.org/multipage/embedded-content.html#image-candidate-string
$image_candidate_strings = explode(',', $node->getAttribute('srcset'));
$image_candidate_strings = array_map('trim', $image_candidate_strings);
for ($i = 0; $i < count($image_candidate_strings); $i++) {
$image_candidate_string = $image_candidate_strings[$i];
if ($image_candidate_string[0] === '/' && $image_candidate_string[1] !== '/') {
$image_candidate_strings[$i] = $scheme_and_host . $image_candidate_string;
}
}
$node->setAttribute('srcset', implode(', ', $image_candidate_strings));
}
}
return Html::serialize($html_dom);
}
}

View File

@ -0,0 +1,72 @@
<?php
namespace Drupal\Core\EventSubscriber;
use Drupal\Component\Utility\Html;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpKernel\Event\FilterResponseEvent;
use Symfony\Component\HttpKernel\KernelEvents;
/**
* Subscribes to filter RSS responses, to make relative URIs absolute.
*/
class RssResponseRelativeUrlFilter implements EventSubscriberInterface {
/**
* Converts relative URLs to absolute URLs.
*
* @param \Symfony\Component\HttpKernel\Event\FilterResponseEvent $event
* The response event.
*/
public function onResponse(FilterResponseEvent $event) {
// Only care about RSS responses.
if (stripos($event->getResponse()->headers->get('Content-Type'), 'application/rss+xml') === FALSE) {
return;
}
$response = $event->getResponse();
$response->setContent($this->transformRootRelativeUrlsToAbsolute($response->getContent(), $event->getRequest()));
}
/**
* Converts all root-relative URLs to absolute URLs in RSS markup.
*
* Does not change any existing protocol-relative or absolute URLs.
*
* @param string $rss_markup
* The RSS markup to update.
* @param \Symfony\Component\HttpFoundation\Request $request
* The current request.
*
* @return string
* The updated RSS markup.
*/
protected function transformRootRelativeUrlsToAbsolute($rss_markup, Request $request) {
$rss_dom = new \DOMDocument();
$rss_dom->loadXML($rss_markup);
// Invoke Html::transformRootRelativeUrlsToAbsolute() on all HTML content
// embedded in this RSS feed.
foreach ($rss_dom->getElementsByTagName('description') as $node) {
$html_markup = $node->nodeValue;
if (!empty($html_markup)) {
$node->nodeValue = Html::transformRootRelativeUrlsToAbsolute($html_markup, $request->getSchemeAndHttpHost());
}
}
return $rss_dom->saveXML();
}
/**
* {@inheritdoc}
*/
public static function getSubscribedEvents() {
// Should run after any other response subscriber that modifies the markup.
// @see \Drupal\Core\EventSubscriber\ActiveLinkResponseFilter
$events[KernelEvents::RESPONSE][] = ['onResponse', -512];
return $events;
}
}

View File

@ -56,13 +56,13 @@ class FileFieldRSSContentTest extends FileFieldTestBase {
// Check that the RSS enclosure appears in the RSS feed.
$this->drupalGet('rss.xml');
$uploaded_filename = str_replace('public://', '', $node_file->getFileUri());
$test_element = sprintf(
'<enclosure url="%s" length="%s" type="%s" />',
$selector = sprintf(
'enclosure[url="%s"][length="%s"][type="%s"]',
file_create_url("public://$uploaded_filename", array('absolute' => TRUE)),
$node_file->getSize(),
$node_file->getMimeType()
);
$this->assertRaw($test_element, 'File field RSS enclosure is displayed when viewing the RSS feed.');
$this->assertTrue(!empty($this->cssSelect($selector)), 'File field RSS enclosure is displayed when viewing the RSS feed.');
}
}

View File

@ -2,6 +2,8 @@
namespace Drupal\node\Tests;
use Drupal\filter\Entity\FilterFormat;
/**
* Ensures that data added to nodes by other modules appears in RSS feeds.
*
@ -60,4 +62,47 @@ class NodeRSSContentTest extends NodeTestBase {
$this->assertNoText($rss_only_content, 'Node content designed for RSS does not appear when viewing node.');
}
/**
* Tests relative, root-relative, protocol-relative and absolute URLs.
*/
public function testUrlHandling() {
// Only the plain_text text format is available by default, which escapes
// all HTML.
FilterFormat::create([
'format' => 'full_html',
'name' => 'Full HTML',
'filters' => [],
])->save();
$defaults = [
'type' => 'article',
'promote' => 1,
];
$this->drupalCreateNode($defaults + [
'body' => [
'value' => '<p><a href="' . file_url_transform_relative(file_create_url('public://root-relative')) . '">Root-relative URL</a></p>',
'format' => 'full_html',
],
]);
$protocol_relative_url = substr(file_create_url('public://protocol-relative'), strlen(\Drupal::request()->getScheme() . ':'));
$this->drupalCreateNode($defaults + [
'body' => [
'value' => '<p><a href="' . $protocol_relative_url . '">Protocol-relative URL</a></p>',
'format' => 'full_html',
],
]);
$absolute_url = file_create_url('public://absolute');
$this->drupalCreateNode($defaults + [
'body' => [
'value' => '<p><a href="' . $absolute_url . '">Absolute URL</a></p>',
'format' => 'full_html',
],
]);
$this->drupalGet('rss.xml');
$this->assertRaw(file_create_url('public://root-relative'), 'Root-relative URL is transformed to absolute.');
$this->assertRaw($protocol_relative_url, 'Protocol-relative URL is left untouched.');
$this->assertRaw($absolute_url, 'Absolute URL is left untouched.');
}
}

View File

@ -105,7 +105,7 @@ class RssTest extends TaxonomyTestBase {
// Test that the feed page exists for the term.
$this->drupalGet("taxonomy/term/{$term1->id()}/feed");
$this->assertRaw('<rss version="2.0"', "Feed page is RSS.");
$this->assertTrue(!empty($this->cssSelect('rss[version="2.0"]')), "Feed page is RSS.");
// Check that the "Exception value" is disabled by default.
$this->drupalGet('taxonomy/term/all/feed');

View File

@ -80,7 +80,7 @@ class BasicTest extends WizardTestBase {
$elements = $this->cssSelect('link[href="' . Url::fromRoute('view.' . $view2['id'] . '.feed_1', [], ['absolute' => TRUE])->toString() . '"]');
$this->assertEqual(count($elements), 1, 'Feed found.');
$this->drupalGet($view2['page[feed_properties][path]']);
$this->assertRaw('<rss version="2.0"');
$this->assertTrue(!empty($this->cssSelect('rss[version="2.0"]')));
// The feed should have the same title and nodes as the page.
$this->assertText($view2['page[title]']);
$this->assertRaw($node1->url('canonical', ['absolute' => TRUE]));

View File

@ -321,4 +321,72 @@ class HtmlTest extends UnitTestCase {
$this->assertSame('', $result);
}
/**
* @covers ::transformRootRelativeUrlsToAbsolute
* @dataProvider providerTestTransformRootRelativeUrlsToAbsolute
*/
public function testTransformRootRelativeUrlsToAbsolute($html, $scheme_and_host, $expected_html) {
$this->assertSame($expected_html ?: $html, Html::transformRootRelativeUrlsToAbsolute($html, $scheme_and_host));
}
/**
* @covers ::transformRootRelativeUrlsToAbsolute
* @dataProvider providerTestTransformRootRelativeUrlsToAbsoluteAssertion
* @expectedException \AssertionError
*/
public function testTransformRootRelativeUrlsToAbsoluteAssertion($scheme_and_host) {
Html::transformRootRelativeUrlsToAbsolute('', $scheme_and_host);
}
/**
* Provides test data for testTransformRootRelativeUrlsToAbsolute().
*
* @return array
* Test data.
*/
public function providerTestTransformRootRelativeUrlsToAbsolute() {
$data = [];
// One random tag name.
$tag_name = strtolower($this->randomMachineName());
// A site installed either in the root of a domain or a subdirectory.
$base_paths = ['/', '/subdir/' . $this->randomMachineName() . '/'];
foreach ($base_paths as $base_path) {
// The only attribute that has more than just a URL as its value, is
// 'srcset', so special-case it.
$data += [
"$tag_name, srcset, $base_path: root-relative" => ["<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, {$base_path}root-relative 300w\">root-relative test</$tag_name>", 'http://example.com', "<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, http://example.com{$base_path}root-relative 300w\">root-relative test</$tag_name>"],
"$tag_name, srcset, $base_path: protocol-relative" => ["<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, //example.com{$base_path}protocol-relative 300w\">protocol-relative test</$tag_name>", 'http://example.com', FALSE],
"$tag_name, srcset, $base_path: absolute" => ["<$tag_name srcset=\"http://example.com{$base_path}already-absolute 200w, http://example.com{$base_path}absolute 300w\">absolute test</$tag_name>", 'http://example.com', FALSE],
];
foreach (['href', 'poster', 'src', 'cite', 'data', 'action', 'formaction', 'about'] as $attribute) {
$data += [
"$tag_name, $attribute, $base_path: root-relative" => ["<$tag_name $attribute=\"{$base_path}root-relative\">root-relative test</$tag_name>", 'http://example.com', "<$tag_name $attribute=\"http://example.com{$base_path}root-relative\">root-relative test</$tag_name>"],
"$tag_name, $attribute, $base_path: protocol-relative" => ["<$tag_name $attribute=\"//example.com{$base_path}protocol-relative\">protocol-relative test</$tag_name>", 'http://example.com', FALSE],
"$tag_name, $attribute, $base_path: absolute" => ["<$tag_name $attribute=\"http://example.com{$base_path}absolute\">absolute test</$tag_name>", 'http://example.com', FALSE],
];
}
}
return $data;
}
/**
* Provides test data for testTransformRootRelativeUrlsToAbsoluteAssertion().
*
* @return array
* Test data.
*/
public function providerTestTransformRootRelativeUrlsToAbsoluteAssertion() {
return [
'only relative path' => ['llama'],
'only root-relative path' => ['/llama'],
'host and path' => ['example.com/llama'],
'scheme, host and path' => ['http://example.com/llama'],
];
}
}