Issue #1998466 by dawehner, ParisLiakos: Convert filter_xss_admin() and similar function to an Xss component.
parent
5a95824b28
commit
23b59123d1
|
@ -6,6 +6,7 @@ use Drupal\Component\Utility\Settings;
|
||||||
use Drupal\Component\Utility\String;
|
use Drupal\Component\Utility\String;
|
||||||
use Drupal\Component\Utility\Timer;
|
use Drupal\Component\Utility\Timer;
|
||||||
use Drupal\Component\Utility\Unicode;
|
use Drupal\Component\Utility\Unicode;
|
||||||
|
use Drupal\Component\Utility\UrlValidator;
|
||||||
use Drupal\Core\DrupalKernel;
|
use Drupal\Core\DrupalKernel;
|
||||||
use Drupal\Core\Database\Database;
|
use Drupal\Core\Database\Database;
|
||||||
use Drupal\Core\DependencyInjection\ContainerBuilder;
|
use Drupal\Core\DependencyInjection\ContainerBuilder;
|
||||||
|
@ -1455,15 +1456,11 @@ function check_plain($text) {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
* TRUE if the text is valid UTF-8, FALSE if not.
|
* TRUE if the text is valid UTF-8, FALSE if not.
|
||||||
|
*
|
||||||
|
* @see \Drupal\Component\Utility\Unicode::validateUtf8()
|
||||||
*/
|
*/
|
||||||
function drupal_validate_utf8($text) {
|
function drupal_validate_utf8($text) {
|
||||||
if (strlen($text) == 0) {
|
return Unicode::validateUtf8($text);
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
// With the PCRE_UTF8 modifier 'u', preg_match() fails silently on strings
|
|
||||||
// containing invalid UTF-8 byte sequences. It does not reject character
|
|
||||||
// codes above U+10FFFF (represented by 4 or more octets), though.
|
|
||||||
return (preg_match('/^./us', $text) == 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
use Drupal\Component\Utility\Crypt;
|
use Drupal\Component\Utility\Crypt;
|
||||||
|
use Drupal\Component\Utility\String;
|
||||||
|
use Drupal\Component\Utility\UrlValidator;
|
||||||
|
use Drupal\Component\Utility\Xss;
|
||||||
use Drupal\Core\Cache\Cache;
|
use Drupal\Core\Cache\Cache;
|
||||||
use Drupal\Core\Language\Language;
|
use Drupal\Core\Language\Language;
|
||||||
use Symfony\Component\DependencyInjection\Container;
|
use Symfony\Component\DependencyInjection\Container;
|
||||||
|
@ -806,29 +809,11 @@ function valid_email_address($mail) {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
* TRUE if the URL is in a valid format.
|
* TRUE if the URL is in a valid format.
|
||||||
|
*
|
||||||
|
* @see \Drupal\Component\Utility\UrlValidator::isValid()
|
||||||
*/
|
*/
|
||||||
function valid_url($url, $absolute = FALSE) {
|
function valid_url($url, $absolute = FALSE) {
|
||||||
if ($absolute) {
|
return UrlValidator::isValid($url, $absolute);
|
||||||
return (bool)preg_match("
|
|
||||||
/^ # Start at the beginning of the text
|
|
||||||
(?:ftp|https?|feed):\/\/ # Look for ftp, http, https or feed schemes
|
|
||||||
(?: # Userinfo (optional) which is typically
|
|
||||||
(?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)* # a username or a username and password
|
|
||||||
(?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@ # combination
|
|
||||||
)?
|
|
||||||
(?:
|
|
||||||
(?:[a-z0-9\-\.]|%[0-9a-f]{2})+ # A domain name or a IPv4 address
|
|
||||||
|(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\]) # or a well formed IPv6 address
|
|
||||||
)
|
|
||||||
(?::[0-9]+)? # Server port number (optional)
|
|
||||||
(?:[\/|\?]
|
|
||||||
(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2}) # The path and query (optional)
|
|
||||||
*)?
|
|
||||||
$/xi", $url);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return (bool)preg_match("/^(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})+$/i", $url);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -910,40 +895,10 @@ function valid_number_step($value, $step, $offset = 0.0) {
|
||||||
* check_plain() being called on it. However, it can be passed to functions
|
* check_plain() being called on it. However, it can be passed to functions
|
||||||
* expecting plain-text strings.
|
* expecting plain-text strings.
|
||||||
*
|
*
|
||||||
* @see check_url()
|
* @see \Drupal\Component\Utility\Url::stripDangerousProtocols()
|
||||||
*/
|
*/
|
||||||
function drupal_strip_dangerous_protocols($uri) {
|
function drupal_strip_dangerous_protocols($uri) {
|
||||||
static $allowed_protocols;
|
return UrlValidator::stripDangerousProtocols($uri);
|
||||||
|
|
||||||
if (!isset($allowed_protocols)) {
|
|
||||||
// filter_xss_admin() is called by the installer and update.php, in which
|
|
||||||
// case the configuration may not exist (yet). Provide a minimal default set
|
|
||||||
// of allowed protocols for these cases.
|
|
||||||
$allowed_protocols = array_flip(config('system.filter')->get('protocols') ?: array('http', 'https'));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iteratively remove any invalid protocol found.
|
|
||||||
do {
|
|
||||||
$before = $uri;
|
|
||||||
$colonpos = strpos($uri, ':');
|
|
||||||
if ($colonpos > 0) {
|
|
||||||
// We found a colon, possibly a protocol. Verify.
|
|
||||||
$protocol = substr($uri, 0, $colonpos);
|
|
||||||
// If a colon is preceded by a slash, question mark or hash, it cannot
|
|
||||||
// possibly be part of the URL scheme. This must be a relative URL, which
|
|
||||||
// inherits the (safe) protocol of the base document.
|
|
||||||
if (preg_match('![/?#]!', $protocol)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Check if this is a disallowed protocol. Per RFC2616, section 3.2.3
|
|
||||||
// (URI Comparison) scheme comparison must be case-insensitive.
|
|
||||||
if (!isset($allowed_protocols[strtolower($protocol)])) {
|
|
||||||
$uri = substr($uri, $colonpos + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while ($before != $uri);
|
|
||||||
|
|
||||||
return $uri;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -961,10 +916,11 @@ function drupal_strip_dangerous_protocols($uri) {
|
||||||
* Drupal\Core\Template\Attribute, call drupal_strip_dangerous_protocols()
|
* Drupal\Core\Template\Attribute, call drupal_strip_dangerous_protocols()
|
||||||
* instead.
|
* instead.
|
||||||
*
|
*
|
||||||
* @see drupal_strip_dangerous_protocols()
|
* @see \Drupal\Component\Utility\Url::stripDangerousProtocols()
|
||||||
|
* @see \Drupal\Component\Utility\String::checkPlain()
|
||||||
*/
|
*/
|
||||||
function check_url($uri) {
|
function check_url($uri) {
|
||||||
return check_plain(drupal_strip_dangerous_protocols($uri));
|
return String::checkPlain(UrlValidator::stripDangerousProtocols($uri));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -976,9 +932,17 @@ function check_url($uri) {
|
||||||
*
|
*
|
||||||
* Allows all tags that can be used inside an HTML body, save
|
* Allows all tags that can be used inside an HTML body, save
|
||||||
* for scripts and styles.
|
* for scripts and styles.
|
||||||
|
*
|
||||||
|
* @param string $string
|
||||||
|
* The string to apply the filter to.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
* The filtered string.
|
||||||
|
*
|
||||||
|
* @see \Drupal\Component\Utility\Xss::filterAdmin()
|
||||||
*/
|
*/
|
||||||
function filter_xss_admin($string) {
|
function filter_xss_admin($string) {
|
||||||
return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'command', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption', 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'mark', 'menu', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr'));
|
return Xss::filterAdmin($string);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1004,246 +968,27 @@ function filter_xss_admin($string) {
|
||||||
* An XSS safe version of $string, or an empty string if $string is not
|
* An XSS safe version of $string, or an empty string if $string is not
|
||||||
* valid UTF-8.
|
* valid UTF-8.
|
||||||
*
|
*
|
||||||
* @see drupal_validate_utf8()
|
* @see \Drupal\Component\Utility\Xss::filter()
|
||||||
|
*
|
||||||
* @ingroup sanitization
|
* @ingroup sanitization
|
||||||
*/
|
*/
|
||||||
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
|
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
|
||||||
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
|
return Xss::filter($string, $allowed_tags);
|
||||||
// site scripting issues on Internet Explorer 6.
|
|
||||||
if (!drupal_validate_utf8($string)) {
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
// Store the text format.
|
|
||||||
_filter_xss_split($allowed_tags, TRUE);
|
|
||||||
// Remove NULL characters (ignored by some browsers).
|
|
||||||
$string = str_replace(chr(0), '', $string);
|
|
||||||
// Remove Netscape 4 JS entities.
|
|
||||||
$string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
|
|
||||||
|
|
||||||
// Defuse all HTML entities.
|
|
||||||
$string = str_replace('&', '&', $string);
|
|
||||||
// Change back only well-formed entities in our whitelist:
|
|
||||||
// Decimal numeric entities.
|
|
||||||
$string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
|
|
||||||
// Hexadecimal numeric entities.
|
|
||||||
$string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
|
|
||||||
// Named entities.
|
|
||||||
$string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
|
|
||||||
|
|
||||||
return preg_replace_callback('%
|
|
||||||
(
|
|
||||||
<(?=[^a-zA-Z!/]) # a lone <
|
|
||||||
| # or
|
|
||||||
<!--.*?--> # a comment
|
|
||||||
| # or
|
|
||||||
<[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
|
|
||||||
| # or
|
|
||||||
> # just a >
|
|
||||||
)%x', '_filter_xss_split', $string);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Processes an HTML tag.
|
|
||||||
*
|
|
||||||
* @param $m
|
|
||||||
* An array with various meaning depending on the value of $store.
|
|
||||||
* If $store is TRUE then the array contains the allowed tags.
|
|
||||||
* If $store is FALSE then the array has one element, the HTML tag to process.
|
|
||||||
* @param $store
|
|
||||||
* Whether to store $m.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
* If the element isn't allowed, an empty string. Otherwise, the cleaned up
|
|
||||||
* version of the HTML element.
|
|
||||||
*/
|
|
||||||
function _filter_xss_split($m, $store = FALSE) {
|
|
||||||
static $allowed_html;
|
|
||||||
|
|
||||||
if ($store) {
|
|
||||||
$allowed_html = array_flip($m);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
$string = $m[1];
|
|
||||||
|
|
||||||
if (substr($string, 0, 1) != '<') {
|
|
||||||
// We matched a lone ">" character.
|
|
||||||
return '>';
|
|
||||||
}
|
|
||||||
elseif (strlen($string) == 1) {
|
|
||||||
// We matched a lone "<" character.
|
|
||||||
return '<';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) {
|
|
||||||
// Seriously malformed.
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
$slash = trim($matches[1]);
|
|
||||||
$elem = &$matches[2];
|
|
||||||
$attrlist = &$matches[3];
|
|
||||||
$comment = &$matches[4];
|
|
||||||
|
|
||||||
if ($comment) {
|
|
||||||
$elem = '!--';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isset($allowed_html[strtolower($elem)])) {
|
|
||||||
// Disallowed HTML element.
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($comment) {
|
|
||||||
return $comment;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($slash != '') {
|
|
||||||
return "</$elem>";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there a closing XHTML slash at the end of the attributes?
|
|
||||||
$attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
|
|
||||||
$xhtml_slash = $count ? ' /' : '';
|
|
||||||
|
|
||||||
// Clean up attributes.
|
|
||||||
$attr2 = implode(' ', _filter_xss_attributes($attrlist));
|
|
||||||
$attr2 = preg_replace('/[<>]/', '', $attr2);
|
|
||||||
$attr2 = strlen($attr2) ? ' ' . $attr2 : '';
|
|
||||||
|
|
||||||
return "<$elem$attr2$xhtml_slash>";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Processes a string of HTML attributes.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
* Cleaned up version of the HTML attributes.
|
|
||||||
*/
|
|
||||||
function _filter_xss_attributes($attr) {
|
|
||||||
$attrarr = array();
|
|
||||||
$mode = 0;
|
|
||||||
$attrname = '';
|
|
||||||
|
|
||||||
while (strlen($attr) != 0) {
|
|
||||||
// Was the last operation successful?
|
|
||||||
$working = 0;
|
|
||||||
|
|
||||||
switch ($mode) {
|
|
||||||
case 0:
|
|
||||||
// Attribute name, href for instance.
|
|
||||||
if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
|
|
||||||
$attrname = strtolower($match[1]);
|
|
||||||
$skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
|
|
||||||
$working = $mode = 1;
|
|
||||||
$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
// Equals sign or valueless ("selected").
|
|
||||||
if (preg_match('/^\s*=\s*/', $attr)) {
|
|
||||||
$working = 1; $mode = 2;
|
|
||||||
$attr = preg_replace('/^\s*=\s*/', '', $attr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match('/^\s+/', $attr)) {
|
|
||||||
$working = 1; $mode = 0;
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = $attrname;
|
|
||||||
}
|
|
||||||
$attr = preg_replace('/^\s+/', '', $attr);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
// Attribute value, a URL after href= for instance.
|
|
||||||
if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
|
|
||||||
$thisval = filter_xss_bad_protocol($match[1]);
|
|
||||||
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = "$attrname=\"$thisval\"";
|
|
||||||
}
|
|
||||||
$working = 1;
|
|
||||||
$mode = 0;
|
|
||||||
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
|
|
||||||
$thisval = filter_xss_bad_protocol($match[1]);
|
|
||||||
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = "$attrname='$thisval'";
|
|
||||||
}
|
|
||||||
$working = 1; $mode = 0;
|
|
||||||
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
|
|
||||||
$thisval = filter_xss_bad_protocol($match[1]);
|
|
||||||
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = "$attrname=\"$thisval\"";
|
|
||||||
}
|
|
||||||
$working = 1; $mode = 0;
|
|
||||||
$attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($working == 0) {
|
|
||||||
// Not well formed; remove and try again.
|
|
||||||
$attr = preg_replace('/
|
|
||||||
^
|
|
||||||
(
|
|
||||||
"[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
|
|
||||||
| # or
|
|
||||||
\'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
|
|
||||||
| # or
|
|
||||||
\S # - a non-whitespace character
|
|
||||||
)* # any number of the above three
|
|
||||||
\s* # any number of whitespaces
|
|
||||||
/x', '', $attr);
|
|
||||||
$mode = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The attribute list ends with a valueless attribute like "selected".
|
|
||||||
if ($mode == 1 && !$skip) {
|
|
||||||
$attrarr[] = $attrname;
|
|
||||||
}
|
|
||||||
return $attrarr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes an HTML attribute value and strips dangerous protocols from URLs.
|
* Processes an HTML attribute value and strips dangerous protocols from URLs.
|
||||||
*
|
*
|
||||||
* @param $string
|
* @param string $string
|
||||||
* The string with the attribute value.
|
* The string with the attribute value.
|
||||||
* @param $decode
|
|
||||||
* (deprecated) Whether to decode entities in the $string. Set to FALSE if the
|
|
||||||
* $string is in plain text, TRUE otherwise. Defaults to TRUE. This parameter
|
|
||||||
* is deprecated and will be removed in Drupal 8. To process a plain-text URI,
|
|
||||||
* call drupal_strip_dangerous_protocols() or check_url() instead.
|
|
||||||
*
|
*
|
||||||
* @return
|
* @return string
|
||||||
* Cleaned up and HTML-escaped version of $string.
|
* Cleaned up and HTML-escaped version of $string.
|
||||||
|
*
|
||||||
|
* @see \Drupal\Component\Utility\Url::filterBadProtocol()
|
||||||
*/
|
*/
|
||||||
function filter_xss_bad_protocol($string, $decode = TRUE) {
|
function filter_xss_bad_protocol($string) {
|
||||||
// Get the plain text representation of the attribute value (i.e. its meaning).
|
return UrlValidator::filterBadProtocol($string);
|
||||||
// @todo Remove the $decode parameter in Drupal 8, and always assume an HTML
|
|
||||||
// string that needs decoding.
|
|
||||||
if ($decode) {
|
|
||||||
if (!function_exists('decode_entities')) {
|
|
||||||
require_once __DIR__ . '/unicode.inc';
|
|
||||||
}
|
|
||||||
|
|
||||||
$string = decode_entities($string);
|
|
||||||
}
|
|
||||||
return check_plain(drupal_strip_dangerous_protocols($string));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -4594,6 +4339,16 @@ function _drupal_bootstrap_code() {
|
||||||
ini_set('log_errors', 1);
|
ini_set('log_errors', 1);
|
||||||
ini_set('error_log', 'public://error.log');
|
ini_set('error_log', 'public://error.log');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set the allowed protocols once we have the config available.
|
||||||
|
$allowed_protocols = \Drupal::config('system.filter')->get('protocols');
|
||||||
|
if (!isset($allowed_protocols)) {
|
||||||
|
// filter_xss_admin() is called by the installer and update.php, in which
|
||||||
|
// case the configuration may not exist (yet). Provide a minimal default set
|
||||||
|
// of allowed protocols for these cases.
|
||||||
|
$allowed_protocols = array('http', 'https');
|
||||||
|
}
|
||||||
|
UrlValidator::setAllowedProtocols($allowed_protocols);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -25,6 +25,7 @@ class String {
|
||||||
* valid UTF-8.
|
* valid UTF-8.
|
||||||
*
|
*
|
||||||
* @see drupal_validate_utf8()
|
* @see drupal_validate_utf8()
|
||||||
|
*
|
||||||
* @ingroup sanitization
|
* @ingroup sanitization
|
||||||
*/
|
*/
|
||||||
public static function checkPlain($text) {
|
public static function checkPlain($text) {
|
||||||
|
|
|
@ -575,4 +575,38 @@ EOD;
|
||||||
return $matches[0][0] . chr(ord($matches[0][1]) ^ 32);
|
return $matches[0][0] . chr(ord($matches[0][1]) ^ 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether a string is valid UTF-8.
|
||||||
|
*
|
||||||
|
* All functions designed to filter input should use drupal_validate_utf8
|
||||||
|
* to ensure they operate on valid UTF-8 strings to prevent bypass of the
|
||||||
|
* filter.
|
||||||
|
*
|
||||||
|
* When text containing an invalid UTF-8 lead byte (0xC0 - 0xFF) is presented
|
||||||
|
* as UTF-8 to Internet Explorer 6, the program may misinterpret subsequent
|
||||||
|
* bytes. When these subsequent bytes are HTML control characters such as
|
||||||
|
* quotes or angle brackets, parts of the text that were deemed safe by filters
|
||||||
|
* end up in locations that are potentially unsafe; An onerror attribute that
|
||||||
|
* is outside of a tag, and thus deemed safe by a filter, can be interpreted
|
||||||
|
* by the browser as if it were inside the tag.
|
||||||
|
*
|
||||||
|
* The function does not return FALSE for strings containing character codes
|
||||||
|
* above U+10FFFF, even though these are prohibited by RFC 3629.
|
||||||
|
*
|
||||||
|
* @param string $text
|
||||||
|
* The text to check.
|
||||||
|
*
|
||||||
|
* @return bool
|
||||||
|
* TRUE if the text is valid UTF-8, FALSE if not.
|
||||||
|
*/
|
||||||
|
public static function validateUtf8($text) {
|
||||||
|
if (strlen($text) == 0) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
// With the PCRE_UTF8 modifier 'u', preg_match() fails silently on strings
|
||||||
|
// containing invalid UTF-8 byte sequences. It does not reject character
|
||||||
|
// codes above U+10FFFF (represented by 4 or more octets), though.
|
||||||
|
return (preg_match('/^./us', $text) == 1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,136 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
* Contains \Drupal\Component\Utility\UrlValidator.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Drupal\Component\Utility;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class to support filtering bad protocols from an url.
|
||||||
|
*/
|
||||||
|
class UrlValidator {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The list of allowed protocols.
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
protected static $allowedProtocols = array('http', 'https');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes an HTML attribute value and strips dangerous protocols from URLs.
|
||||||
|
*
|
||||||
|
* @param string $string
|
||||||
|
* The string with the attribute value.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
* Cleaned up and HTML-escaped version of $string.
|
||||||
|
*/
|
||||||
|
public static function filterBadProtocol($string) {
|
||||||
|
// Get the plain text representation of the attribute value (i.e. its meaning).
|
||||||
|
$string = String::decodeEntities($string);
|
||||||
|
|
||||||
|
return String::checkPlain(static::stripDangerousProtocols($string));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the allowed protocols.
|
||||||
|
*
|
||||||
|
* @param array $protocols
|
||||||
|
* An array of protocols, for example http, https and irc.
|
||||||
|
*/
|
||||||
|
public static function setAllowedProtocols(array $protocols = array()) {
|
||||||
|
static::$allowedProtocols = $protocols;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strips dangerous protocols (e.g. 'javascript:') from a URI.
|
||||||
|
*
|
||||||
|
* This function must be called for all URIs within user-entered input prior
|
||||||
|
* to being output to an HTML attribute value. It is often called as part of
|
||||||
|
* check_url() or filter_xss(), but those functions return an HTML-encoded
|
||||||
|
* string, so this function can be called independently when the output needs to
|
||||||
|
* be a plain-text string for passing to t(), l(),
|
||||||
|
* Drupal\Core\Template\Attribute, or another function that will call
|
||||||
|
* check_plain() separately.
|
||||||
|
*
|
||||||
|
* @param string $uri
|
||||||
|
* A plain-text URI that might contain dangerous protocols.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
* A plain-text URI stripped of dangerous protocols. As with all plain-text
|
||||||
|
* strings, this return value must not be output to an HTML page without
|
||||||
|
* check_plain() being called on it. However, it can be passed to functions
|
||||||
|
* expecting plain-text strings.
|
||||||
|
*
|
||||||
|
* @see check_url()
|
||||||
|
*/
|
||||||
|
public static function stripDangerousProtocols($uri) {
|
||||||
|
$allowed_protocols = array_flip(static::$allowedProtocols);
|
||||||
|
|
||||||
|
// Iteratively remove any invalid protocol found.
|
||||||
|
do {
|
||||||
|
$before = $uri;
|
||||||
|
$colonpos = strpos($uri, ':');
|
||||||
|
if ($colonpos > 0) {
|
||||||
|
// We found a colon, possibly a protocol. Verify.
|
||||||
|
$protocol = substr($uri, 0, $colonpos);
|
||||||
|
// If a colon is preceded by a slash, question mark or hash, it cannot
|
||||||
|
// possibly be part of the URL scheme. This must be a relative URL, which
|
||||||
|
// inherits the (safe) protocol of the base document.
|
||||||
|
if (preg_match('![/?#]!', $protocol)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Check if this is a disallowed protocol. Per RFC2616, section 3.2.3
|
||||||
|
// (URI Comparison) scheme comparison must be case-insensitive.
|
||||||
|
if (!isset($allowed_protocols[strtolower($protocol)])) {
|
||||||
|
$uri = substr($uri, $colonpos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while ($before != $uri);
|
||||||
|
|
||||||
|
return $uri;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies the syntax of the given URL.
|
||||||
|
*
|
||||||
|
* This function should only be used on actual URLs. It should not be used for
|
||||||
|
* Drupal menu paths, which can contain arbitrary characters.
|
||||||
|
* Valid values per RFC 3986.
|
||||||
|
*
|
||||||
|
* @param string $url
|
||||||
|
* The URL to verify.
|
||||||
|
* @param bool $absolute
|
||||||
|
* Whether the URL is absolute (beginning with a scheme such as "http:").
|
||||||
|
*
|
||||||
|
* @return bool
|
||||||
|
* TRUE if the URL is in a valid format.
|
||||||
|
*/
|
||||||
|
public static function isValid($url, $absolute = FALSE) {
|
||||||
|
if ($absolute) {
|
||||||
|
return (bool) preg_match("
|
||||||
|
/^ # Start at the beginning of the text
|
||||||
|
(?:ftp|https?|feed):\/\/ # Look for ftp, http, https or feed schemes
|
||||||
|
(?: # Userinfo (optional) which is typically
|
||||||
|
(?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)* # a username or a username and password
|
||||||
|
(?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@ # combination
|
||||||
|
)?
|
||||||
|
(?:
|
||||||
|
(?:[a-z0-9\-\.]|%[0-9a-f]{2})+ # A domain name or a IPv4 address
|
||||||
|
|(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\]) # or a well formed IPv6 address
|
||||||
|
)
|
||||||
|
(?::[0-9]+)? # Server port number (optional)
|
||||||
|
(?:[\/|\?]
|
||||||
|
(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2}) # The path and query (optional)
|
||||||
|
*)?
|
||||||
|
$/xi", $url);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return (bool) preg_match("/^(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})+$/i", $url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,285 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
* Contains \Drupal\Component\Utility\Xss.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Drupal\Component\Utility;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides helper to filter for cross-site scripting.
|
||||||
|
*/
|
||||||
|
class Xss {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The list of html tags allowed by filterAdmin().
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*
|
||||||
|
* @see \Drupal\Component\Utility\String::filterXssAdmin()
|
||||||
|
*/
|
||||||
|
protected static $adminTags = array('a', 'abbr', 'acronym', 'address', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'command', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption', 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'mark', 'menu', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities.
|
||||||
|
*
|
||||||
|
* Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses.
|
||||||
|
* For examples of various XSS attacks, see: http://ha.ckers.org/xss.html.
|
||||||
|
*
|
||||||
|
* This code does four things:
|
||||||
|
* - Removes characters and constructs that can trick browsers.
|
||||||
|
* - Makes sure all HTML entities are well-formed.
|
||||||
|
* - Makes sure all HTML tags and attributes are well-formed.
|
||||||
|
* - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
|
||||||
|
* javascript:).
|
||||||
|
*
|
||||||
|
* @param $string
|
||||||
|
* The string with raw HTML in it. It will be stripped of everything that can
|
||||||
|
* cause an XSS attack.
|
||||||
|
* @param array $allowed_tags
|
||||||
|
* An array of allowed tags.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
* An XSS safe version of $string, or an empty string if $string is not
|
||||||
|
* valid UTF-8.
|
||||||
|
*
|
||||||
|
* @see \Drupal\Component\Utility\Unicode::validateUtf8()
|
||||||
|
*
|
||||||
|
* @ingroup sanitization
|
||||||
|
*/
|
||||||
|
public static function filter($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
|
||||||
|
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
|
||||||
|
// site scripting issues on Internet Explorer 6.
|
||||||
|
if (!Unicode::validateUtf8($string)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
// Store the text format.
|
||||||
|
static::split($allowed_tags, TRUE);
|
||||||
|
// Remove NULL characters (ignored by some browsers).
|
||||||
|
$string = str_replace(chr(0), '', $string);
|
||||||
|
// Remove Netscape 4 JS entities.
|
||||||
|
$string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
|
||||||
|
|
||||||
|
// Defuse all HTML entities.
|
||||||
|
$string = str_replace('&', '&', $string);
|
||||||
|
// Change back only well-formed entities in our whitelist:
|
||||||
|
// Decimal numeric entities.
|
||||||
|
$string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
|
||||||
|
// Hexadecimal numeric entities.
|
||||||
|
$string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
|
||||||
|
// Named entities.
|
||||||
|
$string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
|
||||||
|
|
||||||
|
return preg_replace_callback('%
|
||||||
|
(
|
||||||
|
<(?=[^a-zA-Z!/]) # a lone <
|
||||||
|
| # or
|
||||||
|
<!--.*?--> # a comment
|
||||||
|
| # or
|
||||||
|
<[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
|
||||||
|
| # or
|
||||||
|
> # just a >
|
||||||
|
)%x', '\Drupal\Component\Utility\Xss::split', $string);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Applies a very permissive XSS/HTML filter for admin-only use.
|
||||||
|
*
|
||||||
|
* Use only for fields where it is impractical to use the
|
||||||
|
* whole filter system, but where some (mainly inline) mark-up
|
||||||
|
* is desired (so check_plain() is not acceptable).
|
||||||
|
*
|
||||||
|
* Allows all tags that can be used inside an HTML body, save
|
||||||
|
* for scripts and styles.
|
||||||
|
*
|
||||||
|
* @param string $string
|
||||||
|
* The string to apply the filter to.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
* The filtered string.
|
||||||
|
*/
|
||||||
|
public static function filterAdmin($string) {
|
||||||
|
return static::filter($string, static::$adminTags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes an HTML tag.
|
||||||
|
*
|
||||||
|
* @param array $matches
|
||||||
|
* An array with various meaning depending on the value of $store.
|
||||||
|
* If $store is TRUE then the array contains the allowed tags.
|
||||||
|
* If $store is FALSE then the array has one element, the HTML tag to process.
|
||||||
|
* @param bool $store
|
||||||
|
* Whether to store $m.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
* If the element isn't allowed, an empty string. Otherwise, the cleaned up
|
||||||
|
* version of the HTML element.
|
||||||
|
*/
|
||||||
|
protected static function split($matches, $store = FALSE) {
|
||||||
|
static $allowed_html;
|
||||||
|
|
||||||
|
if ($store) {
|
||||||
|
$allowed_html = array_flip($matches);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$string = $matches[1];
|
||||||
|
|
||||||
|
if (substr($string, 0, 1) != '<') {
|
||||||
|
// We matched a lone ">" character.
|
||||||
|
return '>';
|
||||||
|
}
|
||||||
|
elseif (strlen($string) == 1) {
|
||||||
|
// We matched a lone "<" character.
|
||||||
|
return '<';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) {
|
||||||
|
// Seriously malformed.
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$slash = trim($matches[1]);
|
||||||
|
$elem = &$matches[2];
|
||||||
|
$attrlist = &$matches[3];
|
||||||
|
$comment = &$matches[4];
|
||||||
|
|
||||||
|
if ($comment) {
|
||||||
|
$elem = '!--';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isset($allowed_html[strtolower($elem)])) {
|
||||||
|
// Disallowed HTML element.
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($comment) {
|
||||||
|
return $comment;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($slash != '') {
|
||||||
|
return "</$elem>";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is there a closing XHTML slash at the end of the attributes?
|
||||||
|
$attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
|
||||||
|
$xhtml_slash = $count ? ' /' : '';
|
||||||
|
|
||||||
|
// Clean up attributes.
|
||||||
|
$attr2 = implode(' ', static::attributes($attrlist));
|
||||||
|
$attr2 = preg_replace('/[<>]/', '', $attr2);
|
||||||
|
$attr2 = strlen($attr2) ? ' ' . $attr2 : '';
|
||||||
|
|
||||||
|
return "<$elem$attr2$xhtml_slash>";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes a string of HTML attributes.
|
||||||
|
*
|
||||||
|
* @param string $attributes
|
||||||
|
* The html attribute to process.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
* Cleaned up version of the HTML attributes.
|
||||||
|
*/
|
||||||
|
protected static function attributes($attributes) {
|
||||||
|
$attributes_array = array();
|
||||||
|
$mode = 0;
|
||||||
|
$attribute_name = '';
|
||||||
|
|
||||||
|
while (strlen($attributes) != 0) {
|
||||||
|
// Was the last operation successful?
|
||||||
|
$working = 0;
|
||||||
|
|
||||||
|
switch ($mode) {
|
||||||
|
case 0:
|
||||||
|
// Attribute name, href for instance.
|
||||||
|
if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) {
|
||||||
|
$attribute_name = strtolower($match[1]);
|
||||||
|
$skip = ($attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on');
|
||||||
|
$working = $mode = 1;
|
||||||
|
$attributes = preg_replace('/^[-a-zA-Z]+/', '', $attributes);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
// Equals sign or valueless ("selected").
|
||||||
|
if (preg_match('/^\s*=\s*/', $attributes)) {
|
||||||
|
$working = 1; $mode = 2;
|
||||||
|
$attributes = preg_replace('/^\s*=\s*/', '', $attributes);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match('/^\s+/', $attributes)) {
|
||||||
|
$working = 1; $mode = 0;
|
||||||
|
if (!$skip) {
|
||||||
|
$attributes_array[] = $attribute_name;
|
||||||
|
}
|
||||||
|
$attributes = preg_replace('/^\s+/', '', $attributes);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
// Attribute value, a URL after href= for instance.
|
||||||
|
if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) {
|
||||||
|
$thisval = UrlValidator::filterBadProtocol($match[1]);
|
||||||
|
|
||||||
|
if (!$skip) {
|
||||||
|
$attributes_array[] = "$attribute_name=\"$thisval\"";
|
||||||
|
}
|
||||||
|
$working = 1;
|
||||||
|
$mode = 0;
|
||||||
|
$attributes = preg_replace('/^"[^"]*"(\s+|$)/', '', $attributes);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) {
|
||||||
|
$thisval = UrlValidator::filterBadProtocol($match[1]);
|
||||||
|
|
||||||
|
if (!$skip) {
|
||||||
|
$attributes_array[] = "$attribute_name='$thisval'";
|
||||||
|
}
|
||||||
|
$working = 1; $mode = 0;
|
||||||
|
$attributes = preg_replace("/^'[^']*'(\s+|$)/", '', $attributes);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) {
|
||||||
|
$thisval = UrlValidator::filterBadProtocol($match[1]);
|
||||||
|
|
||||||
|
if (!$skip) {
|
||||||
|
$attributes_array[] = "$attribute_name=\"$thisval\"";
|
||||||
|
}
|
||||||
|
$working = 1; $mode = 0;
|
||||||
|
$attributes = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attributes);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($working == 0) {
|
||||||
|
// Not well formed; remove and try again.
|
||||||
|
$attributes = preg_replace('/
|
||||||
|
^
|
||||||
|
(
|
||||||
|
"[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
|
||||||
|
| # or
|
||||||
|
\'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
|
||||||
|
| # or
|
||||||
|
\S # - a non-whitespace character
|
||||||
|
)* # any number of the above three
|
||||||
|
\s* # any number of whitespaces
|
||||||
|
/x', '', $attributes);
|
||||||
|
$mode = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The attribute list ends with a valueless attribute like "selected".
|
||||||
|
if ($mode == 1 && !$skip) {
|
||||||
|
$attributes_array[] = $attribute_name;
|
||||||
|
}
|
||||||
|
return $attributes_array;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue