Issue #1998466 by dawehner, ParisLiakos: Convert filter_xss_admin() and similar function to an Xss component.

2013-05-28 22:48:53 +01:00 · 2013-05-28 22:48:53 +01:00 · 23b59123d1
parent 5a95824b28
commit 23b59123d1
6 changed files with 499 additions and 291 deletions
--- a/core/includes/bootstrap.inc
+++ b/core/includes/bootstrap.inc
@ -6,6 +6,7 @@ use Drupal\Component\Utility\Settings;
 use Drupal\Component\Utility\String;
 use Drupal\Component\Utility\Timer;
 use Drupal\Component\Utility\Unicode;
+use Drupal\Component\Utility\UrlValidator;
 use Drupal\Core\DrupalKernel;
 use Drupal\Core\Database\Database;
 use Drupal\Core\DependencyInjection\ContainerBuilder;
@ -1455,15 +1456,11 @@ function check_plain($text) {
 *
 * @return
 *   TRUE if the text is valid UTF-8, FALSE if not.
+ *
+ * @see \Drupal\Component\Utility\Unicode::validateUtf8()
 */
 function drupal_validate_utf8($text) {
-  if (strlen($text) == 0) {
-    return TRUE;
-  }
-  // With the PCRE_UTF8 modifier 'u', preg_match() fails silently on strings
-  // containing invalid UTF-8 byte sequences. It does not reject character
-  // codes above U+10FFFF (represented by 4 or more octets), though.
-  return (preg_match('/^./us', $text) == 1);
+  return Unicode::validateUtf8($text);
 }

 /**
--- a/core/includes/common.inc
+++ b/core/includes/common.inc
@ -1,6 +1,9 @@
 <?php

 use Drupal\Component\Utility\Crypt;
+use Drupal\Component\Utility\String;
+use Drupal\Component\Utility\UrlValidator;
+use Drupal\Component\Utility\Xss;
 use Drupal\Core\Cache\Cache;
 use Drupal\Core\Language\Language;
 use Symfony\Component\DependencyInjection\Container;
@ -806,29 +809,11 @@ function valid_email_address($mail) {
 *
 * @return
 *   TRUE if the URL is in a valid format.
+ *
+ * @see \Drupal\Component\Utility\UrlValidator::isValid()
 */
 function valid_url($url, $absolute = FALSE) {
-  if ($absolute) {
-    return (bool)preg_match("
-      /^                                                      # Start at the beginning of the text
-      (?:ftp|https?|feed):\/\/                                # Look for ftp, http, https or feed schemes
-      (?:                                                     # Userinfo (optional) which is typically
-        (?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)*      # a username or a username and password
-        (?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@          # combination
-      )?
-      (?:
-        (?:[a-z0-9\-\.]|%[0-9a-f]{2})+                        # A domain name or a IPv4 address
-        |(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\])         # or a well formed IPv6 address
-      )
-      (?::[0-9]+)?                                            # Server port number (optional)
-      (?:[\/|\?]
-        (?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})   # The path and query (optional)
-      *)?
-    $/xi", $url);
-  }
-  else {
-    return (bool)preg_match("/^(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})+$/i", $url);
-  }
+  return UrlValidator::isValid($url, $absolute);
 }

 /**
@ -910,40 +895,10 @@ function valid_number_step($value, $step, $offset = 0.0) {
 *   check_plain() being called on it. However, it can be passed to functions
 *   expecting plain-text strings.
 *
- * @see check_url()
+ * @see \Drupal\Component\Utility\Url::stripDangerousProtocols()
 */
 function drupal_strip_dangerous_protocols($uri) {
-  static $allowed_protocols;
-
-  if (!isset($allowed_protocols)) {
-    // filter_xss_admin() is called by the installer and update.php, in which
-    // case the configuration may not exist (yet). Provide a minimal default set
-    // of allowed protocols for these cases.
-    $allowed_protocols = array_flip(config('system.filter')->get('protocols') ?: array('http', 'https'));
-  }
-
-  // Iteratively remove any invalid protocol found.
-  do {
-    $before = $uri;
-    $colonpos = strpos($uri, ':');
-    if ($colonpos > 0) {
-      // We found a colon, possibly a protocol. Verify.
-      $protocol = substr($uri, 0, $colonpos);
-      // If a colon is preceded by a slash, question mark or hash, it cannot
-      // possibly be part of the URL scheme. This must be a relative URL, which
-      // inherits the (safe) protocol of the base document.
-      if (preg_match('![/?#]!', $protocol)) {
-        break;
-      }
-      // Check if this is a disallowed protocol. Per RFC2616, section 3.2.3
-      // (URI Comparison) scheme comparison must be case-insensitive.
-      if (!isset($allowed_protocols[strtolower($protocol)])) {
-        $uri = substr($uri, $colonpos + 1);
-      }
-    }
-  } while ($before != $uri);
-
-  return $uri;
+  return UrlValidator::stripDangerousProtocols($uri);
 }

 /**
@ -961,10 +916,11 @@ function drupal_strip_dangerous_protocols($uri) {
 *   Drupal\Core\Template\Attribute, call drupal_strip_dangerous_protocols()
 *   instead.
 *
- * @see drupal_strip_dangerous_protocols()
+ * @see \Drupal\Component\Utility\Url::stripDangerousProtocols()
+ * @see \Drupal\Component\Utility\String::checkPlain()
 */
 function check_url($uri) {
-  return check_plain(drupal_strip_dangerous_protocols($uri));
+  return String::checkPlain(UrlValidator::stripDangerousProtocols($uri));
 }

 /**
@ -976,9 +932,17 @@ function check_url($uri) {
 *
 * Allows all tags that can be used inside an HTML body, save
 * for scripts and styles.
+ *
+ * @param string $string
+ *   The string to apply the filter to.
+ *
+ * @return string
+ *   The filtered string.
+ *
+ * @see \Drupal\Component\Utility\Xss::filterAdmin()
 */
 function filter_xss_admin($string) {
-  return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'command', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption', 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'mark', 'menu', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr'));
+  return Xss::filterAdmin($string);
 }

 /**
@ -1004,246 +968,27 @@ function filter_xss_admin($string) {
 *   An XSS safe version of $string, or an empty string if $string is not
 *   valid UTF-8.
 *
- * @see drupal_validate_utf8()
+ * @see \Drupal\Component\Utility\Xss::filter()
+ *
 * @ingroup sanitization
 */
 function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
-  // Only operate on valid UTF-8 strings. This is necessary to prevent cross
-  // site scripting issues on Internet Explorer 6.
-  if (!drupal_validate_utf8($string)) {
-    return '';
-  }
-  // Store the text format.
-  _filter_xss_split($allowed_tags, TRUE);
-  // Remove NULL characters (ignored by some browsers).
-  $string = str_replace(chr(0), '', $string);
-  // Remove Netscape 4 JS entities.
-  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
-
-  // Defuse all HTML entities.
-  $string = str_replace('&', '&amp;', $string);
-  // Change back only well-formed entities in our whitelist:
-  // Decimal numeric entities.
-  $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
-  // Hexadecimal numeric entities.
-  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
-  // Named entities.
-  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
-
-  return preg_replace_callback('%
-    (
-    <(?=[^a-zA-Z!/])  # a lone <
-    |                 # or
-    <!--.*?-->        # a comment
-    |                 # or
-    <[^>]*(>|$)       # a string that starts with a <, up until the > or the end of the string
-    |                 # or
-    >                 # just a >
-    )%x', '_filter_xss_split', $string);
-}
-
-/**
- * Processes an HTML tag.
- *
- * @param $m
- *   An array with various meaning depending on the value of $store.
- *   If $store is TRUE then the array contains the allowed tags.
- *   If $store is FALSE then the array has one element, the HTML tag to process.
- * @param $store
- *   Whether to store $m.
- *
- * @return
- *   If the element isn't allowed, an empty string. Otherwise, the cleaned up
- *   version of the HTML element.
- */
-function _filter_xss_split($m, $store = FALSE) {
-  static $allowed_html;
-
-  if ($store) {
-    $allowed_html = array_flip($m);
-    return;
-  }
-
-  $string = $m[1];
-
-  if (substr($string, 0, 1) != '<') {
-    // We matched a lone ">" character.
-    return '&gt;';
-  }
-  elseif (strlen($string) == 1) {
-    // We matched a lone "<" character.
-    return '&lt;';
-  }
-
-  if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) {
-    // Seriously malformed.
-    return '';
-  }
-
-  $slash = trim($matches[1]);
-  $elem = &$matches[2];
-  $attrlist = &$matches[3];
-  $comment = &$matches[4];
-
-  if ($comment) {
-    $elem = '!--';
-  }
-
-  if (!isset($allowed_html[strtolower($elem)])) {
-    // Disallowed HTML element.
-    return '';
-  }
-
-  if ($comment) {
-    return $comment;
-  }
-
-  if ($slash != '') {
-    return "</$elem>";
-  }
-
-  // Is there a closing XHTML slash at the end of the attributes?
-  $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
-  $xhtml_slash = $count ? ' /' : '';
-
-  // Clean up attributes.
-  $attr2 = implode(' ', _filter_xss_attributes($attrlist));
-  $attr2 = preg_replace('/[<>]/', '', $attr2);
-  $attr2 = strlen($attr2) ? ' ' . $attr2 : '';
-
-  return "<$elem$attr2$xhtml_slash>";
-}
-
-/**
- * Processes a string of HTML attributes.
- *
- * @return
- *   Cleaned up version of the HTML attributes.
- */
-function _filter_xss_attributes($attr) {
-  $attrarr = array();
-  $mode = 0;
-  $attrname = '';
-
-  while (strlen($attr) != 0) {
-    // Was the last operation successful?
-    $working = 0;
-
-    switch ($mode) {
-      case 0:
-        // Attribute name, href for instance.
-        if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
-          $attrname = strtolower($match[1]);
-          $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
-          $working = $mode = 1;
-          $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
-        }
-        break;
-
-      case 1:
-        // Equals sign or valueless ("selected").
-        if (preg_match('/^\s*=\s*/', $attr)) {
-          $working = 1; $mode = 2;
-          $attr = preg_replace('/^\s*=\s*/', '', $attr);
-          break;
-        }
-
-        if (preg_match('/^\s+/', $attr)) {
-          $working = 1; $mode = 0;
-          if (!$skip) {
-            $attrarr[] = $attrname;
-          }
-          $attr = preg_replace('/^\s+/', '', $attr);
-        }
-        break;
-
-      case 2:
-        // Attribute value, a URL after href= for instance.
-        if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
-          $thisval = filter_xss_bad_protocol($match[1]);
-
-          if (!$skip) {
-            $attrarr[] = "$attrname=\"$thisval\"";
-          }
-          $working = 1;
-          $mode = 0;
-          $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
-          break;
-        }
-
-        if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
-          $thisval = filter_xss_bad_protocol($match[1]);
-
-          if (!$skip) {
-            $attrarr[] = "$attrname='$thisval'";
-          }
-          $working = 1; $mode = 0;
-          $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
-          break;
-        }
-
-        if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
-          $thisval = filter_xss_bad_protocol($match[1]);
-
-          if (!$skip) {
-            $attrarr[] = "$attrname=\"$thisval\"";
-          }
-          $working = 1; $mode = 0;
-          $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
-        }
-        break;
-    }
-
-    if ($working == 0) {
-      // Not well formed; remove and try again.
-      $attr = preg_replace('/
-        ^
-        (
-        "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
-        |               # or
-        \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
-        |               # or
-        \S              # - a non-whitespace character
-        )*              # any number of the above three
-        \s*             # any number of whitespaces
-        /x', '', $attr);
-      $mode = 0;
-    }
-  }
-
-  // The attribute list ends with a valueless attribute like "selected".
-  if ($mode == 1 && !$skip) {
-    $attrarr[] = $attrname;
-  }
-  return $attrarr;
+  return Xss::filter($string, $allowed_tags);
 }

 /**
 * Processes an HTML attribute value and strips dangerous protocols from URLs.
 *
- * @param $string
+ * @param string $string
 *   The string with the attribute value.
- * @param $decode
- *   (deprecated) Whether to decode entities in the $string. Set to FALSE if the
- *   $string is in plain text, TRUE otherwise. Defaults to TRUE. This parameter
- *   is deprecated and will be removed in Drupal 8. To process a plain-text URI,
- *   call drupal_strip_dangerous_protocols() or check_url() instead.
 *
- * @return
+ * @return string
 *   Cleaned up and HTML-escaped version of $string.
+ *
+ * @see \Drupal\Component\Utility\Url::filterBadProtocol()
 */
-function filter_xss_bad_protocol($string, $decode = TRUE) {
-  // Get the plain text representation of the attribute value (i.e. its meaning).
-  // @todo Remove the $decode parameter in Drupal 8, and always assume an HTML
-  //   string that needs decoding.
-  if ($decode) {
-    if (!function_exists('decode_entities')) {
-      require_once __DIR__ . '/unicode.inc';
-    }
-
-    $string = decode_entities($string);
-  }
-  return check_plain(drupal_strip_dangerous_protocols($string));
+function filter_xss_bad_protocol($string) {
+  return UrlValidator::filterBadProtocol($string);
 }

 /**
@ -4594,6 +4339,16 @@ function _drupal_bootstrap_code() {
    ini_set('log_errors', 1);
    ini_set('error_log', 'public://error.log');
  }
+
+  // Set the allowed protocols once we have the config available.
+  $allowed_protocols = \Drupal::config('system.filter')->get('protocols');
+  if (!isset($allowed_protocols)) {
+    // filter_xss_admin() is called by the installer and update.php, in which
+    // case the configuration may not exist (yet). Provide a minimal default set
+    // of allowed protocols for these cases.
+    $allowed_protocols = array('http', 'https');
+  }
+  UrlValidator::setAllowedProtocols($allowed_protocols);
 }

 /**
--- a/core/lib/Drupal/Component/Utility/String.php
+++ b/core/lib/Drupal/Component/Utility/String.php
@ -25,6 +25,7 @@ class String {
   *   valid UTF-8.
   *
   * @see drupal_validate_utf8()
+   *
   * @ingroup sanitization
   */
  public static function checkPlain($text) {
--- a/core/lib/Drupal/Component/Utility/Unicode.php
+++ b/core/lib/Drupal/Component/Utility/Unicode.php
@ -575,4 +575,38 @@ EOD;
    return $matches[0][0] . chr(ord($matches[0][1]) ^ 32);
  }

+  /**
+   * Checks whether a string is valid UTF-8.
+   *
+   * All functions designed to filter input should use drupal_validate_utf8
+   * to ensure they operate on valid UTF-8 strings to prevent bypass of the
+   * filter.
+   *
+   * When text containing an invalid UTF-8 lead byte (0xC0 - 0xFF) is presented
+   * as UTF-8 to Internet Explorer 6, the program may misinterpret subsequent
+   * bytes. When these subsequent bytes are HTML control characters such as
+   * quotes or angle brackets, parts of the text that were deemed safe by filters
+   * end up in locations that are potentially unsafe; An onerror attribute that
+   * is outside of a tag, and thus deemed safe by a filter, can be interpreted
+   * by the browser as if it were inside the tag.
+   *
+   * The function does not return FALSE for strings containing character codes
+   * above U+10FFFF, even though these are prohibited by RFC 3629.
+   *
+   * @param string $text
+   *   The text to check.
+   *
+   * @return bool
+   *   TRUE if the text is valid UTF-8, FALSE if not.
+   */
+  public static function validateUtf8($text) {
+    if (strlen($text) == 0) {
+      return TRUE;
+    }
+    // With the PCRE_UTF8 modifier 'u', preg_match() fails silently on strings
+    // containing invalid UTF-8 byte sequences. It does not reject character
+    // codes above U+10FFFF (represented by 4 or more octets), though.
+    return (preg_match('/^./us', $text) == 1);
+  }
+
 }
--- a/core/lib/Drupal/Component/Utility/UrlValidator.php
+++ b/core/lib/Drupal/Component/Utility/UrlValidator.php
@ -0,0 +1,136 @@
+<?php
+
+/**
+ * @file
+ * Contains \Drupal\Component\Utility\UrlValidator.
+ */
+
+namespace Drupal\Component\Utility;
+
+/**
+ * Helper class to support filtering bad protocols from an url.
+ */
+class UrlValidator {
+
+  /**
+   * The list of allowed protocols.
+   *
+   * @var array
+   */
+  protected static $allowedProtocols = array('http', 'https');
+
+  /**
+   * Processes an HTML attribute value and strips dangerous protocols from URLs.
+   *
+   * @param string $string
+   *   The string with the attribute value.
+   *
+   * @return string
+   *   Cleaned up and HTML-escaped version of $string.
+   */
+  public static function filterBadProtocol($string) {
+    // Get the plain text representation of the attribute value (i.e. its meaning).
+    $string = String::decodeEntities($string);
+
+    return String::checkPlain(static::stripDangerousProtocols($string));
+  }
+
+  /**
+   * Sets the allowed protocols.
+   *
+   * @param array $protocols
+   *   An array of protocols, for example http, https and irc.
+   */
+  public static function setAllowedProtocols(array $protocols = array()) {
+    static::$allowedProtocols = $protocols;
+  }
+
+  /**
+   * Strips dangerous protocols (e.g. 'javascript:') from a URI.
+   *
+   * This function must be called for all URIs within user-entered input prior
+   * to being output to an HTML attribute value. It is often called as part of
+   * check_url() or filter_xss(), but those functions return an HTML-encoded
+   * string, so this function can be called independently when the output needs to
+   * be a plain-text string for passing to t(), l(),
+   * Drupal\Core\Template\Attribute, or another function that will call
+   * check_plain() separately.
+   *
+   * @param string $uri
+   *   A plain-text URI that might contain dangerous protocols.
+   *
+   * @return string
+   *   A plain-text URI stripped of dangerous protocols. As with all plain-text
+   *   strings, this return value must not be output to an HTML page without
+   *   check_plain() being called on it. However, it can be passed to functions
+   *   expecting plain-text strings.
+   *
+   * @see check_url()
+   */
+  public static function stripDangerousProtocols($uri) {
+    $allowed_protocols = array_flip(static::$allowedProtocols);
+
+    // Iteratively remove any invalid protocol found.
+    do {
+      $before = $uri;
+      $colonpos = strpos($uri, ':');
+      if ($colonpos > 0) {
+        // We found a colon, possibly a protocol. Verify.
+        $protocol = substr($uri, 0, $colonpos);
+        // If a colon is preceded by a slash, question mark or hash, it cannot
+        // possibly be part of the URL scheme. This must be a relative URL, which
+        // inherits the (safe) protocol of the base document.
+        if (preg_match('![/?#]!', $protocol)) {
+          break;
+        }
+        // Check if this is a disallowed protocol. Per RFC2616, section 3.2.3
+        // (URI Comparison) scheme comparison must be case-insensitive.
+        if (!isset($allowed_protocols[strtolower($protocol)])) {
+          $uri = substr($uri, $colonpos + 1);
+        }
+      }
+    } while ($before != $uri);
+
+    return $uri;
+  }
+
+  /**
+   * Verifies the syntax of the given URL.
+   *
+   * This function should only be used on actual URLs. It should not be used for
+   * Drupal menu paths, which can contain arbitrary characters.
+   * Valid values per RFC 3986.
+   *
+   * @param string $url
+   *   The URL to verify.
+   * @param bool $absolute
+   *   Whether the URL is absolute (beginning with a scheme such as "http:").
+   *
+   * @return bool
+   *   TRUE if the URL is in a valid format.
+   */
+  public static function isValid($url, $absolute = FALSE) {
+    if ($absolute) {
+      return (bool) preg_match("
+        /^                                                      # Start at the beginning of the text
+        (?:ftp|https?|feed):\/\/                                # Look for ftp, http, https or feed schemes
+        (?:                                                     # Userinfo (optional) which is typically
+          (?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)*      # a username or a username and password
+          (?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@          # combination
+        )?
+        (?:
+          (?:[a-z0-9\-\.]|%[0-9a-f]{2})+                        # A domain name or a IPv4 address
+          |(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\])         # or a well formed IPv6 address
+        )
+        (?::[0-9]+)?                                            # Server port number (optional)
+        (?:[\/|\?]
+          (?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})   # The path and query (optional)
+        *)?
+      $/xi", $url);
+    }
+    else {
+      return (bool) preg_match("/^(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})+$/i", $url);
+    }
+  }
+
+}
--- a/core/lib/Drupal/Component/Utility/Xss.php
+++ b/core/lib/Drupal/Component/Utility/Xss.php
@ -0,0 +1,285 @@
+<?php
+
+/**
+ * @file
+ * Contains \Drupal\Component\Utility\Xss.
+ */
+
+namespace Drupal\Component\Utility;
+
+/**
+ * Provides helper to filter for cross-site scripting.
+ */
+class Xss {
+
+  /**
+   * The list of html tags allowed by filterAdmin().
+   *
+   * @var array
+   *
+   * @see \Drupal\Component\Utility\String::filterXssAdmin()
+   */
+  protected static $adminTags = array('a', 'abbr', 'acronym', 'address', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'command', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption', 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'mark', 'menu', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr');
+
+  /**
+   * Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities.
+   *
+   * Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses.
+   * For examples of various XSS attacks, see: http://ha.ckers.org/xss.html.
+   *
+   * This code does four things:
+   * - Removes characters and constructs that can trick browsers.
+   * - Makes sure all HTML entities are well-formed.
+   * - Makes sure all HTML tags and attributes are well-formed.
+   * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
+   *   javascript:).
+   *
+   * @param $string
+   *   The string with raw HTML in it. It will be stripped of everything that can
+   *   cause an XSS attack.
+   * @param array $allowed_tags
+   *   An array of allowed tags.
+   *
+   * @return string
+   *   An XSS safe version of $string, or an empty string if $string is not
+   *   valid UTF-8.
+   *
+   * @see \Drupal\Component\Utility\Unicode::validateUtf8()
+   *
+   * @ingroup sanitization
+   */
+  public static function filter($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
+    // Only operate on valid UTF-8 strings. This is necessary to prevent cross
+    // site scripting issues on Internet Explorer 6.
+    if (!Unicode::validateUtf8($string)) {
+      return '';
+    }
+    // Store the text format.
+    static::split($allowed_tags, TRUE);
+    // Remove NULL characters (ignored by some browsers).
+    $string = str_replace(chr(0), '', $string);
+    // Remove Netscape 4 JS entities.
+    $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+    // Defuse all HTML entities.
+    $string = str_replace('&', '&amp;', $string);
+    // Change back only well-formed entities in our whitelist:
+    // Decimal numeric entities.
+    $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
+    // Hexadecimal numeric entities.
+    $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
+    // Named entities.
+    $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+
+    return preg_replace_callback('%
+      (
+      <(?=[^a-zA-Z!/])  # a lone <
+      |                 # or
+      <!--.*?-->        # a comment
+      |                 # or
+      <[^>]*(>|$)       # a string that starts with a <, up until the > or the end of the string
+      |                 # or
+      >                 # just a >
+      )%x', '\Drupal\Component\Utility\Xss::split', $string);
+  }
+
+  /**
+   * Applies a very permissive XSS/HTML filter for admin-only use.
+   *
+   * Use only for fields where it is impractical to use the
+   * whole filter system, but where some (mainly inline) mark-up
+   * is desired (so check_plain() is not acceptable).
+   *
+   * Allows all tags that can be used inside an HTML body, save
+   * for scripts and styles.
+   *
+   * @param string $string
+   *   The string to apply the filter to.
+   *
+   * @return string
+   *   The filtered string.
+   */
+  public static function filterAdmin($string) {
+    return static::filter($string, static::$adminTags);
+  }
+
+  /**
+   * Processes an HTML tag.
+   *
+   * @param array $matches
+   *   An array with various meaning depending on the value of $store.
+   *   If $store is TRUE then the array contains the allowed tags.
+   *   If $store is FALSE then the array has one element, the HTML tag to process.
+   * @param bool $store
+   *   Whether to store $m.
+   *
+   * @return string
+   *   If the element isn't allowed, an empty string. Otherwise, the cleaned up
+   *   version of the HTML element.
+   */
+  protected static function split($matches, $store = FALSE) {
+    static $allowed_html;
+
+    if ($store) {
+      $allowed_html = array_flip($matches);
+      return;
+    }
+
+    $string = $matches[1];
+
+    if (substr($string, 0, 1) != '<') {
+      // We matched a lone ">" character.
+      return '&gt;';
+    }
+    elseif (strlen($string) == 1) {
+      // We matched a lone "<" character.
+      return '&lt;';
+    }
+
+    if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) {
+      // Seriously malformed.
+      return '';
+    }
+
+    $slash = trim($matches[1]);
+    $elem = &$matches[2];
+    $attrlist = &$matches[3];
+    $comment = &$matches[4];
+
+    if ($comment) {
+      $elem = '!--';
+    }
+
+    if (!isset($allowed_html[strtolower($elem)])) {
+      // Disallowed HTML element.
+      return '';
+    }
+
+    if ($comment) {
+      return $comment;
+    }
+
+    if ($slash != '') {
+      return "</$elem>";
+    }
+
+    // Is there a closing XHTML slash at the end of the attributes?
+    $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
+    $xhtml_slash = $count ? ' /' : '';
+
+    // Clean up attributes.
+    $attr2 = implode(' ', static::attributes($attrlist));
+    $attr2 = preg_replace('/[<>]/', '', $attr2);
+    $attr2 = strlen($attr2) ? ' ' . $attr2 : '';
+
+    return "<$elem$attr2$xhtml_slash>";
+  }
+
+  /**
+   * Processes a string of HTML attributes.
+   *
+   * @param string $attributes
+   *   The html attribute to process.
+   *
+   * @return string
+   *   Cleaned up version of the HTML attributes.
+   */
+  protected static function attributes($attributes) {
+    $attributes_array = array();
+    $mode = 0;
+    $attribute_name = '';
+
+    while (strlen($attributes) != 0) {
+      // Was the last operation successful?
+      $working = 0;
+
+      switch ($mode) {
+        case 0:
+          // Attribute name, href for instance.
+          if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) {
+            $attribute_name = strtolower($match[1]);
+            $skip = ($attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on');
+            $working = $mode = 1;
+            $attributes = preg_replace('/^[-a-zA-Z]+/', '', $attributes);
+          }
+          break;
+
+        case 1:
+          // Equals sign or valueless ("selected").
+          if (preg_match('/^\s*=\s*/', $attributes)) {
+            $working = 1; $mode = 2;
+            $attributes = preg_replace('/^\s*=\s*/', '', $attributes);
+            break;
+          }
+
+          if (preg_match('/^\s+/', $attributes)) {
+            $working = 1; $mode = 0;
+            if (!$skip) {
+              $attributes_array[] = $attribute_name;
+            }
+            $attributes = preg_replace('/^\s+/', '', $attributes);
+          }
+          break;
+
+        case 2:
+          // Attribute value, a URL after href= for instance.
+          if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) {
+            $thisval = UrlValidator::filterBadProtocol($match[1]);
+
+            if (!$skip) {
+              $attributes_array[] = "$attribute_name=\"$thisval\"";
+            }
+            $working = 1;
+            $mode = 0;
+            $attributes = preg_replace('/^"[^"]*"(\s+|$)/', '', $attributes);
+            break;
+          }
+
+          if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) {
+            $thisval = UrlValidator::filterBadProtocol($match[1]);
+
+            if (!$skip) {
+              $attributes_array[] = "$attribute_name='$thisval'";
+            }
+            $working = 1; $mode = 0;
+            $attributes = preg_replace("/^'[^']*'(\s+|$)/", '', $attributes);
+            break;
+          }
+
+          if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) {
+            $thisval = UrlValidator::filterBadProtocol($match[1]);
+
+            if (!$skip) {
+              $attributes_array[] = "$attribute_name=\"$thisval\"";
+            }
+            $working = 1; $mode = 0;
+            $attributes = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attributes);
+          }
+          break;
+      }
+
+      if ($working == 0) {
+        // Not well formed; remove and try again.
+        $attributes = preg_replace('/
+          ^
+          (
+          "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
+          |               # or
+          \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
+          |               # or
+          \S              # - a non-whitespace character
+          )*              # any number of the above three
+          \s*             # any number of whitespaces
+          /x', '', $attributes);
+        $mode = 0;
+      }
+    }
+
+    // The attribute list ends with a valueless attribute like "selected".
+    if ($mode == 1 && !$skip) {
+      $attributes_array[] = $attribute_name;
+    }
+    return $attributes_array;
+  }
+
+}