- Patch #470632 by sun: move filter_xss*() into common.inc.
parent
adf7113451
commit
a5f42fd007
|
@ -1256,6 +1256,12 @@ function check_file($filename) {
|
||||||
return is_uploaded_file($filename);
|
return is_uploaded_file($filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup sanitization Sanitization functions
|
||||||
|
* @{
|
||||||
|
* Functions to sanitize values.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prepare a URL for use in an HTML attribute. Strips harmful protocols.
|
* Prepare a URL for use in an HTML attribute. Strips harmful protocols.
|
||||||
*/
|
*/
|
||||||
|
@ -1263,6 +1269,291 @@ function check_url($uri) {
|
||||||
return filter_xss_bad_protocol($uri, FALSE);
|
return filter_xss_bad_protocol($uri, FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Very permissive XSS/HTML filter for admin-only use.
|
||||||
|
*
|
||||||
|
* Use only for fields where it is impractical to use the
|
||||||
|
* whole filter system, but where some (mainly inline) mark-up
|
||||||
|
* is desired (so check_plain() is not acceptable).
|
||||||
|
*
|
||||||
|
* Allows all tags that can be used inside an HTML body, save
|
||||||
|
* for scripts and styles.
|
||||||
|
*/
|
||||||
|
function filter_xss_admin($string) {
|
||||||
|
return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filter XSS.
|
||||||
|
*
|
||||||
|
* Based on kses by Ulf Harnhammar, see
|
||||||
|
* http://sourceforge.net/projects/kses
|
||||||
|
*
|
||||||
|
* For examples of various XSS attacks, see:
|
||||||
|
* http://ha.ckers.org/xss.html
|
||||||
|
*
|
||||||
|
* This code does four things:
|
||||||
|
* - Removes characters and constructs that can trick browsers
|
||||||
|
* - Makes sure all HTML entities are well-formed
|
||||||
|
* - Makes sure all HTML tags and attributes are well-formed
|
||||||
|
* - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
|
||||||
|
*
|
||||||
|
* @param $string
|
||||||
|
* The string with raw HTML in it. It will be stripped of everything that can cause
|
||||||
|
* an XSS attack.
|
||||||
|
* @param $allowed_tags
|
||||||
|
* An array of allowed tags.
|
||||||
|
*/
|
||||||
|
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
|
||||||
|
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
|
||||||
|
// site scripting issues on Internet Explorer 6.
|
||||||
|
if (!drupal_validate_utf8($string)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
// Store the text format
|
||||||
|
_filter_xss_split($allowed_tags, TRUE);
|
||||||
|
// Remove NULL characters (ignored by some browsers)
|
||||||
|
$string = str_replace(chr(0), '', $string);
|
||||||
|
// Remove Netscape 4 JS entities
|
||||||
|
$string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
|
||||||
|
|
||||||
|
// Defuse all HTML entities
|
||||||
|
$string = str_replace('&', '&', $string);
|
||||||
|
// Change back only well-formed entities in our whitelist
|
||||||
|
// Named entities
|
||||||
|
$string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
|
||||||
|
// Decimal numeric entities
|
||||||
|
$string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
|
||||||
|
// Hexadecimal numeric entities
|
||||||
|
$string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
|
||||||
|
|
||||||
|
return preg_replace_callback('%
|
||||||
|
(
|
||||||
|
<(?=[^a-zA-Z!/]) # a lone <
|
||||||
|
| # or
|
||||||
|
<[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
|
||||||
|
| # or
|
||||||
|
> # just a >
|
||||||
|
)%x', '_filter_xss_split', $string);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes an HTML tag.
|
||||||
|
*
|
||||||
|
* @param $m
|
||||||
|
* An array with various meaning depending on the value of $store.
|
||||||
|
* If $store is TRUE then the array contains the allowed tags.
|
||||||
|
* If $store is FALSE then the array has one element, the HTML tag to process.
|
||||||
|
* @param $store
|
||||||
|
* Whether to store $m.
|
||||||
|
* @return
|
||||||
|
* If the element isn't allowed, an empty string. Otherwise, the cleaned up
|
||||||
|
* version of the HTML element.
|
||||||
|
*/
|
||||||
|
function _filter_xss_split($m, $store = FALSE) {
|
||||||
|
static $allowed_html;
|
||||||
|
|
||||||
|
if ($store) {
|
||||||
|
$allowed_html = array_flip($m);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$string = $m[1];
|
||||||
|
|
||||||
|
if (substr($string, 0, 1) != '<') {
|
||||||
|
// We matched a lone ">" character
|
||||||
|
return '>';
|
||||||
|
}
|
||||||
|
elseif (strlen($string) == 1) {
|
||||||
|
// We matched a lone "<" character
|
||||||
|
return '<';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
|
||||||
|
// Seriously malformed
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$slash = trim($matches[1]);
|
||||||
|
$elem = &$matches[2];
|
||||||
|
$attrlist = &$matches[3];
|
||||||
|
|
||||||
|
if (!isset($allowed_html[strtolower($elem)])) {
|
||||||
|
// Disallowed HTML element
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($slash != '') {
|
||||||
|
return "</$elem>";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is there a closing XHTML slash at the end of the attributes?
|
||||||
|
$attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
|
||||||
|
$xhtml_slash = $count ? ' /' : '';
|
||||||
|
|
||||||
|
// Clean up attributes
|
||||||
|
$attr2 = implode(' ', _filter_xss_attributes($attrlist));
|
||||||
|
$attr2 = preg_replace('/[<>]/', '', $attr2);
|
||||||
|
$attr2 = strlen($attr2) ? ' ' . $attr2 : '';
|
||||||
|
|
||||||
|
return "<$elem$attr2$xhtml_slash>";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes a string of HTML attributes.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Cleaned up version of the HTML attributes.
|
||||||
|
*/
|
||||||
|
function _filter_xss_attributes($attr) {
|
||||||
|
$attrarr = array();
|
||||||
|
$mode = 0;
|
||||||
|
$attrname = '';
|
||||||
|
|
||||||
|
while (strlen($attr) != 0) {
|
||||||
|
// Was the last operation successful?
|
||||||
|
$working = 0;
|
||||||
|
|
||||||
|
switch ($mode) {
|
||||||
|
case 0:
|
||||||
|
// Attribute name, href for instance
|
||||||
|
if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
|
||||||
|
$attrname = strtolower($match[1]);
|
||||||
|
$skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
|
||||||
|
$working = $mode = 1;
|
||||||
|
$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
// Equals sign or valueless ("selected")
|
||||||
|
if (preg_match('/^\s*=\s*/', $attr)) {
|
||||||
|
$working = 1; $mode = 2;
|
||||||
|
$attr = preg_replace('/^\s*=\s*/', '', $attr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match('/^\s+/', $attr)) {
|
||||||
|
$working = 1; $mode = 0;
|
||||||
|
if (!$skip) {
|
||||||
|
$attrarr[] = $attrname;
|
||||||
|
}
|
||||||
|
$attr = preg_replace('/^\s+/', '', $attr);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
// Attribute value, a URL after href= for instance
|
||||||
|
if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
|
||||||
|
$thisval = filter_xss_bad_protocol($match[1]);
|
||||||
|
|
||||||
|
if (!$skip) {
|
||||||
|
$attrarr[] = "$attrname=\"$thisval\"";
|
||||||
|
}
|
||||||
|
$working = 1;
|
||||||
|
$mode = 0;
|
||||||
|
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
|
||||||
|
$thisval = filter_xss_bad_protocol($match[1]);
|
||||||
|
|
||||||
|
if (!$skip) {
|
||||||
|
$attrarr[] = "$attrname='$thisval'";
|
||||||
|
}
|
||||||
|
$working = 1; $mode = 0;
|
||||||
|
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
|
||||||
|
$thisval = filter_xss_bad_protocol($match[1]);
|
||||||
|
|
||||||
|
if (!$skip) {
|
||||||
|
$attrarr[] = "$attrname=\"$thisval\"";
|
||||||
|
}
|
||||||
|
$working = 1; $mode = 0;
|
||||||
|
$attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($working == 0) {
|
||||||
|
// not well formed, remove and try again
|
||||||
|
$attr = preg_replace('/
|
||||||
|
^
|
||||||
|
(
|
||||||
|
"[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
|
||||||
|
| # or
|
||||||
|
\'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
|
||||||
|
| # or
|
||||||
|
\S # - a non-whitespace character
|
||||||
|
)* # any number of the above three
|
||||||
|
\s* # any number of whitespaces
|
||||||
|
/x', '', $attr);
|
||||||
|
$mode = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the attribute list ends with a valueless attribute like "selected"
|
||||||
|
if ($mode == 1) {
|
||||||
|
$attrarr[] = $attrname;
|
||||||
|
}
|
||||||
|
return $attrarr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes an HTML attribute value and ensures it does not contain an URL with a disallowed protocol (e.g. javascript:).
|
||||||
|
*
|
||||||
|
* @param $string
|
||||||
|
* The string with the attribute value.
|
||||||
|
* @param $decode
|
||||||
|
* Whether to decode entities in the $string. Set to FALSE if the $string
|
||||||
|
* is in plain text, TRUE otherwise. Defaults to TRUE.
|
||||||
|
* @return
|
||||||
|
* Cleaned up and HTML-escaped version of $string.
|
||||||
|
*/
|
||||||
|
function filter_xss_bad_protocol($string, $decode = TRUE) {
|
||||||
|
static $allowed_protocols;
|
||||||
|
|
||||||
|
if (!isset($allowed_protocols)) {
|
||||||
|
$allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('ftp', 'http', 'https', 'irc', 'mailto', 'news', 'nntp', 'rtsp', 'sftp', 'ssh', 'telnet', 'webcal')));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the plain text representation of the attribute value (i.e. its meaning).
|
||||||
|
if ($decode) {
|
||||||
|
$string = decode_entities($string);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iteratively remove any invalid protocol found.
|
||||||
|
do {
|
||||||
|
$before = $string;
|
||||||
|
$colonpos = strpos($string, ':');
|
||||||
|
if ($colonpos > 0) {
|
||||||
|
// We found a colon, possibly a protocol. Verify.
|
||||||
|
$protocol = substr($string, 0, $colonpos);
|
||||||
|
// If a colon is preceded by a slash, question mark or hash, it cannot
|
||||||
|
// possibly be part of the URL scheme. This must be a relative URL,
|
||||||
|
// which inherits the (safe) protocol of the base document.
|
||||||
|
if (preg_match('![/?#]!', $protocol)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must be case-insensitive
|
||||||
|
// Check if this is a disallowed protocol.
|
||||||
|
if (!isset($allowed_protocols[strtolower($protocol)])) {
|
||||||
|
$string = substr($string, $colonpos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while ($before != $string);
|
||||||
|
|
||||||
|
return check_plain($string);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @} End of "defgroup sanitization".
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @defgroup format Formatting
|
* @defgroup format Formatting
|
||||||
* @{
|
* @{
|
||||||
|
|
|
@ -911,288 +911,6 @@ function _filter_autop($text) {
|
||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Very permissive XSS/HTML filter for admin-only use.
|
|
||||||
*
|
|
||||||
* Use only for fields where it is impractical to use the
|
|
||||||
* whole filter system, but where some (mainly inline) mark-up
|
|
||||||
* is desired (so check_plain() is not acceptable).
|
|
||||||
*
|
|
||||||
* Allows all tags that can be used inside an HTML body, save
|
|
||||||
* for scripts and styles.
|
|
||||||
*/
|
|
||||||
function filter_xss_admin($string) {
|
|
||||||
return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filters XSS. Based on kses by Ulf Harnhammar, see
|
|
||||||
* http://sourceforge.net/projects/kses
|
|
||||||
*
|
|
||||||
* For examples of various XSS attacks, see:
|
|
||||||
* http://ha.ckers.org/xss.html
|
|
||||||
*
|
|
||||||
* This code does four things:
|
|
||||||
* - Removes characters and constructs that can trick browsers
|
|
||||||
* - Makes sure all HTML entities are well-formed
|
|
||||||
* - Makes sure all HTML tags and attributes are well-formed
|
|
||||||
* - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
|
|
||||||
*
|
|
||||||
* @param $string
|
|
||||||
* The string with raw HTML in it. It will be stripped of everything that can cause
|
|
||||||
* an XSS attack.
|
|
||||||
* @param $allowed_tags
|
|
||||||
* An array of allowed tags.
|
|
||||||
*/
|
|
||||||
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
|
|
||||||
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
|
|
||||||
// site scripting issues on Internet Explorer 6.
|
|
||||||
if (!drupal_validate_utf8($string)) {
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
// Store the text format
|
|
||||||
_filter_xss_split($allowed_tags, TRUE);
|
|
||||||
// Remove NULL characters (ignored by some browsers)
|
|
||||||
$string = str_replace(chr(0), '', $string);
|
|
||||||
// Remove Netscape 4 JS entities
|
|
||||||
$string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
|
|
||||||
|
|
||||||
// Defuse all HTML entities
|
|
||||||
$string = str_replace('&', '&', $string);
|
|
||||||
// Change back only well-formed entities in our whitelist
|
|
||||||
// Named entities
|
|
||||||
$string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
|
|
||||||
// Decimal numeric entities
|
|
||||||
$string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
|
|
||||||
// Hexadecimal numeric entities
|
|
||||||
$string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
|
|
||||||
|
|
||||||
return preg_replace_callback('%
|
|
||||||
(
|
|
||||||
<(?=[^a-zA-Z!/]) # a lone <
|
|
||||||
| # or
|
|
||||||
<[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
|
|
||||||
| # or
|
|
||||||
> # just a >
|
|
||||||
)%x', '_filter_xss_split', $string);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Processes an HTML tag.
|
|
||||||
*
|
|
||||||
* @param $m
|
|
||||||
* An array with various meaning depending on the value of $store.
|
|
||||||
* If $store is TRUE then the array contains the allowed tags.
|
|
||||||
* If $store is FALSE then the array has one element, the HTML tag to process.
|
|
||||||
* @param $store
|
|
||||||
* Whether to store $m.
|
|
||||||
* @return
|
|
||||||
* If the element isn't allowed, an empty string. Otherwise, the cleaned up
|
|
||||||
* version of the HTML element.
|
|
||||||
*/
|
|
||||||
function _filter_xss_split($m, $store = FALSE) {
|
|
||||||
static $allowed_html;
|
|
||||||
|
|
||||||
if ($store) {
|
|
||||||
$allowed_html = array_flip($m);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
$string = $m[1];
|
|
||||||
|
|
||||||
if (substr($string, 0, 1) != '<') {
|
|
||||||
// We matched a lone ">" character
|
|
||||||
return '>';
|
|
||||||
}
|
|
||||||
elseif (strlen($string) == 1) {
|
|
||||||
// We matched a lone "<" character
|
|
||||||
return '<';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
|
|
||||||
// Seriously malformed
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
$slash = trim($matches[1]);
|
|
||||||
$elem = &$matches[2];
|
|
||||||
$attrlist = &$matches[3];
|
|
||||||
|
|
||||||
if (!isset($allowed_html[strtolower($elem)])) {
|
|
||||||
// Disallowed HTML element
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($slash != '') {
|
|
||||||
return "</$elem>";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there a closing XHTML slash at the end of the attributes?
|
|
||||||
$attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
|
|
||||||
$xhtml_slash = $count ? ' /' : '';
|
|
||||||
|
|
||||||
// Clean up attributes
|
|
||||||
$attr2 = implode(' ', _filter_xss_attributes($attrlist));
|
|
||||||
$attr2 = preg_replace('/[<>]/', '', $attr2);
|
|
||||||
$attr2 = strlen($attr2) ? ' ' . $attr2 : '';
|
|
||||||
|
|
||||||
return "<$elem$attr2$xhtml_slash>";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Processes a string of HTML attributes.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
* Cleaned up version of the HTML attributes.
|
|
||||||
*/
|
|
||||||
function _filter_xss_attributes($attr) {
|
|
||||||
$attrarr = array();
|
|
||||||
$mode = 0;
|
|
||||||
$attrname = '';
|
|
||||||
|
|
||||||
while (strlen($attr) != 0) {
|
|
||||||
// Was the last operation successful?
|
|
||||||
$working = 0;
|
|
||||||
|
|
||||||
switch ($mode) {
|
|
||||||
case 0:
|
|
||||||
// Attribute name, href for instance
|
|
||||||
if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
|
|
||||||
$attrname = strtolower($match[1]);
|
|
||||||
$skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
|
|
||||||
$working = $mode = 1;
|
|
||||||
$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
// Equals sign or valueless ("selected")
|
|
||||||
if (preg_match('/^\s*=\s*/', $attr)) {
|
|
||||||
$working = 1; $mode = 2;
|
|
||||||
$attr = preg_replace('/^\s*=\s*/', '', $attr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match('/^\s+/', $attr)) {
|
|
||||||
$working = 1; $mode = 0;
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = $attrname;
|
|
||||||
}
|
|
||||||
$attr = preg_replace('/^\s+/', '', $attr);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
// Attribute value, a URL after href= for instance
|
|
||||||
if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
|
|
||||||
$thisval = filter_xss_bad_protocol($match[1]);
|
|
||||||
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = "$attrname=\"$thisval\"";
|
|
||||||
}
|
|
||||||
$working = 1;
|
|
||||||
$mode = 0;
|
|
||||||
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
|
|
||||||
$thisval = filter_xss_bad_protocol($match[1]);
|
|
||||||
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = "$attrname='$thisval'";
|
|
||||||
}
|
|
||||||
$working = 1; $mode = 0;
|
|
||||||
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
|
|
||||||
$thisval = filter_xss_bad_protocol($match[1]);
|
|
||||||
|
|
||||||
if (!$skip) {
|
|
||||||
$attrarr[] = "$attrname=\"$thisval\"";
|
|
||||||
}
|
|
||||||
$working = 1; $mode = 0;
|
|
||||||
$attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($working == 0) {
|
|
||||||
// not well formed, remove and try again
|
|
||||||
$attr = preg_replace('/
|
|
||||||
^
|
|
||||||
(
|
|
||||||
"[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
|
|
||||||
| # or
|
|
||||||
\'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
|
|
||||||
| # or
|
|
||||||
\S # - a non-whitespace character
|
|
||||||
)* # any number of the above three
|
|
||||||
\s* # any number of whitespaces
|
|
||||||
/x', '', $attr);
|
|
||||||
$mode = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// the attribute list ends with a valueless attribute like "selected"
|
|
||||||
if ($mode == 1) {
|
|
||||||
$attrarr[] = $attrname;
|
|
||||||
}
|
|
||||||
return $attrarr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Processes an HTML attribute value and ensures it does not contain an URL
|
|
||||||
* with a disallowed protocol (e.g. javascript:)
|
|
||||||
*
|
|
||||||
* @param $string
|
|
||||||
* The string with the attribute value.
|
|
||||||
* @param $decode
|
|
||||||
* Whether to decode entities in the $string. Set to FALSE if the $string
|
|
||||||
* is in plain text, TRUE otherwise. Defaults to TRUE.
|
|
||||||
* @return
|
|
||||||
* Cleaned up and HTML-escaped version of $string.
|
|
||||||
*/
|
|
||||||
function filter_xss_bad_protocol($string, $decode = TRUE) {
|
|
||||||
static $allowed_protocols;
|
|
||||||
if (!isset($allowed_protocols)) {
|
|
||||||
$allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('ftp', 'http', 'https', 'irc', 'mailto', 'news', 'nntp', 'rtsp', 'sftp', 'ssh', 'telnet', 'webcal')));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the plain text representation of the attribute value (i.e. its meaning).
|
|
||||||
if ($decode) {
|
|
||||||
$string = decode_entities($string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iteratively remove any invalid protocol found.
|
|
||||||
|
|
||||||
do {
|
|
||||||
$before = $string;
|
|
||||||
$colonpos = strpos($string, ':');
|
|
||||||
if ($colonpos > 0) {
|
|
||||||
// We found a colon, possibly a protocol. Verify.
|
|
||||||
$protocol = substr($string, 0, $colonpos);
|
|
||||||
// If a colon is preceded by a slash, question mark or hash, it cannot
|
|
||||||
// possibly be part of the URL scheme. This must be a relative URL,
|
|
||||||
// which inherits the (safe) protocol of the base document.
|
|
||||||
if (preg_match('![/?#]!', $protocol)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must be case-insensitive
|
|
||||||
// Check if this is a disallowed protocol.
|
|
||||||
if (!isset($allowed_protocols[strtolower($protocol)])) {
|
|
||||||
$string = substr($string, $colonpos + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while ($before != $string);
|
|
||||||
return check_plain($string);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @} End of "Standard filters".
|
* @} End of "Standard filters".
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue