- Patch #374441 by tic200, Damien Tournoud, scor: refactor Drupal HTML corrector using PHP5's XML/Dom parser.
parent
5468b47b91
commit
93728e5d32
|
@ -73,6 +73,8 @@ Drupal 7.0, xxxx-xx-xx (development version)
|
|||
* If your site is being upgraded from Drupal 6 and you do not have the
|
||||
contributed date or event modules installed, user time zone settings will
|
||||
fallback to the system time zone and will have to be reconfigured by each user.
|
||||
- Filter system:
|
||||
* Refactored the HTML corrector to take advantage of PHP 5 features.
|
||||
- Removed ping module:
|
||||
* Contributed modules with similar functionality are available.
|
||||
- Refactored the "access rules" component of user module:
|
||||
|
|
|
@ -286,12 +286,12 @@ class TextSummaryTestCase extends DrupalWebTestCase {
|
|||
// And using a text format WITH the line-break and htmlcorrector filters.
|
||||
$expected_lb = array(
|
||||
"<p>\nHi\n</p>\n<p>\nfolks\n<br />\n!\n</p>",
|
||||
"<",
|
||||
"<p",
|
||||
"<p></p>",
|
||||
"<p></p>",
|
||||
"<p></p>",
|
||||
"<p></p>",
|
||||
"",
|
||||
"<p />",
|
||||
"<p />",
|
||||
"<p />",
|
||||
"<p />",
|
||||
"<p />",
|
||||
"<p>\nHi</p>",
|
||||
"<p>\nHi</p>",
|
||||
"<p>\nHi</p>",
|
||||
|
|
|
@ -757,74 +757,21 @@ function _filter_url($text, $format) {
|
|||
* Scan input and make sure that all HTML tags are properly closed and nested.
|
||||
*/
|
||||
function _filter_htmlcorrector($text) {
|
||||
// Prepare tag lists.
|
||||
static $no_nesting, $single_use;
|
||||
if (!isset($no_nesting)) {
|
||||
// Tags which cannot be nested but are typically left unclosed.
|
||||
$no_nesting = drupal_map_assoc(array('li', 'p'));
|
||||
// Ignore warnings during HTML soup loading.
|
||||
$htmlDom = @DOMDocument::loadHTML('<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>' . $text . '</body></html>');
|
||||
|
||||
// Single use tags in HTML4
|
||||
$single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
|
||||
// The result of DOMDocument->saveXML($bodyNode) is a partial (X)HTML document.
|
||||
// We only need what is inside the body tag.
|
||||
$bodyNode = $htmlDom->getElementsByTagName('body')->item(0);
|
||||
if (preg_match("|^<body[^>]*>(.*)</body>$|s", $htmlDom->saveXML($bodyNode), $matches)) {
|
||||
$body_content = $matches[1];
|
||||
// The XHTML guidelines recommend to include a space before the trailing /
|
||||
// and > of empty elements for better rendering on HTML user agents.
|
||||
return preg_replace('|<([^>]*)/>|i', '<$1 />', $body_content);
|
||||
}
|
||||
|
||||
// Properly entify angles.
|
||||
$text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text);
|
||||
|
||||
// Split tags from text.
|
||||
$split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
// Note: PHP ensures the array consists of alternating delimiters and literals
|
||||
// and begins and ends with a literal (inserting $null as required).
|
||||
|
||||
$tag = FALSE; // Odd/even counter. Tag or no tag.
|
||||
$stack = array();
|
||||
$output = '';
|
||||
foreach ($split as $value) {
|
||||
// Process HTML tags.
|
||||
if ($tag) {
|
||||
list($tagname) = explode(' ', strtolower($value), 2);
|
||||
// Closing tag
|
||||
if ($tagname{0} == '/') {
|
||||
$tagname = substr($tagname, 1);
|
||||
// Discard XHTML closing tags for single use tags.
|
||||
if (!isset($single_use[$tagname])) {
|
||||
// See if we possibly have a matching opening tag on the stack.
|
||||
if (in_array($tagname, $stack)) {
|
||||
// Close other tags lingering first.
|
||||
do {
|
||||
$output .= '</' . $stack[0] . '>';
|
||||
} while (array_shift($stack) != $tagname);
|
||||
}
|
||||
// Otherwise, discard it.
|
||||
}
|
||||
}
|
||||
// Opening tag
|
||||
else {
|
||||
// See if we have an identical 'no nesting' tag already open and close it if found.
|
||||
if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
|
||||
$output .= '</' . array_shift($stack) . '>';
|
||||
}
|
||||
// Push non-single-use tags onto the stack
|
||||
if (!isset($single_use[$tagname])) {
|
||||
array_unshift($stack, $tagname);
|
||||
}
|
||||
// Add trailing slash to single-use tags as per X(HT)ML.
|
||||
else {
|
||||
$value = rtrim($value, ' /') . ' /';
|
||||
}
|
||||
$output .= '<' . $value . '>';
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Passthrough all text.
|
||||
$output .= $value;
|
||||
}
|
||||
$tag = !$tag;
|
||||
else {
|
||||
return '';
|
||||
}
|
||||
// Close remaining tags.
|
||||
while (count($stack) > 0) {
|
||||
$output .= '</' . array_shift($stack) . '>';
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -182,24 +182,20 @@ class FilterAdminTestCase extends DrupalWebTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
class FilterTestCase extends DrupalWebTestCase {
|
||||
/**
|
||||
* Unit tests for core filters.
|
||||
*/
|
||||
class FilterUnitTest extends DrupalWebTestCase {
|
||||
protected $format;
|
||||
|
||||
public static function getInfo() {
|
||||
return array(
|
||||
'name' => 'Core filters',
|
||||
'description' => 'Filter each filter individually: Convert URLs into links, Convert line breaks, Correct broken HTML, Escape all HTML, Limit allowed HTML tags.',
|
||||
'description' => 'Filter each filter individually: convert line breaks, correct broken HTML.',
|
||||
'group' => 'Filter',
|
||||
);
|
||||
}
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
|
||||
$admin_user = $this->drupalCreateUser(array('administer filters', 'create page content'));
|
||||
$this->drupalLogin($admin_user);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the line break filter.
|
||||
*/
|
||||
|
@ -594,17 +590,17 @@ class FilterTestCase extends DrupalWebTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Test the HTML corrector.
|
||||
* Test the HTML corrector filter.
|
||||
*
|
||||
* @todo This test could really use some validity checking function.
|
||||
*/
|
||||
function testHtmlCorrector() {
|
||||
function testHtmlCorrectorFilter() {
|
||||
// Tag closing.
|
||||
$f = _filter_htmlcorrector('<p>text');
|
||||
$this->assertEqual($f, '<p>text</p>', t('HTML corrector -- tag closing at the end of input.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<p>text<p><p>text');
|
||||
$this->assertEqual($f, '<p>text</p><p></p><p>text</p>', t('HTML corrector -- tag closing.'));
|
||||
$this->assertEqual($f, '<p>text</p><p /><p>text</p>', t('HTML corrector -- tag closing.'));
|
||||
|
||||
$f = _filter_htmlcorrector("<ul><li>e1<li>e2");
|
||||
$this->assertEqual($f, "<ul><li>e1</li><li>e2</li></ul>", t('HTML corrector -- unclosed list tags.'));
|
||||
|
@ -615,6 +611,70 @@ class FilterTestCase extends DrupalWebTestCase {
|
|||
// XHTML slash for empty elements.
|
||||
$f = _filter_htmlcorrector('<hr><br>');
|
||||
$this->assertEqual($f, '<hr /><br />', t('HTML corrector -- XHTML closing slash.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<P>test</P>');
|
||||
$this->assertEqual($f, '<p>test</p>', t('HTML corrector -- Convert uppercased tags to proper lowercased ones.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<P>test</p>');
|
||||
$this->assertEqual($f, '<p>test</p>', t('HTML corrector -- Convert uppercased tags to proper lowercased ones.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test<hr/>');
|
||||
$this->assertEqual($f, 'test<hr />', t('HTML corrector -- Let proper XHTML pass thru.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test<hr />');
|
||||
$this->assertEqual($f, 'test<hr />', t('HTML corrector -- Let proper XHTML pass thru.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<span class="test" />');
|
||||
$this->assertEqual($f, '<span class="test" />', t('HTML corrector -- Let proper XHTML pass thru.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test1<br class="test">test2');
|
||||
$this->assertEqual($f, 'test1<br class="test" />test2', t('HTML corrector -- Automatically close single tags.'));
|
||||
|
||||
$f = _filter_htmlcorrector('line1<hr>line2');
|
||||
$this->assertEqual($f, 'line1<hr />line2', t('HTML corrector -- Automatically close single tags.'));
|
||||
|
||||
$f = _filter_htmlcorrector('line1<HR>line2');
|
||||
$this->assertEqual($f, 'line1<hr />line2', t('HTML corrector -- Automatically close single tags.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<img src="http://example.com/test.jpg">test</img>');
|
||||
$this->assertEqual($f, '<img src="http://example.com/test.jpg" />test', t('HTML corrector -- Automatically close single tags.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<p>line1<br/><hr/>line2</p>');
|
||||
$this->assertEqual($f, '<p>line1<br /></p><hr />line2', t('HTML corrector -- Move non-inline elements outside of inline containers.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<p>line1<div>line2</div></p>');
|
||||
$this->assertEqual($f, '<p>line1</p><div>line2</div>', t('HTML corrector -- Move non-inline elements outside of inline containers.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<p>test<p>test</p>\n');
|
||||
$this->assertEqual($f, '<p>test</p><p>test</p>\n', t('HTML corrector -- Auto-close improperly nested tags.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<p>Line1<br><STRONG>bold stuff</b>');
|
||||
$this->assertEqual($f, '<p>Line1<br /><strong>bold stuff</strong></p>', t('HTML corrector -- Properly close unclosed tags, and remove useless closing tags.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test <!-- this is a comment -->');
|
||||
$this->assertEqual($f, 'test <!-- this is a comment -->', t('HTML corrector -- Do not touch HTML comments.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test <!--this is a comment-->');
|
||||
$this->assertEqual($f, 'test <!--this is a comment-->', t('HTML corrector -- Do not touch HTML comments.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test <!-- comment <p>another
|
||||
<strong>multiple</strong> line
|
||||
comment</p> -->');
|
||||
$this->assertEqual($f, 'test <!-- comment <p>another
|
||||
<strong>multiple</strong> line
|
||||
comment</p> -->', t('HTML corrector -- Do not touch HTML comments.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test <!-- comment <p>another comment</p> -->');
|
||||
$this->assertEqual($f, 'test <!-- comment <p>another comment</p> -->', t('HTML corrector -- Do not touch HTML comments.'));
|
||||
|
||||
$f = _filter_htmlcorrector('test <!--break-->');
|
||||
$this->assertEqual($f, 'test <!--break-->', t('HTML corrector -- Do not touch HTML comments.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<p>test\n</p>\n');
|
||||
$this->assertEqual($f, '<p>test\n</p>\n', t('HTML corrector -- New-lines are accepted and kept as-is.'));
|
||||
|
||||
$f = _filter_htmlcorrector('<p>دروبال');
|
||||
$this->assertEqual($f, '<p>دروبال</p>', t('HTML corrector -- Encoding is correctly kept.'));
|
||||
}
|
||||
|
||||
function createFormat($filter) {
|
||||
|
|
|
@ -266,6 +266,17 @@ function system_requirements($phase) {
|
|||
include_once DRUPAL_ROOT . '/includes/unicode.inc';
|
||||
$requirements = array_merge($requirements, unicode_requirements());
|
||||
|
||||
// Verify if the DOM PHP 5 extension is available.
|
||||
$has_dom = class_exists('DOMDocument');
|
||||
if (!$has_dom) {
|
||||
$requirements['php_dom'] = array(
|
||||
'title' => $t('PHP DOM Extension'),
|
||||
'value' => $t('Not found'),
|
||||
'severity' => REQUIREMENT_ERROR,
|
||||
'description' => $t("The DOM extension is part of PHP 5 core, but doesn't seem to be enabled on your system. You need to enable the DOM extension on your PHP installation."),
|
||||
);
|
||||
}
|
||||
|
||||
if ($phase == 'runtime') {
|
||||
// Check for update status module.
|
||||
if (!module_exists('update')) {
|
||||
|
|
Loading…
Reference in New Issue