- Patch by jhodgdon, janusman, codycraven: truncate_utf8() only works for latin languages and drupal_substr() had a bug.

merge-requests/26/head
Dries Buytaert 2010-06-10 15:20:48 +00:00
parent e960a8e233
commit e4dd88023d
3 changed files with 244 additions and 86 deletions
includes
modules
simpletest/tests

View File

@ -17,6 +17,67 @@ define('UNICODE_SINGLEBYTE', 0);
*/
define('UNICODE_MULTIBYTE', 1);
/**
* Matches Unicode characters that are word boundaries.
*
* @see http://unicode.org/glossary
*
* Characters with the following General_category (gc) property values are used
* as word boundaries. While this does not fully conform to the Word Boundaries
* algorithm described in http://unicode.org/reports/tr29, as PCRE does not
* contain the Word_Break property table, this simpler algorithm has to do.
* - Cc, Cf, Cn, Co, Cs: Other.
* - Pc, Pd, Pe, Pf, Pi, Po, Ps: Punctuation.
* - Sc, Sk, Sm, So: Symbols.
* - Zl, Zp, Zs: Separators.
*
* Non-boundary characters include the following General_category (gc) property
* values:
* - Ll, Lm, Lo, Lt, Lu: Letters.
* - Mc, Me, Mn: Combining Marks.
* - Nd, Nl, No: Numbers.
*
* Note that the PCRE property matcher is not used because we wanted to be
* compatible with Unicode 5.2.0 regardless of the PCRE version used (and any
* bugs in PCRE property tables).
*/
define('PREG_CLASS_UNICODE_WORD_BOUNDARY',
'\x{0}-\x{2F}\x{3A}-\x{40}\x{5B}-\x{60}\x{7B}-\x{A9}\x{AB}-\x{B1}\x{B4}' .
'\x{B6}-\x{B8}\x{BB}\x{BF}\x{D7}\x{F7}\x{2C2}-\x{2C5}\x{2D2}-\x{2DF}' .
'\x{2E5}-\x{2EB}\x{2ED}\x{2EF}-\x{2FF}\x{375}\x{37E}-\x{385}\x{387}\x{3F6}' .
'\x{482}\x{55A}-\x{55F}\x{589}-\x{58A}\x{5BE}\x{5C0}\x{5C3}\x{5C6}' .
'\x{5F3}-\x{60F}\x{61B}-\x{61F}\x{66A}-\x{66D}\x{6D4}\x{6DD}\x{6E9}' .
'\x{6FD}-\x{6FE}\x{700}-\x{70F}\x{7F6}-\x{7F9}\x{830}-\x{83E}' .
'\x{964}-\x{965}\x{970}\x{9F2}-\x{9F3}\x{9FA}-\x{9FB}\x{AF1}\x{B70}' .
'\x{BF3}-\x{BFA}\x{C7F}\x{CF1}-\x{CF2}\x{D79}\x{DF4}\x{E3F}\x{E4F}' .
'\x{E5A}-\x{E5B}\x{F01}-\x{F17}\x{F1A}-\x{F1F}\x{F34}\x{F36}\x{F38}' .
'\x{F3A}-\x{F3D}\x{F85}\x{FBE}-\x{FC5}\x{FC7}-\x{FD8}\x{104A}-\x{104F}' .
'\x{109E}-\x{109F}\x{10FB}\x{1360}-\x{1368}\x{1390}-\x{1399}\x{1400}' .
'\x{166D}-\x{166E}\x{1680}\x{169B}-\x{169C}\x{16EB}-\x{16ED}' .
'\x{1735}-\x{1736}\x{17B4}-\x{17B5}\x{17D4}-\x{17D6}\x{17D8}-\x{17DB}' .
'\x{1800}-\x{180A}\x{180E}\x{1940}-\x{1945}\x{19DE}-\x{19FF}' .
'\x{1A1E}-\x{1A1F}\x{1AA0}-\x{1AA6}\x{1AA8}-\x{1AAD}\x{1B5A}-\x{1B6A}' .
'\x{1B74}-\x{1B7C}\x{1C3B}-\x{1C3F}\x{1C7E}-\x{1C7F}\x{1CD3}\x{1FBD}' .
'\x{1FBF}-\x{1FC1}\x{1FCD}-\x{1FCF}\x{1FDD}-\x{1FDF}\x{1FED}-\x{1FEF}' .
'\x{1FFD}-\x{206F}\x{207A}-\x{207E}\x{208A}-\x{208E}\x{20A0}-\x{20B8}' .
'\x{2100}-\x{2101}\x{2103}-\x{2106}\x{2108}-\x{2109}\x{2114}' .
'\x{2116}-\x{2118}\x{211E}-\x{2123}\x{2125}\x{2127}\x{2129}\x{212E}' .
'\x{213A}-\x{213B}\x{2140}-\x{2144}\x{214A}-\x{214D}\x{214F}' .
'\x{2190}-\x{244A}\x{249C}-\x{24E9}\x{2500}-\x{2775}\x{2794}-\x{2B59}' .
'\x{2CE5}-\x{2CEA}\x{2CF9}-\x{2CFC}\x{2CFE}-\x{2CFF}\x{2E00}-\x{2E2E}' .
'\x{2E30}-\x{3004}\x{3008}-\x{3020}\x{3030}\x{3036}-\x{3037}' .
'\x{303D}-\x{303F}\x{309B}-\x{309C}\x{30A0}\x{30FB}\x{3190}-\x{3191}' .
'\x{3196}-\x{319F}\x{31C0}-\x{31E3}\x{3200}-\x{321E}\x{322A}-\x{3250}' .
'\x{3260}-\x{327F}\x{328A}-\x{32B0}\x{32C0}-\x{33FF}\x{4DC0}-\x{4DFF}' .
'\x{A490}-\x{A4C6}\x{A4FE}-\x{A4FF}\x{A60D}-\x{A60F}\x{A673}\x{A67E}' .
'\x{A6F2}-\x{A716}\x{A720}-\x{A721}\x{A789}-\x{A78A}\x{A828}-\x{A82B}' .
'\x{A836}-\x{A839}\x{A874}-\x{A877}\x{A8CE}-\x{A8CF}\x{A8F8}-\x{A8FA}' .
'\x{A92E}-\x{A92F}\x{A95F}\x{A9C1}-\x{A9CD}\x{A9DE}-\x{A9DF}' .
'\x{AA5C}-\x{AA5F}\x{AA77}-\x{AA79}\x{AADE}-\x{AADF}\x{ABEB}' .
'\x{D800}-\x{F8FF}\x{FB29}\x{FD3E}-\x{FD3F}\x{FDFC}-\x{FDFD}' .
'\x{FE10}-\x{FE19}\x{FE30}-\x{FE6B}\x{FEFF}-\x{FF0F}\x{FF1A}-\x{FF20}' .
'\x{FF3B}-\x{FF40}\x{FF5B}-\x{FF65}\x{FFE0}-\x{FFFD}');
/**
* Wrapper around _unicode_check().
*/
@ -213,44 +274,80 @@ function drupal_truncate_bytes($string, $len) {
}
/**
* Truncate a UTF-8-encoded string safely to a number of characters.
* Truncates a UTF-8-encoded string safely to a number of characters.
*
* @param $string
* The string to truncate.
* @param $len
* An upper limit on the returned string length.
* @param $max_length
* An upper limit on the returned string length, including trailing ellipsis
* if $add_ellipsis is TRUE.
* @param $wordsafe
* Flag to truncate at last space within the upper limit. Defaults to FALSE.
* @param $dots
* Flag to add trailing dots. Defaults to FALSE.
* If TRUE, attempt to truncate on a word boundary. Word boundaries are
* spaces, punctuation, and Unicode characters used as word boundaries in
* non-Latin languages; see PREG_CLASS_UNICODE_WORD_BOUNDARY for more
* information. If a word boundary cannot be found that would make the length
* of the returned string fall within length guidelines (see parameters
* $max_return_length and $min_wordsafe_length), word boundaries are ignored.
* @param $add_ellipsis
* If TRUE, add t('...') to the end of the truncated string (defaults to
* FALSE). The string length will still fall within $max_return_length.
* @param $min_wordsafe_length
* If $wordsafe is TRUE, the minimum acceptable length for truncation (before
* adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe
* is FALSE. This can be used to prevent having a very short resulting string
* that will not be understandable. For instance, if you are truncating the
* string "See myverylongurlexample.com for more information" to a word-safe
* return length of 20, the only available word boundary within 20 characters
* is after the word "See", which wouldn't leave a very informative string. If
* you had set $min_wordsafe_length to 10, though, the function would realise
* that "See" alone is too short, and would then just truncate ignoring word
* boundaries, giving you "See myverylongurl..." (assuming you had set
* $add_ellipses to TRUE).
*
* @return
* The truncated string.
*/
function truncate_utf8($string, $len, $wordsafe = FALSE, $dots = FALSE) {
function truncate_utf8($string, $max_length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1) {
$ellipsis = '';
$max_length = max($max_length, 0);
$min_wordsafe_length = max($min_wordsafe_length, 0);
if (drupal_strlen($string) <= $len) {
if (drupal_strlen($string) <= $max_length) {
// No truncation needed, so don't add ellipsis, just return.
return $string;
}
if ($dots) {
$len -= 4;
if ($add_ellipsis) {
// Truncate ellipsis in case $max_length is small.
$ellipsis = drupal_substr(t('...'), 0, $max_length);
$max_length -= drupal_strlen($ellipsis);
$max_length = max($max_length, 0);
}
if ($max_length <= $min_wordsafe_length) {
// Do not attempt word-safe if lengths are bad.
$wordsafe = FALSE;
}
if ($wordsafe) {
$string = drupal_substr($string, 0, $len + 1); // leave one more character
if ($last_space = strrpos($string, ' ')) { // space exists AND is not on position 0
$string = substr($string, 0, $last_space);
$matches = array();
// Find the last word boundary, if there is one within $min_wordsafe_length
// to $max_length characters. preg_match() is always greedy, so it will
// find the longest string possible.
$found = preg_match('/^(.{' . $min_wordsafe_length . ',' . $max_length . '})[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . ']/u', $string, $matches);
if ($found) {
$string = $matches[1];
}
else {
$string = drupal_substr($string, 0, $len);
$string = drupal_substr($string, 0, $max_length);
}
}
else {
$string = drupal_substr($string, 0, $len);
$string = drupal_substr($string, 0, $max_length);
}
if ($dots) {
$string .= ' ...';
if ($add_ellipsis) {
$string .= $ellipsis;
}
return $string;
@ -516,16 +613,21 @@ function drupal_substr($text, $start, $length = NULL) {
// Count all the continuation bytes from the starting index until we have
// found $length characters or reached the end of the string, then
// backtrace one byte.
$iend = $istart - 1; $chars = -1;
$iend = $istart - 1;
$chars = -1;
$last_real = FALSE;
while ($iend < $strlen - 1 && $chars < $length) {
$iend++;
$c = ord($text[$iend]);
$last_real = FALSE;
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
$last_real = TRUE;
}
}
// Backtrace one byte if the end of the string was not reached.
if ($iend < $strlen - 1) {
// Backtrace one byte if the last character we found was a real character
// and we don't need it.
if ($last_real && $chars >= $length) {
$iend--;
}
}
@ -548,7 +650,7 @@ function drupal_substr($text, $start, $length = NULL) {
}
else {
// $length == 0, return an empty string.
$iend = $istart - 1;
return '';
}
return substr($text, $istart, max(0, $iend - $istart + 1));

View File

@ -6,68 +6,6 @@
* Enables site-wide keyword searching.
*/
/**
* Matches Unicode character classes to exclude from the search index.
*
* @see http://unicode.org/glossary
*
* Characters with the following General_category (gc) property values are
* excluded from the search index. Also, they are used as word boundaries.
* While this does not fully conform to the Word Boundaries algorithm
* described in http://unicode.org/reports/tr29, as PCRE does not contain the
* Word_Break property table, this simpler algorithm has to do.
* - Cc, Cf, Cn, Co, Cs: Other.
* - Pc, Pd, Pe, Pf, Pi, Po, Ps: Punctuation.
* - Sc, Sk, Sm, So: Symbols.
* - Zl, Zp, Zs: Separators.
*
* Consequently, the index only contains characters with the following
* General_category (gc) property values:
* - Ll, Lm, Lo, Lt, Lu: Letters.
* - Mc, Me, Mn: Combining Marks.
* - Nd, Nl, No: Numbers.
*
* Note that the PCRE property matcher is not used because we wanted to be
* compatible with Unicode 5.2.0 regardless of the PCRE version used (and any
* bugs in PCRE property tables).
*/
define('PREG_CLASS_SEARCH_EXCLUDE',
'\x{0}-\x{2F}\x{3A}-\x{40}\x{5B}-\x{60}\x{7B}-\x{A9}\x{AB}-\x{B1}\x{B4}' .
'\x{B6}-\x{B8}\x{BB}\x{BF}\x{D7}\x{F7}\x{2C2}-\x{2C5}\x{2D2}-\x{2DF}' .
'\x{2E5}-\x{2EB}\x{2ED}\x{2EF}-\x{2FF}\x{375}\x{37E}-\x{385}\x{387}\x{3F6}' .
'\x{482}\x{55A}-\x{55F}\x{589}-\x{58A}\x{5BE}\x{5C0}\x{5C3}\x{5C6}' .
'\x{5F3}-\x{60F}\x{61B}-\x{61F}\x{66A}-\x{66D}\x{6D4}\x{6DD}\x{6E9}' .
'\x{6FD}-\x{6FE}\x{700}-\x{70F}\x{7F6}-\x{7F9}\x{830}-\x{83E}' .
'\x{964}-\x{965}\x{970}\x{9F2}-\x{9F3}\x{9FA}-\x{9FB}\x{AF1}\x{B70}' .
'\x{BF3}-\x{BFA}\x{C7F}\x{CF1}-\x{CF2}\x{D79}\x{DF4}\x{E3F}\x{E4F}' .
'\x{E5A}-\x{E5B}\x{F01}-\x{F17}\x{F1A}-\x{F1F}\x{F34}\x{F36}\x{F38}' .
'\x{F3A}-\x{F3D}\x{F85}\x{FBE}-\x{FC5}\x{FC7}-\x{FD8}\x{104A}-\x{104F}' .
'\x{109E}-\x{109F}\x{10FB}\x{1360}-\x{1368}\x{1390}-\x{1399}\x{1400}' .
'\x{166D}-\x{166E}\x{1680}\x{169B}-\x{169C}\x{16EB}-\x{16ED}' .
'\x{1735}-\x{1736}\x{17B4}-\x{17B5}\x{17D4}-\x{17D6}\x{17D8}-\x{17DB}' .
'\x{1800}-\x{180A}\x{180E}\x{1940}-\x{1945}\x{19DE}-\x{19FF}' .
'\x{1A1E}-\x{1A1F}\x{1AA0}-\x{1AA6}\x{1AA8}-\x{1AAD}\x{1B5A}-\x{1B6A}' .
'\x{1B74}-\x{1B7C}\x{1C3B}-\x{1C3F}\x{1C7E}-\x{1C7F}\x{1CD3}\x{1FBD}' .
'\x{1FBF}-\x{1FC1}\x{1FCD}-\x{1FCF}\x{1FDD}-\x{1FDF}\x{1FED}-\x{1FEF}' .
'\x{1FFD}-\x{206F}\x{207A}-\x{207E}\x{208A}-\x{208E}\x{20A0}-\x{20B8}' .
'\x{2100}-\x{2101}\x{2103}-\x{2106}\x{2108}-\x{2109}\x{2114}' .
'\x{2116}-\x{2118}\x{211E}-\x{2123}\x{2125}\x{2127}\x{2129}\x{212E}' .
'\x{213A}-\x{213B}\x{2140}-\x{2144}\x{214A}-\x{214D}\x{214F}' .
'\x{2190}-\x{244A}\x{249C}-\x{24E9}\x{2500}-\x{2775}\x{2794}-\x{2B59}' .
'\x{2CE5}-\x{2CEA}\x{2CF9}-\x{2CFC}\x{2CFE}-\x{2CFF}\x{2E00}-\x{2E2E}' .
'\x{2E30}-\x{3004}\x{3008}-\x{3020}\x{3030}\x{3036}-\x{3037}' .
'\x{303D}-\x{303F}\x{309B}-\x{309C}\x{30A0}\x{30FB}\x{3190}-\x{3191}' .
'\x{3196}-\x{319F}\x{31C0}-\x{31E3}\x{3200}-\x{321E}\x{322A}-\x{3250}' .
'\x{3260}-\x{327F}\x{328A}-\x{32B0}\x{32C0}-\x{33FF}\x{4DC0}-\x{4DFF}' .
'\x{A490}-\x{A4C6}\x{A4FE}-\x{A4FF}\x{A60D}-\x{A60F}\x{A673}\x{A67E}' .
'\x{A6F2}-\x{A716}\x{A720}-\x{A721}\x{A789}-\x{A78A}\x{A828}-\x{A82B}' .
'\x{A836}-\x{A839}\x{A874}-\x{A877}\x{A8CE}-\x{A8CF}\x{A8F8}-\x{A8FA}' .
'\x{A92E}-\x{A92F}\x{A95F}\x{A9C1}-\x{A9CD}\x{A9DE}-\x{A9DF}' .
'\x{AA5C}-\x{AA5F}\x{AA77}-\x{AA79}\x{AADE}-\x{AADF}\x{ABEB}' .
'\x{D800}-\x{F8FF}\x{FB29}\x{FD3E}-\x{FD3F}\x{FDFC}-\x{FDFD}' .
'\x{FE10}-\x{FE19}\x{FE30}-\x{FE6B}\x{FEFF}-\x{FF0F}\x{FF1A}-\x{FF20}' .
'\x{FF3B}-\x{FF40}\x{FF5B}-\x{FF65}\x{FFE0}-\x{FFFD}');
/**
* Matches all 'N' Unicode character classes (numbers)
*/
@ -454,7 +392,7 @@ function search_simplify($text) {
// With the exception of the rules above, we consider all punctuation,
// marks, spacers, etc, to be a word boundary.
$text = preg_replace('/[' . PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
$text = preg_replace('/[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . ']+/u', ' ', $text);
return $text;
}
@ -1075,7 +1013,7 @@ function search_data($keys = NULL, $type = 'node') {
*/
function search_excerpt($keys, $text) {
// We highlight around non-indexable or CJK characters.
$boundary = '(?:(?<=[' . PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK . '])|(?=[' . PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK . ']))';
$boundary = '(?:(?<=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . '])|(?=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . ']))';
// Extract positive keywords and phrases
preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches);

View File

@ -48,6 +48,7 @@ class UnicodeUnitTest extends DrupalWebTestCase {
$this->helperTestUcFirst();
$this->helperTestStrLen();
$this->helperTestSubStr();
$this->helperTestTruncate();
}
/**
@ -67,6 +68,7 @@ class UnicodeUnitTest extends DrupalWebTestCase {
$this->helperTestUcFirst();
$this->helperTestStrLen();
$this->helperTestSubStr();
$this->helperTestTruncate();
}
function helperTestStrToLower() {
@ -127,10 +129,18 @@ class UnicodeUnitTest extends DrupalWebTestCase {
function helperTestSubStr() {
$testcase = array(
// 012345678901234567890123
array('frànçAIS is über-åwesome', 0, 0,
''),
array('frànçAIS is über-åwesome', 0, 1,
'f'),
array('frànçAIS is über-åwesome', 0, 8,
'frànçAIS'),
array('frànçAIS is über-åwesome', 0, 23,
'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', 0, 24,
'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 0, 25,
'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 0, 100,
'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 4, 4,
@ -141,16 +151,38 @@ class UnicodeUnitTest extends DrupalWebTestCase {
''),
array('frànçAIS is über-åwesome', -4, 2,
'so'),
array('frànçAIS is über-åwesome', -4, 3,
'som'),
array('frànçAIS is über-åwesome', -4, 4,
'some'),
array('frànçAIS is über-åwesome', -4, 5,
'some'),
array('frànçAIS is über-åwesome', -7, 10,
'åwesome'),
array('frànçAIS is über-åwesome', 5, -10,
'AIS is üb'),
array('frànçAIS is über-åwesome', 0, -10,
'frànçAIS is üb'),
array('frànçAIS is über-åwesome', 0, -1,
'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', -7, -2,
'åweso'),
array('frànçAIS is über-åwesome', -7, -6,
'å'),
array('frànçAIS is über-åwesome', -7, -7,
''),
array('frànçAIS is über-åwesome', -7, -8,
''),
array('...', 0, 2, '..'),
array('以呂波耳・ほへとち。リヌルヲ。', 1, 3,
'呂波耳'),
);
foreach ($testcase as $test) {
list($input, $start, $length, $output) = $test;
$this->assertEqual(drupal_substr($input, $start, $length), $output, t('%input substring-ed at offset %offset for %length characters is %output', array('%input' => $input, '%offset' => $start, '%length' => $length, '%output' => $output)));
$result = drupal_substr($input, $start, $length);
$this->assertEqual($result, $output, t('%input substring at offset %offset for %length characters is %output (got %result)', array('%input' => $input, '%offset' => $start, '%length' => $length, '%output' => $output, '%result' => $result)));
}
}
@ -215,4 +247,90 @@ class UnicodeUnitTest extends DrupalWebTestCase {
$this->assertIdentical(decode_entities($input, $exclude), $output, t('Make sure the decoded entity of %input, excluding %excludes, is %output', array('%input' => $input, '%excludes' => implode(',', $exclude), '%output' => $output)));
}
}
/**
* Tests truncate_utf8().
*/
function helperTestTruncate() {
// Each case is an array with input string, length to truncate to, and
// expected return value.
// Test non-wordsafe, non-ellipsis cases.
$non_wordsafe_non_ellipsis_cases = array(
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is über-'),
array('以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'),
);
$this->runTruncateTests($non_wordsafe_non_ellipsis_cases, FALSE, FALSE);
// Test non-wordsafe, ellipsis cases.
$non_wordsafe_ellipsis_cases = array(
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwe...'),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is üb...'),
);
$this->runTruncateTests($non_wordsafe_ellipsis_cases, FALSE, TRUE);
// Test wordsafe, ellipsis cases.
$wordsafe_ellipsis_cases = array(
array('123', 1, '.'),
array('123', 2, '..'),
array('123', 3, '123'),
array('1234', 3, '...'),
array('1234567890', 10, '1234567890'),
array('12345678901', 10, '1234567...'),
array('12345678901', 11, '12345678901'),
array('123456789012', 11, '12345678...'),
array('12345 7890', 10, '12345 7890'),
array('12345 7890', 9, '12345...'),
array('123 567 90', 10, '123 567 90'),
array('123 567 901', 10, '123 567...'),
array('Stop. Hammertime.', 17, 'Stop. Hammertime.'),
array('Stop. Hammertime.', 16, 'Stop....'),
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über...'),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is...'),
array('¿Dónde está el niño?', 20, '¿Dónde está el niño?'),
array('¿Dónde está el niño?', 19, '¿Dónde está el...'),
array('¿Dónde está el niño?', 15, '¿Dónde está...'),
array('¿Dónde está el niño?', 10, '¿Dónde...'),
array('Help! Help! Help!', 17, 'Help! Help! Help!'),
array('Help! Help! Help!', 16, 'Help! Help!...'),
array('Help! Help! Help!', 15, 'Help! Help!...'),
array('Help! Help! Help!', 14, 'Help! Help!...'),
array('Help! Help! Help!', 13, 'Help! Help...'),
array('Help! Help! Help!', 12, 'Help!...'),
array('Help! Help! Help!', 11, 'Help!...'),
array('Help! Help! Help!', 10, 'Help!...'),
array('Help! Help! Help!', 9, 'Help!...'),
array('Help! Help! Help!', 8, 'Help!...'),
array('Help! Help! Help!', 7, 'Help...'),
array('Help! Help! Help!', 6, 'Hel...'),
array('Help! Help! Help!', 5, 'He...'),
);
$this->runTruncateTests($wordsafe_ellipsis_cases, TRUE, TRUE);
}
/**
* Runs test cases for helperTestTruncate().
*
* Runs each test case through truncate_utf8() and compares the output
* to the expected output.
*
* @param $cases
* Cases array. Each case is an array with the input string, length to
* truncate to, and expected output.
* @param $wordsafe
* TRUE to use word-safe truncation, FALSE to not use word-safe truncation.
* @param $ellipsis
* TRUE to append ... if the input is truncated, FALSE to not append ....
*/
function runTruncateTests($cases, $wordsafe, $ellipsis) {
foreach ($cases as $case) {
list($input, $max_length, $expected) = $case;
$output = truncate_utf8($input, $max_length, $wordsafe, $ellipsis);
$this->assertEqual($output, $expected, t('%input truncate to %length characters with %wordsafe, %ellipsis is %expected (got %output)', array('%input' => $input, '%length' => $max_length, '%output' => $output, '%expected' => $expected, '%wordsafe' => ($wordsafe ? 'word-safe' : 'not word-safe'), '%ellipsis' => ($ellipsis ? 'ellipsis' : 'not ellipsis'))));
}
}
}