324 lines
10 KiB
PHP
324 lines
10 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @file
|
|
* Provides Unicode-related conversions and operations.
|
|
*/
|
|
|
|
use Drupal\Component\Utility\Unicode;
|
|
use Drupal\Component\Utility\String;
|
|
|
|
/**
|
|
* Returns Unicode library status and errors.
|
|
*/
|
|
function unicode_requirements() {
|
|
$libraries = array(
|
|
Unicode::STATUS_SINGLEBYTE => t('Standard PHP'),
|
|
Unicode::STATUS_MULTIBYTE => t('PHP Mbstring Extension'),
|
|
Unicode::STATUS_ERROR => t('Error'),
|
|
);
|
|
$severities = array(
|
|
Unicode::STATUS_SINGLEBYTE => REQUIREMENT_WARNING,
|
|
Unicode::STATUS_MULTIBYTE => NULL,
|
|
Unicode::STATUS_ERROR => REQUIREMENT_ERROR,
|
|
);
|
|
$failed_check = Unicode::check();
|
|
$library = Unicode::getStatus();
|
|
|
|
$requirements['unicode'] = array(
|
|
'title' => t('Unicode library'),
|
|
'value' => $libraries[$library],
|
|
'severity' => $severities[$library],
|
|
);
|
|
$t_args = array('@url' => 'http://www.php.net/mbstring');
|
|
switch ($failed_check) {
|
|
case 'mb_strlen':
|
|
$requirements['unicode']['description'] = t('Operations on Unicode strings are emulated on a best-effort basis. Install the <a href="@url">PHP mbstring extension</a> for improved Unicode support.', $t_args);
|
|
break;
|
|
|
|
case 'mbstring.func_overload':
|
|
$requirements['unicode']['description'] = t('Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini <em>mbstring.func_overload</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
|
|
break;
|
|
|
|
case 'mbstring.encoding_translation':
|
|
$requirements['unicode']['description'] = t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.encoding_translation</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
|
|
break;
|
|
|
|
case 'mbstring.http_input':
|
|
$requirements['unicode']['description'] = t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_input</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
|
|
break;
|
|
|
|
case 'mbstring.http_output':
|
|
$requirements['unicode']['description'] = t('Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_output</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', $t_args);
|
|
break;
|
|
}
|
|
|
|
return $requirements;
|
|
}
|
|
|
|
/**
|
|
* Prepares a new XML parser.
|
|
*
|
|
* This is a wrapper around xml_parser_create() which extracts the encoding
|
|
* from the XML data first and sets the output encoding to UTF-8. This function
|
|
* should be used instead of xml_parser_create(), because PHP 4's XML parser
|
|
* doesn't check the input encoding itself. "Starting from PHP 5, the input
|
|
* encoding is automatically detected, so that the encoding parameter specifies
|
|
* only the output encoding."
|
|
*
|
|
* This is also where unsupported encodings will be converted. Callers should
|
|
* take this into account: $data might have been changed after the call.
|
|
*
|
|
* @param $data
|
|
* The XML data which will be parsed later.
|
|
*
|
|
* @return
|
|
* An XML parser object or FALSE on error.
|
|
*
|
|
* @ingroup php_wrappers
|
|
*/
|
|
function drupal_xml_parser_create(&$data) {
|
|
// Default XML encoding is UTF-8
|
|
$encoding = 'utf-8';
|
|
$bom = FALSE;
|
|
|
|
// Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it).
|
|
if (!strncmp($data, "\xEF\xBB\xBF", 3)) {
|
|
$bom = TRUE;
|
|
$data = substr($data, 3);
|
|
}
|
|
|
|
// Check for an encoding declaration in the XML prolog if no BOM was found.
|
|
if (!$bom && preg_match('/^<\?xml[^>]+encoding="(.+?)"/', $data, $match)) {
|
|
$encoding = $match[1];
|
|
}
|
|
|
|
// Unsupported encodings are converted here into UTF-8.
|
|
$php_supported = array('utf-8', 'iso-8859-1', 'us-ascii');
|
|
if (!in_array(strtolower($encoding), $php_supported)) {
|
|
$out = drupal_convert_to_utf8($data, $encoding);
|
|
if ($out !== FALSE) {
|
|
$encoding = 'utf-8';
|
|
$data = preg_replace('/^(<\?xml[^>]+encoding)="(.+?)"/', '\\1="utf-8"', $out);
|
|
}
|
|
else {
|
|
\Drupal::logger('php')->warning('Could not convert XML encoding %s to UTF-8.', array('%s' => $encoding));
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
$xml_parser = xml_parser_create($encoding);
|
|
xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, 'utf-8');
|
|
return $xml_parser;
|
|
}
|
|
|
|
/**
|
|
* Converts data to UTF-8.
|
|
*
|
|
* @param string $data
|
|
* The data to be converted.
|
|
* @param string $encoding
|
|
* The encoding that the data is in.
|
|
*
|
|
* @return string|bool
|
|
* Converted data or FALSE.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::convertToUtf8().
|
|
*/
|
|
function drupal_convert_to_utf8($data, $encoding) {
|
|
$out = Unicode::convertToUtf8($data, $encoding);
|
|
if ($out === FALSE) {
|
|
\Drupal::logger('php')->error('Unsupported encoding %s. Please install iconv, GNU recode or mbstring for PHP.', array('%s' => $encoding));
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
/**
|
|
* Truncates a UTF-8-encoded string safely to a number of bytes.
|
|
*
|
|
* @param string $string
|
|
* The string to truncate.
|
|
* @param int $len
|
|
* An upper limit on the returned string length.
|
|
*
|
|
* @return string
|
|
* The truncated string.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::truncateBytes().
|
|
*/
|
|
function drupal_truncate_bytes($string, $len) {
|
|
return Unicode::truncateBytes($string, $len);
|
|
}
|
|
|
|
/**
|
|
* Truncates a UTF-8-encoded string safely to a number of characters.
|
|
*
|
|
* @param $string
|
|
* The string to truncate.
|
|
* @param $max_length
|
|
* An upper limit on the returned string length, including trailing ellipsis
|
|
* if $add_ellipsis is TRUE.
|
|
* @param $wordsafe
|
|
* If TRUE, attempt to truncate on a word boundary. Word boundaries are
|
|
* spaces, punctuation, and Unicode characters used as word boundaries in
|
|
* non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more
|
|
* information. If a word boundary cannot be found that would make the length
|
|
* of the returned string fall within length guidelines (see parameters
|
|
* $max_length and $min_wordsafe_length), word boundaries are ignored.
|
|
* @param $add_ellipsis
|
|
* If TRUE, add t('...') to the end of the truncated string (defaults to
|
|
* FALSE). The string length will still fall within $max_length.
|
|
* @param $min_wordsafe_length
|
|
* If $wordsafe is TRUE, the minimum acceptable length for truncation (before
|
|
* adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe
|
|
* is FALSE. This can be used to prevent having a very short resulting string
|
|
* that will not be understandable. For instance, if you are truncating the
|
|
* string "See myverylongurlexample.com for more information" to a word-safe
|
|
* return length of 20, the only available word boundary within 20 characters
|
|
* is after the word "See", which wouldn't leave a very informative string. If
|
|
* you had set $min_wordsafe_length to 10, though, the function would realise
|
|
* that "See" alone is too short, and would then just truncate ignoring word
|
|
* boundaries, giving you "See myverylongurl..." (assuming you had set
|
|
* $add_ellipses to TRUE).
|
|
*
|
|
* @return string
|
|
* The truncated string.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::truncate().
|
|
*/
|
|
function truncate_utf8($string, $max_length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1) {
|
|
return Unicode::truncate($string, $max_length, $wordsafe, $add_ellipsis, $min_wordsafe_length);
|
|
}
|
|
|
|
/**
|
|
* Encodes MIME/HTTP header values that contain incorrectly encoded characters.
|
|
*
|
|
* @param $string
|
|
* The header to encode.
|
|
*
|
|
* @return string
|
|
* The mime-encoded header.
|
|
*
|
|
* @see mime_header_decode()
|
|
* @see \Drupal\Component\Utility\Unicode::mimeHeaderEncode().
|
|
*/
|
|
function mime_header_encode($string) {
|
|
return Unicode::mimeHeaderEncode($string);
|
|
}
|
|
|
|
/**
|
|
* Decodes MIME/HTTP encoded header values.
|
|
*
|
|
* @param $header
|
|
* The header to decode.
|
|
*
|
|
* @return string
|
|
* The mime-decoded header.
|
|
*
|
|
* @see mime_header_encode()
|
|
* @see \Drupal\Component\Utility\Unicode::mimeHeaderDecode().
|
|
*/
|
|
function mime_header_decode($header) {
|
|
return Unicode::mimeHeaderDecode($header);
|
|
}
|
|
|
|
/**
|
|
* Decodes all HTML entities (including numerical ones) to regular UTF-8 bytes.
|
|
*
|
|
* @param $text
|
|
* The text to decode entities in.
|
|
*
|
|
* @return
|
|
* The input $text, with all HTML entities decoded once.
|
|
*
|
|
* @see \Drupal\Component\Utility\String::decodeEntities().
|
|
*/
|
|
function decode_entities($text) {
|
|
return String::decodeEntities($text);
|
|
}
|
|
|
|
/**
|
|
* Counts the number of characters in a UTF-8 string.
|
|
*
|
|
* @param $text
|
|
* The string to run the operation on.
|
|
*
|
|
* @return integer
|
|
* The length of the string.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::strlen().
|
|
* @ingroup php_wrappers
|
|
*/
|
|
function drupal_strlen($text) {
|
|
return Unicode::strlen($text);
|
|
}
|
|
|
|
/**
|
|
* Uppercase a UTF-8 string.
|
|
*
|
|
* @param $text
|
|
* The string to run the operation on.
|
|
*
|
|
* @return string
|
|
* The string in uppercase.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::strtoupper().
|
|
* @ingroup php_wrappers
|
|
*/
|
|
function drupal_strtoupper($text) {
|
|
return Unicode::strtoupper($text);
|
|
}
|
|
|
|
/**
|
|
* Lowercase a UTF-8 string.
|
|
*
|
|
* @param $text
|
|
* The string to run the operation on.
|
|
*
|
|
* @return string
|
|
* The string in lowercase.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::strtolower().
|
|
* @ingroup php_wrappers
|
|
*/
|
|
function drupal_strtolower($text) {
|
|
return Unicode::strtolower($text);
|
|
}
|
|
|
|
/**
|
|
* Capitalizes the first letter of a UTF-8 string.
|
|
*
|
|
* @param $text
|
|
* The string to convert.
|
|
*
|
|
* @return
|
|
* The string with the first letter as uppercase.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::ucfirst().
|
|
* @ingroup php_wrappers
|
|
*/
|
|
function drupal_ucfirst($text) {
|
|
return Unicode::ucfirst($text);
|
|
}
|
|
|
|
/**
|
|
* Cuts off a piece of a string based on character indices and counts.
|
|
*
|
|
* @param $text
|
|
* The input string.
|
|
* @param $start
|
|
* The position at which to start reading.
|
|
* @param $length
|
|
* The number of characters to read.
|
|
*
|
|
* @return
|
|
* The shortened string.
|
|
*
|
|
* @see \Drupal\Component\Utility\Unicode::substr().
|
|
* @ingroup php_wrappers
|
|
*/
|
|
function drupal_substr($text, $start, $length = NULL) {
|
|
return Unicode::substr($text, $start, $length);
|
|
}
|