2005-07-25 20:40:35 +00:00
< ? php
2013-01-10 23:50:55 +00:00
/**
* @ file
* Provides Unicode - related conversions and operations .
*/
2013-05-07 23:29:47 +00:00
use Drupal\Component\Utility\Unicode ;
use Drupal\Component\Utility\String ;
2010-06-10 15:20:48 +00:00
2005-07-25 20:40:35 +00:00
/**
2013-01-10 23:50:55 +00:00
* Returns Unicode library status and errors .
2005-07-25 20:40:35 +00:00
*/
2006-09-01 08:44:53 +00:00
function unicode_requirements () {
$libraries = array (
2013-06-17 13:35:07 +00:00
Unicode :: STATUS_SINGLEBYTE => t ( 'Standard PHP' ),
Unicode :: STATUS_MULTIBYTE => t ( 'PHP Mbstring Extension' ),
Unicode :: STATUS_ERROR => t ( 'Error' ),
2006-09-01 08:44:53 +00:00
);
2006-12-01 16:47:58 +00:00
$severities = array (
2013-05-07 23:29:47 +00:00
Unicode :: STATUS_SINGLEBYTE => REQUIREMENT_WARNING ,
Unicode :: STATUS_MULTIBYTE => NULL ,
Unicode :: STATUS_ERROR => REQUIREMENT_ERROR ,
2006-12-01 16:47:58 +00:00
);
2013-05-07 23:29:47 +00:00
$failed_check = Unicode :: check ();
$library = Unicode :: getStatus ();
2005-07-25 20:40:35 +00:00
2006-09-01 08:44:53 +00:00
$requirements [ 'unicode' ] = array (
2013-06-17 13:35:07 +00:00
'title' => t ( 'Unicode library' ),
2006-09-01 08:44:53 +00:00
'value' => $libraries [ $library ],
2012-05-25 06:34:21 +00:00
'severity' => $severities [ $library ],
2006-09-01 08:44:53 +00:00
);
2012-05-25 06:34:21 +00:00
$t_args = array ( '@url' => 'http://www.php.net/mbstring' );
switch ( $failed_check ) {
case 'mb_strlen' :
2013-06-17 13:35:07 +00:00
$requirements [ 'unicode' ][ 'description' ] = t ( 'Operations on Unicode strings are emulated on a best-effort basis. Install the <a href="@url">PHP mbstring extension</a> for improved Unicode support.' , $t_args );
2012-05-25 06:34:21 +00:00
break ;
2006-09-01 08:44:53 +00:00
2012-05-25 06:34:21 +00:00
case 'mbstring.func_overload' :
2013-06-17 13:35:07 +00:00
$requirements [ 'unicode' ][ 'description' ] = t ( 'Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini <em>mbstring.func_overload</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , $t_args );
2012-05-25 06:34:21 +00:00
break ;
case 'mbstring.encoding_translation' :
2013-06-17 13:35:07 +00:00
$requirements [ 'unicode' ][ 'description' ] = t ( 'Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.encoding_translation</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , $t_args );
2012-05-25 06:34:21 +00:00
break ;
case 'mbstring.http_input' :
2013-06-17 13:35:07 +00:00
$requirements [ 'unicode' ][ 'description' ] = t ( 'Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_input</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , $t_args );
2012-05-25 06:34:21 +00:00
break ;
case 'mbstring.http_output' :
2013-06-17 13:35:07 +00:00
$requirements [ 'unicode' ][ 'description' ] = t ( 'Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_output</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , $t_args );
2012-05-25 06:34:21 +00:00
break ;
}
2006-12-01 16:47:58 +00:00
2006-09-01 08:44:53 +00:00
return $requirements ;
}
2007-10-21 18:59:02 +00:00
2005-07-25 20:40:35 +00:00
/**
2013-01-10 23:50:55 +00:00
* Prepares a new XML parser .
2005-07-25 20:40:35 +00:00
*
2013-01-10 23:50:55 +00:00
* This is a wrapper around xml_parser_create () which extracts the encoding
* from the XML data first and sets the output encoding to UTF - 8. This function
* should be used instead of xml_parser_create (), because PHP 4 ' s XML parser
* doesn ' t check the input encoding itself . " Starting from PHP 5, the input
* encoding is automatically detected , so that the encoding parameter specifies
* only the output encoding . "
2005-07-25 20:40:35 +00:00
*
2006-03-09 14:46:33 +00:00
* This is also where unsupported encodings will be converted . Callers should
* take this into account : $data might have been changed after the call .
2005-07-25 20:40:35 +00:00
*
2011-05-08 19:50:38 +00:00
* @ param $data
2005-07-25 20:40:35 +00:00
* The XML data which will be parsed later .
2010-07-16 11:17:25 +00:00
*
2005-07-25 20:40:35 +00:00
* @ return
2008-06-18 03:36:24 +00:00
* An XML parser object or FALSE on error .
2009-09-28 22:22:54 +00:00
*
* @ ingroup php_wrappers
2005-07-25 20:40:35 +00:00
*/
function drupal_xml_parser_create ( & $data ) {
// Default XML encoding is UTF-8
$encoding = 'utf-8' ;
2006-07-05 11:45:51 +00:00
$bom = FALSE ;
2005-07-25 20:40:35 +00:00
// Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it).
if ( ! strncmp ( $data , " \xEF \xBB \xBF " , 3 )) {
2006-07-05 11:45:51 +00:00
$bom = TRUE ;
2005-07-25 20:40:35 +00:00
$data = substr ( $data , 3 );
}
// Check for an encoding declaration in the XML prolog if no BOM was found.
2008-09-05 09:25:52 +00:00
if ( ! $bom && preg_match ( '/^<\?xml[^>]+encoding="(.+?)"/' , $data , $match )) {
2005-07-25 20:40:35 +00:00
$encoding = $match [ 1 ];
}
// Unsupported encodings are converted here into UTF-8.
$php_supported = array ( 'utf-8' , 'iso-8859-1' , 'us-ascii' );
if ( ! in_array ( strtolower ( $encoding ), $php_supported )) {
$out = drupal_convert_to_utf8 ( $data , $encoding );
2006-07-05 11:45:51 +00:00
if ( $out !== FALSE ) {
2005-07-25 20:40:35 +00:00
$encoding = 'utf-8' ;
2008-09-05 09:25:52 +00:00
$data = preg_replace ( '/^(<\?xml[^>]+encoding)="(.+?)"/' , '\\1="utf-8"' , $out );
2005-07-25 20:40:35 +00:00
}
else {
2011-07-04 16:58:33 +00:00
watchdog ( 'php' , 'Could not convert XML encoding %s to UTF-8.' , array ( '%s' => $encoding ), WATCHDOG_WARNING );
2008-06-18 03:36:24 +00:00
return FALSE ;
2005-07-25 20:40:35 +00:00
}
}
$xml_parser = xml_parser_create ( $encoding );
xml_parser_set_option ( $xml_parser , XML_OPTION_TARGET_ENCODING , 'utf-8' );
return $xml_parser ;
}
/**
2013-01-10 23:50:55 +00:00
* Converts data to UTF - 8.
2005-07-25 20:40:35 +00:00
*
2013-05-07 23:29:47 +00:00
* @ param string $data
2005-07-25 20:40:35 +00:00
* The data to be converted .
2013-05-07 23:29:47 +00:00
* @ param string $encoding
2011-02-04 20:32:18 +00:00
* The encoding that the data is in .
2010-07-16 11:17:25 +00:00
*
2013-05-07 23:29:47 +00:00
* @ return string | bool
2005-07-25 20:40:35 +00:00
* Converted data or FALSE .
2013-05-07 23:29:47 +00:00
*
* @ see \Drupal\Component\Utility\Unicode :: convertToUtf8 () .
2005-07-25 20:40:35 +00:00
*/
function drupal_convert_to_utf8 ( $data , $encoding ) {
2013-05-07 23:29:47 +00:00
$out = Unicode :: convertToUtf8 ( $data , $encoding );
if ( $out === FALSE ) {
2011-07-04 16:58:33 +00:00
watchdog ( 'php' , 'Unsupported encoding %s. Please install iconv, GNU recode or mbstring for PHP.' , array ( '%s' => $encoding ), WATCHDOG_ERROR );
2005-07-25 20:40:35 +00:00
}
return $out ;
}
/**
2013-01-10 23:50:55 +00:00
* Truncates a UTF - 8 - encoded string safely to a number of bytes .
2005-07-25 20:40:35 +00:00
*
2013-05-07 23:29:47 +00:00
* @ param string $string
2005-07-25 20:40:35 +00:00
* The string to truncate .
2013-05-07 23:29:47 +00:00
* @ param int $len
2005-07-25 20:40:35 +00:00
* An upper limit on the returned string length .
2010-07-16 11:17:25 +00:00
*
2013-05-07 23:29:47 +00:00
* @ return string
2007-12-20 08:46:01 +00:00
* The truncated string .
2013-05-07 23:29:47 +00:00
*
* @ see \Drupal\Component\Utility\Unicode :: truncateBytes () .
2007-12-20 08:46:01 +00:00
*/
function drupal_truncate_bytes ( $string , $len ) {
2013-05-07 23:29:47 +00:00
return Unicode :: truncateBytes ( $string , $len );
2007-12-20 08:46:01 +00:00
}
/**
2010-06-10 15:20:48 +00:00
* Truncates a UTF - 8 - encoded string safely to a number of characters .
2007-12-20 08:46:01 +00:00
*
* @ param $string
* The string to truncate .
2010-06-10 15:20:48 +00:00
* @ param $max_length
* An upper limit on the returned string length , including trailing ellipsis
* if $add_ellipsis is TRUE .
2005-07-25 20:40:35 +00:00
* @ param $wordsafe
2010-06-10 15:20:48 +00:00
* If TRUE , attempt to truncate on a word boundary . Word boundaries are
* spaces , punctuation , and Unicode characters used as word boundaries in
2013-05-07 23:29:47 +00:00
* non - Latin languages ; see Unicode :: PREG_CLASS_WORD_BOUNDARY for more
2010-12-28 18:21:58 +00:00
* information . If a word boundary cannot be found that would make the length
2010-06-10 15:20:48 +00:00
* of the returned string fall within length guidelines ( see parameters
2010-12-28 18:21:58 +00:00
* $max_length and $min_wordsafe_length ), word boundaries are ignored .
2010-06-10 15:20:48 +00:00
* @ param $add_ellipsis
* If TRUE , add t ( '...' ) to the end of the truncated string ( defaults to
2010-12-28 18:21:58 +00:00
* FALSE ) . The string length will still fall within $max_length .
2010-06-10 15:20:48 +00:00
* @ param $min_wordsafe_length
* If $wordsafe is TRUE , the minimum acceptable length for truncation ( before
* adding an ellipsis , if $add_ellipsis is TRUE ) . Has no effect if $wordsafe
* is FALSE . This can be used to prevent having a very short resulting string
* that will not be understandable . For instance , if you are truncating the
* string " See myverylongurlexample.com for more information " to a word - safe
* return length of 20 , the only available word boundary within 20 characters
* is after the word " See " , which wouldn ' t leave a very informative string . If
* you had set $min_wordsafe_length to 10 , though , the function would realise
* that " See " alone is too short , and would then just truncate ignoring word
* boundaries , giving you " See myverylongurl... " ( assuming you had set
* $add_ellipses to TRUE ) .
*
2013-01-10 23:50:55 +00:00
* @ return string
2005-07-25 20:40:35 +00:00
* The truncated string .
2013-05-07 23:29:47 +00:00
*
* @ see \Drupal\Component\Utility\Unicode :: truncate () .
2005-07-25 20:40:35 +00:00
*/
2010-06-10 15:20:48 +00:00
function truncate_utf8 ( $string , $max_length , $wordsafe = FALSE , $add_ellipsis = FALSE , $min_wordsafe_length = 1 ) {
2013-05-07 23:29:47 +00:00
return Unicode :: truncate ( $string , $max_length , $wordsafe , $add_ellipsis , $min_wordsafe_length );
2005-07-25 20:40:35 +00:00
}
/**
2013-01-10 23:50:55 +00:00
* Encodes MIME / HTTP header values that contain incorrectly encoded characters .
2005-07-25 20:40:35 +00:00
*
2013-01-10 23:50:55 +00:00
* @ param $string
* The header to encode .
*
* @ return string
* The mime - encoded header .
*
* @ see mime_header_decode ()
2013-05-07 23:29:47 +00:00
* @ see \Drupal\Component\Utility\Unicode :: mimeHeaderEncode () .
2005-07-25 20:40:35 +00:00
*/
function mime_header_encode ( $string ) {
2013-05-07 23:29:47 +00:00
return Unicode :: mimeHeaderEncode ( $string );
2005-07-25 20:40:35 +00:00
}
2005-09-29 12:37:58 +00:00
/**
2013-01-10 23:50:55 +00:00
* Decodes MIME / HTTP encoded header values .
*
* @ param $header
* The header to decode .
*
* @ return string
* The mime - decoded header .
*
* @ see mime_header_encode ()
2013-05-07 23:29:47 +00:00
* @ see \Drupal\Component\Utility\Unicode :: mimeHeaderDecode () .
2005-09-29 12:37:58 +00:00
*/
function mime_header_decode ( $header ) {
2013-05-07 23:29:47 +00:00
return Unicode :: mimeHeaderDecode ( $header );
2005-09-29 12:37:58 +00:00
}
2005-07-25 20:40:35 +00:00
/**
2010-06-14 12:37:15 +00:00
* Decodes all HTML entities ( including numerical ones ) to regular UTF - 8 bytes .
*
2005-07-25 20:40:35 +00:00
* @ param $text
* The text to decode entities in .
2010-06-14 12:37:15 +00:00
*
* @ return
* The input $text , with all HTML entities decoded once .
2013-05-07 23:29:47 +00:00
*
* @ see \Drupal\Component\Utility\String :: decodeEntities () .
2005-07-25 20:40:35 +00:00
*/
2010-08-11 10:58:22 +00:00
function decode_entities ( $text ) {
2013-05-07 23:29:47 +00:00
return String :: decodeEntities ( $text );
2005-07-25 20:40:35 +00:00
}
/**
2013-01-10 23:50:55 +00:00
* Counts the number of characters in a UTF - 8 string .
*
* @ param $text
* The string to run the operation on .
*
* @ return integer
* The length of the string .
2009-09-28 22:22:54 +00:00
*
2013-05-07 23:29:47 +00:00
* @ see \Drupal\Component\Utility\Unicode :: strlen () .
2009-09-28 22:22:54 +00:00
* @ ingroup php_wrappers
2005-07-25 20:40:35 +00:00
*/
function drupal_strlen ( $text ) {
2013-05-07 23:29:47 +00:00
return Unicode :: strlen ( $text );
2005-07-25 20:40:35 +00:00
}
/**
* Uppercase a UTF - 8 string .
2009-09-28 22:22:54 +00:00
*
2013-01-10 23:50:55 +00:00
* @ param $text
* The string to run the operation on .
*
* @ return string
* The string in uppercase .
*
2013-05-07 23:29:47 +00:00
* @ see \Drupal\Component\Utility\Unicode :: strtoupper () .
2009-09-28 22:22:54 +00:00
* @ ingroup php_wrappers
2005-07-25 20:40:35 +00:00
*/
function drupal_strtoupper ( $text ) {
2013-05-07 23:29:47 +00:00
return Unicode :: strtoupper ( $text );
2005-07-25 20:40:35 +00:00
}
/**
* Lowercase a UTF - 8 string .
2009-09-28 22:22:54 +00:00
*
2013-01-10 23:50:55 +00:00
* @ param $text
* The string to run the operation on .
*
* @ return string
* The string in lowercase .
*
2013-05-07 23:29:47 +00:00
* @ see \Drupal\Component\Utility\Unicode :: strtolower () .
2009-09-28 22:22:54 +00:00
* @ ingroup php_wrappers
2005-07-25 20:40:35 +00:00
*/
function drupal_strtolower ( $text ) {
2013-05-07 23:29:47 +00:00
return Unicode :: strtolower ( $text );
2005-07-25 20:40:35 +00:00
}
/**
2013-01-10 23:50:55 +00:00
* Capitalizes the first letter of a UTF - 8 string .
*
* @ param $text
* The string to convert .
*
* @ return
* The string with the first letter as uppercase .
2009-09-28 22:22:54 +00:00
*
2013-05-07 23:29:47 +00:00
* @ see \Drupal\Component\Utility\Unicode :: ucfirst () .
2009-09-28 22:22:54 +00:00
* @ ingroup php_wrappers
2005-07-25 20:40:35 +00:00
*/
function drupal_ucfirst ( $text ) {
2013-05-07 23:29:47 +00:00
return Unicode :: ucfirst ( $text );
2005-07-25 20:40:35 +00:00
}
/**
2013-01-10 23:50:55 +00:00
* Cuts off a piece of a string based on character indices and counts .
*
* @ param $text
* The input string .
* @ param $start
* The position at which to start reading .
* @ param $length
* The number of characters to read .
*
* @ return
* The shortened string .
2009-09-28 22:22:54 +00:00
*
2013-05-07 23:29:47 +00:00
* @ see \Drupal\Component\Utility\Unicode :: substr () .
2009-09-28 22:22:54 +00:00
* @ ingroup php_wrappers
2005-07-25 20:40:35 +00:00
*/
function drupal_substr ( $text , $start , $length = NULL ) {
2013-05-07 23:29:47 +00:00
return Unicode :: substr ( $text , $start , $length );
2005-07-25 20:40:35 +00:00
}