2005-07-25 20:40:35 +00:00
< ? php
// $Id$
2006-12-06 16:15:52 +00:00
/**
* Indicates an error during check for PHP unicode support .
*/
2005-07-25 20:40:35 +00:00
define ( 'UNICODE_ERROR' , - 1 );
2006-12-06 16:15:52 +00:00
/**
* Indicates that standard PHP ( emulated ) unicode support is being used .
*/
2005-07-25 20:40:35 +00:00
define ( 'UNICODE_SINGLEBYTE' , 0 );
2006-12-06 16:15:52 +00:00
/**
* Indicates that full unicode support with the PHP mbstring extension is being
* used .
*/
2005-07-25 20:40:35 +00:00
define ( 'UNICODE_MULTIBYTE' , 1 );
2005-07-27 01:58:43 +00:00
/**
* Wrapper around _unicode_check () .
*/
function unicode_check () {
2006-09-01 08:44:53 +00:00
list ( $GLOBALS [ 'multibyte' ]) = _unicode_check ();
2005-07-27 01:58:43 +00:00
}
2005-07-25 20:40:35 +00:00
/**
* Perform checks about Unicode support in PHP , and set the right settings if
* needed .
*
* Because Drupal needs to be able to handle text in various encodings , we do
* not support mbstring function overloading . HTTP input / output conversion must
* be disabled for similar reasons .
*
* @ param $errors
* Whether to report any fatal errors with form_set_error () .
*/
2006-09-01 08:44:53 +00:00
function _unicode_check () {
// Ensure translations don't break at install time
$t = get_t ();
2005-07-25 20:40:35 +00:00
// Set the standard C locale to ensure consistent, ASCII-only string handling.
setlocale ( LC_CTYPE , 'C' );
// Check for mbstring extension
if ( ! function_exists ( 'mb_strlen' )) {
2006-12-01 16:47:58 +00:00
return array ( UNICODE_SINGLEBYTE , $t ( 'Operations on Unicode strings are emulated on a best-effort basis. Install the <a href="@url">PHP mbstring extension</a> for improved Unicode support.' , array ( '@url' => 'http://www.php.net/mbstring' )));
2005-07-25 20:40:35 +00:00
}
// Check mbstring configuration
if ( ini_get ( 'mbstring.func_overload' ) != 0 ) {
2006-12-01 16:47:58 +00:00
return array ( UNICODE_ERROR , $t ( 'Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini <em>mbstring.func_overload</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , array ( '@url' => 'http://www.php.net/mbstring' )));
2005-07-25 20:40:35 +00:00
}
if ( ini_get ( 'mbstring.encoding_translation' ) != 0 ) {
2006-12-01 16:47:58 +00:00
return array ( UNICODE_ERROR , $t ( 'Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.encoding_translation</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , array ( '@url' => 'http://www.php.net/mbstring' )));
2005-07-25 20:40:35 +00:00
}
if ( ini_get ( 'mbstring.http_input' ) != 'pass' ) {
2006-12-01 16:47:58 +00:00
return array ( UNICODE_ERROR , $t ( 'Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_input</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , array ( '@url' => 'http://www.php.net/mbstring' )));
2005-12-31 14:32:23 +00:00
}
2005-07-25 20:40:35 +00:00
if ( ini_get ( 'mbstring.http_output' ) != 'pass' ) {
2006-12-01 16:47:58 +00:00
return array ( UNICODE_ERROR , $t ( 'Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_output</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.' , array ( '@url' => 'http://www.php.net/mbstring' )));
2005-07-25 20:40:35 +00:00
}
// Set appropriate configuration
mb_internal_encoding ( 'utf-8' );
mb_language ( 'uni' );
2006-12-01 16:47:58 +00:00
return array ( UNICODE_MULTIBYTE , '' );
2005-07-25 20:40:35 +00:00
}
/**
2006-09-01 08:44:53 +00:00
* Return Unicode library status and errors .
2005-07-25 20:40:35 +00:00
*/
2006-09-01 08:44:53 +00:00
function unicode_requirements () {
// Ensure translations don't break at install time
2007-05-10 07:00:21 +00:00
$t = get_t ();
2006-09-01 08:44:53 +00:00
$libraries = array (
UNICODE_SINGLEBYTE => $t ( 'Standard PHP' ),
UNICODE_MULTIBYTE => $t ( 'PHP Mbstring Extension' ),
UNICODE_ERROR => $t ( 'Error' ),
);
2006-12-01 16:47:58 +00:00
$severities = array (
UNICODE_SINGLEBYTE => REQUIREMENT_WARNING ,
UNICODE_MULTIBYTE => REQUIREMENT_OK ,
UNICODE_ERROR => REQUIREMENT_ERROR ,
);
list ( $library , $description ) = _unicode_check ();
2005-07-25 20:40:35 +00:00
2006-09-01 08:44:53 +00:00
$requirements [ 'unicode' ] = array (
'title' => $t ( 'Unicode library' ),
'value' => $libraries [ $library ],
);
if ( $description ) {
$requirements [ 'unicode' ][ 'description' ] = $description ;
}
2006-12-01 16:47:58 +00:00
$requirements [ 'unicode' ][ 'severity' ] = $severities [ $library ];
2006-09-01 08:44:53 +00:00
return $requirements ;
}
2007-10-21 18:59:02 +00:00
2005-07-25 20:40:35 +00:00
/**
* Prepare a new XML parser .
*
* This is a wrapper around xml_parser_create () which extracts the encoding from
* the XML data first and sets the output encoding to UTF - 8. This function should
2006-03-09 14:46:33 +00:00
* be used instead of xml_parser_create (), because PHP 4 's XML parser doesn' t
* check the input encoding itself . " Starting from PHP 5, the input encoding is
* automatically detected , so that the encoding parameter specifies only the
* output encoding . "
2005-07-25 20:40:35 +00:00
*
2006-03-09 14:46:33 +00:00
* This is also where unsupported encodings will be converted . Callers should
* take this into account : $data might have been changed after the call .
2005-07-25 20:40:35 +00:00
*
* @ param & $data
* The XML data which will be parsed later .
* @ return
2008-06-18 03:36:24 +00:00
* An XML parser object or FALSE on error .
2005-07-25 20:40:35 +00:00
*/
function drupal_xml_parser_create ( & $data ) {
// Default XML encoding is UTF-8
$encoding = 'utf-8' ;
2006-07-05 11:45:51 +00:00
$bom = FALSE ;
2005-07-25 20:40:35 +00:00
// Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it).
if ( ! strncmp ( $data , " \xEF \xBB \xBF " , 3 )) {
2006-07-05 11:45:51 +00:00
$bom = TRUE ;
2005-07-25 20:40:35 +00:00
$data = substr ( $data , 3 );
}
// Check for an encoding declaration in the XML prolog if no BOM was found.
2008-09-05 09:25:52 +00:00
if ( ! $bom && preg_match ( '/^<\?xml[^>]+encoding="(.+?)"/' , $data , $match )) {
2005-07-25 20:40:35 +00:00
$encoding = $match [ 1 ];
}
// Unsupported encodings are converted here into UTF-8.
$php_supported = array ( 'utf-8' , 'iso-8859-1' , 'us-ascii' );
if ( ! in_array ( strtolower ( $encoding ), $php_supported )) {
$out = drupal_convert_to_utf8 ( $data , $encoding );
2006-07-05 11:45:51 +00:00
if ( $out !== FALSE ) {
2005-07-25 20:40:35 +00:00
$encoding = 'utf-8' ;
2008-09-05 09:25:52 +00:00
$data = preg_replace ( '/^(<\?xml[^>]+encoding)="(.+?)"/' , '\\1="utf-8"' , $out );
2005-07-25 20:40:35 +00:00
}
else {
2007-04-24 13:53:15 +00:00
watchdog ( 'php' , 'Could not convert XML encoding %s to UTF-8.' , array ( '%s' => $encoding ), WATCHDOG_WARNING );
2008-06-18 03:36:24 +00:00
return FALSE ;
2005-07-25 20:40:35 +00:00
}
}
$xml_parser = xml_parser_create ( $encoding );
xml_parser_set_option ( $xml_parser , XML_OPTION_TARGET_ENCODING , 'utf-8' );
return $xml_parser ;
}
/**
* Convert data to UTF - 8
*
* Requires the iconv , GNU recode or mbstring PHP extension .
*
* @ param $data
* The data to be converted .
* @ param $encoding
* The encoding that the data is in
* @ return
* Converted data or FALSE .
*/
function drupal_convert_to_utf8 ( $data , $encoding ) {
if ( function_exists ( 'iconv' )) {
$out = @ iconv ( $encoding , 'utf-8' , $data );
}
2008-10-12 04:30:09 +00:00
elseif ( function_exists ( 'mb_convert_encoding' )) {
2005-07-25 20:40:35 +00:00
$out = @ mb_convert_encoding ( $data , 'utf-8' , $encoding );
}
2008-10-12 04:30:09 +00:00
elseif ( function_exists ( 'recode_string' )) {
2008-04-14 17:48:46 +00:00
$out = @ recode_string ( $encoding . '..utf-8' , $data );
2005-07-25 20:40:35 +00:00
}
else {
2007-04-24 13:53:15 +00:00
watchdog ( 'php' , 'Unsupported encoding %s. Please install iconv, GNU recode or mbstring for PHP.' , array ( '%s' => $encoding ), WATCHDOG_ERROR );
2005-07-25 20:40:35 +00:00
return FALSE ;
}
return $out ;
}
/**
* Truncate a UTF - 8 - encoded string safely to a number of bytes .
*
* If the end position is in the middle of a UTF - 8 sequence , it scans backwards
* until the beginning of the byte sequence .
*
* Use this function whenever you want to chop off a string at an unsure
* location . On the other hand , if you 're sure that you' re splitting on a
* character boundary ( e . g . after using strpos () or similar ), you can safely use
* substr () instead .
*
* @ param $string
* The string to truncate .
* @ param $len
* An upper limit on the returned string length .
2007-12-20 08:46:01 +00:00
* @ return
* The truncated string .
*/
function drupal_truncate_bytes ( $string , $len ) {
if ( strlen ( $string ) <= $len ) {
2007-12-28 12:02:52 +00:00
return $string ;
2007-12-20 08:46:01 +00:00
}
if (( ord ( $string [ $len ]) < 0x80 ) || ( ord ( $string [ $len ]) >= 0xC0 )) {
return substr ( $string , 0 , $len );
}
while ( -- $len >= 0 && ord ( $string [ $len ]) >= 0x80 && ord ( $string [ $len ]) < 0xC0 ) {};
return substr ( $string , 0 , $len );
}
/**
* Truncate a UTF - 8 - encoded string safely to a number of characters .
*
* @ param $string
* The string to truncate .
* @ param $len
* An upper limit on the returned string length .
2005-07-25 20:40:35 +00:00
* @ param $wordsafe
2007-12-20 08:46:01 +00:00
* Flag to truncate at last space within the upper limit . Defaults to FALSE .
* @ param $dots
* Flag to add trailing dots . Defaults to FALSE .
2005-07-25 20:40:35 +00:00
* @ return
* The truncated string .
*/
function truncate_utf8 ( $string , $len , $wordsafe = FALSE , $dots = FALSE ) {
2007-12-20 08:46:01 +00:00
if ( drupal_strlen ( $string ) <= $len ) {
2005-07-25 20:40:35 +00:00
return $string ;
}
2007-12-20 08:46:01 +00:00
if ( $dots ) {
$len -= 4 ;
}
2005-07-25 20:40:35 +00:00
if ( $wordsafe ) {
2007-12-20 08:46:01 +00:00
$string = drupal_substr ( $string , 0 , $len + 1 ); // leave one more character
if ( $last_space = strrpos ( $string , ' ' )) { // space exists AND is not on position 0
$string = substr ( $string , 0 , $last_space );
}
else {
$string = drupal_substr ( $string , 0 , $len );
2006-02-09 08:56:11 +00:00
}
2005-07-25 20:40:35 +00:00
}
2007-12-20 08:46:01 +00:00
else {
$string = drupal_substr ( $string , 0 , $len );
2005-07-25 20:40:35 +00:00
}
2007-12-20 08:46:01 +00:00
if ( $dots ) {
$string .= ' ...' ;
}
return $string ;
2005-07-25 20:40:35 +00:00
}
/**
* Encodes MIME / HTTP header values that contain non - ASCII , UTF - 8 encoded
* characters .
*
* For example , mime_header_encode ( 'tést.txt' ) returns " =?UTF-8?B?dMOpc3QudHh0?= " .
*
* See http :// www . rfc - editor . org / rfc / rfc2047 . txt for more information .
*
* Notes :
* - Only encode strings that contain non - ASCII characters .
* - We progressively cut - off a chunk with truncate_utf8 () . This is to ensure
* each chunk starts and ends on a character boundary .
* - Using \n as the chunk separator may cause problems on some systems and may
* have to be changed to \r\n or \r .
*/
function mime_header_encode ( $string ) {
2006-03-13 21:44:49 +00:00
if ( preg_match ( '/[^\x20-\x7E]/' , $string )) {
2005-07-25 20:40:35 +00:00
$chunk_size = 47 ; // floor((75 - strlen("=?UTF-8?B??=")) * 0.75);
$len = strlen ( $string );
$output = '' ;
while ( $len > 0 ) {
2007-12-20 08:46:01 +00:00
$chunk = drupal_truncate_bytes ( $string , $chunk_size );
2008-04-14 17:48:46 +00:00
$output .= ' =?UTF-8?B?' . base64_encode ( $chunk ) . " ?= \n " ;
2005-07-25 20:40:35 +00:00
$c = strlen ( $chunk );
$string = substr ( $string , $c );
$len -= $c ;
}
return trim ( $output );
}
return $string ;
}
2005-09-29 12:37:58 +00:00
/**
* Complement to mime_header_encode
*/
function mime_header_decode ( $header ) {
// First step: encoded chunks followed by other encoded chunks (need to collapse whitespace)
$header = preg_replace_callback ( '/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=\s+(?==\?)/' , '_mime_header_decode' , $header );
// Second step: remaining chunks (do not collapse whitespace)
return preg_replace_callback ( '/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=/' , '_mime_header_decode' , $header );
}
/**
* Helper function to mime_header_decode
*/
function _mime_header_decode ( $matches ) {
// Regexp groups:
// 1: Character set name
// 2: Escaping method (Q or B)
// 3: Encoded data
$data = ( $matches [ 2 ] == 'B' ) ? base64_decode ( $matches [ 3 ]) : str_replace ( '_' , ' ' , quoted_printable_decode ( $matches [ 3 ]));
if ( strtolower ( $matches [ 1 ]) != 'utf-8' ) {
$data = drupal_convert_to_utf8 ( $data , $matches [ 1 ]);
}
return $data ;
}
2005-07-25 20:40:35 +00:00
/**
* Decode all HTML entities ( including numerical ones ) to regular UTF - 8 bytes .
* Double - escaped entities will only be decoded once ( " &lt; " becomes " < " , not " < " ) .
*
* @ param $text
* The text to decode entities in .
* @ param $exclude
* An array of characters which should not be decoded . For example ,
* array ( '<' , '&' , '"' ) . This affects both named and numerical entities .
*/
function decode_entities ( $text , $exclude = array ()) {
static $table ;
// We store named entities in a table for quick processing.
if ( ! isset ( $table )) {
// Get all named HTML entities.
$table = array_flip ( get_html_translation_table ( HTML_ENTITIES ));
// PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map ( 'utf8_encode' , $table );
// Add apostrophe (XML)
$table [ ''' ] = " ' " ;
}
$newtable = array_diff ( $table , $exclude );
2008-11-05 12:58:59 +00:00
// Use a regexp to select all entities in one pass, to avoid decoding
// double-escaped entities twice. The PREG_REPLACE_EVAL modifier 'e' is
// being used to allow for a callback (see
// http://php.net/manual/en/reference.pcre.pattern.modifiers).
2005-07-25 20:40:35 +00:00
return preg_replace ( '/&(#x?)?([A-Za-z0-9]+);/e' , '_decode_entities("$1", "$2", "$0", $newtable, $exclude)' , $text );
}
/**
* Helper function for decode_entities
*/
function _decode_entities ( $prefix , $codepoint , $original , & $table , & $exclude ) {
// Named entity
if ( ! $prefix ) {
if ( isset ( $table [ $original ])) {
return $table [ $original ];
}
else {
return $original ;
}
}
// Hexadecimal numerical entity
if ( $prefix == '#x' ) {
$codepoint = base_convert ( $codepoint , 16 , 10 );
}
2005-11-29 20:17:10 +00:00
// Decimal numerical entity (strip leading zeros to avoid PHP octal notation)
else {
$codepoint = preg_replace ( '/^0+/' , '' , $codepoint );
}
2005-07-25 20:40:35 +00:00
// Encode codepoint as UTF-8 bytes
if ( $codepoint < 0x80 ) {
$str = chr ( $codepoint );
}
2008-10-12 04:30:09 +00:00
elseif ( $codepoint < 0x800 ) {
2005-07-25 20:40:35 +00:00
$str = chr ( 0xC0 | ( $codepoint >> 6 ))
. chr ( 0x80 | ( $codepoint & 0x3F ));
}
2008-10-12 04:30:09 +00:00
elseif ( $codepoint < 0x10000 ) {
2005-07-25 20:40:35 +00:00
$str = chr ( 0xE0 | ( $codepoint >> 12 ))
. chr ( 0x80 | (( $codepoint >> 6 ) & 0x3F ))
. chr ( 0x80 | ( $codepoint & 0x3F ));
}
2008-10-12 04:30:09 +00:00
elseif ( $codepoint < 0x200000 ) {
2005-07-25 20:40:35 +00:00
$str = chr ( 0xF0 | ( $codepoint >> 18 ))
. chr ( 0x80 | (( $codepoint >> 12 ) & 0x3F ))
. chr ( 0x80 | (( $codepoint >> 6 ) & 0x3F ))
. chr ( 0x80 | ( $codepoint & 0x3F ));
}
// Check for excluded characters
if ( in_array ( $str , $exclude )) {
return $original ;
}
else {
return $str ;
}
}
/**
* Count the amount of characters in a UTF - 8 string . This is less than or
* equal to the byte count .
*/
function drupal_strlen ( $text ) {
global $multibyte ;
if ( $multibyte == UNICODE_MULTIBYTE ) {
return mb_strlen ( $text );
}
else {
// Do not count UTF-8 continuation bytes.
return strlen ( preg_replace ( " /[ \x80 - \xBF ]/ " , '' , $text ));
}
}
/**
* Uppercase a UTF - 8 string .
*/
function drupal_strtoupper ( $text ) {
global $multibyte ;
2005-12-31 14:32:23 +00:00
if ( $multibyte == UNICODE_MULTIBYTE ) {
2005-07-25 20:40:35 +00:00
return mb_strtoupper ( $text );
}
else {
// Use C-locale for ASCII-only uppercase
$text = strtoupper ( $text );
// Case flip Latin-1 accented letters
$text = preg_replace_callback ( '/\xC3[\xA0-\xB6\xB8-\xBE]/' , '_unicode_caseflip' , $text );
return $text ;
2005-12-31 14:32:23 +00:00
}
2005-07-25 20:40:35 +00:00
}
/**
* Lowercase a UTF - 8 string .
*/
function drupal_strtolower ( $text ) {
global $multibyte ;
if ( $multibyte == UNICODE_MULTIBYTE ) {
return mb_strtolower ( $text );
}
else {
// Use C-locale for ASCII-only lowercase
$text = strtolower ( $text );
// Case flip Latin-1 accented letters
$text = preg_replace_callback ( '/\xC3[\x80-\x96\x98-\x9E]/' , '_unicode_caseflip' , $text );
return $text ;
2005-12-31 14:32:23 +00:00
}
2005-07-25 20:40:35 +00:00
}
/**
* Helper function for case conversion of Latin - 1.
* Used for flipping U + C0 - U + DE to U + E0 - U + FD and back .
*/
function _unicode_caseflip ( $matches ) {
2006-01-15 07:14:14 +00:00
return $matches [ 0 ][ 0 ] . chr ( ord ( $matches [ 0 ][ 1 ]) ^ 32 );
2005-07-25 20:40:35 +00:00
}
/**
* Capitalize the first letter of a UTF - 8 string .
*/
function drupal_ucfirst ( $text ) {
// Note: no mbstring equivalent!
return drupal_strtoupper ( drupal_substr ( $text , 0 , 1 )) . drupal_substr ( $text , 1 );
}
/**
* Cut off a piece of a string based on character indices and counts . Follows
2007-10-21 18:59:02 +00:00
* the same behavior as PHP ' s own substr () function .
2005-07-25 20:40:35 +00:00
*
* Note that for cutting off a string at a known character / substring
* location , the usage of PHP ' s normal strpos / substr is safe and
* much faster .
*/
function drupal_substr ( $text , $start , $length = NULL ) {
global $multibyte ;
if ( $multibyte == UNICODE_MULTIBYTE ) {
return $length === NULL ? mb_substr ( $text , $start ) : mb_substr ( $text , $start , $length );
}
else {
$strlen = strlen ( $text );
2008-12-31 11:01:49 +00:00
// Find the starting byte offset.
2007-05-12 06:08:56 +00:00
$bytes = 0 ;
2005-07-25 20:40:35 +00:00
if ( $start > 0 ) {
// Count all the continuation bytes from the start until we have found
2008-12-31 11:01:49 +00:00
// $start characters or the end of the string.
2005-07-25 20:40:35 +00:00
$bytes = - 1 ; $chars = - 1 ;
2008-12-31 11:01:49 +00:00
while ( $bytes < $strlen - 1 && $chars < $start ) {
2005-07-25 20:40:35 +00:00
$bytes ++ ;
2006-01-15 07:14:14 +00:00
$c = ord ( $text [ $bytes ]);
2005-07-25 20:40:35 +00:00
if ( $c < 0x80 || $c >= 0xC0 ) {
$chars ++ ;
}
}
}
2008-10-12 04:30:09 +00:00
elseif ( $start < 0 ) {
2005-07-25 20:40:35 +00:00
// Count all the continuation bytes from the end until we have found
2008-12-31 11:01:49 +00:00
// abs($start) characters.
2005-07-25 20:40:35 +00:00
$start = abs ( $start );
$bytes = $strlen ; $chars = 0 ;
while ( $bytes > 0 && $chars < $start ) {
$bytes -- ;
2006-01-15 07:14:14 +00:00
$c = ord ( $text [ $bytes ]);
2005-07-25 20:40:35 +00:00
if ( $c < 0x80 || $c >= 0xC0 ) {
$chars ++ ;
}
}
}
$istart = $bytes ;
2008-12-31 11:01:49 +00:00
// Find the ending byte offset.
2005-07-25 20:40:35 +00:00
if ( $length === NULL ) {
2008-12-31 11:01:49 +00:00
$iend = $strlen ;
2005-07-25 20:40:35 +00:00
}
2008-10-12 04:30:09 +00:00
elseif ( $length > 0 ) {
2005-07-25 20:40:35 +00:00
// Count all the continuation bytes from the starting index until we have
2008-12-31 11:01:49 +00:00
// found $length characters or reached the end of the string, then
// backtrace one byte.
$iend = $istart - 1 ; $chars = - 1 ;
while ( $iend < $strlen - 1 && $chars < $length ) {
$iend ++ ;
$c = ord ( $text [ $iend ]);
2005-07-25 20:40:35 +00:00
if ( $c < 0x80 || $c >= 0xC0 ) {
$chars ++ ;
}
}
2008-12-31 11:01:49 +00:00
// Backtrace one byte if the end of the string was not reached.
if ( $iend < $strlen - 1 ) {
$iend -- ;
}
2005-07-25 20:40:35 +00:00
}
2008-10-12 04:30:09 +00:00
elseif ( $length < 0 ) {
2005-07-25 20:40:35 +00:00
// Count all the continuation bytes from the end until we have found
2008-12-31 11:01:49 +00:00
// abs($start) characters, then backtrace one byte.
2005-07-25 20:40:35 +00:00
$length = abs ( $length );
2008-12-31 11:01:49 +00:00
$iend = $strlen ; $chars = 0 ;
while ( $iend > 0 && $chars < $length ) {
$iend -- ;
$c = ord ( $text [ $iend ]);
2005-07-25 20:40:35 +00:00
if ( $c < 0x80 || $c >= 0xC0 ) {
$chars ++ ;
}
}
2008-12-31 11:01:49 +00:00
// Backtrace one byte if we are not at the begining of the string.
if ( $iend > 0 ) {
$iend -- ;
}
}
else {
// $length == 0, return an empty string.
$iend = $istart - 1 ;
2005-07-25 20:40:35 +00:00
}
return substr ( $text , $istart , max ( 0 , $iend - $istart + 1 ));
2005-12-31 14:32:23 +00:00
}
2005-07-25 20:40:35 +00:00
}
2005-10-07 06:11:12 +00:00