Revert "Issue #3000630 by scott_euser, Krzysztof Domański, APolitsin, vijaycs85, longwave: Transliteration causes 2 capital letters at the beginning of a word"
This reverts commit bb7fb6a3dd
.
8.7.x
parent
c503bb6a93
commit
ce031c3f87
|
@ -105,55 +105,31 @@ class PhpTransliteration implements TransliterationInterface {
|
|||
* {@inheritdoc}
|
||||
*/
|
||||
public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) {
|
||||
$results = [];
|
||||
$result = '';
|
||||
$length = 0;
|
||||
|
||||
// Split on words to handle mixed case per word.
|
||||
$words = explode(' ', $string);
|
||||
foreach ($words as $key => $word) {
|
||||
$results[$key] = '';
|
||||
|
||||
// String is mixed case if it consists of both uppercase and lowercase
|
||||
// letters. To accurately check this, remove any numbers and check that
|
||||
// remaining characters are not all uppercase and not all lowercase.
|
||||
$alpha_string = preg_replace('/\\d/', '', $word);
|
||||
$mixed_case = (strlen($alpha_string) > 1 && mb_strtolower($alpha_string) !== $alpha_string && mb_strtoupper($alpha_string) !== $alpha_string);
|
||||
|
||||
// Split into Unicode characters and transliterate each one.
|
||||
foreach (preg_split('//u', $word, 0, PREG_SPLIT_NO_EMPTY) as $character) {
|
||||
$code = self::ordUTF8($character);
|
||||
if ($code == -1) {
|
||||
$to_add = $unknown_character;
|
||||
}
|
||||
else {
|
||||
$to_add = $this->replace($code, $langcode, $unknown_character);
|
||||
}
|
||||
|
||||
// Check if this exceeds the maximum allowed length.
|
||||
if (isset($max_length)) {
|
||||
$length += strlen($to_add);
|
||||
if ($length > $max_length) {
|
||||
// There is no more space.
|
||||
$results = array_filter($results);
|
||||
return implode(' ', $results);
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a capitalised letter of a mixed case word, only capitalise
|
||||
// the first letter and lowercase any subsequent letters.
|
||||
if ($mixed_case && strlen($to_add) > 1 && mb_strtoupper($to_add) === $to_add) {
|
||||
$to_add = ucfirst(strtolower($to_add));
|
||||
}
|
||||
|
||||
$results[$key] .= $to_add;
|
||||
// Split into Unicode characters and transliterate each one.
|
||||
foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) {
|
||||
$code = self::ordUTF8($character);
|
||||
if ($code == -1) {
|
||||
$to_add = $unknown_character;
|
||||
}
|
||||
else {
|
||||
$to_add = $this->replace($code, $langcode, $unknown_character);
|
||||
}
|
||||
|
||||
// Add space to count for max length.
|
||||
$length++;
|
||||
// Check if this exceeds the maximum allowed length.
|
||||
if (isset($max_length)) {
|
||||
$length += strlen($to_add);
|
||||
if ($length > $max_length) {
|
||||
// There is no more space.
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
$result .= $to_add;
|
||||
}
|
||||
|
||||
$results = array_filter($results);
|
||||
return implode(' ', $results);
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -146,19 +146,6 @@ class PhpTransliterationTest extends TestCase {
|
|||
['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'],
|
||||
// Max length.
|
||||
['de', $two_byte, 'Ae Oe', '?', 5],
|
||||
// Test strings with mixed case words where a single capital character
|
||||
// results in multiple characters. The first character should remain
|
||||
// capitalised but subsequent resulting characters should be lowercase.
|
||||
// For example a result of the transliteration should be 'Shtrikhkod'
|
||||
// not 'SHtrikhkod'. Numbers should not be used in determining whether a
|
||||
// string is mixed case.
|
||||
['ru', 'Штрихкод', 'Shtrikhkod'],
|
||||
['bg', 'Щастие', 'Schastie'],
|
||||
['bg', 'Щ1', 'SCH1'],
|
||||
['bg', 'Щ1Щ', 'SCH1SCH'],
|
||||
['bg', 'Щ1щ', 'Sch1sch'],
|
||||
['bg', 'Щастие ЩЩЩ', 'Schastie SCHSCHSCH'],
|
||||
['bg', 'Щастие ЩЩЩ. Щастие! Щастие', 'Schastie SCHSCHSCH. Schastie! Schastie'],
|
||||
];
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue