From ce031c3f87a79545122d0e4468e45b4489021e50 Mon Sep 17 00:00:00 2001 From: Alex Pott Date: Sat, 24 Nov 2018 00:24:09 +0000 Subject: [PATCH] =?UTF-8?q?Revert=20"Issue=20#3000630=20by=20scott=5Feuser?= =?UTF-8?q?,=20Krzysztof=20Doma=C5=84ski,=20APolitsin,=20vijaycs85,=20long?= =?UTF-8?q?wave:=20Transliteration=20causes=202=20capital=20letters=20at?= =?UTF-8?q?=20the=20beginning=20of=20a=20word"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bb7fb6a3dd840f95ed76c17f94dcd2dc049e0470. --- .../Transliteration/PhpTransliteration.php | 64 ++++++------------- .../PhpTransliterationTest.php | 13 ---- 2 files changed, 20 insertions(+), 57 deletions(-) diff --git a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php index 5eee57aa200..3cd8d685650 100644 --- a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php +++ b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php @@ -105,55 +105,31 @@ class PhpTransliteration implements TransliterationInterface { * {@inheritdoc} */ public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) { - $results = []; + $result = ''; $length = 0; - - // Split on words to handle mixed case per word. - $words = explode(' ', $string); - foreach ($words as $key => $word) { - $results[$key] = ''; - - // String is mixed case if it consists of both uppercase and lowercase - // letters. To accurately check this, remove any numbers and check that - // remaining characters are not all uppercase and not all lowercase. - $alpha_string = preg_replace('/\\d/', '', $word); - $mixed_case = (strlen($alpha_string) > 1 && mb_strtolower($alpha_string) !== $alpha_string && mb_strtoupper($alpha_string) !== $alpha_string); - - // Split into Unicode characters and transliterate each one. - foreach (preg_split('//u', $word, 0, PREG_SPLIT_NO_EMPTY) as $character) { - $code = self::ordUTF8($character); - if ($code == -1) { - $to_add = $unknown_character; - } - else { - $to_add = $this->replace($code, $langcode, $unknown_character); - } - - // Check if this exceeds the maximum allowed length. - if (isset($max_length)) { - $length += strlen($to_add); - if ($length > $max_length) { - // There is no more space. - $results = array_filter($results); - return implode(' ', $results); - } - } - - // If this is a capitalised letter of a mixed case word, only capitalise - // the first letter and lowercase any subsequent letters. - if ($mixed_case && strlen($to_add) > 1 && mb_strtoupper($to_add) === $to_add) { - $to_add = ucfirst(strtolower($to_add)); - } - - $results[$key] .= $to_add; + // Split into Unicode characters and transliterate each one. + foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) { + $code = self::ordUTF8($character); + if ($code == -1) { + $to_add = $unknown_character; + } + else { + $to_add = $this->replace($code, $langcode, $unknown_character); } - // Add space to count for max length. - $length++; + // Check if this exceeds the maximum allowed length. + if (isset($max_length)) { + $length += strlen($to_add); + if ($length > $max_length) { + // There is no more space. + return $result; + } + } + + $result .= $to_add; } - $results = array_filter($results); - return implode(' ', $results); + return $result; } /** diff --git a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php index fd6cfa9eb21..49adc4c550c 100644 --- a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php +++ b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php @@ -146,19 +146,6 @@ class PhpTransliterationTest extends TestCase { ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'], // Max length. ['de', $two_byte, 'Ae Oe', '?', 5], - // Test strings with mixed case words where a single capital character - // results in multiple characters. The first character should remain - // capitalised but subsequent resulting characters should be lowercase. - // For example a result of the transliteration should be 'Shtrikhkod' - // not 'SHtrikhkod'. Numbers should not be used in determining whether a - // string is mixed case. - ['ru', 'Штрихкод', 'Shtrikhkod'], - ['bg', 'Щастие', 'Schastie'], - ['bg', 'Щ1', 'SCH1'], - ['bg', 'Щ1Щ', 'SCH1SCH'], - ['bg', 'Щ1щ', 'Sch1sch'], - ['bg', 'Щастие ЩЩЩ', 'Schastie SCHSCHSCH'], - ['bg', 'Щастие ЩЩЩ. Щастие! Щастие', 'Schastie SCHSCHSCH. Schastie! Schastie'], ]; }