Issue #3151364 by Charlie ChX Negyesi, amateescu, alexpott, jhodgdon: diacritics are not removed from ǢǣǼǽǮǯ

(cherry picked from commit 920d1708c9)
merge-requests/64/head
catch 2020-07-14 13:16:26 +01:00
parent 884d2a50fe
commit 33b050cafd
3 changed files with 31 additions and 2 deletions

View File

@ -58,6 +58,21 @@ class PhpTransliteration implements TransliterationInterface {
*/
protected $genericMap = [];
/**
* Special characters for ::removeDiacritics().
*
* Characters which have accented variants but their base character
* transliterates to more than one ASCII character require special
* treatment: we want to remove their accent and use the un-
* transliterated base character.
*/
protected $fixTransliterateForRemoveDiacritics = [
'AE' => 'Æ',
'ae' => 'æ',
'ZH' => 'Ʒ',
'zh' => 'ʒ',
];
/**
* Constructs a transliteration object.
*
@ -93,6 +108,9 @@ class PhpTransliteration implements TransliterationInterface {
if (strlen($to_add) === 1) {
$replacement = $to_add;
}
elseif (isset($this->fixTransliterateForRemoveDiacritics[$to_add])) {
$replacement = $this->fixTransliterateForRemoveDiacritics[$to_add];
}
}
$result .= $replacement;

View File

@ -22,3 +22,14 @@ function search_post_update_block_page(&$sandbox = NULL) {
return $block->getPluginId() === 'search_form_block';
});
}
/**
* Mark everything for reindexing after diacritics removal rule change.
*/
function search_post_update_reindex_after_diacritics_rule_change() {
$search_page_repository = \Drupal::service('search.search_page_repository');
foreach ($search_page_repository->getIndexableSearchPages() as $entity) {
$entity->getPlugin()->markForReindex();
}
return t("Content has been marked for re-indexing for all active search pages. Searching will continue to work, but new content won't be indexed until all existing content has been re-indexed.");
}

View File

@ -58,8 +58,8 @@ class PhpTransliterationTest extends TestCase {
// Test all characters in the Unicode range 0x01CD to 0x024F.
['ǍǎǏ', 'AaI'],
['ǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟ', 'iOoUuUuUuUuUuǝAa'],
['ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯ', 'AaǢǣGgGgKkOoOoǮǯ'],
['ǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ', 'jDZDzdzGgǶǷNnAaǼǽOo'],
['ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯ', 'AaÆæGgGgKkOoOoƷʒ'],
['ǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ', 'jDZDzdzGgǶǷNnAaÆæOo'],
['ȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏ', 'AaAaEeEeIiIiOoOo'],
['ȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟ', 'RrRrUuUuSsTtȜȝHh'],
['ȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯ', 'ȠȡȢȣZzAaEeOoOoOo'],