Allow "no" to match "nb" in language util (#92862)

* Allow "no" to match "nb"

* Adjust comparison for speed
pull/92879/head
Michael Hansen 2023-05-09 13:46:57 -05:00 committed by GitHub
parent 7d29d584fd
commit 5d6ccd6a32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 69 additions and 10 deletions

View File

@ -54,6 +54,20 @@ def is_region(language: str, region: str | None) -> bool:
return True
def is_language_match(lang_1: str, lang_2: str) -> bool:
"""Return true if two languages are considered the same."""
if lang_1 == lang_2:
# Exact match
return True
if {lang_1, lang_2} == {"no", "nb"}:
# no = spoken Norwegian
# nb = written Norwegian (Bokmål)
return True
return False
@dataclass
class Dialect:
"""Language with optional region and script/code."""
@ -71,26 +85,35 @@ class Dialect:
# Regions are upper-cased
self.region = self.region.upper()
def score(self, dialect: Dialect, country: str | None = None) -> float:
def score(
self, dialect: Dialect, country: str | None = None
) -> tuple[float, float]:
"""Return score for match with another dialect where higher is better.
Score < 0 indicates a failure to match.
"""
if self.language != dialect.language:
if not is_language_match(self.language, dialect.language):
# Not a match
return -1
return (-1, 0)
is_exact_language = self.language == dialect.language
if (self.region is None) and (dialect.region is None):
# Weak match with no region constraint
return 1
# Prefer exact language match
return (2 if is_exact_language else 1, 0)
if (self.region is not None) and (dialect.region is not None):
if self.region == dialect.region:
# Exact language + region match
return math.inf
# Same language + region match
# Prefer exact language match
return (
math.inf,
1 if is_exact_language else 0,
)
# Regions are both set, but don't match
return 0
return (0, 0)
# Generate ordered list of preferred regions
pref_regions = list(
@ -113,13 +136,13 @@ class Dialect:
# More preferred regions are at the front.
# Add 1 to boost above a weak match where no regions are set.
return 1 + (len(pref_regions) - region_idx)
return (1 + (len(pref_regions) - region_idx), 0)
except ValueError:
# Region was not in preferred list
pass
# Not a preferred region
return 0
return (0, 0)
@staticmethod
def parse(tag: str) -> Dialect:
@ -169,4 +192,4 @@ def matches(
)
# Score < 0 is not a match
return [tag for _dialect, score, tag in scored if score >= 0]
return [tag for _dialect, score, tag in scored if score[0] >= 0]

View File

@ -190,3 +190,39 @@ def test_sr_latn() -> None:
"sr-CS",
"sr-RS",
]
def test_no_nb_same() -> None:
"""Test that the no/nb are interchangeable."""
assert language.matches(
"no",
["en-US", "en-GB", "nb"],
) == ["nb"]
assert language.matches(
"nb",
["en-US", "en-GB", "no"],
) == ["no"]
def test_no_nb_prefer_exact() -> None:
"""Test that the exact language is preferred even if an interchangeable language is available."""
assert language.matches(
"no",
["en-US", "en-GB", "nb", "no"],
) == ["no", "nb"]
assert language.matches(
"no",
["en-US", "en-GB", "no", "nb"],
) == ["no", "nb"]
def test_no_nb_prefer_exact_regions() -> None:
"""Test that the exact language/region is preferred."""
assert language.matches(
"no-AA",
["en-US", "en-GB", "nb-AA", "no-AA"],
) == ["no-AA", "nb-AA"]
assert language.matches(
"no-AA",
["en-US", "en-GB", "no-AA", "nb-AA"],
) == ["no-AA", "nb-AA"]