Allow "no" to match "nb" in language util (#92862)
* Allow "no" to match "nb" * Adjust comparison for speedpull/92879/head
parent
7d29d584fd
commit
5d6ccd6a32
|
@ -54,6 +54,20 @@ def is_region(language: str, region: str | None) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
def is_language_match(lang_1: str, lang_2: str) -> bool:
|
||||
"""Return true if two languages are considered the same."""
|
||||
if lang_1 == lang_2:
|
||||
# Exact match
|
||||
return True
|
||||
|
||||
if {lang_1, lang_2} == {"no", "nb"}:
|
||||
# no = spoken Norwegian
|
||||
# nb = written Norwegian (Bokmål)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
@dataclass
|
||||
class Dialect:
|
||||
"""Language with optional region and script/code."""
|
||||
|
@ -71,26 +85,35 @@ class Dialect:
|
|||
# Regions are upper-cased
|
||||
self.region = self.region.upper()
|
||||
|
||||
def score(self, dialect: Dialect, country: str | None = None) -> float:
|
||||
def score(
|
||||
self, dialect: Dialect, country: str | None = None
|
||||
) -> tuple[float, float]:
|
||||
"""Return score for match with another dialect where higher is better.
|
||||
|
||||
Score < 0 indicates a failure to match.
|
||||
"""
|
||||
if self.language != dialect.language:
|
||||
if not is_language_match(self.language, dialect.language):
|
||||
# Not a match
|
||||
return -1
|
||||
return (-1, 0)
|
||||
|
||||
is_exact_language = self.language == dialect.language
|
||||
|
||||
if (self.region is None) and (dialect.region is None):
|
||||
# Weak match with no region constraint
|
||||
return 1
|
||||
# Prefer exact language match
|
||||
return (2 if is_exact_language else 1, 0)
|
||||
|
||||
if (self.region is not None) and (dialect.region is not None):
|
||||
if self.region == dialect.region:
|
||||
# Exact language + region match
|
||||
return math.inf
|
||||
# Same language + region match
|
||||
# Prefer exact language match
|
||||
return (
|
||||
math.inf,
|
||||
1 if is_exact_language else 0,
|
||||
)
|
||||
|
||||
# Regions are both set, but don't match
|
||||
return 0
|
||||
return (0, 0)
|
||||
|
||||
# Generate ordered list of preferred regions
|
||||
pref_regions = list(
|
||||
|
@ -113,13 +136,13 @@ class Dialect:
|
|||
|
||||
# More preferred regions are at the front.
|
||||
# Add 1 to boost above a weak match where no regions are set.
|
||||
return 1 + (len(pref_regions) - region_idx)
|
||||
return (1 + (len(pref_regions) - region_idx), 0)
|
||||
except ValueError:
|
||||
# Region was not in preferred list
|
||||
pass
|
||||
|
||||
# Not a preferred region
|
||||
return 0
|
||||
return (0, 0)
|
||||
|
||||
@staticmethod
|
||||
def parse(tag: str) -> Dialect:
|
||||
|
@ -169,4 +192,4 @@ def matches(
|
|||
)
|
||||
|
||||
# Score < 0 is not a match
|
||||
return [tag for _dialect, score, tag in scored if score >= 0]
|
||||
return [tag for _dialect, score, tag in scored if score[0] >= 0]
|
||||
|
|
|
@ -190,3 +190,39 @@ def test_sr_latn() -> None:
|
|||
"sr-CS",
|
||||
"sr-RS",
|
||||
]
|
||||
|
||||
|
||||
def test_no_nb_same() -> None:
|
||||
"""Test that the no/nb are interchangeable."""
|
||||
assert language.matches(
|
||||
"no",
|
||||
["en-US", "en-GB", "nb"],
|
||||
) == ["nb"]
|
||||
assert language.matches(
|
||||
"nb",
|
||||
["en-US", "en-GB", "no"],
|
||||
) == ["no"]
|
||||
|
||||
|
||||
def test_no_nb_prefer_exact() -> None:
|
||||
"""Test that the exact language is preferred even if an interchangeable language is available."""
|
||||
assert language.matches(
|
||||
"no",
|
||||
["en-US", "en-GB", "nb", "no"],
|
||||
) == ["no", "nb"]
|
||||
assert language.matches(
|
||||
"no",
|
||||
["en-US", "en-GB", "no", "nb"],
|
||||
) == ["no", "nb"]
|
||||
|
||||
|
||||
def test_no_nb_prefer_exact_regions() -> None:
|
||||
"""Test that the exact language/region is preferred."""
|
||||
assert language.matches(
|
||||
"no-AA",
|
||||
["en-US", "en-GB", "nb-AA", "no-AA"],
|
||||
) == ["no-AA", "nb-AA"]
|
||||
assert language.matches(
|
||||
"no-AA",
|
||||
["en-US", "en-GB", "no-AA", "nb-AA"],
|
||||
) == ["no-AA", "nb-AA"]
|
||||
|
|
Loading…
Reference in New Issue