2023-04-13 01:27:09 +00:00
|
|
|
"""Helper methods for language selection in Home Assistant."""
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
from collections.abc import Iterable
|
|
|
|
from dataclasses import dataclass
|
|
|
|
import operator
|
|
|
|
import re
|
|
|
|
|
2023-04-20 17:55:26 +00:00
|
|
|
from homeassistant.const import MATCH_ALL
|
|
|
|
|
2023-04-13 01:27:09 +00:00
|
|
|
SEPARATOR_RE = re.compile(r"[-_]")
|
|
|
|
|
|
|
|
|
|
|
|
def preferred_regions(
|
|
|
|
language: str,
|
|
|
|
country: str | None = None,
|
|
|
|
code: str | None = None,
|
|
|
|
) -> Iterable[str | None]:
|
2023-04-24 14:55:08 +00:00
|
|
|
"""Yield an ordered list of regions for a language based on country/code hints.
|
|
|
|
|
|
|
|
Regions should be checked for support in the returned order if no other
|
|
|
|
information is available.
|
|
|
|
"""
|
2023-04-13 01:27:09 +00:00
|
|
|
if country is not None:
|
|
|
|
yield country.upper()
|
|
|
|
|
|
|
|
if language == "en":
|
|
|
|
# Prefer U.S. English if no country
|
|
|
|
if country is None:
|
|
|
|
yield "US"
|
|
|
|
elif language == "zh":
|
|
|
|
if code == "Hant":
|
|
|
|
yield "HK"
|
|
|
|
yield "TW"
|
|
|
|
else:
|
|
|
|
yield "CN"
|
|
|
|
|
|
|
|
# fr -> fr-FR
|
|
|
|
yield language.upper()
|
|
|
|
|
|
|
|
|
|
|
|
def is_region(language: str, region: str | None) -> bool:
|
|
|
|
"""Return true if region is not known to be a script/code instead."""
|
|
|
|
if language == "es":
|
|
|
|
return region != "419"
|
|
|
|
|
|
|
|
if language == "sr":
|
|
|
|
return region != "Latn"
|
|
|
|
|
|
|
|
if language == "zh":
|
|
|
|
return region not in ("Hans", "Hant")
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Dialect:
|
|
|
|
"""Language with optional region and script/code."""
|
|
|
|
|
|
|
|
language: str
|
|
|
|
region: str | None
|
|
|
|
code: str | None = None
|
|
|
|
|
|
|
|
def __post_init__(self) -> None:
|
|
|
|
"""Fix casing of language/region."""
|
|
|
|
# Languages are lower-cased
|
|
|
|
self.language = self.language.casefold()
|
|
|
|
|
|
|
|
if self.region is not None:
|
|
|
|
# Regions are upper-cased
|
|
|
|
self.region = self.region.upper()
|
|
|
|
|
|
|
|
def score(self, dialect: Dialect, country: str | None = None) -> int:
|
|
|
|
"""Return score for match with another dialect where higher is better.
|
|
|
|
|
|
|
|
Score < 0 indicates a failure to match.
|
|
|
|
"""
|
|
|
|
if self.language != dialect.language:
|
|
|
|
# Not a match
|
|
|
|
return -1
|
|
|
|
|
|
|
|
if self.region == dialect.region:
|
|
|
|
# Language + region match
|
|
|
|
return 1
|
|
|
|
|
|
|
|
pref_regions: set[str | None] = set()
|
|
|
|
if (self.region is None) or (dialect.region is None):
|
|
|
|
# Generate a set of preferred regions
|
|
|
|
pref_regions = set(
|
|
|
|
preferred_regions(
|
|
|
|
self.language,
|
|
|
|
country=country,
|
|
|
|
code=self.code,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# Replace missing regions with preferred
|
|
|
|
regions = pref_regions if self.region is None else {self.region}
|
|
|
|
other_regions = pref_regions if dialect.region is None else {dialect.region}
|
|
|
|
|
|
|
|
# Better match if there is overlap in regions
|
2023-04-20 17:55:26 +00:00
|
|
|
return 2 if regions.intersection(other_regions) else 0
|
2023-04-13 01:27:09 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def parse(tag: str) -> Dialect:
|
|
|
|
"""Parse language tag into language/region/code."""
|
|
|
|
parts = SEPARATOR_RE.split(tag, maxsplit=1)
|
|
|
|
language = parts[0]
|
|
|
|
region: str | None = None
|
|
|
|
code: str | None = None
|
|
|
|
|
|
|
|
if len(parts) > 1:
|
|
|
|
region_or_code = parts[1]
|
|
|
|
if is_region(language, region_or_code):
|
|
|
|
# US, GB, etc.
|
|
|
|
region = region_or_code
|
|
|
|
else:
|
|
|
|
# Hant, 419, etc.
|
|
|
|
code = region_or_code
|
|
|
|
|
|
|
|
return Dialect(
|
|
|
|
language=language,
|
|
|
|
region=region,
|
|
|
|
code=code,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def matches(
|
|
|
|
target: str, supported: Iterable[str], country: str | None = None
|
|
|
|
) -> list[str]:
|
|
|
|
"""Return a sorted list of matching language tags based on a target tag and country hint."""
|
2023-04-20 17:55:26 +00:00
|
|
|
if target == MATCH_ALL:
|
|
|
|
return list(supported)
|
|
|
|
|
2023-04-13 01:27:09 +00:00
|
|
|
target_dialect = Dialect.parse(target)
|
|
|
|
|
|
|
|
# Higher score is better
|
|
|
|
scored = sorted(
|
|
|
|
(
|
|
|
|
(
|
|
|
|
dialect := Dialect.parse(tag),
|
|
|
|
target_dialect.score(dialect, country=country),
|
|
|
|
tag,
|
|
|
|
)
|
|
|
|
for tag in supported
|
|
|
|
),
|
|
|
|
key=operator.itemgetter(1),
|
|
|
|
reverse=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Score < 0 is not a match
|
|
|
|
return [tag for _dialect, score, tag in scored if score >= 0]
|