Significantly improve yaml load times when the C loader is available (#73337)

pull/73424/head
J. Nick Koston 2022-06-13 08:44:46 -10:00 committed by GitHub
parent b84e844c76
commit dca4d3cd61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 190 additions and 78 deletions

View File

@ -28,6 +28,7 @@ env:
PIP_CACHE: /tmp/pip-cache
SQLALCHEMY_WARN_20: 1
PYTHONASYNCIODEBUG: 1
HASS_CI: 1
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

View File

@ -158,7 +158,7 @@ jobs:
wheels-key: ${{ secrets.WHEELS_KEY }}
wheels-user: wheels
env-file: true
apk: "build-base;cmake;git;linux-headers;libexecinfo-dev;bluez-dev;libffi-dev;openssl-dev;glib-dev;eudev-dev;libxml2-dev;libxslt-dev;libpng-dev;libjpeg-turbo-dev;tiff-dev;autoconf;automake;cups-dev;gmp-dev;mpfr-dev;mpc1-dev;ffmpeg-dev;gammu-dev;cargo"
apk: "build-base;cmake;git;linux-headers;libexecinfo-dev;bluez-dev;libffi-dev;openssl-dev;glib-dev;eudev-dev;libxml2-dev;libxslt-dev;libpng-dev;libjpeg-turbo-dev;tiff-dev;autoconf;automake;cups-dev;gmp-dev;mpfr-dev;mpc1-dev;ffmpeg-dev;gammu-dev;yaml-dev;cargo"
pip: "Cython;numpy;scikit-build"
skip-binary: aiohttp,grpcio
constraints: "homeassistant/package_constraints.txt"

View File

@ -18,6 +18,7 @@ RUN \
libavfilter-dev \
libpcap-dev \
libturbojpeg0 \
libyaml-dev \
libxml2 \
git \
cmake \

View File

@ -191,7 +191,7 @@ def check(config_dir, secrets=False):
if secrets:
# Ensure !secrets point to the patched function
yaml_loader.SafeLineLoader.add_constructor("!secret", yaml_loader.secret_yaml)
yaml_loader.add_constructor("!secret", yaml_loader.secret_yaml)
def secrets_proxy(*args):
secrets = Secrets(*args)
@ -219,9 +219,7 @@ def check(config_dir, secrets=False):
pat.stop()
if secrets:
# Ensure !secrets point to the original function
yaml_loader.SafeLineLoader.add_constructor(
"!secret", yaml_loader.secret_yaml
)
yaml_loader.add_constructor("!secret", yaml_loader.secret_yaml)
return res

View File

@ -4,6 +4,7 @@ from __future__ import annotations
from collections import OrderedDict
from collections.abc import Iterator
import fnmatch
from io import StringIO
import logging
import os
from pathlib import Path
@ -11,6 +12,14 @@ from typing import Any, TextIO, TypeVar, Union, overload
import yaml
try:
from yaml import CSafeLoader as FastestAvailableSafeLoader
HAS_C_LOADER = True
except ImportError:
HAS_C_LOADER = False
from yaml import SafeLoader as FastestAvailableSafeLoader # type: ignore[misc]
from homeassistant.exceptions import HomeAssistantError
from .const import SECRET_YAML
@ -88,6 +97,30 @@ class Secrets:
return secrets
class SafeLoader(FastestAvailableSafeLoader):
"""The fastest available safe loader."""
def __init__(self, stream: Any, secrets: Secrets | None = None) -> None:
"""Initialize a safe line loader."""
self.stream = stream
if isinstance(stream, str):
self.name = "<unicode string>"
elif isinstance(stream, bytes):
self.name = "<byte string>"
else:
self.name = getattr(stream, "name", "<file>")
super().__init__(stream)
self.secrets = secrets
def get_name(self) -> str:
"""Get the name of the loader."""
return self.name
def get_stream_name(self) -> str:
"""Get the name of the stream."""
return self.stream.name or ""
class SafeLineLoader(yaml.SafeLoader):
"""Loader class that keeps track of line numbers."""
@ -103,6 +136,17 @@ class SafeLineLoader(yaml.SafeLoader):
node.__line__ = last_line + 1 # type: ignore[attr-defined]
return node
def get_name(self) -> str:
"""Get the name of the loader."""
return self.name
def get_stream_name(self) -> str:
"""Get the name of the stream."""
return self.stream.name or ""
LoaderType = Union[SafeLineLoader, SafeLoader]
def load_yaml(fname: str, secrets: Secrets | None = None) -> JSON_TYPE:
"""Load a YAML file."""
@ -114,60 +158,90 @@ def load_yaml(fname: str, secrets: Secrets | None = None) -> JSON_TYPE:
raise HomeAssistantError(exc) from exc
def parse_yaml(content: str | TextIO, secrets: Secrets | None = None) -> JSON_TYPE:
"""Load a YAML file."""
def parse_yaml(
content: str | TextIO | StringIO, secrets: Secrets | None = None
) -> JSON_TYPE:
"""Parse YAML with the fastest available loader."""
if not HAS_C_LOADER:
return _parse_yaml_pure_python(content, secrets)
try:
# If configuration file is empty YAML returns None
# We convert that to an empty dict
return (
yaml.load(content, Loader=lambda stream: SafeLineLoader(stream, secrets))
or OrderedDict()
)
return _parse_yaml(SafeLoader, content, secrets)
except yaml.YAMLError:
# Loading failed, so we now load with the slow line loader
# since the C one will not give us line numbers
if isinstance(content, (StringIO, TextIO)):
# Rewind the stream so we can try again
content.seek(0, 0)
return _parse_yaml_pure_python(content, secrets)
def _parse_yaml_pure_python(
content: str | TextIO | StringIO, secrets: Secrets | None = None
) -> JSON_TYPE:
"""Parse YAML with the pure python loader (this is very slow)."""
try:
return _parse_yaml(SafeLineLoader, content, secrets)
except yaml.YAMLError as exc:
_LOGGER.error(str(exc))
raise HomeAssistantError(exc) from exc
def _parse_yaml(
loader: type[SafeLoader] | type[SafeLineLoader],
content: str | TextIO,
secrets: Secrets | None = None,
) -> JSON_TYPE:
"""Load a YAML file."""
# If configuration file is empty YAML returns None
# We convert that to an empty dict
return (
yaml.load(content, Loader=lambda stream: loader(stream, secrets))
or OrderedDict()
)
@overload
def _add_reference(
obj: list | NodeListClass, loader: SafeLineLoader, node: yaml.nodes.Node
obj: list | NodeListClass,
loader: LoaderType,
node: yaml.nodes.Node,
) -> NodeListClass:
...
@overload
def _add_reference(
obj: str | NodeStrClass, loader: SafeLineLoader, node: yaml.nodes.Node
obj: str | NodeStrClass,
loader: LoaderType,
node: yaml.nodes.Node,
) -> NodeStrClass:
...
@overload
def _add_reference(
obj: _DictT, loader: SafeLineLoader, node: yaml.nodes.Node
) -> _DictT:
def _add_reference(obj: _DictT, loader: LoaderType, node: yaml.nodes.Node) -> _DictT:
...
def _add_reference(obj, loader: SafeLineLoader, node: yaml.nodes.Node): # type: ignore[no-untyped-def]
def _add_reference(obj, loader: LoaderType, node: yaml.nodes.Node): # type: ignore[no-untyped-def]
"""Add file reference information to an object."""
if isinstance(obj, list):
obj = NodeListClass(obj)
if isinstance(obj, str):
obj = NodeStrClass(obj)
setattr(obj, "__config_file__", loader.name)
setattr(obj, "__config_file__", loader.get_name())
setattr(obj, "__line__", node.start_mark.line)
return obj
def _include_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
def _include_yaml(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
"""Load another YAML file and embeds it using the !include tag.
Example:
device_tracker: !include device_tracker.yaml
"""
fname = os.path.join(os.path.dirname(loader.name), node.value)
fname = os.path.join(os.path.dirname(loader.get_name()), node.value)
try:
return _add_reference(load_yaml(fname, loader.secrets), loader, node)
except FileNotFoundError as exc:
@ -191,12 +265,10 @@ def _find_files(directory: str, pattern: str) -> Iterator[str]:
yield filename
def _include_dir_named_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
) -> OrderedDict:
def _include_dir_named_yaml(loader: LoaderType, node: yaml.nodes.Node) -> OrderedDict:
"""Load multiple files from directory as a dictionary."""
mapping: OrderedDict = OrderedDict()
loc = os.path.join(os.path.dirname(loader.name), node.value)
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
for fname in _find_files(loc, "*.yaml"):
filename = os.path.splitext(os.path.basename(fname))[0]
if os.path.basename(fname) == SECRET_YAML:
@ -206,11 +278,11 @@ def _include_dir_named_yaml(
def _include_dir_merge_named_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
loader: LoaderType, node: yaml.nodes.Node
) -> OrderedDict:
"""Load multiple files from directory as a merged dictionary."""
mapping: OrderedDict = OrderedDict()
loc = os.path.join(os.path.dirname(loader.name), node.value)
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
for fname in _find_files(loc, "*.yaml"):
if os.path.basename(fname) == SECRET_YAML:
continue
@ -221,10 +293,10 @@ def _include_dir_merge_named_yaml(
def _include_dir_list_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
loader: LoaderType, node: yaml.nodes.Node
) -> list[JSON_TYPE]:
"""Load multiple files from directory as a list."""
loc = os.path.join(os.path.dirname(loader.name), node.value)
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
return [
load_yaml(f, loader.secrets)
for f in _find_files(loc, "*.yaml")
@ -233,10 +305,10 @@ def _include_dir_list_yaml(
def _include_dir_merge_list_yaml(
loader: SafeLineLoader, node: yaml.nodes.Node
loader: LoaderType, node: yaml.nodes.Node
) -> JSON_TYPE:
"""Load multiple files from directory as a merged list."""
loc: str = os.path.join(os.path.dirname(loader.name), node.value)
loc: str = os.path.join(os.path.dirname(loader.get_name()), node.value)
merged_list: list[JSON_TYPE] = []
for fname in _find_files(loc, "*.yaml"):
if os.path.basename(fname) == SECRET_YAML:
@ -247,7 +319,7 @@ def _include_dir_merge_list_yaml(
return _add_reference(merged_list, loader, node)
def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> OrderedDict:
def _ordered_dict(loader: LoaderType, node: yaml.nodes.MappingNode) -> OrderedDict:
"""Load YAML mappings into an ordered dictionary to preserve key order."""
loader.flatten_mapping(node)
nodes = loader.construct_pairs(node)
@ -259,14 +331,14 @@ def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> Order
try:
hash(key)
except TypeError as exc:
fname = getattr(loader.stream, "name", "")
fname = loader.get_stream_name()
raise yaml.MarkedYAMLError(
context=f'invalid key: "{key}"',
context_mark=yaml.Mark(fname, 0, line, -1, None, None), # type: ignore[arg-type]
) from exc
if key in seen:
fname = getattr(loader.stream, "name", "")
fname = loader.get_stream_name()
_LOGGER.warning(
'YAML file %s contains duplicate key "%s". Check lines %d and %d',
fname,
@ -279,13 +351,13 @@ def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> Order
return _add_reference(OrderedDict(nodes), loader, node)
def _construct_seq(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
def _construct_seq(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
"""Add line number and file name to Load YAML sequence."""
(obj,) = loader.construct_yaml_seq(node)
return _add_reference(obj, loader, node)
def _env_var_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> str:
def _env_var_yaml(loader: LoaderType, node: yaml.nodes.Node) -> str:
"""Load environment variables and embed it into the configuration YAML."""
args = node.value.split()
@ -298,27 +370,27 @@ def _env_var_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> str:
raise HomeAssistantError(node.value)
def secret_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
def secret_yaml(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
"""Load secrets and embed it into the configuration YAML."""
if loader.secrets is None:
raise HomeAssistantError("Secrets not supported in this YAML file")
return loader.secrets.get(loader.name, node.value)
return loader.secrets.get(loader.get_name(), node.value)
SafeLineLoader.add_constructor("!include", _include_yaml)
SafeLineLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _ordered_dict
)
SafeLineLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, _construct_seq
)
SafeLineLoader.add_constructor("!env_var", _env_var_yaml)
SafeLineLoader.add_constructor("!secret", secret_yaml)
SafeLineLoader.add_constructor("!include_dir_list", _include_dir_list_yaml)
SafeLineLoader.add_constructor("!include_dir_merge_list", _include_dir_merge_list_yaml)
SafeLineLoader.add_constructor("!include_dir_named", _include_dir_named_yaml)
SafeLineLoader.add_constructor(
"!include_dir_merge_named", _include_dir_merge_named_yaml
)
SafeLineLoader.add_constructor("!input", Input.from_node)
def add_constructor(tag: Any, constructor: Any) -> None:
"""Add to constructor to all loaders."""
for yaml_loader in (SafeLoader, SafeLineLoader):
yaml_loader.add_constructor(tag, constructor)
add_constructor("!include", _include_yaml)
add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _ordered_dict)
add_constructor(yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, _construct_seq)
add_constructor("!env_var", _env_var_yaml)
add_constructor("!secret", secret_yaml)
add_constructor("!include_dir_list", _include_dir_list_yaml)
add_constructor("!include_dir_merge_list", _include_dir_merge_list_yaml)
add_constructor("!include_dir_named", _include_dir_named_yaml)
add_constructor("!include_dir_merge_named", _include_dir_merge_named_yaml)
add_constructor("!input", Input.from_node)

View File

@ -1,2 +1,2 @@
*!* NOT YAML
-*!*- NOT YAML

View File

@ -1,6 +1,7 @@
"""Test config utils."""
# pylint: disable=protected-access
from collections import OrderedDict
import contextlib
import copy
import os
from unittest import mock
@ -147,7 +148,7 @@ def test_load_yaml_config_raises_error_if_not_dict():
def test_load_yaml_config_raises_error_if_malformed_yaml():
"""Test error raised if invalid YAML."""
with open(YAML_PATH, "w") as fp:
fp.write(":")
fp.write(":-")
with pytest.raises(HomeAssistantError):
config_util.load_yaml_config_file(YAML_PATH)
@ -156,11 +157,22 @@ def test_load_yaml_config_raises_error_if_malformed_yaml():
def test_load_yaml_config_raises_error_if_unsafe_yaml():
"""Test error raised if unsafe YAML."""
with open(YAML_PATH, "w") as fp:
fp.write("hello: !!python/object/apply:os.system")
fp.write("- !!python/object/apply:os.system []")
with pytest.raises(HomeAssistantError):
with patch.object(os, "system") as system_mock, contextlib.suppress(
HomeAssistantError
):
config_util.load_yaml_config_file(YAML_PATH)
assert len(system_mock.mock_calls) == 0
# Here we validate that the test above is a good test
# since previously the syntax was not valid
with open(YAML_PATH) as fp, patch.object(os, "system") as system_mock:
list(yaml.unsafe_load_all(fp))
assert len(system_mock.mock_calls) == 1
def test_load_yaml_config_preserves_key_order():
"""Test removal of library."""

View File

@ -1,10 +1,12 @@
"""Test Home Assistant yaml loader."""
import importlib
import io
import os
import unittest
from unittest.mock import patch
import pytest
import yaml as pyyaml
from homeassistant.config import YAML_CONFIG_FILE, load_yaml_config_file
from homeassistant.exceptions import HomeAssistantError
@ -14,7 +16,24 @@ from homeassistant.util.yaml import loader as yaml_loader
from tests.common import get_test_config_dir, patch_yaml_files
def test_simple_list():
@pytest.fixture(params=["enable_c_loader", "disable_c_loader"])
def try_both_loaders(request):
"""Disable the yaml c loader."""
if not request.param == "disable_c_loader":
yield
return
try:
cloader = pyyaml.CSafeLoader
except ImportError:
return
del pyyaml.CSafeLoader
importlib.reload(yaml_loader)
yield
pyyaml.CSafeLoader = cloader
importlib.reload(yaml_loader)
def test_simple_list(try_both_loaders):
"""Test simple list."""
conf = "config:\n - simple\n - list"
with io.StringIO(conf) as file:
@ -22,7 +41,7 @@ def test_simple_list():
assert doc["config"] == ["simple", "list"]
def test_simple_dict():
def test_simple_dict(try_both_loaders):
"""Test simple dict."""
conf = "key: value"
with io.StringIO(conf) as file:
@ -37,14 +56,14 @@ def test_unhashable_key():
load_yaml_config_file(YAML_CONFIG_FILE)
def test_no_key():
def test_no_key(try_both_loaders):
"""Test item without a key."""
files = {YAML_CONFIG_FILE: "a: a\nnokeyhere"}
with pytest.raises(HomeAssistantError), patch_yaml_files(files):
yaml.load_yaml(YAML_CONFIG_FILE)
def test_environment_variable():
def test_environment_variable(try_both_loaders):
"""Test config file with environment variable."""
os.environ["PASSWORD"] = "secret_password"
conf = "password: !env_var PASSWORD"
@ -54,7 +73,7 @@ def test_environment_variable():
del os.environ["PASSWORD"]
def test_environment_variable_default():
def test_environment_variable_default(try_both_loaders):
"""Test config file with default value for environment variable."""
conf = "password: !env_var PASSWORD secret_password"
with io.StringIO(conf) as file:
@ -62,14 +81,14 @@ def test_environment_variable_default():
assert doc["password"] == "secret_password"
def test_invalid_environment_variable():
def test_invalid_environment_variable(try_both_loaders):
"""Test config file with no environment variable sat."""
conf = "password: !env_var PASSWORD"
with pytest.raises(HomeAssistantError), io.StringIO(conf) as file:
yaml_loader.yaml.load(file, Loader=yaml_loader.SafeLineLoader)
def test_include_yaml():
def test_include_yaml(try_both_loaders):
"""Test include yaml."""
with patch_yaml_files({"test.yaml": "value"}):
conf = "key: !include test.yaml"
@ -85,7 +104,7 @@ def test_include_yaml():
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_list(mock_walk):
def test_include_dir_list(mock_walk, try_both_loaders):
"""Test include dir list yaml."""
mock_walk.return_value = [["/test", [], ["two.yaml", "one.yaml"]]]
@ -97,7 +116,7 @@ def test_include_dir_list(mock_walk):
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_list_recursive(mock_walk):
def test_include_dir_list_recursive(mock_walk, try_both_loaders):
"""Test include dir recursive list yaml."""
mock_walk.return_value = [
["/test", ["tmp2", ".ignore", "ignore"], ["zero.yaml"]],
@ -124,7 +143,7 @@ def test_include_dir_list_recursive(mock_walk):
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_named(mock_walk):
def test_include_dir_named(mock_walk, try_both_loaders):
"""Test include dir named yaml."""
mock_walk.return_value = [
["/test", [], ["first.yaml", "second.yaml", "secrets.yaml"]]
@ -139,7 +158,7 @@ def test_include_dir_named(mock_walk):
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_named_recursive(mock_walk):
def test_include_dir_named_recursive(mock_walk, try_both_loaders):
"""Test include dir named yaml."""
mock_walk.return_value = [
["/test", ["tmp2", ".ignore", "ignore"], ["first.yaml"]],
@ -167,7 +186,7 @@ def test_include_dir_named_recursive(mock_walk):
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_merge_list(mock_walk):
def test_include_dir_merge_list(mock_walk, try_both_loaders):
"""Test include dir merge list yaml."""
mock_walk.return_value = [["/test", [], ["first.yaml", "second.yaml"]]]
@ -181,7 +200,7 @@ def test_include_dir_merge_list(mock_walk):
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_merge_list_recursive(mock_walk):
def test_include_dir_merge_list_recursive(mock_walk, try_both_loaders):
"""Test include dir merge list yaml."""
mock_walk.return_value = [
["/test", ["tmp2", ".ignore", "ignore"], ["first.yaml"]],
@ -208,7 +227,7 @@ def test_include_dir_merge_list_recursive(mock_walk):
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_merge_named(mock_walk):
def test_include_dir_merge_named(mock_walk, try_both_loaders):
"""Test include dir merge named yaml."""
mock_walk.return_value = [["/test", [], ["first.yaml", "second.yaml"]]]
@ -225,7 +244,7 @@ def test_include_dir_merge_named(mock_walk):
@patch("homeassistant.util.yaml.loader.os.walk")
def test_include_dir_merge_named_recursive(mock_walk):
def test_include_dir_merge_named_recursive(mock_walk, try_both_loaders):
"""Test include dir merge named yaml."""
mock_walk.return_value = [
["/test", ["tmp2", ".ignore", "ignore"], ["first.yaml"]],
@ -257,7 +276,7 @@ def test_include_dir_merge_named_recursive(mock_walk):
@patch("homeassistant.util.yaml.loader.open", create=True)
def test_load_yaml_encoding_error(mock_open):
def test_load_yaml_encoding_error(mock_open, try_both_loaders):
"""Test raising a UnicodeDecodeError."""
mock_open.side_effect = UnicodeDecodeError("", b"", 1, 0, "")
with pytest.raises(HomeAssistantError):
@ -413,7 +432,7 @@ def test_representing_yaml_loaded_data():
assert yaml.dump(data) == "key:\n- 1\n- '2'\n- 3\n"
def test_duplicate_key(caplog):
def test_duplicate_key(caplog, try_both_loaders):
"""Test duplicate dict keys."""
files = {YAML_CONFIG_FILE: "key: thing1\nkey: thing2"}
with patch_yaml_files(files):
@ -421,7 +440,7 @@ def test_duplicate_key(caplog):
assert "contains duplicate key" in caplog.text
def test_no_recursive_secrets(caplog):
def test_no_recursive_secrets(caplog, try_both_loaders):
"""Test that loading of secrets from the secrets file fails correctly."""
files = {YAML_CONFIG_FILE: "key: !secret a", yaml.SECRET_YAML: "a: 1\nb: !secret a"}
with patch_yaml_files(files), pytest.raises(HomeAssistantError) as e:
@ -441,7 +460,16 @@ def test_input_class():
assert len({input, input2}) == 1
def test_input():
def test_input(try_both_loaders):
"""Test loading inputs."""
data = {"hello": yaml.Input("test_name")}
assert yaml.parse_yaml(yaml.dump(data)) == data
@pytest.mark.skipif(
not os.environ.get("HASS_CI"),
reason="This test validates that the CI has the C loader available",
)
def test_c_loader_is_available_in_ci():
"""Verify we are testing the C loader in the CI."""
assert yaml.loader.HAS_C_LOADER is True