core/tests/components/assist_pipeline/test_vad.py

"""Tests for voice command segmenter."""
import itertools as it
from unittest.mock import patch

from homeassistant.components.assist_pipeline.vad import (
    AudioBuffer,
    VoiceActivityDetector,
    VoiceCommandSegmenter,
    chunk_samples,
)

_ONE_SECOND = 1.0


def test_silence() -> None:
    """Test that 3 seconds of silence does not trigger a voice command."""
    segmenter = VoiceCommandSegmenter()

    # True return value indicates voice command has not finished
    assert segmenter.process(_ONE_SECOND * 3, False)


def test_speech() -> None:
    """Test that silence + speech + silence triggers a voice command."""

    def is_speech(chunk):
        """Anything non-zero is speech."""
        return sum(chunk) > 0

    segmenter = VoiceCommandSegmenter()

    # silence
    assert segmenter.process(_ONE_SECOND, False)

    # "speech"
    assert segmenter.process(_ONE_SECOND, True)

    # silence
    # False return value indicates voice command is finished
    assert not segmenter.process(_ONE_SECOND, False)


def test_audio_buffer() -> None:
    """Test audio buffer wrapping."""

    class DisabledVad(VoiceActivityDetector):
        def is_speech(self, chunk):
            return False

        @property
        def samples_per_chunk(self):
            return 160  # 10 ms

    vad = DisabledVad()
    bytes_per_chunk = vad.samples_per_chunk * 2
    vad_buffer = AudioBuffer(bytes_per_chunk)
    segmenter = VoiceCommandSegmenter()

    with patch.object(vad, "is_speech", return_value=False) as mock_process:
        # Partially fill audio buffer
        half_chunk = bytes(it.islice(it.cycle(range(256)), bytes_per_chunk // 2))
        segmenter.process_with_vad(half_chunk, vad, vad_buffer)

        assert not mock_process.called
        assert vad_buffer is not None
        assert vad_buffer.bytes() == half_chunk

        # Fill and wrap with 1/4 chunk left over
        three_quarters_chunk = bytes(
            it.islice(it.cycle(range(256)), int(0.75 * bytes_per_chunk))
        )
        segmenter.process_with_vad(three_quarters_chunk, vad, vad_buffer)

        assert mock_process.call_count == 1
        assert (
            vad_buffer.bytes()
            == three_quarters_chunk[
                len(three_quarters_chunk) - (bytes_per_chunk // 4) :
            ]
        )
        assert (
            mock_process.call_args[0][0]
            == half_chunk + three_quarters_chunk[: bytes_per_chunk // 2]
        )

        # Run 2 chunks through
        segmenter.reset()
        vad_buffer.clear()
        assert len(vad_buffer) == 0

        mock_process.reset_mock()
        two_chunks = bytes(it.islice(it.cycle(range(256)), bytes_per_chunk * 2))
        segmenter.process_with_vad(two_chunks, vad, vad_buffer)

        assert mock_process.call_count == 2
        assert len(vad_buffer) == 0
        assert mock_process.call_args_list[0][0][0] == two_chunks[:bytes_per_chunk]
        assert mock_process.call_args_list[1][0][0] == two_chunks[bytes_per_chunk:]


def test_partial_chunk() -> None:
    """Test that chunk_samples returns when given a partial chunk."""
    bytes_per_chunk = 5
    samples = bytes([1, 2, 3])
    leftover_chunk_buffer = AudioBuffer(bytes_per_chunk)
    chunks = list(chunk_samples(samples, bytes_per_chunk, leftover_chunk_buffer))

    assert len(chunks) == 0
    assert leftover_chunk_buffer.bytes() == samples


def test_chunk_samples_leftover() -> None:
    """Test that chunk_samples property keeps left over bytes across calls."""
    bytes_per_chunk = 5
    samples = bytes([1, 2, 3, 4, 5, 6])
    leftover_chunk_buffer = AudioBuffer(bytes_per_chunk)
    chunks = list(chunk_samples(samples, bytes_per_chunk, leftover_chunk_buffer))

    assert len(chunks) == 1
    assert leftover_chunk_buffer.bytes() == bytes([6])

    # Add some more to the chunk
    chunks = list(chunk_samples(samples, bytes_per_chunk, leftover_chunk_buffer))

    assert len(chunks) == 1
    assert leftover_chunk_buffer.bytes() == bytes([5, 6])


def test_vad_no_chunking() -> None:
    """Test VAD that doesn't require chunking."""

    class VadNoChunk(VoiceActivityDetector):
        def is_speech(self, chunk: bytes) -> bool:
            return sum(chunk) > 0

        @property
        def samples_per_chunk(self) -> int | None:
            return None

    vad = VadNoChunk()
    segmenter = VoiceCommandSegmenter(
        speech_seconds=1.0, silence_seconds=1.0, reset_seconds=0.5
    )
    silence = bytes([0] * 16000)
    speech = bytes([255] * (16000 // 2))

    # Test with differently-sized chunks
    assert vad.is_speech(speech)
    assert not vad.is_speech(silence)

    # Simulate voice command
    assert segmenter.process_with_vad(silence, vad, None)
    # begin
    assert segmenter.process_with_vad(speech, vad, None)
    assert segmenter.process_with_vad(speech, vad, None)
    assert segmenter.process_with_vad(speech, vad, None)
    # reset with silence
    assert segmenter.process_with_vad(silence, vad, None)
    # resume
    assert segmenter.process_with_vad(speech, vad, None)
    assert segmenter.process_with_vad(speech, vad, None)
    assert segmenter.process_with_vad(speech, vad, None)
    assert segmenter.process_with_vad(speech, vad, None)
    # end
    assert segmenter.process_with_vad(silence, vad, None)
    assert not segmenter.process_with_vad(silence, vad, None)