167 lines
5.2 KiB
Python
167 lines
5.2 KiB
Python
"""Tests for voice command segmenter."""
|
|
import itertools as it
|
|
from unittest.mock import patch
|
|
|
|
from homeassistant.components.assist_pipeline.vad import (
|
|
AudioBuffer,
|
|
VoiceActivityDetector,
|
|
VoiceCommandSegmenter,
|
|
chunk_samples,
|
|
)
|
|
|
|
_ONE_SECOND = 1.0
|
|
|
|
|
|
def test_silence() -> None:
|
|
"""Test that 3 seconds of silence does not trigger a voice command."""
|
|
segmenter = VoiceCommandSegmenter()
|
|
|
|
# True return value indicates voice command has not finished
|
|
assert segmenter.process(_ONE_SECOND * 3, False)
|
|
|
|
|
|
def test_speech() -> None:
|
|
"""Test that silence + speech + silence triggers a voice command."""
|
|
|
|
def is_speech(chunk):
|
|
"""Anything non-zero is speech."""
|
|
return sum(chunk) > 0
|
|
|
|
segmenter = VoiceCommandSegmenter()
|
|
|
|
# silence
|
|
assert segmenter.process(_ONE_SECOND, False)
|
|
|
|
# "speech"
|
|
assert segmenter.process(_ONE_SECOND, True)
|
|
|
|
# silence
|
|
# False return value indicates voice command is finished
|
|
assert not segmenter.process(_ONE_SECOND, False)
|
|
|
|
|
|
def test_audio_buffer() -> None:
|
|
"""Test audio buffer wrapping."""
|
|
|
|
class DisabledVad(VoiceActivityDetector):
|
|
def is_speech(self, chunk):
|
|
return False
|
|
|
|
@property
|
|
def samples_per_chunk(self):
|
|
return 160 # 10 ms
|
|
|
|
vad = DisabledVad()
|
|
bytes_per_chunk = vad.samples_per_chunk * 2
|
|
vad_buffer = AudioBuffer(bytes_per_chunk)
|
|
segmenter = VoiceCommandSegmenter()
|
|
|
|
with patch.object(vad, "is_speech", return_value=False) as mock_process:
|
|
# Partially fill audio buffer
|
|
half_chunk = bytes(it.islice(it.cycle(range(256)), bytes_per_chunk // 2))
|
|
segmenter.process_with_vad(half_chunk, vad, vad_buffer)
|
|
|
|
assert not mock_process.called
|
|
assert vad_buffer is not None
|
|
assert vad_buffer.bytes() == half_chunk
|
|
|
|
# Fill and wrap with 1/4 chunk left over
|
|
three_quarters_chunk = bytes(
|
|
it.islice(it.cycle(range(256)), int(0.75 * bytes_per_chunk))
|
|
)
|
|
segmenter.process_with_vad(three_quarters_chunk, vad, vad_buffer)
|
|
|
|
assert mock_process.call_count == 1
|
|
assert (
|
|
vad_buffer.bytes()
|
|
== three_quarters_chunk[
|
|
len(three_quarters_chunk) - (bytes_per_chunk // 4) :
|
|
]
|
|
)
|
|
assert (
|
|
mock_process.call_args[0][0]
|
|
== half_chunk + three_quarters_chunk[: bytes_per_chunk // 2]
|
|
)
|
|
|
|
# Run 2 chunks through
|
|
segmenter.reset()
|
|
vad_buffer.clear()
|
|
assert len(vad_buffer) == 0
|
|
|
|
mock_process.reset_mock()
|
|
two_chunks = bytes(it.islice(it.cycle(range(256)), bytes_per_chunk * 2))
|
|
segmenter.process_with_vad(two_chunks, vad, vad_buffer)
|
|
|
|
assert mock_process.call_count == 2
|
|
assert len(vad_buffer) == 0
|
|
assert mock_process.call_args_list[0][0][0] == two_chunks[:bytes_per_chunk]
|
|
assert mock_process.call_args_list[1][0][0] == two_chunks[bytes_per_chunk:]
|
|
|
|
|
|
def test_partial_chunk() -> None:
|
|
"""Test that chunk_samples returns when given a partial chunk."""
|
|
bytes_per_chunk = 5
|
|
samples = bytes([1, 2, 3])
|
|
leftover_chunk_buffer = AudioBuffer(bytes_per_chunk)
|
|
chunks = list(chunk_samples(samples, bytes_per_chunk, leftover_chunk_buffer))
|
|
|
|
assert len(chunks) == 0
|
|
assert leftover_chunk_buffer.bytes() == samples
|
|
|
|
|
|
def test_chunk_samples_leftover() -> None:
|
|
"""Test that chunk_samples property keeps left over bytes across calls."""
|
|
bytes_per_chunk = 5
|
|
samples = bytes([1, 2, 3, 4, 5, 6])
|
|
leftover_chunk_buffer = AudioBuffer(bytes_per_chunk)
|
|
chunks = list(chunk_samples(samples, bytes_per_chunk, leftover_chunk_buffer))
|
|
|
|
assert len(chunks) == 1
|
|
assert leftover_chunk_buffer.bytes() == bytes([6])
|
|
|
|
# Add some more to the chunk
|
|
chunks = list(chunk_samples(samples, bytes_per_chunk, leftover_chunk_buffer))
|
|
|
|
assert len(chunks) == 1
|
|
assert leftover_chunk_buffer.bytes() == bytes([5, 6])
|
|
|
|
|
|
def test_vad_no_chunking() -> None:
|
|
"""Test VAD that doesn't require chunking."""
|
|
|
|
class VadNoChunk(VoiceActivityDetector):
|
|
def is_speech(self, chunk: bytes) -> bool:
|
|
return sum(chunk) > 0
|
|
|
|
@property
|
|
def samples_per_chunk(self) -> int | None:
|
|
return None
|
|
|
|
vad = VadNoChunk()
|
|
segmenter = VoiceCommandSegmenter(
|
|
speech_seconds=1.0, silence_seconds=1.0, reset_seconds=0.5
|
|
)
|
|
silence = bytes([0] * 16000)
|
|
speech = bytes([255] * (16000 // 2))
|
|
|
|
# Test with differently-sized chunks
|
|
assert vad.is_speech(speech)
|
|
assert not vad.is_speech(silence)
|
|
|
|
# Simulate voice command
|
|
assert segmenter.process_with_vad(silence, vad, None)
|
|
# begin
|
|
assert segmenter.process_with_vad(speech, vad, None)
|
|
assert segmenter.process_with_vad(speech, vad, None)
|
|
assert segmenter.process_with_vad(speech, vad, None)
|
|
# reset with silence
|
|
assert segmenter.process_with_vad(silence, vad, None)
|
|
# resume
|
|
assert segmenter.process_with_vad(speech, vad, None)
|
|
assert segmenter.process_with_vad(speech, vad, None)
|
|
assert segmenter.process_with_vad(speech, vad, None)
|
|
assert segmenter.process_with_vad(speech, vad, None)
|
|
# end
|
|
assert segmenter.process_with_vad(silence, vad, None)
|
|
assert not segmenter.process_with_vad(silence, vad, None)
|