Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions homeassistant/components/assist_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ async def speech_to_text(
{
"engine": engine,
"metadata": asdict(metadata),
"audio_processing": asdict(self.stt_provider.audio_processing),
},
)
)
Expand Down
36 changes: 34 additions & 2 deletions homeassistant/components/esphome/assist_satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
)

self._active_pipeline_index = 0
self._active_audio_channel = 0
self._has_multi_channel_audio = False

def _get_entity_id(self, suffix: str) -> str | None:
"""Return the entity id for pipeline select, etc."""
Expand Down Expand Up @@ -291,6 +293,9 @@
assist_satellite.AssistSatelliteEntityFeature.START_CONVERSATION
)

if feature_flags & VoiceAssistantFeature.MULTI_CHANNEL_AUDIO:

Check failure on line 296 in homeassistant/components/esphome/assist_satellite.py

View workflow job for this annotation

GitHub Actions / Check mypy

"type[VoiceAssistantFeature]" has no attribute "MULTI_CHANNEL_AUDIO" [attr-defined]
self._has_multi_channel_audio = True

# Update wake word select when config is updated
self.async_on_remove(
self._entry_data.async_register_assist_satellite_set_wake_words_callback(
Expand All @@ -315,6 +320,18 @@

data_to_send: dict[str, Any] = {}
if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_STT_START:
if (
self._has_multi_channel_audio
and event.data
and (audio_processing := event.data.get("audio_processing"))
):
# Settings come from stt SpeechAudioProcessing
if (audio_processing.get("prefers_auto_gain_enabled") is False) and (
audio_processing.get("prefers_noise_reduction_enabled") is False
):
# Use non-enhanced audio
self._active_audio_channel = 1

self._entry_data.async_set_assist_pipeline_state(True)
elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_STT_END:
assert event.data is not None
Expand Down Expand Up @@ -533,6 +550,10 @@
# Try next wake word select
maybe_pipeline_index += 1

# Default to audio channel 0 (enhanced)
# May be changed when STT_START event arrives.
self._active_audio_channel = 0

_LOGGER.debug(
"Running pipeline %s from %s to %s",
self._active_pipeline_index + 1,
Expand All @@ -555,9 +576,20 @@

return port

async def handle_audio(self, data: bytes) -> None:
async def handle_audio(self, data: bytes, data2: bytes | None = None) -> None:
"""Handle incoming audio chunk from API."""
self._audio_queue.put_nowait(data)
# Default to enhanced audio (channel 0)
active_data = data

if (
self._has_multi_channel_audio
and (data2 is not None)
and (self._active_audio_channel == 1)
):
# Non-enhanced audio (channel 1)
active_data = data2

self._audio_queue.put_nowait(active_data)

Comment on lines +581 to 593
async def handle_pipeline_stop(self, abort: bool) -> None:
"""Handle request for pipeline to stop."""
Expand Down
25 changes: 25 additions & 0 deletions tests/components/assist_pipeline/snapshots/test_init.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
}),
dict({
'data': dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': <AudioBitRates.BITRATE_16: 16>,
Expand Down Expand Up @@ -119,6 +124,11 @@
}),
dict({
'data': dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': <AudioBitRates.BITRATE_16: 16>,
Expand Down Expand Up @@ -221,6 +231,11 @@
}),
dict({
'data': dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'test',
'metadata': dict({
'bit_rate': <AudioBitRates.BITRATE_16: 16>,
Expand Down Expand Up @@ -347,6 +362,11 @@
}),
dict({
'data': dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': <AudioBitRates.BITRATE_16: 16>,
Expand Down Expand Up @@ -449,6 +469,11 @@
}),
dict({
'data': dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': <AudioBitRates.BITRATE_16: 16>,
Expand Down
40 changes: 40 additions & 0 deletions tests/components/assist_pipeline/snapshots/test_websocket.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
# ---
# name: test_audio_pipeline.1
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down Expand Up @@ -112,6 +117,11 @@
# ---
# name: test_audio_pipeline_debug.1
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down Expand Up @@ -218,6 +228,11 @@
# ---
# name: test_audio_pipeline_with_enhancements.1
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down Expand Up @@ -334,6 +349,11 @@
# ---
# name: test_audio_pipeline_with_wake_word_no_timeout.3
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down Expand Up @@ -461,6 +481,11 @@
# ---
# name: test_device_capture.1
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down Expand Up @@ -488,6 +513,11 @@
# ---
# name: test_device_capture_override.1
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down Expand Up @@ -537,6 +567,11 @@
# ---
# name: test_device_capture_queue_full.1
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down Expand Up @@ -761,6 +796,11 @@
# ---
# name: test_stt_stream_failed.1
dict({
'audio_processing': dict({
'prefers_auto_gain_enabled': True,
'prefers_noise_reduction_enabled': True,
'requires_external_vad': True,
}),
'engine': 'stt.mock_stt',
'metadata': dict({
'bit_rate': 16,
Expand Down
135 changes: 135 additions & 0 deletions tests/components/esphome/test_assist_satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2290,3 +2290,138 @@ async def test_custom_wake_words(
# Check non-existent wake word
req = await http_client.get("/api/esphome/wake_words/wrong_wake_word.json")
assert req.status == HTTPStatus.NOT_FOUND


async def test_multichannel_audio(
hass: HomeAssistant,
mock_client: APIClient,
mock_esphome_device: MockESPHomeDeviceType,
) -> None:
"""Test that stt-start event can switch audio channels."""
mock_device = await mock_esphome_device(
mock_client=mock_client,
device_info={
"voice_assistant_feature_flags": VoiceAssistantFeature.VOICE_ASSISTANT
| VoiceAssistantFeature.SPEAKER
| VoiceAssistantFeature.API_AUDIO
| VoiceAssistantFeature.MULTI_CHANNEL_AUDIO
},
)
await hass.async_block_till_done()

satellite = get_satellite_entity(hass, mock_device.device_info.mac_address)
assert satellite is not None

pipeline_finished = asyncio.Event()

async def async_pipeline_from_audio_stream(*args, **kwargs):
event_callback = kwargs["event_callback"]

# STT
event_callback(
PipelineEvent(
type=PipelineEventType.STT_START,
data={
"engine": "test-stt-engine",
"metadata": {},
"audio_processing": {
# Request non-enhanced audio (channel 1)
"prefers_auto_gain_enabled": False,
"prefers_noise_reduction_enabled": False,
},
},
)
)

stt_stream = kwargs["stt_stream"]

chunks = [chunk async for chunk in stt_stream]

# Verify correct channel
assert chunks == [b"channel 1"]

pipeline_finished.set()

with (
patch(
"homeassistant.components.assist_satellite.entity.async_pipeline_from_audio_stream",
new=async_pipeline_from_audio_stream,
),
):
async with asyncio.timeout(1):
await satellite.handle_pipeline_start(
conversation_id="",
flags=VoiceAssistantCommandFlag(0), # stt
audio_settings=VoiceAssistantAudioSettings(),
wake_word_phrase=None,
)
await satellite.handle_audio(b"channel 0", b"channel 1")
await satellite.handle_pipeline_stop(abort=False)
Comment on lines +2351 to +2359
await pipeline_finished.wait()


async def test_multichannel_audio_fallback_channel_0(
hass: HomeAssistant,
mock_client: APIClient,
mock_esphome_device: MockESPHomeDeviceType,
) -> None:
"""Test that channel 0 is used if multi-channel audio isn't supported."""
mock_device = await mock_esphome_device(
mock_client=mock_client,
device_info={
"voice_assistant_feature_flags": VoiceAssistantFeature.VOICE_ASSISTANT
| VoiceAssistantFeature.SPEAKER
| VoiceAssistantFeature.API_AUDIO
},
)
await hass.async_block_till_done()

satellite = get_satellite_entity(hass, mock_device.device_info.mac_address)
assert satellite is not None

pipeline_finished = asyncio.Event()

async def async_pipeline_from_audio_stream(*args, **kwargs):
event_callback = kwargs["event_callback"]

# STT
event_callback(
PipelineEvent(
type=PipelineEventType.STT_START,
data={
"engine": "test-stt-engine",
"metadata": {},
"audio_processing": {
# Request non-enhanced audio (channel 1)
"prefers_auto_gain_enabled": False,
"prefers_noise_reduction_enabled": False,
},
},
)
)

stt_stream = kwargs["stt_stream"]

chunks = [chunk async for chunk in stt_stream]

# Non-enhanced audio (channel 1) was requested, but it isn't supported.
assert chunks == [b"channel 0"]

pipeline_finished.set()

with (
patch(
"homeassistant.components.assist_satellite.entity.async_pipeline_from_audio_stream",
new=async_pipeline_from_audio_stream,
),
):
async with asyncio.timeout(1):
await satellite.handle_pipeline_start(
conversation_id="",
flags=VoiceAssistantCommandFlag(0), # stt
audio_settings=VoiceAssistantAudioSettings(),
wake_word_phrase=None,
)
await satellite.handle_audio(b"channel 0", b"channel 1")
await satellite.handle_pipeline_stop(abort=False)
await pipeline_finished.wait()
Loading