home-assistant · synesthesiam · May 5, 2026 · May 5, 2026
@@ -932,6 +932,7 @@ async def speech_to_text(
                 {
                     "engine": engine,
                     "metadata": asdict(metadata),
+                    "audio_processing": asdict(self.stt_provider.audio_processing),
                 },
             )
         )

@@ -146,6 +146,8 @@
         )
 
         self._active_pipeline_index = 0
+        self._active_audio_channel = 0
+        self._has_multi_channel_audio = False
 
     def _get_entity_id(self, suffix: str) -> str | None:
         """Return the entity id for pipeline select, etc."""
@@ -291,6 +293,9 @@
                 assist_satellite.AssistSatelliteEntityFeature.START_CONVERSATION
             )
 
+        if feature_flags & VoiceAssistantFeature.MULTI_CHANNEL_AUDIO:
+            self._has_multi_channel_audio = True
+
         # Update wake word select when config is updated
         self.async_on_remove(
             self._entry_data.async_register_assist_satellite_set_wake_words_callback(
@@ -315,6 +320,18 @@
 
         data_to_send: dict[str, Any] = {}
         if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_STT_START:
+            if (
+                self._has_multi_channel_audio
+                and event.data
+                and (audio_processing := event.data.get("audio_processing"))
+            ):
+                # Settings come from stt SpeechAudioProcessing
+                if (audio_processing.get("prefers_auto_gain_enabled") is False) and (
+                    audio_processing.get("prefers_noise_reduction_enabled") is False
+                ):
+                    # Use non-enhanced audio
+                    self._active_audio_channel = 1
+
             self._entry_data.async_set_assist_pipeline_state(True)
         elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_STT_END:
             assert event.data is not None
@@ -533,6 +550,10 @@
             # Try next wake word select
             maybe_pipeline_index += 1
 
+        # Default to audio channel 0 (enhanced)
+        # May be changed when STT_START event arrives.
+        self._active_audio_channel = 0
+
         _LOGGER.debug(
             "Running pipeline %s from %s to %s",
             self._active_pipeline_index + 1,
@@ -555,9 +576,20 @@
 
         return port
 
-    async def handle_audio(self, data: bytes) -> None:
+    async def handle_audio(self, data: bytes, data2: bytes | None = None) -> None:
         """Handle incoming audio chunk from API."""
-        self._audio_queue.put_nowait(data)
+        # Default to enhanced audio (channel 0)
+        active_data = data
+
+        if (
+            self._has_multi_channel_audio
+            and (data2 is not None)
+            and (self._active_audio_channel == 1)
+        ):
+            # Non-enhanced audio (channel 1)
+            active_data = data2
+
+        self._audio_queue.put_nowait(active_data)
 
     async def handle_pipeline_stop(self, abort: bool) -> None:
         """Handle request for pipeline to stop."""

@@ -17,6 +17,11 @@
     }),
     dict({
       'data': dict({
+        'audio_processing': dict({
+          'prefers_auto_gain_enabled': True,
+          'prefers_noise_reduction_enabled': True,
+          'requires_external_vad': True,
+        }),
         'engine': 'stt.mock_stt',
         'metadata': dict({
           'bit_rate': <AudioBitRates.BITRATE_16: 16>,
@@ -119,6 +124,11 @@
     }),
     dict({
       'data': dict({
+        'audio_processing': dict({
+          'prefers_auto_gain_enabled': True,
+          'prefers_noise_reduction_enabled': True,
+          'requires_external_vad': True,
+        }),
         'engine': 'stt.mock_stt',
         'metadata': dict({
           'bit_rate': <AudioBitRates.BITRATE_16: 16>,
@@ -221,6 +231,11 @@
     }),
     dict({
       'data': dict({
+        'audio_processing': dict({
+          'prefers_auto_gain_enabled': True,
+          'prefers_noise_reduction_enabled': True,
+          'requires_external_vad': True,
+        }),
         'engine': 'test',
         'metadata': dict({
           'bit_rate': <AudioBitRates.BITRATE_16: 16>,
@@ -347,6 +362,11 @@
     }),
     dict({
       'data': dict({
+        'audio_processing': dict({
+          'prefers_auto_gain_enabled': True,
+          'prefers_noise_reduction_enabled': True,
+          'requires_external_vad': True,
+        }),
         'engine': 'stt.mock_stt',
         'metadata': dict({
           'bit_rate': <AudioBitRates.BITRATE_16: 16>,
@@ -449,6 +469,11 @@
     }),
     dict({
       'data': dict({
+        'audio_processing': dict({
+          'prefers_auto_gain_enabled': True,
+          'prefers_noise_reduction_enabled': True,
+          'requires_external_vad': True,
+        }),
         'engine': 'stt.mock_stt',
         'metadata': dict({
           'bit_rate': <AudioBitRates.BITRATE_16: 16>,

@@ -18,6 +18,11 @@
 # ---
 # name: test_audio_pipeline.1
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,
@@ -112,6 +117,11 @@
 # ---
 # name: test_audio_pipeline_debug.1
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,
@@ -218,6 +228,11 @@
 # ---
 # name: test_audio_pipeline_with_enhancements.1
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,
@@ -334,6 +349,11 @@
 # ---
 # name: test_audio_pipeline_with_wake_word_no_timeout.3
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,
@@ -461,6 +481,11 @@
 # ---
 # name: test_device_capture.1
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,
@@ -488,6 +513,11 @@
 # ---
 # name: test_device_capture_override.1
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,
@@ -537,6 +567,11 @@
 # ---
 # name: test_device_capture_queue_full.1
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,
@@ -761,6 +796,11 @@
 # ---
 # name: test_stt_stream_failed.1
   dict({
+    'audio_processing': dict({
+      'prefers_auto_gain_enabled': True,
+      'prefers_noise_reduction_enabled': True,
+      'requires_external_vad': True,
+    }),
     'engine': 'stt.mock_stt',
     'metadata': dict({
       'bit_rate': 16,

@@ -2290,3 +2290,138 @@ async def test_custom_wake_words(
     # Check non-existent wake word
     req = await http_client.get("/api/esphome/wake_words/wrong_wake_word.json")
     assert req.status == HTTPStatus.NOT_FOUND
+
+
+async def test_multichannel_audio(
+    hass: HomeAssistant,
+    mock_client: APIClient,
+    mock_esphome_device: MockESPHomeDeviceType,
+) -> None:
+    """Test that stt-start event can switch audio channels."""
+    mock_device = await mock_esphome_device(
+        mock_client=mock_client,
+        device_info={
+            "voice_assistant_feature_flags": VoiceAssistantFeature.VOICE_ASSISTANT
+            | VoiceAssistantFeature.SPEAKER
+            | VoiceAssistantFeature.API_AUDIO
+            | VoiceAssistantFeature.MULTI_CHANNEL_AUDIO
+        },
+    )
+    await hass.async_block_till_done()
+
+    satellite = get_satellite_entity(hass, mock_device.device_info.mac_address)
+    assert satellite is not None
+
+    pipeline_finished = asyncio.Event()
+
+    async def async_pipeline_from_audio_stream(*args, **kwargs):
+        event_callback = kwargs["event_callback"]
+
+        # STT
+        event_callback(
+            PipelineEvent(
+                type=PipelineEventType.STT_START,
+                data={
+                    "engine": "test-stt-engine",
+                    "metadata": {},
+                    "audio_processing": {
+                        # Request non-enhanced audio (channel 1)
+                        "prefers_auto_gain_enabled": False,
+                        "prefers_noise_reduction_enabled": False,
+                    },
+                },
+            )
+        )
+
+        stt_stream = kwargs["stt_stream"]
+
+        chunks = [chunk async for chunk in stt_stream]
+
+        # Verify correct channel
+        assert chunks == [b"channel 1"]
+
+        pipeline_finished.set()
+
+    with (
+        patch(
+            "homeassistant.components.assist_satellite.entity.async_pipeline_from_audio_stream",
+            new=async_pipeline_from_audio_stream,
+        ),
+    ):
+        async with asyncio.timeout(1):
+            await satellite.handle_pipeline_start(
+                conversation_id="",
+                flags=VoiceAssistantCommandFlag(0),  # stt
+                audio_settings=VoiceAssistantAudioSettings(),
+                wake_word_phrase=None,
+            )
+            await satellite.handle_audio(b"channel 0", b"channel 1")
+            await satellite.handle_pipeline_stop(abort=False)
+            await pipeline_finished.wait()
+
+
+async def test_multichannel_audio_fallback_channel_0(
+    hass: HomeAssistant,
+    mock_client: APIClient,
+    mock_esphome_device: MockESPHomeDeviceType,
+) -> None:
+    """Test that channel 0 is used if multi-channel audio isn't supported."""
+    mock_device = await mock_esphome_device(
+        mock_client=mock_client,
+        device_info={
+            "voice_assistant_feature_flags": VoiceAssistantFeature.VOICE_ASSISTANT
+            | VoiceAssistantFeature.SPEAKER
+            | VoiceAssistantFeature.API_AUDIO
+        },
+    )
+    await hass.async_block_till_done()
+
+    satellite = get_satellite_entity(hass, mock_device.device_info.mac_address)
+    assert satellite is not None
+
+    pipeline_finished = asyncio.Event()
+
+    async def async_pipeline_from_audio_stream(*args, **kwargs):
+        event_callback = kwargs["event_callback"]
+
+        # STT
+        event_callback(
+            PipelineEvent(
+                type=PipelineEventType.STT_START,
+                data={
+                    "engine": "test-stt-engine",
+                    "metadata": {},
+                    "audio_processing": {
+                        # Request non-enhanced audio (channel 1)
+                        "prefers_auto_gain_enabled": False,
+                        "prefers_noise_reduction_enabled": False,
+                    },
+                },
+            )
+        )
+
+        stt_stream = kwargs["stt_stream"]
+
+        chunks = [chunk async for chunk in stt_stream]
+
+        # Non-enhanced audio (channel 1) was requested, but it isn't supported.
+        assert chunks == [b"channel 0"]
+
+        pipeline_finished.set()
+
+    with (
+        patch(
+            "homeassistant.components.assist_satellite.entity.async_pipeline_from_audio_stream",
+            new=async_pipeline_from_audio_stream,
+        ),
+    ):
+        async with asyncio.timeout(1):
+            await satellite.handle_pipeline_start(
+                conversation_id="",
+                flags=VoiceAssistantCommandFlag(0),  # stt
+                audio_settings=VoiceAssistantAudioSettings(),
+                wake_word_phrase=None,
+            )
+            await satellite.handle_audio(b"channel 0", b"channel 1")
+            await satellite.handle_pipeline_stop(abort=False)
+            await pipeline_finished.wait()
-Original file line number
+Diff line change
@@ Expand Up / @@ -932,6 +932,7 @@ async def speech_to_text( @@
                     {
                         "engine": engine,
                         "metadata": asdict(metadata),
+                        "audio_processing": asdict(self.stt_provider.audio_processing),
                     },
                 )
             )
@@ Expand Down @@