diff --git a/docs/dev/WebRTCAEC.md b/docs/dev/WebRTCAEC.md new file mode 100644 index 00000000000..eafcd2a3315 --- /dev/null +++ b/docs/dev/WebRTCAEC.md @@ -0,0 +1,68 @@ +# WebRTC AEC3 Echo Cancellation + +Mumble's built-in echo cancellation uses SpeexDSP — a linear MDF filter from ~2007. It works adequately with headphones but struggles with real-world speaker setups: nonlinear distortion, room reverb, and rapidly changing acoustic conditions produce audible echo artifacts. + +This optional mode adds **WebRTC AEC3**, the same algorithm used in Chrome, Teams, and Discord. It is enabled at build time with `-Dwebrtc-apm=ON` and appears in Audio Input settings as **"Echo cancellation (WebRTC AEC3)"**. + +## How it works + +WebRTC APM separates render (speaker) and capture (microphone) processing, which lets it bypass Mumble's existing Resynchronizer queue: + +- **`addEcho()`** — speaker samples feed directly into `ProcessReverseStream()`. The `short[]` allocation and `resync.addSpeaker()` are skipped. +- **`encodeAudioFrame()`** — mic samples feed directly into `ProcessStream()` instead of the Speex path. The measured output latency (hardware path from WASAPI's `GetStreamLatency` + software buffer occupancy) is passed as the stream delay so AEC3 can align the two streams. + +The existing SpeexDSP preprocessor (VAD, AGC, denoising) still runs on the cleaned signal afterward. The Resynchronizer and Speex AEC paths are untouched — `SPEEX_MIXED` and `SPEEX_MULTICHANNEL` behave exactly as before. + +## Platform support + +| Backend | Supported | +|---|---| +| Windows (WASAPI) | Yes | +| Linux (PulseAudio) | Yes | +| macOS | No — use `APPLE_AEC` instead | +| ALSA | No — no loopback capture available | + +## Getting the dependency + +`webrtc-audio-processing` (≥ 2.0, freedesktop fork) must be installed separately — it is not bundled with Mumble. + +**Linux:** +```sh +# Ubuntu/Debian +sudo apt install libwebrtc-audio-processing-dev +# Fedora +sudo dnf install webrtc-audio-processing-devel +``` + +**Windows (vcpkg):** +```sh +vcpkg install webrtc-audio-processing:x64-windows-static-md +``` + +> **Note:** Mumble's vcpkg fork (`mumble-voip/vcpkg`) does not yet include this port. For now, install from upstream vcpkg and point `CMAKE_PREFIX_PATH` at your vcpkg installed tree, or build from source. + +**From source:** +```sh +git clone https://gitlab.freedesktop.org/pipewire/webrtc-audio-processing.git +cd webrtc-audio-processing +meson setup build --prefix=/your/install/prefix +ninja -C build install +``` + +## Building Mumble with WebRTC AEC3 + +```sh +cmake -Dwebrtc-apm=ON -DCMAKE_PREFIX_PATH=/your/install/prefix .. +ninja mumble +``` + +On Windows, run this from an MSVC x64 developer environment. The DLL (`webrtc-audio-processing-2-1.dll`) is automatically copied to the build output directory by a `POST_BUILD` step. + +Builds without the flag (`-Dwebrtc-apm=OFF`, the default) are unaffected — no behavior change, no new dependency. + +## Related + +- `src/mumble/AudioInput.cpp` — `resetAudioProcessor()`, `addMic()`, `addEcho()`, `encodeAudioFrame()` +- `src/mumble/EchoCancelOption.h` / `.cpp` — enum and option list +- `docs/dev/build-instructions/cmake_options.md` — `webrtc-apm` option reference +- `docs/dev/AudioInputDebug.md` — how to tap the DSP chain for debugging diff --git a/src/mumble/AudioInput.cpp b/src/mumble/AudioInput.cpp index a2265cbc574..063412f4555 100644 --- a/src/mumble/AudioInput.cpp +++ b/src/mumble/AudioInput.cpp @@ -310,6 +310,10 @@ AudioInput::~AudioInput() { if (sesEcho) speex_echo_state_destroy(sesEcho); +#ifdef USE_WEBRTC_APM + m_apm = nullptr; +#endif + if (srsMic) speex_resampler_destroy(srsMic); if (srsEcho) @@ -598,7 +602,19 @@ void AudioInput::addMic(const void *data, unsigned int nsamp) { // If we have echo cancellation enabled... if (iEchoChannels > 0) { - resync.addMic(psMic); +#ifdef USE_WEBRTC_APM + // WebRTC path: render stream was already fed in addEcho(), process capture directly. + if (m_apm) { + encodeAudioFrame(AudioChunk(psMic)); + // psMic is heap-allocated because iEchoChannels > 0 (see above). + // The Speex path hands it to the Resynchronizer which owns and frees it; + // the WebRTC path bypasses the Resynchronizer, so we free it here. + delete[] psMic; + } else +#endif + { + resync.addMic(psMic); + } } else { encodeAudioFrame(AudioChunk(psMic)); } @@ -654,6 +670,21 @@ void AudioInput::addEcho(const void *data, unsigned int nsamp) { speex_resampler_process_interleaved_float(srsEcho, pfEchoInput, &inlen, pfOutput, &outlen); } +#ifdef USE_WEBRTC_APM + // WebRTC path: feed the render (speaker) stream directly to the Audio Processing Module (APM). + // The capture (mic) side calls encodeAudioFrame() from addMic() to complete the AEC loop. + // Hold qmSpeex briefly to guard against m_apm being reset concurrently in resetAudioProcessor(). + { + QMutexLocker l(&qmSpeex); + if (m_apm) { + float *renderPtr = ptr; + webrtc::StreamConfig cfg(iSampleRate, 1); + m_apm->ProcessReverseStream(&renderPtr, cfg, cfg, &renderPtr); + continue; + } + } +#endif + short *outbuff = new short[iEchoFrameSize]; // float -> 16bit PCM @@ -749,6 +780,11 @@ void AudioInput::resetAudioProcessor() { if (sesEcho) speex_echo_state_destroy(sesEcho); + sesEcho = nullptr; // Null immediately after destroy to prevent a dangling pointer on re-entry. + +#ifdef USE_WEBRTC_APM + m_apm = nullptr; +#endif m_preprocessor.init(iSampleRate, iFrameSize); resync.reset(); @@ -769,16 +805,29 @@ void AudioInput::resetAudioProcessor() { } if (iEchoChannels > 0) { - int filterSize = iFrameSize * (10 + resync.getNominalLag()); - sesEcho = - speex_echo_state_init_mc(iFrameSize, filterSize, 1, bEchoMulti ? static_cast< int >(iEchoChannels) : 1); - int iArg = iSampleRate; - speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg); - m_preprocessor.setEchoState(sesEcho); - - qWarning("AudioInput: ECHO CANCELLER ACTIVE"); - } else { - sesEcho = nullptr; +#ifdef USE_WEBRTC_APM + if (Global::get().s.echoOption == EchoCancelOptionID::WEBRTC_AEC) { + webrtc::AudioProcessing::Config cfg; + cfg.echo_canceller.enabled = true; + cfg.echo_canceller.mobile_mode = false; + m_apm = webrtc::AudioProcessingBuilder().SetConfig(cfg).Create(); + if (m_apm) { + qWarning("AudioInput: WebRTC AEC3 ACTIVE"); + } else { + qWarning("AudioInput: Failed to create WebRTC APM, echo cancellation disabled"); + } + } else +#endif + { + int filterSize = iFrameSize * (10 + resync.getNominalLag()); + sesEcho = speex_echo_state_init_mc(iFrameSize, filterSize, 1, + bEchoMulti ? static_cast< int >(iEchoChannels) : 1); + int iArg = iSampleRate; + speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg); + m_preprocessor.setEchoState(sesEcho); + + qWarning("AudioInput: ECHO CANCELLER ACTIVE"); + } } bResetEncoder = true; @@ -900,6 +949,22 @@ void AudioInput::encodeAudioFrame(AudioChunk chunk) { } short psClean[iFrameSize]; +#ifdef USE_WEBRTC_APM + if (m_apm) { + // WebRTC APM works in float [-1.0, 1.0]; convert to/from int16 PCM [-32768, 32767]. + static constexpr float kInt16Scale = 32768.f; + float floatBuf[iFrameSize]; + for (int i = 0; i < iFrameSize; ++i) + floatBuf[i] = chunk.mic[i] / kInt16Scale; + float *floatPtr = floatBuf; + webrtc::StreamConfig cfg(iSampleRate, 1); + m_apm->set_stream_delay_ms(Global::get().iOutputLatencyMs.load()); + m_apm->ProcessStream(&floatPtr, cfg, cfg, &floatPtr); + for (int i = 0; i < iFrameSize; ++i) + psClean[i] = static_cast< short >(qBound(-kInt16Scale, floatBuf[i] * kInt16Scale, kInt16Scale - 1.f)); + psSource = psClean; + } else +#endif if (sesEcho && chunk.speaker) { speex_echo_cancellation(sesEcho, chunk.mic, chunk.speaker, psClean); psSource = psClean; diff --git a/src/mumble/AudioInput.h b/src/mumble/AudioInput.h index 5a87257090e..eb8153d2c8d 100644 --- a/src/mumble/AudioInput.h +++ b/src/mumble/AudioInput.h @@ -21,6 +21,17 @@ #include #include +#ifdef USE_WEBRTC_APM +# ifdef _MSC_VER + // webrtc-audio-processing headers emit warnings we can't fix (third-party code). +# pragma warning(push, 0) +# endif +# include +# ifdef _MSC_VER +# pragma warning(pop) +# endif +#endif + #include "Audio.h" #include "AudioOutputToken.h" #include "AudioPreprocessor.h" @@ -225,6 +236,9 @@ class AudioInput : public QThread { QMutex qmSpeex; AudioPreprocessor m_preprocessor; SpeexEchoState *sesEcho; +#ifdef USE_WEBRTC_APM + rtc::scoped_refptr< webrtc::AudioProcessing > m_apm; +#endif /// bResetEncoder is a flag that notifies /// our encoder functions that the encoder diff --git a/src/mumble/CMakeLists.txt b/src/mumble/CMakeLists.txt index 1f3daa534c2..f131ebe2ce5 100644 --- a/src/mumble/CMakeLists.txt +++ b/src/mumble/CMakeLists.txt @@ -24,6 +24,8 @@ option(bundled-speex "Build the included version of Speex instead of looking for option(rnnoise "Use RNNoise for machine learning noise reduction." ON) option(bundled-rnnoise "Build the included version of RNNoise instead of looking for one on the system." ${rnnoise}) +option(webrtc-apm "Use WebRTC AEC3 for echo cancellation via webrtc-audio-processing." OFF) + option(manual-plugin "Include the built-in \"manual\" positional audio plugin." ON) option(qtspeech "Use Qt's text-to-speech system (part of the Qt Speech module) instead of Mumble's own OS-specific text-to-speech implementations." OFF) @@ -787,6 +789,65 @@ if(rnnoise) endif() endif() +if(webrtc-apm) + target_compile_definitions(mumble_client_object_lib PRIVATE "USE_WEBRTC_APM") + + # Try pkg-config first (Linux), then fall back to find_path/find_library (Windows/macOS). + # The installed include dir is /include/webrtc-audio-processing-{1,2}/ + # Headers are then included as . + find_pkg("webrtc-audio-processing-2;webrtc-audio-processing-1;webrtc-audio-processing") + + set(WEBRTC_APM_FOUND FALSE) + foreach(_wap_name webrtc-audio-processing-2 webrtc-audio-processing-1 webrtc-audio-processing) + if(${_wap_name}_FOUND) + target_include_directories(mumble_client_object_lib PRIVATE ${${_wap_name}_INCLUDE_DIRS}) + target_link_libraries(mumble_client_object_lib PRIVATE ${${_wap_name}_LIBRARIES}) + set(WEBRTC_APM_FOUND TRUE) + break() + endif() + endforeach() + + if(NOT WEBRTC_APM_FOUND) + # pkg-config not available (e.g. Windows). Search manually via CMAKE_PREFIX_PATH. + # Supports both version 1.x and 2.x install layouts. + find_path(WEBRTC_APM_INCLUDE_DIR + NAMES modules/audio_processing/include/audio_processing.h + PATH_SUFFIXES webrtc-audio-processing-2 webrtc-audio-processing-1 webrtc-audio-processing + ) + find_library(WEBRTC_APM_LIBRARY + NAMES webrtc-audio-processing-2 webrtc-audio-processing-1 webrtc-audio-processing + ) + + if(NOT WEBRTC_APM_INCLUDE_DIR OR NOT WEBRTC_APM_LIBRARY) + message(FATAL_ERROR "webrtc-audio-processing not found. " + "Set CMAKE_PREFIX_PATH to the install prefix (e.g. C:/Users/ben/Projects/webrtc-apm-prefix).") + endif() + + target_include_directories(mumble_client_object_lib PRIVATE "${WEBRTC_APM_INCLUDE_DIR}") + target_link_libraries(mumble_client_object_lib PRIVATE "${WEBRTC_APM_LIBRARY}") + target_compile_definitions(mumble_client_object_lib PRIVATE WEBRTC_WIN _WIN32 NOMINMAX _USE_MATH_DEFINES) + endif() + + if(WIN32) + find_file(WEBRTC_APM_DLL + NAMES webrtc-audio-processing-2-1.dll webrtc-audio-processing-1.dll webrtc-audio-processing.dll + PATH_SUFFIXES bin + ) + if(WEBRTC_APM_DLL) + add_custom_command(TARGET mumble POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${WEBRTC_APM_DLL}" + "$" + COMMENT "Copying WebRTC APM DLL" + ) + else() + message(WARNING "webrtc-audio-processing DLL not found — copy it manually to the build directory.") + endif() + endif() + + message(STATUS "WebRTC APM (AEC3) support enabled") +endif() + if(qtspeech) find_pkg(Qt6 COMPONENTS TextToSpeech REQUIRED) target_sources(mumble_client_object_lib PRIVATE "TextToSpeech.cpp") diff --git a/src/mumble/EchoCancelOption.cpp b/src/mumble/EchoCancelOption.cpp index 8cd44550b9d..04dce8c7c9f 100644 --- a/src/mumble/EchoCancelOption.cpp +++ b/src/mumble/EchoCancelOption.cpp @@ -25,9 +25,15 @@ const std::vector< EchoCancelOption > &EchoCancelOption::getOptions() { "Multichannel echo cancellation requires more CPU, so " "you should try mixed first.") }, // Available only on Apple devices - { EchoCancelOptionID::APPLE_AEC, QObject::tr("EXPERIMENTAL: Acoustic echo cancellation (Apple)."), - QObject::tr("The support for this option is experimental only! This option works best when using built-in " - "microphone and speaker.") } + { EchoCancelOptionID::APPLE_AEC, QObject::tr("Acoustic echo cancellation (Apple)"), + QObject::tr("Uses Apple's built-in voice processing for echo cancellation. Works best with built-in " + "microphone and speaker.") }, +#ifdef USE_WEBRTC_APM + // Available when built with webrtc-audio-processing (-Dwebrtc-apm=ON) + { EchoCancelOptionID::WEBRTC_AEC, QObject::tr("Echo cancellation (WebRTC AEC3)"), + QObject::tr("Uses the WebRTC AEC3 algorithm for high-quality echo cancellation. " + "Recommended for use with speakers instead of headphones.") }, +#endif }; return echoCancelOptions; diff --git a/src/mumble/EchoCancelOption.h b/src/mumble/EchoCancelOption.h index ac775d6bc72..6e259e33450 100644 --- a/src/mumble/EchoCancelOption.h +++ b/src/mumble/EchoCancelOption.h @@ -17,7 +17,10 @@ enum class EchoCancelOptionID { DISABLED = 0, SPEEX_MIXED = 1, SPEEX_MULTICHANNEL = 2, - APPLE_AEC = 3 // Apple's Acoustic Echo Cancellation support for macOS and iOS. + APPLE_AEC = 3, // Apple's Acoustic Echo Cancellation support for macOS and iOS. + // Always defined (even without USE_WEBRTC_APM) so saved config values round-trip cleanly. + // The corresponding UI entry in getOptions() is compiled out when USE_WEBRTC_APM is not set. + WEBRTC_AEC = 4 // WebRTC AEC3 via the webrtc-audio-processing library. }; struct EchoCancelOption { diff --git a/src/mumble/EnumStringConversions.cpp b/src/mumble/EnumStringConversions.cpp index c4be0959b29..2496f8ed283 100644 --- a/src/mumble/EnumStringConversions.cpp +++ b/src/mumble/EnumStringConversions.cpp @@ -56,7 +56,8 @@ PROCESS(EchoCancelOptionID, DISABLED, "Disabled") \ PROCESS(EchoCancelOptionID, SPEEX_MIXED, "Speex_MixedChannel") \ PROCESS(EchoCancelOptionID, SPEEX_MULTICHANNEL, "Speex_Multichannel") \ - PROCESS(EchoCancelOptionID, APPLE_AEC, "Apple_AEC") + PROCESS(EchoCancelOptionID, APPLE_AEC, "Apple_AEC") \ + PROCESS(EchoCancelOptionID, WEBRTC_AEC, "WebRTC_AEC3") #define PROXY_TYPE_VALUES \ PROCESS(Settings::ProxyType, NoProxy, "None") \ diff --git a/src/mumble/Global.h b/src/mumble/Global.h index de1cff25d7c..2b6a0537410 100644 --- a/src/mumble/Global.h +++ b/src/mumble/Global.h @@ -14,6 +14,7 @@ #include "Timer.h" #include "Version.h" +#include #include // Global helper class to spread variables around across threads. @@ -94,6 +95,12 @@ struct Global Q_DECL_FINAL { ChanACL::Permissions pPermissions; int iMaxBandwidth; int iAudioBandwidth; + /// End-to-end output latency in ms (hardware path + software buffer occupancy), used by WebRTC + /// AEC3 to align the render and capture streams for echo cancellation. + /// AudioInput and AudioOutput are decoupled — they cannot call methods on each other — so + /// Global is the appropriate channel for this runtime-measured value (same pattern as + /// iAudioPathTime). Set by the audio output backend after stream init; 50ms is a safe default. + std::atomic< int > iOutputLatencyMs{ 50 }; QDir qdBasePath; bool bAttenuateOthers; /// If set the AudioOutput::mix will forcefully adjust the volume of all diff --git a/src/mumble/PulseAudio.cpp b/src/mumble/PulseAudio.cpp index 5bc03817947..a03f70fba01 100644 --- a/src/mumble/PulseAudio.cpp +++ b/src/mumble/PulseAudio.cpp @@ -911,6 +911,9 @@ void PulseAudioSystem::contextCallback(pa_context *c) { PulseAudioInputRegistrar::PulseAudioInputRegistrar() : AudioInputRegistrar(QLatin1String("PulseAudio"), 10) { echoOptions.push_back(EchoCancelOptionID::SPEEX_MIXED); echoOptions.push_back(EchoCancelOptionID::SPEEX_MULTICHANNEL); +#ifdef USE_WEBRTC_APM + echoOptions.push_back(EchoCancelOptionID::WEBRTC_AEC); +#endif } AudioInput *PulseAudioInputRegistrar::create() { @@ -939,8 +942,15 @@ void PulseAudioInputRegistrar::setDeviceChoice(const QVariant &choice, Settings } bool PulseAudioInputRegistrar::canEcho(EchoCancelOptionID echoOption, const QString &osys) const { - return (echoOption == EchoCancelOptionID::SPEEX_MIXED || echoOption == EchoCancelOptionID::SPEEX_MULTICHANNEL) - && (osys == name); + if (osys != name) + return false; + if (echoOption == EchoCancelOptionID::SPEEX_MIXED || echoOption == EchoCancelOptionID::SPEEX_MULTICHANNEL) + return true; +#ifdef USE_WEBRTC_APM + if (echoOption == EchoCancelOptionID::WEBRTC_AEC) + return true; +#endif + return false; } PulseAudioOutputRegistrar::PulseAudioOutputRegistrar() : AudioOutputRegistrar(QLatin1String("PulseAudio"), 10) { diff --git a/src/mumble/WASAPI.cpp b/src/mumble/WASAPI.cpp index 4059b4c645f..13c2f0f772b 100644 --- a/src/mumble/WASAPI.cpp +++ b/src/mumble/WASAPI.cpp @@ -121,6 +121,9 @@ void WASAPIInit::destroy() { WASAPIInputRegistrar::WASAPIInputRegistrar() : AudioInputRegistrar(QLatin1String("WASAPI"), 10) { echoOptions.push_back(EchoCancelOptionID::SPEEX_MIXED); echoOptions.push_back(EchoCancelOptionID::SPEEX_MULTICHANNEL); +#ifdef USE_WEBRTC_APM + echoOptions.push_back(EchoCancelOptionID::WEBRTC_AEC); +#endif } bool WASAPIInputRegistrar::isMicrophoneAccessDeniedByOS() { @@ -225,8 +228,15 @@ void WASAPIInputRegistrar::setDeviceChoice(const QVariant &choice, Settings &s) } bool WASAPIInputRegistrar::canEcho(EchoCancelOptionID echoOptionIDs, const QString &outputSystem) const { - return (echoOptionIDs == EchoCancelOptionID::SPEEX_MIXED || echoOptionIDs == EchoCancelOptionID::SPEEX_MULTICHANNEL) - && (outputSystem == name); + if (outputSystem != name) + return false; + if (echoOptionIDs == EchoCancelOptionID::SPEEX_MIXED || echoOptionIDs == EchoCancelOptionID::SPEEX_MULTICHANNEL) + return true; +#ifdef USE_WEBRTC_APM + if (echoOptionIDs == EchoCancelOptionID::WEBRTC_AEC) + return true; +#endif + return false; } bool WASAPIInputRegistrar::canExclusive() const { @@ -1029,9 +1039,13 @@ void WASAPIOutput::run() { pAudioClient->GetStreamLatency(&latency); pAudioClient->GetBufferSize(&bufferFrameCount); qWarning("WASAPIOutput: Stream Latency %lld (%d)", latency, bufferFrameCount); - iMixerFreq = pwfx->nSamplesPerSec; + // Store total output latency for WebRTC AEC delay estimation: + // hardware/driver path (latency is in 100ns units) + software buffer occupancy + Global::get().iOutputLatencyMs.store(static_cast< int >(latency / 10000) + + static_cast< int >(bufferFrameCount) * 1000 / iMixerFreq); + qWarning("WASAPIOutput: Periods %lldus %lldus (latency %lldus)", def / 10LL, min / 10LL, latency / 10LL); qWarning("WASAPIOutput: Buffer is %dus (%d)", (bufferFrameCount * 1000000) / iMixerFreq, Global::get().s.iOutputDelay);