diff --git a/.gitmodules b/.gitmodules index b92bdb23fed..e721d98a04d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -37,3 +37,6 @@ [submodule "3rdparty/CLI11"] path = 3rdparty/CLI11 url = https://github.com/CLIUtils/CLI11.git +[submodule "3rdparty/fftconvolver"] + path = 3rdparty/fftconvolver + url = https://github.com/HiFi-LoFi/FFTConvolver.git diff --git a/3rdparty/fftconvolver b/3rdparty/fftconvolver new file mode 160000 index 00000000000..f2cdeb04c42 --- /dev/null +++ b/3rdparty/fftconvolver @@ -0,0 +1 @@ +Subproject commit f2cdeb04c42141d2caec19ca4f137398b2a76b85 diff --git a/data/hrtf/README.md b/data/hrtf/README.md new file mode 100644 index 00000000000..6348392d662 --- /dev/null +++ b/data/hrtf/README.md @@ -0,0 +1,58 @@ +# data/hrtf/default.sofa — Provenance + +## Source + +`default.sofa` is the CIPIC subject 124 HRTF, copied verbatim from the +**Valve Steam Audio** open-source SDK repository: + +- Repository: https://github.com/ValveSoftware/steam-audio +- File path: `core/data/hrtf/cipic_124.sofa` +- Commit: `f88bd4e443ffdec9ec14ec52d2702de9702411a2` + ("Merge from Perforce 2024-02-19 14:28:05.862022") + +No processing was applied by Steam Audio beyond what is in the original CIPIC +distribution — Steam Audio loads the file as-is using `mysofa_open_no_norm()`. + +## Original dataset + +CIPIC subject 124 is from the CIPIC Interface Laboratory HRTF Database +(UC Davis, Center for Image Processing and Integrated Computing). + +The original CIPIC website (https://www.ece.ucdavis.edu/cipic/spatial-sound/hrtf-data/) +is no longer reachable. Mirrors and references: + +- GitHub mirror of the dataset: https://github.com/amini-allight/cipic-hrtf-database +- Web Archive snapshot of the original page: + https://web.archive.org/web/20170916053150/http://interface.cipic.ucdavis.edu/sound/hrtf.html + +## Why file choice matters + +Raw HRTF measurements typically have strong spectral coloration — the frequency +response at front incidence may vary by 20 dB or more across the audible range. +When used directly for binaural rendering this colours speech noticeably (a +"telephone / band-pass" quality). A well-behaved HRTF for voice communication +should have a relatively flat front-incidence response (within ±10 dB from 100 Hz +to 8 kHz), with spectral shaping appearing primarily as pinna notches above 8 kHz +where it contributes to the spatial illusion rather than colouring speech. + +The cipic_124.sofa file meets this criterion: its front-incidence response is flat +within ±5 dB from 100 Hz to 8 kHz. Whether this flatness is inherent to subject 124's +measurements or was applied as diffuse-field equalization upstream is not documented +by Steam Audio. + +## License + +Copyright (c) 2001 The Regents of the University of California. All Rights Reserved. + +The CIPIC database is made available for educational, research, and commercial use +with an acknowledgment request (see Steam Audio's `core/THIRDPARTY.md` for the full +license text). Steam Audio redistributes it under their Apache 2.0 SDK licence. + +## Updating + +To replace this file with a different HRTF: + +1. Obtain a SOFA file in the `SimpleFreeFieldHRIR` convention (44100 or 48000 Hz) +2. Prefer a file with a flat front-incidence response (diffuse-field equalized) +3. Verify with `ffmpeg -i test_impulse.wav -af "sofalizer=sofa=:type=freq:rotation=0:elevation=0" ir_0deg.wav` + and inspect the spectrum — it should be within ±10 dB from 100 Hz to 8 kHz at 0° diff --git a/data/hrtf/default.sofa b/data/hrtf/default.sofa new file mode 100644 index 00000000000..947a5f9cdc9 Binary files /dev/null and b/data/hrtf/default.sofa differ diff --git a/src/mumble/AudioConfigDialog.cpp b/src/mumble/AudioConfigDialog.cpp index f9d377ada1f..cafc5be55c0 100644 --- a/src/mumble/AudioConfigDialog.cpp +++ b/src/mumble/AudioConfigDialog.cpp @@ -14,6 +14,7 @@ #include "Utils.h" #include "Global.h" +#include #include #include @@ -682,6 +683,13 @@ AudioOutputDialog::AudioOutputDialog(Settings &st) : ConfigWidget(st) { qlBloom->setToolTip(bloomTooltip); qsBloom->setToolTip(bloomTooltip); qsbBloom->setToolTip(bloomTooltip); + +#ifndef USE_HRTF + qcbHrtf->setVisible(false); + qlHrtfFile->setVisible(false); + qleHrtfFile->setVisible(false); + qpbHrtfBrowse->setVisible(false); +#endif } QString AudioOutputDialog::title() const { @@ -749,6 +757,8 @@ void AudioOutputDialog::load(const Settings &r) { qsbBloom->setValue(static_cast< int >(r.fAudioBloom * 100)); loadCheckBox(qcbHeadphones, r.bPositionalHeadphone); loadCheckBox(qcbPositional, r.bPositionalAudio); + loadCheckBox(qcbHrtf, r.bHrtf); + qleHrtfFile->setText(r.qsHrtfFile); qsOtherVolume->setEnabled(r.bAttenuateOthersOnTalk || r.bAttenuateOthers); qlOtherVolume->setEnabled(r.bAttenuateOthersOnTalk || r.bAttenuateOthers); @@ -778,6 +788,8 @@ void AudioOutputDialog::save() const { s.bPositionalAudio = qcbPositional->isChecked(); s.bPositionalHeadphone = qcbHeadphones->isChecked(); s.bExclusiveOutput = qcbExclusive->isChecked(); + s.bHrtf = qcbHrtf->isChecked(); + s.qsHrtfFile = qleHrtfFile->text(); if (AudioOutputRegistrar::qmNew) { @@ -959,3 +971,12 @@ void AudioOutputDialog::on_qcbAttenuateOthers_clicked(bool checked) { void AudioOutputDialog::on_qcbOnlyAttenuateSameOutput_clicked(bool checked) { qcbAttenuateLoopbacks->setEnabled(checked); } + +void AudioOutputDialog::on_qpbHrtfBrowse_clicked() { + const QString path = QFileDialog::getOpenFileName( + this, tr("Select HRTF SOFA file"), qleHrtfFile->text(), + tr("SOFA files (*.sofa);;All files (*)")); + if (!path.isEmpty()) { + qleHrtfFile->setText(path); + } +} diff --git a/src/mumble/AudioConfigDialog.h b/src/mumble/AudioConfigDialog.h index 37a0a19f899..fa2c1912b0a 100644 --- a/src/mumble/AudioConfigDialog.h +++ b/src/mumble/AudioConfigDialog.h @@ -108,6 +108,7 @@ public slots: void on_qcbAttenuateOthersOnTalk_clicked(bool checked); void on_qcbAttenuateOthers_clicked(bool checked); void on_qcbOnlyAttenuateSameOutput_clicked(bool checked); + void on_qpbHrtfBrowse_clicked(); }; #endif diff --git a/src/mumble/AudioOutput.cpp b/src/mumble/AudioOutput.cpp index 15926425346..5b4727587e8 100644 --- a/src/mumble/AudioOutput.cpp +++ b/src/mumble/AudioOutput.cpp @@ -24,6 +24,7 @@ #include #include #include +#include // Remember that we cannot use static member classes that are not pointers, as the constructor // for AudioOutputRegistrar() might be called before they are initialized, as the constructor @@ -227,9 +228,19 @@ void AudioOutput::removeBuffer(const void *buffer, bool acquireWriteLock) { for (auto iter = qmOutputs.begin(); iter != qmOutputs.end(); ++iter) { if (iter.value() == buffer) { +#ifdef USE_HRTF + // Capture the pointer value as source ID before deletion. + const auto hrtfSourceId = + static_cast< unsigned int >(reinterpret_cast< uintptr_t >(iter.value())); +#endif delete iter.value(); qmOutputs.erase(iter); +#ifdef USE_HRTF + if (m_hrtfSpatializer) { + m_hrtfSpatializer->removeSource(hrtfSourceId); + } +#endif break; } } @@ -434,6 +445,12 @@ void AudioOutput::initializeMixer(const unsigned int *chanmasks, bool forceheadp static_cast< unsigned int >(iChannels * ((eSampleFormat == SampleFloat) ? sizeof(float) : sizeof(short))); qWarning("AudioOutput: Initialized %d channel %d hz mixer", iChannels, iMixerFreq); +#ifdef USE_HRTF + m_hrtfSpatializer = std::make_unique< HrtfSpatializer >(static_cast< int >(iMixerFreq), + static_cast< int >(iFrameSize)); + m_hrtfSpatializer->loadHRTF(Global::get().s.qsHrtfFile); +#endif + if (Global::get().s.bPositionalAudio && iChannels == 1) { Log::logOrDefer(Log::Warning, tr("Positional audio cannot work with mono output devices!")); } @@ -517,6 +534,14 @@ bool AudioOutput::mix(void *outbuff, unsigned int frameCount) { bool validListener = false; +#ifdef USE_HRTF + // Listener orientation vectors for HRTF direction computation. + // Set when positional audio is active (validListener == true). + Vector3D hrtfCameraDir = { 0.0f, 0.0f, 1.0f }; + Vector3D hrtfCameraAxis = { 0.0f, 1.0f, 0.0f }; + Vector3D hrtfRight = { 1.0f, 0.0f, 0.0f }; +#endif + // Initialize recorder if recording is enabled std::shared_ptr< float[] > recbuff; if (recorder) { @@ -580,6 +605,12 @@ bool AudioOutput::mix(void *outbuff, unsigned int frameCount) { // Calculate right vector as front X top Vector3D right = cameraAxis.crossProduct(cameraDir); +#ifdef USE_HRTF + hrtfCameraDir = cameraDir; + hrtfCameraAxis = cameraAxis; + hrtfRight = right; +#endif + /* qWarning("Front: %f %f %f", front[0], front[1], front[2]); qWarning("Top: %f %f %f", top[0], top[1], top[2]); @@ -736,6 +767,68 @@ bool AudioOutput::mix(void *outbuff, unsigned int frameCount) { qWarning("Voice pos: %f %f %f", aop->fPos[0], aop->fPos[1], aop->fPos[2]); qWarning("Voice dir: %f %f %f", connectionVec.x, connectionVec.y, connectionVec.z); */ +#ifdef USE_HRTF + if (nchan == 2 && m_hrtfSpatializer && m_hrtfSpatializer->isLoaded() + && Global::get().s.bHrtf) { + // HRTF binaural path: replaces the per-channel gain + ITD loop below. + + // Compute source direction in listener-local frame (+X=right, +Y=up, +Z=forward). + const float localX = connectionVec.x * hrtfRight.x + connectionVec.y * hrtfRight.y + + connectionVec.z * hrtfRight.z; + const float localY = connectionVec.x * hrtfCameraAxis.x + + connectionVec.y * hrtfCameraAxis.y + + connectionVec.z * hrtfCameraAxis.z; + const float localZ = connectionVec.x * hrtfCameraDir.x + + connectionVec.y * hrtfCameraDir.y + + connectionVec.z * hrtfCameraDir.z; + + // Buffer pointer as source ID: stable lifetime, unique across speech + samples. + const auto sourceId = + static_cast< unsigned int >(reinterpret_cast< uintptr_t >(buffer)); + + // Downmix stereo to mono before spatialisation. + static thread_local std::vector< float > monoMix; + monoMix.resize(frameCount); + if (speech && speech->bStereo) { + for (unsigned int i = 0; i < frameCount; ++i) + monoMix[i] = (pfBuffer[2 * i] + pfBuffer[2 * i + 1]) * 0.5f; + } else { + for (unsigned int i = 0; i < frameCount; ++i) + monoMix[i] = pfBuffer[i]; + } + + // Spatialize: mono → interleaved binaural stereo (L,R,L,R,...). + static thread_local std::vector< float > hrtfOut; + hrtfOut.resize(frameCount * 2); + m_hrtfSpatializer->spatialize(sourceId, monoMix.data(), hrtfOut.data(), + frameCount, localX, localY, localZ); + + // Apply distance attenuation only (dot=1.0 → pure distance falloff; + // the HRTF IR encodes ILD/ITD directional cues, no per-channel weighting needed). + const bool isAudible = (Global::get().s.fAudioMaxDistVolume > 0) + || (len < Global::get().s.fAudioMaxDistance); + const float gain = isAudible ? mul * calcGain(1.0f, len) * volumeAdjustment : 0.0f; + maxVolume = gain; + + // Ramp gain linearly across the block to avoid clicks on distance changes + // (mirrors per-sample interpolation in the non-HRTF path below). + // pfVolume[0] caches the previous block's gain; -1.0 signals first call. + if (!buffer->pfVolume) { + buffer->pfVolume = new float[nchan]; + buffer->pfVolume[0] = -1.0f; + } + const float oldGain = (buffer->pfVolume[0] >= 0.0f) ? buffer->pfVolume[0] : gain; + buffer->pfVolume[0] = gain; + const float gainInc = (gain - oldGain) / static_cast< float >(frameCount); + + for (unsigned int i = 0; i < frameCount; ++i) { + const float g = oldGain + gainInc * static_cast< float >(i); + output[i * nchan + 0] += hrtfOut[2 * i] * g; + output[i * nchan + 1] += hrtfOut[2 * i + 1] * g; + } + } else { +// Non-HRTF per-channel gain + ITD path (use `git diff -w` to review separately from indentation): +#endif if (!buffer->pfVolume) { buffer->pfVolume = new float[nchan]; for (unsigned int s = 0; s < nchan; ++s) @@ -812,6 +905,9 @@ bool AudioOutput::mix(void *outbuff, unsigned int frameCount) { } } } +#ifdef USE_HRTF + } // end else: non-HRTF per-channel gain + ITD path +#endif } else { // Mix the current audio source into the output by adding it to the elements of the output buffer // after having applied a volume adjustment diff --git a/src/mumble/AudioOutput.h b/src/mumble/AudioOutput.h index 4fd6c176d5e..d2b97fca2e6 100644 --- a/src/mumble/AudioOutput.h +++ b/src/mumble/AudioOutput.h @@ -15,6 +15,10 @@ # include "ManualPlugin.h" #endif +#ifdef USE_HRTF +# include "HrtfSpatializer.h" +#endif + #include #ifndef SPEAKER_FRONT_LEFT @@ -101,6 +105,10 @@ private slots: QHash< unsigned int, Position2D > positions; #endif +#ifdef USE_HRTF + std::unique_ptr< HrtfSpatializer > m_hrtfSpatializer; +#endif + void initializeMixer(const unsigned int *chanmasks, bool forceheadphone = false); bool mix(void *output, unsigned int frameCount); diff --git a/src/mumble/AudioOutput.ui b/src/mumble/AudioOutput.ui index e327cdde2cf..3d906f27dde 100644 --- a/src/mumble/AudioOutput.ui +++ b/src/mumble/AudioOutput.ui @@ -709,6 +709,46 @@ + + + + Enable Head-Related Transfer Function (HRTF) binaural audio processing for headphones + + + When enabled, HRTF processing uses a measured head-related transfer function to render spatial audio binaurally. This provides improved elevation perception and front/back disambiguation compared to standard panning. Requires headphones for best effect. + + + Use HRTF binaural audio (headphones recommended) + + + + + + + HRTF File + + + qleHrtfFile + + + + + + + Path to a custom SOFA file. Leave empty to use the default HRTF. + + + Default HRTF + + + + + + + Browse... + + + diff --git a/src/mumble/CMakeLists.txt b/src/mumble/CMakeLists.txt index 1f3daa534c2..27258a54262 100644 --- a/src/mumble/CMakeLists.txt +++ b/src/mumble/CMakeLists.txt @@ -26,6 +26,8 @@ option(bundled-rnnoise "Build the included version of RNNoise instead of looking option(manual-plugin "Include the built-in \"manual\" positional audio plugin." ON) +option(hrtf "Build with HRTF binaural spatialization (requires libmysofa)." ON) + option(qtspeech "Use Qt's text-to-speech system (part of the Qt Speech module) instead of Mumble's own OS-specific text-to-speech implementations." OFF) option(jackaudio "Build support for JackAudio." ON) @@ -941,6 +943,55 @@ if(gkey) target_compile_definitions(mumble_client_object_lib PUBLIC "USE_GKEY") endif() +if(hrtf) + find_pkg("libmysofa" QUIET) + if(libmysofa_FOUND) + if(libmysofa_INCLUDE_DIRS) + target_include_directories(mumble_client_object_lib PRIVATE ${libmysofa_INCLUDE_DIRS}) + endif() + target_link_libraries(mumble_client_object_lib PRIVATE ${libmysofa_LIBRARIES}) + else() + # Direct search fallback + find_library(MYSOFA_LIB mysofa) + find_path(MYSOFA_INCLUDE_DIR mysofa.h) + if(MYSOFA_LIB AND MYSOFA_INCLUDE_DIR) + target_include_directories(mumble_client_object_lib PRIVATE ${MYSOFA_INCLUDE_DIR}) + target_link_libraries(mumble_client_object_lib PRIVATE ${MYSOFA_LIB}) + else() + message(WARNING "libmysofa not found - disabling HRTF support") + set(hrtf OFF CACHE BOOL "Build with HRTF binaural spatialization (requires libmysofa)." FORCE) + endif() + endif() +endif() + +if(hrtf) + # Default SOFA file shipped with Mumble + set(MUMBLE_HRTF_DATA_DIR "${CMAKE_SOURCE_DIR}/data/hrtf") + set(MUMBLE_HRTF_DEFAULT_SOFA "${MUMBLE_HRTF_DATA_DIR}/default.sofa") + + target_compile_definitions(mumble_client_object_lib PUBLIC + "USE_HRTF" + "MUMBLE_HRTF_DEFAULT_SOFA=\"${MUMBLE_HRTF_DEFAULT_SOFA}\"" + ) + + target_sources(mumble_client_object_lib PRIVATE + "HrtfSpatializer.cpp" + "HrtfSpatializer.h" + "${3RDPARTY_DIR}/fftconvolver/FFTConvolver.cpp" + "${3RDPARTY_DIR}/fftconvolver/AudioFFT.cpp" + "${3RDPARTY_DIR}/fftconvolver/Utilities.cpp" + ) + + target_include_directories(mumble_client_object_lib PRIVATE + "${3RDPARTY_DIR}/fftconvolver" + ) + + install(FILES "${MUMBLE_HRTF_DEFAULT_SOFA}" + DESTINATION "${CMAKE_INSTALL_DATADIR}/mumble/hrtf" + COMPONENT mumble_client + OPTIONAL) +endif() + if(g15) if(WIN32 OR APPLE) target_sources(mumble_client_object_lib diff --git a/src/mumble/HrtfSpatializer.cpp b/src/mumble/HrtfSpatializer.cpp new file mode 100644 index 00000000000..cd09d8a0911 --- /dev/null +++ b/src/mumble/HrtfSpatializer.cpp @@ -0,0 +1,218 @@ +// Copyright The Mumble Developers. All rights reserved. +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file at the root of the +// Mumble source tree or at . + +#include "HrtfSpatializer.h" + +#include +#include "FFTConvolver.h" + +#include +#include + +#include +#include +#include + +// Dot product threshold for triggering a crossfade (~8 degrees of movement). +static constexpr float kDirChangeDotThreshold = 0.99f; + +HrtfSpatializer::HrtfSpatializer(int mixerFreq, int blockSize) + : m_mixerFreq(mixerFreq), m_blockSize(blockSize) { +} + +HrtfSpatializer::~HrtfSpatializer() { + if (m_sofa) { + mysofa_close(m_sofa); + m_sofa = nullptr; + } +} + +bool HrtfSpatializer::loadHRTF(const QString &sofaPath) { + if (m_sofa) { + mysofa_close(m_sofa); + m_sofa = nullptr; + } + m_filterLength = 0; + + QStringList candidates; + if (!sofaPath.isEmpty()) { + candidates << sofaPath; + } + + // Installed data directory (e.g. /usr/share/mumble/hrtf/default.sofa) + const QString installed = QStandardPaths::locate(QStandardPaths::AppDataLocation, + QStringLiteral("hrtf/default.sofa")); + if (!installed.isEmpty()) { + candidates << installed; + } + +#ifdef MUMBLE_HRTF_DEFAULT_SOFA + // Source-tree path baked in at build time (developer / uninstalled builds) + candidates << QString::fromUtf8(MUMBLE_HRTF_DEFAULT_SOFA); +#endif + + for (const QString &path : candidates) { + if (!QFileInfo::exists(path)) { + continue; + } + + int err = MYSOFA_OK; + struct MYSOFA_EASY *sofa = + mysofa_open(path.toUtf8().constData(), static_cast< float >(m_mixerFreq), &m_filterLength, &err); + if (sofa && err == MYSOFA_OK) { + m_sofa = sofa; + m_irLeft.resize(static_cast< size_t >(m_filterLength)); + m_irRight.resize(static_cast< size_t >(m_filterLength)); + qDebug("HrtfSpatializer: Loaded SOFA file \"%s\" (filterLen=%d @ %d Hz)", + qPrintable(path), m_filterLength, m_mixerFreq); + return true; + } + qWarning("HrtfSpatializer: Failed to load \"%s\" (err=%d)", qPrintable(path), err); + } + + qWarning("HrtfSpatializer: No usable SOFA file found"); + return false; +} + +bool HrtfSpatializer::isLoaded() const { + return m_sofa != nullptr; +} + +bool HrtfSpatializer::directionChanged(const SourceState &state, float dx, float dy, float dz) const { + // Sentinel: {2,0,0} is outside the unit sphere → treat as always-changed + if (state.curDirX > 1.5f) { + return true; + } + const float dot = state.curDirX * dx + state.curDirY * dy + state.curDirZ * dz; + return dot < kDirChangeDotThreshold; +} + +void HrtfSpatializer::initConvolverSet(ConvolverSet &set, float dx, float dy, float dz) { + // Convert from listener-local to SOFA Cartesian: + // Listener-local: +X=right, +Y=up, +Z=forward + // SOFA Cartesian: +X=front, +Y=left, +Z=up + const float sofaX = dz; // forward → front + const float sofaY = -dx; // right → left (negated) + const float sofaZ = dy; // up → up + + float delayL, delayR; + mysofa_getfilter_float(m_sofa, sofaX, sofaY, sofaZ, + m_irLeft.data(), m_irRight.data(), &delayL, &delayR); + + if (!set.convL) { + set.convL = std::make_unique< fftconvolver::FFTConvolver >(); + set.convR = std::make_unique< fftconvolver::FFTConvolver >(); + } + + set.convL->init(static_cast< size_t >(m_blockSize), + m_irLeft.data(), static_cast< size_t >(m_filterLength)); + set.convR->init(static_cast< size_t >(m_blockSize), + m_irRight.data(), static_cast< size_t >(m_filterLength)); + set.initialized = true; +} + +void HrtfSpatializer::spatialize(unsigned int sourceId, const float *monoIn, float *outStereo, + unsigned int frameCount, float dirX, float dirY, float dirZ) { + if (!m_sofa || frameCount == 0) { + memset(outStereo, 0, sizeof(float) * frameCount * 2); + return; + } + + // Ensure direction is a unit vector. When the source is coincident with the listener + // the direction vector is zero, which would produce an undefined HRTF lookup and cause + // directionChanged() to fire every block (dot=0 < threshold) → click storm. + // Fall back to "forward" (+Z in listener-local) as a stable, perceptually neutral direction. + const float dirLen = std::sqrt(dirX * dirX + dirY * dirY + dirZ * dirZ); + if (dirLen < 1e-4f) { + dirX = 0.0f; + dirY = 0.0f; + dirZ = 1.0f; + } else if (dirLen < 0.99f || dirLen > 1.01f) { + dirX /= dirLen; + dirY /= dirLen; + dirZ /= dirLen; + } + + // Get or create per-source state + auto &statePtr = m_sources[sourceId]; + if (!statePtr) { + statePtr = std::make_unique< SourceState >(); + } + SourceState *state = statePtr.get(); + + ConvolverSet &cur = state->sets[state->currentSet]; + ConvolverSet &nxt = state->sets[1 - state->currentSet]; + + if (!cur.initialized) { + // First call for this source: initialise directly, no crossfade needed. + initConvolverSet(cur, dirX, dirY, dirZ); + state->curDirX = dirX; + state->curDirY = dirY; + state->curDirZ = dirZ; + } else if (!state->crossfading && directionChanged(*state, dirX, dirY, dirZ)) { + // Direction changed enough to warrant a new HRTF. + // Initialise the inactive set (resets only its own accumulator, not cur's) + // and start a linear crossfade from cur → nxt over one block. + initConvolverSet(nxt, dirX, dirY, dirZ); + state->nxtDirX = dirX; + state->nxtDirY = dirY; + state->nxtDirZ = dirZ; + state->crossfading = true; + state->crossfadeProgress = 0; + state->crossfadeTotalSamples = m_blockSize; + } + // If already crossfading, continue until it completes before triggering another. + // At most one-block (~10 ms) lag before tracking a new direction — imperceptible. + + static thread_local std::vector< float > tmpL, tmpR, nxtL, nxtR; + tmpL.resize(frameCount); + tmpR.resize(frameCount); + + if (!state->crossfading) { + // Fast path: single convolver set. + cur.convL->process(monoIn, tmpL.data(), static_cast< size_t >(frameCount)); + cur.convR->process(monoIn, tmpR.data(), static_cast< size_t >(frameCount)); + + for (unsigned int i = 0; i < frameCount; ++i) { + outStereo[2 * i] = tmpL[i]; + outStereo[2 * i + 1] = tmpR[i]; + } + } else { + // Crossfade path: run both convolver sets and blend. + // cur keeps its accumulator intact (no re-init), so its tail decays naturally. + nxtL.resize(frameCount); + nxtR.resize(frameCount); + + cur.convL->process(monoIn, tmpL.data(), static_cast< size_t >(frameCount)); + cur.convR->process(monoIn, tmpR.data(), static_cast< size_t >(frameCount)); + nxt.convL->process(monoIn, nxtL.data(), static_cast< size_t >(frameCount)); + nxt.convR->process(monoIn, nxtR.data(), static_cast< size_t >(frameCount)); + + const float invTotal = 1.0f / static_cast< float >(state->crossfadeTotalSamples); + for (unsigned int i = 0; i < frameCount; ++i) { + const float alpha = std::min( + 1.0f, + static_cast< float >(state->crossfadeProgress + i) * invTotal); + outStereo[2 * i] = (1.0f - alpha) * tmpL[i] + alpha * nxtL[i]; + outStereo[2 * i + 1] = (1.0f - alpha) * tmpR[i] + alpha * nxtR[i]; + } + + state->crossfadeProgress += static_cast< int >(frameCount); + if (state->crossfadeProgress >= state->crossfadeTotalSamples) { + // Crossfade complete: nxt is now the active set. + // Use the direction nxt was actually initialised with, not the current + // caller direction (which may have drifted across variable-sized blocks). + state->currentSet = 1 - state->currentSet; + state->crossfading = false; + state->curDirX = state->nxtDirX; + state->curDirY = state->nxtDirY; + state->curDirZ = state->nxtDirZ; + } + } +} + +void HrtfSpatializer::removeSource(unsigned int sourceId) { + m_sources.erase(sourceId); +} diff --git a/src/mumble/HrtfSpatializer.h b/src/mumble/HrtfSpatializer.h new file mode 100644 index 00000000000..59d88fa28d6 --- /dev/null +++ b/src/mumble/HrtfSpatializer.h @@ -0,0 +1,107 @@ +// Copyright The Mumble Developers. All rights reserved. +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file at the root of the +// Mumble source tree or at . + +#ifndef MUMBLE_MUMBLE_HRTFSPATIALIZER_H_ +#define MUMBLE_MUMBLE_HRTFSPATIALIZER_H_ + +#include + +#include +#include +#include + +struct MYSOFA_EASY; + +namespace fftconvolver { +class FFTConvolver; +} + +/// HRTF-based binaural spatializer for positional audio. +/// +/// Uses libmysofa for SOFA file loading and HRIR interpolation, and +/// FFTConvolver for real-time partitioned convolution. +/// +/// Direction changes are handled by crossfading between two sets of convolvers +/// (double-buffer ping-pong) over one block duration, so the active convolver's +/// overlap-add accumulator is never reset mid-stream. +/// +/// Not thread-safe: all methods must be called from the same thread +/// (the audio mixing thread), except loadHRTF() which must be called +/// before mixing begins. +class HrtfSpatializer { +public: + explicit HrtfSpatializer(int mixerFreq, int blockSize); + ~HrtfSpatializer(); + + /// Load HRTF from a SOFA file. + /// @param sofaPath Path to .sofa file, or empty to use the default. + /// @return true on success. + bool loadHRTF(const QString &sofaPath = {}); + + /// Spatialize mono input into interleaved stereo output. + /// @param sourceId Unique ID for this audio source (e.g. session ID). + /// @param monoIn Mono input samples. + /// @param outStereo Interleaved stereo output, must hold frameCount*2 floats. + /// @param frameCount Number of frames to process. + /// @param dirX X component of unit direction vector (listener-local, +X=right). + /// @param dirY Y component of unit direction vector (listener-local, +Y=up). + /// @param dirZ Z component of unit direction vector (listener-local, +Z=forward). + void spatialize(unsigned int sourceId, const float *monoIn, float *outStereo, + unsigned int frameCount, float dirX, float dirY, float dirZ); + + /// Remove state for a source that is no longer active. + void removeSource(unsigned int sourceId); + + bool isLoaded() const; + +private: + /// A single stereo convolver pair initialised for one HRTF direction. + struct ConvolverSet { + std::unique_ptr< fftconvolver::FFTConvolver > convL; + std::unique_ptr< fftconvolver::FFTConvolver > convR; + bool initialized = false; + }; + + /// Per-source state. Two ConvolverSets alternate as current/next (ping-pong). + struct SourceState { + ConvolverSet sets[2]; + int currentSet = 0; ///< Index of the set currently faded-in (0 or 1) + + /// Crossfade bookkeeping (in samples). + /// Only the active convolver set is ever re-inited; the old one + /// keeps running until the crossfade finishes, then is swapped out. + bool crossfading = false; + int crossfadeProgress = 0; ///< Samples elapsed so far + int crossfadeTotalSamples = 0; + + /// Direction of the HRTF loaded into sets[currentSet]. + /// Sentinel: {2,0,0} means not yet initialised. + float curDirX = 2.0f; + float curDirY = 0.0f; + float curDirZ = 0.0f; + + /// Direction loaded into the inactive (next) set when a crossfade was started. + /// At crossfade completion, curDir is set to this — not to the latest caller + /// direction, which may have drifted if mix() was called with variable frameCount. + float nxtDirX = 0.0f; + float nxtDirY = 0.0f; + float nxtDirZ = 0.0f; + }; + + struct MYSOFA_EASY *m_sofa = nullptr; + int m_filterLength = 0; + int m_mixerFreq; + int m_blockSize; + + std::vector< float > m_irLeft; + std::vector< float > m_irRight; + + std::unordered_map< unsigned int, std::unique_ptr< SourceState > > m_sources; + + bool directionChanged(const SourceState &state, float dx, float dy, float dz) const; + void initConvolverSet(ConvolverSet &set, float dx, float dy, float dz); +}; + +#endif // MUMBLE_MUMBLE_HRTFSPATIALIZER_H_ diff --git a/src/mumble/Settings.h b/src/mumble/Settings.h index 232f987e03e..aa24f0c6fd0 100644 --- a/src/mumble/Settings.h +++ b/src/mumble/Settings.h @@ -350,6 +350,8 @@ struct Settings { float fAudioMaxDistance = 15.0f; float fAudioMaxDistVolume = 0.0f; float fAudioBloom = 0.5f; + bool bHrtf = false; + QString qsHrtfFile; /// Contains the settings for each individual plugin. The key in this map is the Hex-represented SHA-1 /// hash of the plugin's UTF-8 encoded absolute file-path on the hard-drive. QHash< QString, PluginSetting > qhPluginSettings = {}; diff --git a/src/mumble/SettingsKeys.h b/src/mumble/SettingsKeys.h index 6ff85bec573..f5d1d3abdc3 100644 --- a/src/mumble/SettingsKeys.h +++ b/src/mumble/SettingsKeys.h @@ -91,6 +91,8 @@ const SettingsKey POSITIONAL_MAX_DISTANCE_KEY = { "maximum_distance" }; const SettingsKey POSITIONAL_MIN_VOLUME_KEY = { "minimum_volume" }; const SettingsKey POSITIONAL_BLOOM_KEY = { "bloom" }; const SettingsKey POSITIONAL_TRANSMIT_POSITION_KEY = { "transmit_position" }; +const SettingsKey HRTF_ENABLED_KEY = { "hrtf_enabled" }; +const SettingsKey HRTF_FILE_KEY = { "hrtf_file" }; // Network const SettingsKey JITTER_BUFFER_SIZE_KEY = { "jitter_buffer_size" }; diff --git a/src/mumble/SettingsMacros.h b/src/mumble/SettingsMacros.h index 293e206cdda..af14efa3f38 100644 --- a/src/mumble/SettingsMacros.h +++ b/src/mumble/SettingsMacros.h @@ -76,7 +76,9 @@ PROCESS(positional_audio, POSITIONAL_MIN_VOLUME_KEY, fAudioMaxDistVolume) \ PROCESS(positional_audio, POSITIONAL_BLOOM_KEY, fAudioBloom) \ PROCESS(positional_audio, POSITIONAL_HEADPHONE_MODE_KEY, bPositionalHeadphone) \ - PROCESS(positional_audio, POSITIONAL_TRANSMIT_POSITION_KEY, bTransmitPosition) + PROCESS(positional_audio, POSITIONAL_TRANSMIT_POSITION_KEY, bTransmitPosition) \ + PROCESS(positional_audio, HRTF_ENABLED_KEY, bHrtf) \ + PROCESS(positional_audio, HRTF_FILE_KEY, qsHrtfFile) #define NETWORK_SETTINGS \ diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index ea4a992a86a..4cf47e9509c 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -27,6 +27,10 @@ if (static) link_libraries(mumble_test_qt_plugins) endif() +if(client AND hrtf) + add_subdirectory("TestHrtfSpatializer") +endif() + if(client) add_subdirectory("TestXMLTools") if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") diff --git a/src/tests/TestHrtfSpatializer/CMakeLists.txt b/src/tests/TestHrtfSpatializer/CMakeLists.txt new file mode 100644 index 00000000000..6d31f53e051 --- /dev/null +++ b/src/tests/TestHrtfSpatializer/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright The Mumble Developers. All rights reserved. +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file at the root of the +# Mumble source tree or at . + +set(MUMBLE_SOURCE_DIR "${CMAKE_SOURCE_DIR}/src/mumble") +set(3RDPARTY_DIR "${CMAKE_SOURCE_DIR}/3rdparty") + +set(TESTHRTF_SOURCES + TestHrtfSpatializer.cpp + "${MUMBLE_SOURCE_DIR}/HrtfSpatializer.cpp" + "${MUMBLE_SOURCE_DIR}/HrtfSpatializer.h" + "${3RDPARTY_DIR}/fftconvolver/FFTConvolver.cpp" + "${3RDPARTY_DIR}/fftconvolver/AudioFFT.cpp" + "${3RDPARTY_DIR}/fftconvolver/Utilities.cpp" +) + +add_executable(TestHrtfSpatializer ${TESTHRTF_SOURCES}) + +set_target_properties(TestHrtfSpatializer PROPERTIES AUTOMOC ON) + +target_include_directories(TestHrtfSpatializer PRIVATE + ${MUMBLE_SOURCE_DIR} + "${3RDPARTY_DIR}/fftconvolver" +) + +# Find libmysofa — try pkg-config first, fall back to find_library (mirrors main build) +find_package(PkgConfig QUIET) +if(PkgConfig_FOUND) + pkg_check_modules(MYSOFA QUIET libmysofa) +endif() +if(MYSOFA_FOUND) + target_include_directories(TestHrtfSpatializer PRIVATE ${MYSOFA_INCLUDE_DIRS}) + target_link_libraries(TestHrtfSpatializer PRIVATE ${MYSOFA_LIBRARIES}) +else() + find_library(MYSOFA_LIB mysofa REQUIRED) + find_path(MYSOFA_INCLUDE_DIR mysofa.h REQUIRED) + target_include_directories(TestHrtfSpatializer PRIVATE ${MYSOFA_INCLUDE_DIR}) + target_link_libraries(TestHrtfSpatializer PRIVATE ${MYSOFA_LIB}) +endif() + +target_compile_definitions(TestHrtfSpatializer PRIVATE + "USE_HRTF" + "MUMBLE_HRTF_DEFAULT_SOFA=\"${CMAKE_SOURCE_DIR}/data/hrtf/default.sofa\"" +) + +target_link_libraries(TestHrtfSpatializer PRIVATE Qt6::Test) + +add_test(NAME TestHrtfSpatializer COMMAND $) diff --git a/src/tests/TestHrtfSpatializer/TestHrtfSpatializer.cpp b/src/tests/TestHrtfSpatializer/TestHrtfSpatializer.cpp new file mode 100644 index 00000000000..e040b6f9234 --- /dev/null +++ b/src/tests/TestHrtfSpatializer/TestHrtfSpatializer.cpp @@ -0,0 +1,258 @@ +// Copyright The Mumble Developers. All rights reserved. +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file at the root of the +// Mumble source tree or at . + +#include + +#include "HrtfSpatializer.h" + +#include +#include +#include + +class TestHrtfSpatializer : public QObject { + Q_OBJECT + +private slots: + void initTestCase(); + void testSofaLoading(); + void testCardinalDirectionDistinctness(); + void testImpulseConvolution(); + void testNumericalStability(); + void testLeftRightAsymmetry(); + void testZeroDirectionVector(); + void testVariableFrameCountCrossfade(); + +private: + static constexpr int kSampleRate = 48000; + static constexpr int kBlockSize = 480; // 10 ms at 48kHz +}; + +void TestHrtfSpatializer::initTestCase() { + // Nothing to do - each test creates its own spatializer +} + +void TestHrtfSpatializer::testSofaLoading() { + HrtfSpatializer spatializer(kSampleRate, kBlockSize); + QVERIFY(!spatializer.isLoaded()); + + const bool ok = spatializer.loadHRTF(); // use default SOFA + QVERIFY2(ok, "loadHRTF() should succeed with the default SOFA file"); + QVERIFY(spatializer.isLoaded()); +} + +void TestHrtfSpatializer::testCardinalDirectionDistinctness() { + HrtfSpatializer spatializer(kSampleRate, kBlockSize); + QVERIFY(spatializer.loadHRTF()); + + // Impulse input + std::vector< float > impulse(static_cast< size_t >(kBlockSize), 0.0f); + impulse[0] = 1.0f; + + auto getResponse = [&](float dx, float dy, float dz) -> std::vector< float > { + // Each call needs its own spatializer instance to get an independent impulse response + HrtfSpatializer sp(kSampleRate, kBlockSize); + sp.loadHRTF(); + std::vector< float > out(static_cast< size_t >(kBlockSize * 2)); + sp.spatialize(1, impulse.data(), out.data(), static_cast< unsigned int >(kBlockSize), dx, dy, dz); + return out; + }; + + const auto front = getResponse(0.0f, 0.0f, 1.0f); // forward + const auto back = getResponse(0.0f, 0.0f, -1.0f); // backward + const auto left = getResponse(-1.0f, 0.0f, 0.0f); // left + const auto right = getResponse(1.0f, 0.0f, 0.0f); // right + const auto up = getResponse(0.0f, 1.0f, 0.0f); // up + + auto energyDiff = [&](const std::vector< float > &a, const std::vector< float > &b) { + float diff = 0.0f; + for (size_t i = 0; i < a.size(); ++i) + diff += (a[i] - b[i]) * (a[i] - b[i]); + return diff; + }; + + // All cardinal directions should produce distinct HRIRs + QVERIFY2(energyDiff(front, back) > 1e-6f, "front and back should produce different HRIRs"); + QVERIFY2(energyDiff(front, left) > 1e-6f, "front and left should produce different HRIRs"); + QVERIFY2(energyDiff(front, right) > 1e-6f, "front and right should produce different HRIRs"); + QVERIFY2(energyDiff(front, up) > 1e-6f, "front and up should produce different HRIRs"); + QVERIFY2(energyDiff(left, right) > 1e-6f, "left and right should produce different HRIRs"); +} + +void TestHrtfSpatializer::testImpulseConvolution() { + // Convolving a unit impulse with a filter produces the filter itself. + // We can't easily access the raw HRIR, but we can verify that: + // - The output is non-trivial (energy > 0) + // - The output has the same energy as expected for a unit response + + HrtfSpatializer spatializer(kSampleRate, kBlockSize); + QVERIFY(spatializer.loadHRTF()); + + std::vector< float > impulse(static_cast< size_t >(kBlockSize), 0.0f); + impulse[0] = 1.0f; + + std::vector< float > out(static_cast< size_t >(kBlockSize * 2), 0.0f); + spatializer.spatialize(1, impulse.data(), out.data(), static_cast< unsigned int >(kBlockSize), + 0.0f, 0.0f, 1.0f); // front direction + + float energy = 0.0f; + for (float s : out) + energy += s * s; + + QVERIFY2(energy > 1e-6f, "Impulse response should have non-zero energy"); + + // HRTF for most SOFA files has energy ≈ 1.0 (normalized). Allow a wide range. + QVERIFY2(energy < 10.0f, "Impulse response energy should be bounded"); +} + +void TestHrtfSpatializer::testNumericalStability() { + HrtfSpatializer spatializer(kSampleRate, kBlockSize); + QVERIFY(spatializer.loadHRTF()); + + std::mt19937 rng(42); + std::uniform_real_distribution< float > signalDist(-1.0f, 1.0f); + std::uniform_real_distribution< float > angleDist(-1.0f, 1.0f); + + std::vector< float > input(static_cast< size_t >(kBlockSize)); + std::vector< float > output(static_cast< size_t >(kBlockSize * 2)); + + for (int iter = 0; iter < 1000; ++iter) { + // Random input + for (float &s : input) + s = signalDist(rng); + + // Random direction (normalize) + float dx = angleDist(rng); + float dy = angleDist(rng); + float dz = angleDist(rng); + const float len = std::sqrt(dx * dx + dy * dy + dz * dz); + if (len < 1e-6f) { + dx = 0.0f; dy = 0.0f; dz = 1.0f; + } else { + dx /= len; dy /= len; dz /= len; + } + + spatializer.spatialize(1, input.data(), output.data(), + static_cast< unsigned int >(kBlockSize), dx, dy, dz); + + for (size_t i = 0; i < output.size(); ++i) { + QVERIFY2(std::isfinite(output[i]), + qPrintable(QString("NaN/Inf in output at iter %1 sample %2").arg(iter).arg(i))); + } + } +} + +void TestHrtfSpatializer::testLeftRightAsymmetry() { + HrtfSpatializer spatializer(kSampleRate, kBlockSize); + QVERIFY(spatializer.loadHRTF()); + + std::vector< float > impulse(static_cast< size_t >(kBlockSize), 0.0f); + impulse[0] = 1.0f; + + auto getResponse = [&](float dx, float dy, float dz, unsigned int id) { + std::vector< float > out(static_cast< size_t >(kBlockSize * 2), 0.0f); + spatializer.spatialize(id, impulse.data(), out.data(), + static_cast< unsigned int >(kBlockSize), dx, dy, dz); + return out; + }; + + // Source to the right: L and R channels should be different + const auto rightOut = getResponse(1.0f, 0.0f, 0.0f, 10); + float diffLR = 0.0f; + for (unsigned int i = 0; i < static_cast< unsigned int >(kBlockSize); ++i) + diffLR += std::abs(rightOut[2 * i] - rightOut[2 * i + 1]); + QVERIFY2(diffLR > 1e-4f, "Right-side source should produce different L/R outputs"); + + // Source in front: L and R channels should be approximately equal (symmetric HRTF) + spatializer.removeSource(10); + const auto frontOut = getResponse(0.0f, 0.0f, 1.0f, 11); + float maxLRDiff = 0.0f; + float maxMag = 0.0f; + for (unsigned int i = 0; i < static_cast< unsigned int >(kBlockSize); ++i) { + maxLRDiff = std::max(maxLRDiff, std::abs(frontOut[2 * i] - frontOut[2 * i + 1])); + maxMag = std::max(maxMag, std::abs(frontOut[2 * i])); + } + // For a symmetric HRTF (KEMAR), front should have L ≈ R + // Allow up to 10% relative difference + if (maxMag > 1e-6f) { + QVERIFY2(maxLRDiff / maxMag < 0.2f, + qPrintable(QString("Front source L/R difference (%1) exceeds 20% of magnitude (%2)") + .arg(maxLRDiff) + .arg(maxMag))); + } +} + +void TestHrtfSpatializer::testZeroDirectionVector() { + // When source and listener are at the same position, the direction vector passed to + // spatialize() is (0,0,0). This must not produce NaN/Inf or trigger click-inducing + // crossfade thrashing (dot=0 < threshold → re-init every block). + HrtfSpatializer spatializer(kSampleRate, kBlockSize); + QVERIFY(spatializer.loadHRTF()); + + std::vector< float > input(static_cast< size_t >(kBlockSize), 0.1f); + std::vector< float > output(static_cast< size_t >(kBlockSize * 2), 0.0f); + + // Run 10 consecutive blocks with zero direction — should be stable and finite. + for (int block = 0; block < 10; ++block) { + spatializer.spatialize(42, input.data(), output.data(), + static_cast< unsigned int >(kBlockSize), 0.0f, 0.0f, 0.0f); + for (size_t i = 0; i < output.size(); ++i) { + QVERIFY2(std::isfinite(output[i]), + qPrintable(QString("NaN/Inf at block %1 sample %2").arg(block).arg(i))); + } + } +} + +void TestHrtfSpatializer::testVariableFrameCountCrossfade() { + // Reproduce the condition Codex flagged: mix() backends call spatialize() with + // variable frameCount values (WASAPI uses packetLength, JACK can call with 1 + // frame). If the crossfade completes over multiple small calls, curDir must + // reflect the direction nxt was actually initialised with, not whatever + // direction happens to arrive in the final small call. + + HrtfSpatializer spatializer(kSampleRate, kBlockSize); + QVERIFY(spatializer.loadHRTF()); + + std::vector< float > input(static_cast< size_t >(kBlockSize), 0.05f); + std::vector< float > output(static_cast< size_t >(kBlockSize * 2), 0.0f); + + // Seed: source directly in front. + spatializer.spatialize(1, input.data(), output.data(), + static_cast< unsigned int >(kBlockSize), 0.0f, 0.0f, 1.0f); + + // Trigger a crossfade by moving to the right, using half-block chunks. + // The crossfade spans kBlockSize samples total but arrives in pieces. + const unsigned int half = static_cast< unsigned int >(kBlockSize) / 2; + std::vector< float > halfInput(half, 0.05f); + std::vector< float > halfOutput(half * 2, 0.0f); + + // First half-block with new direction (starts crossfade, nxtDir = right). + spatializer.spatialize(1, halfInput.data(), halfOutput.data(), half, 1.0f, 0.0f, 0.0f); + for (size_t i = 0; i < halfOutput.size(); ++i) { + QVERIFY2(std::isfinite(halfOutput[i]), "NaN/Inf in first half-block"); + } + + // Second half-block with a *different* direction (should not affect nxtDir). + spatializer.spatialize(1, halfInput.data(), halfOutput.data(), half, 0.0f, 1.0f, 0.0f); + for (size_t i = 0; i < halfOutput.size(); ++i) { + QVERIFY2(std::isfinite(halfOutput[i]), "NaN/Inf in second half-block"); + } + + // After crossfade completes, curDir must be the direction nxt was inited with + // (right = 1,0,0), not the last caller direction (up = 0,1,0). The next call + // should NOT immediately re-trigger a crossfade back to right. + // We verify this indirectly: running several more blocks with the same "up" + // direction should produce finite, stable output without continuous re-init. + for (int block = 0; block < 5; ++block) { + spatializer.spatialize(1, input.data(), output.data(), + static_cast< unsigned int >(kBlockSize), 0.0f, 1.0f, 0.0f); + for (size_t i = 0; i < output.size(); ++i) { + QVERIFY2(std::isfinite(output[i]), + qPrintable(QString("NaN/Inf at block %1 sample %2").arg(block).arg(i))); + } + } +} + +QTEST_APPLESS_MAIN(TestHrtfSpatializer) +#include "TestHrtfSpatializer.moc"