NVIDIA-NeMo · artbataev · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -21,9 +21,9 @@ Requires Python 3.10+, PyTorch 2.6+.
 - **Line length: 119** (not default 88) — consistent across black, isort, flake8
 - Black with `skip_string_normalization = true`
 - isort with `profile = black`
-- Check: `python setup.py style --scope <path>`
-- Fix: `python setup.py style --scope <path> --fix`
-- **Incremental reformatting**: most collections are excluded from black (see `extend-exclude` in pyproject.toml). The files are reformatted when somebody makes changes to avoid a single big reformatting PR. Do not reformat files outside your changes.
+- Check: `isort --check <path> && black --check <path>` or `isort --check . && black --check .`
+- Fix: `isort <path> && black <path>` or `isort . && black .`
+- Jupyter Notebooks are excluded from automatic black reformatting (see `extend-exclude`), but can be still reformatted when passed directly. Do not reformat notebooks outside your changes.
 
 ## Testing
 

diff --git a/examples/asr/asr_adapters/scoring_and_analysis.py b/examples/asr/asr_adapters/scoring_and_analysis.py
@@ -202,7 +202,12 @@ def display_results(df_all: pd.DataFrame, category: str, best_config: pd.Series,
 
 
 def get_best_config(
-    df_exp: pd.DataFrame, dataset_type_col: str, key_info: dict, topk: int, show_analysis: bool, exp_type: str,
+    df_exp: pd.DataFrame,
+    dataset_type_col: str,
+    key_info: dict,
+    topk: int,
+    show_analysis: bool,
+    exp_type: str,
 ):
     """Get the best hyperparameter configuration for a given subset of experiments.
 

diff --git a/examples/asr/export/transducer/infer_transducer_onnx.py b/examples/asr/export/transducer/infer_transducer_onnx.py
@@ -60,7 +60,11 @@
 def parse_arguments():
     parser = ArgumentParser()
     parser.add_argument(
-        "--nemo_model", type=str, default=None, required=False, help="Path to .nemo file",
+        "--nemo_model",
+        type=str,
+        default=None,
+        required=False,
+        help="Path to .nemo file",
     )
     parser.add_argument(
         '--pretrained_model', type=str, default=None, required=False, help='Name of a pretrained NeMo file'

diff --git a/examples/asr/export/transducer/infer_transducer_ts.py b/examples/asr/export/transducer/infer_transducer_ts.py
@@ -63,7 +63,11 @@
 def parse_arguments():
     parser = ArgumentParser()
     parser.add_argument(
-        "--nemo_model", type=str, default=None, required=False, help="Path to .nemo file",
+        "--nemo_model",
+        type=str,
+        default=None,
+        required=False,
+        help="Path to .nemo file",
     )
     parser.add_argument(
         '--pretrained_model', type=str, default=None, required=False, help='Name of a pretrained NeMo file'

diff --git a/examples/asr/speech_classification/vad_infer.py b/examples/asr/speech_classification/vad_infer.py
@@ -91,7 +91,9 @@ def main(cfg):
             'vad_stream': True,
             'sample_rate': 16000,
             'manifest_filepath': manifest_vad_input,
-            'labels': ['infer',],
+            'labels': [
+                'infer',
+            ],
             'num_workers': cfg.num_workers,
             'shuffle': False,
             'window_length_in_sec': cfg.vad.parameters.window_length_in_sec,

diff --git a/examples/tts/aligner_heteronym_disambiguation.py b/examples/tts/aligner_heteronym_disambiguation.py
@@ -44,8 +44,7 @@
 
 
 def get_args():
-    """Retrieve arguments for disambiguation.
-    """
+    """Retrieve arguments for disambiguation."""
     parser = argparse.ArgumentParser("G2P disambiguation using Aligner input embedding distances.")
     # TODO(jocelynh): Make this required=False with default download from NGC once ckpt uploaded
     parser.add_argument('--model', required=True, type=str, help="Path to Aligner model checkpoint (.nemo file).")
@@ -85,8 +84,7 @@ def get_args():
 
 
 def load_and_prepare_audio(aligner, audio_path, target_sr, device):
-    """Loads and resamples audio to target sample rate (if necessary), and preprocesses for Aligner input.
-    """
+    """Loads and resamples audio to target sample rate (if necessary), and preprocesses for Aligner input."""
     # Load audio and get length for preprocessing
     audio_data, orig_sr = sf.read(audio_path)
     if orig_sr != target_sr:
@@ -238,8 +236,7 @@ def disambiguate_candidates(aligner, text, spec, spec_len, confidence, device, h
 def disambiguate_dataset(
     aligner, manifest_path, out_path, sr, heteronyms, confidence, device, verbose, heteronyms_only=True
 ):
-    """Disambiguates the phonemes for all words with ambiguous pronunciations in the given manifest.
-    """
+    """Disambiguates the phonemes for all words with ambiguous pronunciations in the given manifest."""
     log_file = open('disambiguation_logs.txt', 'w') if verbose else None
 
     with open(out_path, 'w') as f_out:

diff --git a/external/get_collections.py b/external/get_collections.py
@@ -25,8 +25,8 @@
 
 
 def process_collection(id, col):
-    """ Helper function processing the collection.
-    
+    """Helper function processing the collection.
+
     Args:
         id: (short) name of the collection.
         col: a collection (python module).
@@ -41,7 +41,7 @@ def process_collection(id, col):
 
 
 def main():
-    """ Main function generating a JSON file with list of NeMo collections. """
+    """Main function generating a JSON file with list of NeMo collections."""
     # Parse filename.
     parser = argparse.ArgumentParser()
     parser.add_argument('--filename', help='Name of the output JSON file', type=str, default="collections.json")

diff --git a/external/get_modules.py b/external/get_modules.py
@@ -26,8 +26,8 @@
 
 
 def process_member(name, obj, module_list):
-    """ Helper function processing the passed object and, if ok, adding a record to the module list.
-    
+    """Helper function processing the passed object and, if ok, adding a record to the module list.
+
     Args:
         name: name of the member
         obj: member (class/function etc.)
@@ -74,7 +74,7 @@ def process_member(name, obj, module_list):
 
 
 def main():
-    """ Main function analysing the indicated NeMo collection and generating a JSON file with module descriptions. """
+    """Main function analysing the indicated NeMo collection and generating a JSON file with module descriptions."""
     # Parse filename.
     parser = argparse.ArgumentParser()
     parser.add_argument('--collection', help='ID of the collection', type=str)

diff --git a/nemo/collections/asr/data/audio_to_ctm_dataset.py b/nemo/collections/asr/data/audio_to_ctm_dataset.py
@@ -24,8 +24,7 @@
 
 @dataclass
 class FrameCtmUnit:
-    """A container class for one CTM unit with start and length countable in frames.
-    """
+    """A container class for one CTM unit with start and length countable in frames."""
 
     label: str
     start_frame: int

diff --git a/nemo/collections/asr/data/audio_to_label_dataset.py b/nemo/collections/asr/data/audio_to_label_dataset.py
@@ -131,7 +131,11 @@ def get_tarred_classification_label_dataset(
 
 
 def get_concat_tarred_speech_label_dataset(
-    featurizer, config: dict, shuffle_n: int, global_rank: int, world_size: int,
+    featurizer,
+    config: dict,
+    shuffle_n: int,
+    global_rank: int,
+    world_size: int,
 ):
     tarred_audio_filepaths = config['tarred_audio_filepaths']
     manifest_filepaths = config['manifest_filepath']
@@ -143,7 +147,11 @@ def get_concat_tarred_speech_label_dataset(
         conf['manifest_filepath'] = manifest_filepath
         conf['tarred_audio_filepaths'] = tarred_audio_filepath
         dataset = get_tarred_speech_label_dataset(
-            config=conf, featurizer=featurizer, shuffle_n=shuffle_n, global_rank=global_rank, world_size=world_size,
+            config=conf,
+            featurizer=featurizer,
+            shuffle_n=shuffle_n,
+            global_rank=global_rank,
+            world_size=world_size,
         )
         datasets.append(dataset)
 
@@ -160,7 +168,11 @@ def get_concat_tarred_speech_label_dataset(
 
 
 def get_tarred_speech_label_dataset(
-    featurizer, config: dict, shuffle_n: int, global_rank: int, world_size: int,
+    featurizer,
+    config: dict,
+    shuffle_n: int,
+    global_rank: int,
+    world_size: int,
 ) -> audio_to_label.TarredAudioToSpeechLabelDataset:
     """
     InInstantiates a Speech Label (e.g. VAD, speaker recognition) TarredAudioLabelDataset.

diff --git a/nemo/collections/asr/data/feature_to_label.py b/nemo/collections/asr/data/feature_to_label.py
@@ -26,7 +26,7 @@ def _feature_collate_fn(batch):
     """collate batch of feat sig, feat len, labels, labels len, assuming all features have the same shape.
     Args:
         batch (FloatTensor, LongTensor, LongTensor, LongTensor):  A tuple of tuples of feature, feature lengths,
-               encoded labels, and encoded labels length. 
+               encoded labels, and encoded labels length.
     """
     packed_batch = list(zip(*batch))
     if len(packed_batch) == 5:
@@ -61,7 +61,7 @@ def _audio_feature_collate_fn(batch, feat_pad_val, label_pad_id):
     Args:
         batch (Optional[FloatTensor], Optional[LongTensor], LongTensor,
                LongTensor):  A tuple of tuples of feature, feature lengths,
-               labels, and label lengths.  This collate func assumes the 
+               labels, and label lengths.  This collate func assumes the
                features are torch tensors of Log-Melspectrogram (i.e. [N_MEL, T]).
     """
     packed_batch = list(zip(*batch))
@@ -178,8 +178,7 @@ class _FeatureSeqSpeakerLabelDataset(Dataset):
 
     @property
     def output_types(self) -> Optional[Dict[str, NeuralType]]:
-        """Returns definitions of module output ports.
-        """
+        """Returns definitions of module output ports."""
         # TODO output type for external features
         output_types = {
             'external_feat': NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
@@ -197,16 +196,26 @@ def output_types(self) -> Optional[Dict[str, NeuralType]]:
             )
         else:
             output_types.update(
-                {'label': NeuralType(('B', 'T'), LabelsType()), 'label_length': NeuralType(tuple('B'), LengthsType()),}
+                {
+                    'label': NeuralType(('B', 'T'), LabelsType()),
+                    'label_length': NeuralType(tuple('B'), LengthsType()),
+                }
             )
 
         return output_types
 
     def __init__(
-        self, *, manifest_filepath: str, labels: List[str], feature_loader, is_speaker_emb: bool = False,
+        self,
+        *,
+        manifest_filepath: str,
+        labels: List[str],
+        feature_loader,
+        is_speaker_emb: bool = False,
     ):
         super().__init__()
-        self.collection = collections.ASRFeatureSequenceLabel(manifests_files=manifest_filepath.split(','),)
+        self.collection = collections.ASRFeatureSequenceLabel(
+            manifests_files=manifest_filepath.split(','),
+        )
 
         self.feature_loader = feature_loader
         self.labels = labels if labels else self.collection.uniq_labels
@@ -259,12 +268,12 @@ def _collate_fn(self, batch):
 
 class FeatureToLabelDataset(Dataset):
     """
-    Dataset that loads tensors via a json file containing paths to feature files and their labels. 
+    Dataset that loads tensors via a json file containing paths to feature files and their labels.
     Each new line is a different sample. Example below:
     and their target labels. JSON files should be of the following format:
         {"feature_filepath": "/path/to/audio_feature.pt", "label": "1"}
         ...
-        {"feature_filepath": "/path/to/audio_feature.pt", "label": "0"} 
+        {"feature_filepath": "/path/to/audio_feature.pt", "label": "0"}
     Args:
         manifest_filepath (str): Path to JSON containing data.
         labels (Optional[list]): List of unique labels collected from all samples.
@@ -283,8 +292,7 @@ class FeatureToLabelDataset(Dataset):
 
     @property
     def output_types(self) -> Optional[Dict[str, NeuralType]]:
-        """Returns definitions of module output ports.
-        """
+        """Returns definitions of module output ports."""
         output_types = {
             'audio_feat': NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
             'feat_length': NeuralType(tuple('B'), LengthsType()),
@@ -375,12 +383,12 @@ def _vad_segment_collate_fn(self, batch):
 
 class FeatureToMultiLabelDataset(Dataset):
     """
-    Dataset that loads tensors via a json file containing paths to feature files and their labels. 
+    Dataset that loads tensors via a json file containing paths to feature files and their labels.
     Each new line is a different sample. Example below:
     and their target labels. JSON files should be of the following format:
         {"feature_filepath": "/path/to/audio_feature.pt", "label": "1 1 0 0 1"}
         ...
-        {"feature_filepath": "/path/to/audio_feature.pt", "label": "0 1 0 0"} 
+        {"feature_filepath": "/path/to/audio_feature.pt", "label": "0 1 0 0"}
     Args:
         manifest_filepath (str): Path to JSON containing data.
         labels (Optional[list]): List of unique labels collected from all samples.
@@ -397,8 +405,7 @@ class FeatureToMultiLabelDataset(Dataset):
 
     @property
     def output_types(self) -> Optional[Dict[str, NeuralType]]:
-        """Returns definitions of module output ports.
-        """
+        """Returns definitions of module output ports."""
         output_types = {
             'audio_feat': NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
             'feat_length': NeuralType(tuple('B'), LengthsType()),

diff --git a/nemo/collections/asr/data/feature_to_label_dataset.py b/nemo/collections/asr/data/feature_to_label_dataset.py
@@ -28,7 +28,9 @@ def get_feature_seq_speakerlabel_dataset(
         An instance of FeatureToSeqSpeakerLabelDataset.
     """
     dataset = feature_to_label.FeatureToSeqSpeakerLabelDataset(
-        manifest_filepath=config['manifest_filepath'], labels=config['labels'], feature_loader=feature_loader,
+        manifest_filepath=config['manifest_filepath'],
+        labels=config['labels'],
+        feature_loader=feature_loader,
     )
     return dataset
 

diff --git a/nemo/collections/asr/data/huggingface/hf_audio_to_text_dataset.py b/nemo/collections/asr/data/huggingface/hf_audio_to_text_dataset.py
@@ -23,7 +23,11 @@
 
 
 def get_hf_audio_to_text_bpe_dataset(
-    config: DictConfig, global_rank: int, world_size: int, tokenizer, augmentor=None,
+    config: DictConfig,
+    global_rank: int,
+    world_size: int,
+    tokenizer,
+    augmentor=None,
 ):
     if "streaming" in config and config["streaming"]:
         dataset = HFIterableAudioToBPEDataset(
@@ -72,7 +76,10 @@ def get_hf_audio_to_text_bpe_dataset(
 
 
 def get_hf_audio_to_text_char_dataset(
-    config: DictConfig, global_rank: int, world_size: int, augmentor=None,
+    config: DictConfig,
+    global_rank: int,
+    world_size: int,
+    augmentor=None,
 ):
     if "streaming" in config and config["streaming"]:
         dataset = HFIterableAudioToCharDataset(

diff --git a/nemo/collections/asr/losses/angularloss.py b/nemo/collections/asr/losses/angularloss.py
@@ -27,13 +27,12 @@ class AngularSoftmaxLoss(Loss, Typing):
     reference: https://openaccess.thecvf.com/content_CVPR_2019/papers/Deng_ArcFace_Additive_Angular_Margin_Loss_for_Deep_Face_Recognition_CVPR_2019_paper.pdf
     args:
     scale: scale value for cosine angle
-    margin: margin value added to cosine angle 
+    margin: margin value added to cosine angle
     """
 
     @property
     def input_types(self):
-        """Input types definitions for AnguarLoss.
-        """
+        """Input types definitions for AnguarLoss."""
         return {
             "logits": NeuralType(('B', 'D'), LogitsType()),
             "labels": NeuralType(('B',), LabelsType()),

diff --git a/nemo/collections/asr/losses/ctc.py b/nemo/collections/asr/losses/ctc.py
@@ -25,8 +25,7 @@
 class CTCLoss(nn.CTCLoss, Serialization, Typing):
     @property
     def input_types(self):
-        """Input types definitions for CTCLoss.
-        """
+        """Input types definitions for CTCLoss."""
         return {
             "log_probs": NeuralType(('B', 'T', 'D'), LogprobsType()),
             "targets": NeuralType(('B', 'T'), LabelsType()),

diff --git a/nemo/collections/asr/losses/rnnt_pytorch.py b/nemo/collections/asr/losses/rnnt_pytorch.py
@@ -24,8 +24,7 @@
 class RNNTLossPytorch(Loss):
     @property
     def input_types(self):
-        """Input types definitions for CTCLoss.
-        """
+        """Input types definitions for CTCLoss."""
         return {
             "acts": NeuralType(('B', 'T', 'T', 'D'), LogprobsType()),
             "labels": NeuralType(('B', 'T'), LabelsType()),
@@ -126,8 +125,7 @@ class TDTLossPytorch(Loss):
 
     @property
     def input_types(self):
-        """Input types definitions for CTCLoss.
-        """
+        """Input types definitions for CTCLoss."""
         return {
             "acts": NeuralType(('B', 'T', 'T', 'D'), LogprobsType()),
             "labels": NeuralType(('B', 'T'), LabelsType()),
@@ -256,8 +254,7 @@ class MultiblankRNNTLossPytorch(Loss):
 
     @property
     def input_types(self):
-        """Input types definitions for CTCLoss.
-        """
+        """Input types definitions for CTCLoss."""
         return {
             "acts": NeuralType(('B', 'T', 'T', 'D'), LogprobsType()),
             "labels": NeuralType(('B', 'T'), LabelsType()),

diff --git a/nemo/collections/asr/losses/ssl_losses/ctc.py b/nemo/collections/asr/losses/ssl_losses/ctc.py
@@ -22,8 +22,7 @@
 class CTCLossForSSL(Loss):
     @property
     def input_types(self):
-        """Input types definitions for Contrastive.
-        """
+        """Input types definitions for Contrastive."""
         return {
             "spec_masks": NeuralType(("B", "D", "T"), SpectrogramType()),
             "decoder_outputs": NeuralType(("B", "T", "D"), VoidType()),