RVC-Boss · JackyHe398 · Aug 8, 2025 · Aug 9, 2025
diff --git a/GPT_SoVITS/AR/data/dataset.py b/GPT_SoVITS/AR/data/dataset.py
@@ -61,11 +61,7 @@ def __init__(
         )
         # get dict
         self.path2 = phoneme_path  # "%s/2-name2text.txt"%exp_dir#phoneme_path
-        self.path3 = "%s/3-bert" % (
-            os.path.dirname(
-                phoneme_path,
-            )
-        )  # "%s/3-bert"%exp_dir#bert_dir
+        self.path3 = f"{os.path.dirname(phoneme_path)}/3-bert"  # "%s/3-bert"%exp_dir#bert_dir
         self.path6 = semantic_path  # "%s/6-name2semantic.tsv"%exp_dir#semantic_path
         assert os.path.exists(self.path2)
         assert os.path.exists(self.path6)
@@ -219,7 +215,7 @@ def __getitem__(self, idx: int) -> Dict:
         semantic_ids_len = len(semantic_ids)
 
         flag = 0
-        path_bert = "%s/%s.pt" % (self.path3, item_name)
+        path_bert = f"{self.path3}/{item_name}.pt"
         if os.path.exists(path_bert) == True:
             bert_feature = torch.load(path_bert, map_location="cpu")
         else:

diff --git a/GPT_SoVITS/AR/utils/io.py b/GPT_SoVITS/AR/utils/io.py
@@ -26,5 +26,5 @@ def write_args(args, path):
         args_file.write(str(sys.argv))
         args_file.write("\n==> args:\n")
         for k, v in sorted(args_dict.items()):
-            args_file.write("  %s: %s\n" % (str(k), str(v)))
+            args_file.write(f"  {str(k)}: {str(v)}\n")
         args_file.close()
diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py
@@ -41,7 +41,7 @@
 
 def resample(audio_tensor, sr0, sr1, device):
     global resample_transform_dict
-    key = "%s-%s-%s" % (sr0, sr1, str(device))
+    key = f"{sr0}-{sr1}-{str(device)}"
     if key not in resample_transform_dict:
         resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
     return resample_transform_dict[key](audio_tensor)
@@ -489,7 +489,7 @@ def init_vits_weights(self, weights_path: str):
         path_sovits = self.configs.default_configs[model_version]["vits_weights_path"]
 
         if if_lora_v3 == True and os.path.exists(path_sovits) == False:
-            info = path_sovits + i18n("SoVITS %s 底模缺失，无法加载相应 LoRA 权重" % model_version)
+            info = path_sovits + i18n(f"SoVITS {model_version} 底模缺失，无法加载相应 LoRA 权重")
             raise FileExistsError(info)
 
         # dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False)
@@ -608,7 +608,7 @@ def init_vocoder(self, version: str):
                 self.empty_cache()
 
             self.vocoder = BigVGAN.from_pretrained(
-                "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
+                f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
                 use_cuda_kernel=False,
             )  # if True, RuntimeError: Ninja is required to load C++ extensions
             # remove weight norm in the model and set to eval mode
@@ -641,7 +641,7 @@ def init_vocoder(self, version: str):
             )
             self.vocoder.remove_weight_norm()
             state_dict_g = torch.load(
-                "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
+                f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
                 map_location="cpu",
                 weights_only=False,
             )

diff --git a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
@@ -143,7 +143,7 @@ def cut2(inp):
 @register_method("cut3")
 def cut3(inp):
     inp = inp.strip("\n")
-    opts = ["%s" % item for item in inp.strip("。").split("。")]
+    opts = [f"{item}" for item in inp.strip("。").split("。")]
     opts = [item for item in opts if not set(item).issubset(punctuation)]
     return "\n".join(opts)
 

diff --git a/GPT_SoVITS/eres2net/kaldi.py b/GPT_SoVITS/eres2net/kaldi.py
@@ -625,18 +625,7 @@ def fbank(
     # size (num_mel_bins, padded_window_size // 2)
     # print(num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp)
 
-    cache_key = "%s-%s-%s-%s-%s-%s-%s-%s-%s-%s" % (
-        num_mel_bins,
-        padded_window_size,
-        sample_frequency,
-        low_freq,
-        high_freq,
-        vtln_low,
-        vtln_high,
-        vtln_warp,
-        device,
-        dtype,
-    )
+    cache_key = f"{num_mel_bins}-{padded_window_size}-{sample_frequency}-{low_freq}-{high_freq}-{vtln_low}-{vtln_high}-{vtln_warp}-{device}-{dtype}"
     if cache_key not in cache:
         mel_energies = get_mel_banks(
             num_mel_bins,

diff --git a/GPT_SoVITS/export_torch_script_v3v4.py b/GPT_SoVITS/export_torch_script_v3v4.py
@@ -505,7 +505,7 @@ def init_bigvgan():
     from BigVGAN import bigvgan
 
     bigvgan_model = bigvgan.BigVGAN.from_pretrained(
-        "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
+        f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
         use_cuda_kernel=False,
     )  # if True, RuntimeError: Ninja is required to load C++ extensions
     # remove weight norm in the model and set to eval mode
@@ -533,7 +533,7 @@ def init_hifigan():
     hifigan_model.eval()
     hifigan_model.remove_weight_norm()
     state_dict_g = torch.load(
-        "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), map_location="cpu"
+        f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth", map_location="cpu"
     )
     print("loading vocoder", hifigan_model.load_state_dict(state_dict_g))
     if is_half == True:
@@ -1042,7 +1042,7 @@ def test_export(
     wav_gen = wav_gen[:, :, :wav_gen_length]
 
     audio = wav_gen[0][0].cpu().detach().numpy()
-    logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+    logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     sr = 24000
     soundfile.write(output, (audio * 32768).astype(np.int16), sr)
 
@@ -1115,7 +1115,7 @@ def test_export(
     wav_gen = torch.cat([wav_gen, zero_wav_torch], 0)
 
     audio = wav_gen.cpu().detach().numpy()
-    logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+    logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     soundfile.write(output, (audio * 32768).astype(np.int16), out_sr)
 
 

diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
@@ -180,6 +180,8 @@ def get_bert_feature(text, word2ph):
     for i in range(len(word2ph)):
         repeat_feature = res[i].repeat(word2ph[i], 1)
         phone_level_feature.append(repeat_feature)
+    if len(phone_level_feature) == 0:
+        return torch.empty((res.shape[1], 0), dtype=res.dtype, device=res.device)
     phone_level_feature = torch.cat(phone_level_feature, dim=0)
     return phone_level_feature.T
 
@@ -235,7 +237,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
     is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
     path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
     if if_lora_v3 == True and is_exist == False:
-        info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失，无法加载相应 LoRA 权重")
+        info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失，无法加载相应 LoRA 权重")
         gr.Warning(info)
         raise FileExistsError(info)
     dict_language = dict_language_v1 if version == "v1" else dict_language_v2
@@ -320,7 +322,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
         vq_model = vq_model.to(device)
     vq_model.eval()
     if if_lora_v3 == False:
-        print("loading sovits_%s" % model_version, vq_model.load_state_dict(dict_s2["weight"], strict=False))
+        print(f"loading sovits_{model_version}", vq_model.load_state_dict(dict_s2["weight"], strict=False))
     else:
         path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
         print(
@@ -335,7 +337,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
             init_lora_weights=True,
         )
         vq_model.cfm = get_peft_model(vq_model.cfm, lora_config)
-        print("loading sovits_%s_lora%s" % (model_version, lora_rank))
+        print(f"loading sovits_{model_version}_lora{lora_rank}")
         vq_model.load_state_dict(dict_s2["weight"], strict=False)
         vq_model.cfm = vq_model.cfm.merge_and_unload()
         # torch.save(vq_model.state_dict(),"merge_win.pth")
@@ -442,7 +444,7 @@ def init_bigvgan():
     from BigVGAN import bigvgan
 
     bigvgan_model = bigvgan.BigVGAN.from_pretrained(
-        "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
+        f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
         use_cuda_kernel=False,
     )  # if True, RuntimeError: Ninja is required to load C++ extensions
     # remove weight norm in the model and set to eval mode
@@ -472,7 +474,7 @@ def init_hifigan():
     hifigan_model.eval()
     hifigan_model.remove_weight_norm()
     state_dict_g = torch.load(
-        "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
+        f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
         map_location="cpu",
         weights_only=False,
     )
@@ -508,7 +510,7 @@ def init_sv_cn():
 
 def resample(audio_tensor, sr0, sr1, device):
     global resample_transform_dict
-    key = "%s-%s-%s" % (sr0, sr1, str(device))
+    key = f"{sr0}-{sr1}-{str(device)}"
     if key not in resample_transform_dict:
         resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
     return resample_transform_dict[key](audio_tensor)
@@ -1062,7 +1064,7 @@ def cut2(inp):
 
 def cut3(inp):
     inp = inp.strip("\n")
-    opts = ["%s" % item for item in inp.strip("。").split("。")]
+    opts = [f"{item}" for item in inp.strip("。").split("。")]
     opts = [item for item in opts if not set(item).issubset(punctuation)]
     return "\n".join(opts)
 

diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py
@@ -31,7 +31,7 @@ def set_high_priority():
 
 now_dir = os.getcwd()
 sys.path.append(now_dir)
-sys.path.append("%s/GPT_SoVITS" % (now_dir))
+sys.path.append(f"{now_dir}/GPT_SoVITS")
 
 logging.getLogger("markdown_it").setLevel(logging.ERROR)
 logging.getLogger("urllib3").setLevel(logging.ERROR)
@@ -239,7 +239,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
     is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
     path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
     if if_lora_v3 == True and is_exist == False:
-        info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失，无法加载相应 LoRA 权重")
+        info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失，无法加载相应 LoRA 权重")
         gr.Warning(info)
         raise FileExistsError(info)
     dict_language = dict_language_v1 if version == "v1" else dict_language_v2

diff --git a/GPT_SoVITS/module/data_utils.py b/GPT_SoVITS/module/data_utils.py
@@ -23,15 +23,15 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
 
     def __init__(self, hparams, version=None, val=False):
         exp_dir = hparams.exp_dir
-        self.path2 = "%s/2-name2text.txt" % exp_dir
-        self.path4 = "%s/4-cnhubert" % exp_dir
-        self.path5 = "%s/5-wav32k" % exp_dir
+        self.path2 = f"{exp_dir}/2-name2text.txt"
+        self.path4 = f"{exp_dir}/4-cnhubert"
+        self.path5 = f"{exp_dir}/5-wav32k"
         assert os.path.exists(self.path2)
         assert os.path.exists(self.path4)
         assert os.path.exists(self.path5)
         self.is_v2Pro = version in {"v2Pro", "v2ProPlus"}
         if self.is_v2Pro:
-            self.path7 = "%s/7-sv_cn" % exp_dir
+            self.path7 = f"{exp_dir}/7-sv_cn"
             assert os.path.exists(self.path7)
         names4 = set([name[:-3] for name in list(os.listdir(self.path4))])  # 去除.pt后缀
         names5 = set(os.listdir(self.path5))
@@ -85,7 +85,7 @@ def __init__(self, hparams, version=None, val=False):
                 skipped_phone += 1
                 continue
 
-            size = os.path.getsize("%s/%s" % (self.path5, audiopath))
+            size = os.path.getsize(f"{self.path5}/{audiopath}")
             duration = size / self.sampling_rate / 2
 
             if duration == 0:
@@ -110,9 +110,9 @@ def get_audio_text_speaker_pair(self, audiopath_sid_text):
         audiopath, phoneme_ids = audiopath_sid_text
         text = torch.FloatTensor(phoneme_ids)
         try:
-            spec, wav = self.get_audio("%s/%s" % (self.path5, audiopath))
+            spec, wav = self.get_audio(f"{self.path5}/{audiopath}")
             with torch.no_grad():
-                ssl = torch.load("%s/%s.pt" % (self.path4, audiopath), map_location="cpu")
+                ssl = torch.load(f"{self.path4}/{audiopath}.pt", map_location="cpu")
                 if ssl.shape[-1] != spec.shape[-1]:
                     typee = ssl.dtype
                     ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee)

diff --git a/GPT_SoVITS/module/mel_processing.py b/GPT_SoVITS/module/mel_processing.py
@@ -46,7 +46,7 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
     global hann_window
     dtype_device = str(y.dtype) + "_" + str(y.device)
     # wnsize_dtype_device = str(win_size) + '_' + dtype_device
-    key = "%s-%s-%s-%s-%s" % (dtype_device, n_fft, sampling_rate, hop_size, win_size)
+    key = f"{dtype_device}-{n_fft}-{sampling_rate}-{hop_size}-{win_size}"
     # if wnsize_dtype_device not in hann_window:
     if key not in hann_window:
         # hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
@@ -78,7 +78,7 @@ def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
     global mel_basis
     dtype_device = str(spec.dtype) + "_" + str(spec.device)
     # fmax_dtype_device = str(fmax) + '_' + dtype_device
-    key = "%s-%s-%s-%s-%s-%s" % (dtype_device, n_fft, num_mels, sampling_rate, fmin, fmax)
+    key = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{fmin}-{fmax}"
     # if fmax_dtype_device not in mel_basis:
     if key not in mel_basis:
         mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
@@ -99,16 +99,7 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size,
     global mel_basis, hann_window
     dtype_device = str(y.dtype) + "_" + str(y.device)
     # fmax_dtype_device = str(fmax) + '_' + dtype_device
-    fmax_dtype_device = "%s-%s-%s-%s-%s-%s-%s-%s" % (
-        dtype_device,
-        n_fft,
-        num_mels,
-        sampling_rate,
-        hop_size,
-        win_size,
-        fmin,
-        fmax,
-    )
+    fmax_dtype_device = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{hop_size}-{win_size}-{fmin}-{fmax}"
     # wnsize_dtype_device = str(win_size) + '_' + dtype_device
     wnsize_dtype_device = fmax_dtype_device
     if fmax_dtype_device not in mel_basis:

diff --git a/GPT_SoVITS/process_ckpt.py b/GPT_SoVITS/process_ckpt.py
@@ -12,9 +12,9 @@
 def my_save(fea, path):  #####fix issue: torch.save doesn't support chinese path
     dir = os.path.dirname(path)
     name = os.path.basename(path)
-    tmp_path = "%s.pth" % (ttime())
+    tmp_path = f"{ttime()}.pth"
     torch.save(fea, tmp_path)
-    shutil.move(tmp_path, "%s/%s" % (dir, name))
+    shutil.move(tmp_path, f"{dir}/{name}")
 
 
 from io import BytesIO
@@ -47,14 +47,14 @@ def savee(ckpt, name, epoch, steps, hps, model_version=None, lora_rank=None):
                 continue
             opt["weight"][key] = ckpt[key].half()
         opt["config"] = hps
-        opt["info"] = "%sepoch_%siteration" % (epoch, steps)
+        opt["info"] = f"{epoch}epoch_{steps}iteration"
         if lora_rank:
             opt["lora_rank"] = lora_rank
-            my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
+            my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version)
         elif model_version != None and "Pro" in model_version:
-            my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
+            my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version)
         else:
-            my_save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
+            my_save(opt, f"{hps.save_weight_dir}/{name}.pth")
         return "Success."
     except:
         return traceback.format_exc()

diff --git a/GPT_SoVITS/utils.py b/GPT_SoVITS/utils.py
@@ -44,7 +44,7 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False
             )
         except:
             traceback.print_exc()
-            print("error, %s is not in the checkpoint" % k)  # shape不对也会，比如text_embedding当cleaner修改时
+            print(f"error, {k} is not in the checkpoint")  # shape不对也会，比如text_embedding当cleaner修改时
             new_state_dict[k] = v
     if hasattr(model, "module"):
         model.module.load_state_dict(new_state_dict)
@@ -67,9 +67,9 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False
 def my_save(fea, path):  #####fix issue: torch.save doesn't support chinese path
     dir = os.path.dirname(path)
     name = os.path.basename(path)
-    tmp_path = "%s.pth" % (ttime())
+    tmp_path = f"{ttime()}.pth"
     torch.save(fea, tmp_path)
-    shutil.move(tmp_path, "%s/%s" % (dir, name))
+    shutil.move(tmp_path, f"{dir}/{name}")
 
 
 def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):

diff --git a/api.py b/api.py
@@ -147,7 +147,7 @@
 
 now_dir = os.getcwd()
 sys.path.append(now_dir)
-sys.path.append("%s/GPT_SoVITS" % (now_dir))
+sys.path.append(f"{now_dir}/GPT_SoVITS")
 
 import signal
 from text.LangSegmenter import LangSegmenter
@@ -239,7 +239,7 @@ def init_bigvgan():
     from BigVGAN import bigvgan
 
     bigvgan_model = bigvgan.BigVGAN.from_pretrained(
-        "%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
+        f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
         use_cuda_kernel=False,
     )  # if True, RuntimeError: Ninja is required to load C++ extensions
     # remove weight norm in the model and set to eval mode
@@ -268,7 +268,7 @@ def init_hifigan():
     hifigan_model.eval()
     hifigan_model.remove_weight_norm()
     state_dict_g = torch.load(
-        "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
+        f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
         map_location="cpu",
         weights_only=False,
     )
@@ -292,7 +292,7 @@ def init_sv_cn():
 
 def resample(audio_tensor, sr0, sr1, device):
     global resample_transform_dict
-    key = "%s-%s-%s" % (sr0, sr1, str(device))
+    key = f"{sr0}-{sr1}-{str(device)}"
     if key not in resample_transform_dict:
         resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
     return resample_transform_dict[key](audio_tensor)
@@ -391,7 +391,7 @@ def get_sovits_weights(sovits_path):
     path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
 
     if if_lora_v3 == True and is_exist == False:
-        logger.info("SoVITS %s 底模缺失，无法加载相应 LoRA 权重" % model_version)
+        logger.info(f"SoVITS {model_version} 底模缺失，无法加载相应 LoRA 权重")
 
     dict_s2 = load_sovits_new(sovits_path)
     hps = dict_s2["config"]