Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions GPT_SoVITS/AR/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,7 @@ def __init__(
)
# get dict
self.path2 = phoneme_path # "%s/2-name2text.txt"%exp_dir#phoneme_path
self.path3 = "%s/3-bert" % (
os.path.dirname(
phoneme_path,
)
) # "%s/3-bert"%exp_dir#bert_dir
self.path3 = f"{os.path.dirname(phoneme_path)}/3-bert" # "%s/3-bert"%exp_dir#bert_dir
self.path6 = semantic_path # "%s/6-name2semantic.tsv"%exp_dir#semantic_path
assert os.path.exists(self.path2)
assert os.path.exists(self.path6)
Expand Down Expand Up @@ -219,7 +215,7 @@ def __getitem__(self, idx: int) -> Dict:
semantic_ids_len = len(semantic_ids)

flag = 0
path_bert = "%s/%s.pt" % (self.path3, item_name)
path_bert = f"{self.path3}/{item_name}.pt"
if os.path.exists(path_bert) == True:
bert_feature = torch.load(path_bert, map_location="cpu")
else:
Expand Down
2 changes: 1 addition & 1 deletion GPT_SoVITS/AR/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ def write_args(args, path):
args_file.write(str(sys.argv))
args_file.write("\n==> args:\n")
for k, v in sorted(args_dict.items()):
args_file.write(" %s: %s\n" % (str(k), str(v)))
args_file.write(f" {str(k)}: {str(v)}\n")
args_file.close()
8 changes: 4 additions & 4 deletions GPT_SoVITS/TTS_infer_pack/TTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

def resample(audio_tensor, sr0, sr1, device):
global resample_transform_dict
key = "%s-%s-%s" % (sr0, sr1, str(device))
key = f"{sr0}-{sr1}-{str(device)}"
if key not in resample_transform_dict:
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
return resample_transform_dict[key](audio_tensor)
Expand Down Expand Up @@ -489,7 +489,7 @@ def init_vits_weights(self, weights_path: str):
path_sovits = self.configs.default_configs[model_version]["vits_weights_path"]

if if_lora_v3 == True and os.path.exists(path_sovits) == False:
info = path_sovits + i18n("SoVITS %s 底模缺失,无法加载相应 LoRA 权重" % model_version)
info = path_sovits + i18n(f"SoVITS {model_version} 底模缺失,无法加载相应 LoRA 权重")
raise FileExistsError(info)

# dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False)
Expand Down Expand Up @@ -608,7 +608,7 @@ def init_vocoder(self, version: str):
self.empty_cache()

self.vocoder = BigVGAN.from_pretrained(
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
use_cuda_kernel=False,
) # if True, RuntimeError: Ninja is required to load C++ extensions
# remove weight norm in the model and set to eval mode
Expand Down Expand Up @@ -641,7 +641,7 @@ def init_vocoder(self, version: str):
)
self.vocoder.remove_weight_norm()
state_dict_g = torch.load(
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
map_location="cpu",
weights_only=False,
)
Expand Down
2 changes: 1 addition & 1 deletion GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def cut2(inp):
@register_method("cut3")
def cut3(inp):
inp = inp.strip("\n")
opts = ["%s" % item for item in inp.strip("。").split("。")]
opts = [f"{item}" for item in inp.strip("。").split("。")]
opts = [item for item in opts if not set(item).issubset(punctuation)]
return "\n".join(opts)

Expand Down
13 changes: 1 addition & 12 deletions GPT_SoVITS/eres2net/kaldi.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,18 +625,7 @@ def fbank(
# size (num_mel_bins, padded_window_size // 2)
# print(num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp)

cache_key = "%s-%s-%s-%s-%s-%s-%s-%s-%s-%s" % (
num_mel_bins,
padded_window_size,
sample_frequency,
low_freq,
high_freq,
vtln_low,
vtln_high,
vtln_warp,
device,
dtype,
)
cache_key = f"{num_mel_bins}-{padded_window_size}-{sample_frequency}-{low_freq}-{high_freq}-{vtln_low}-{vtln_high}-{vtln_warp}-{device}-{dtype}"
if cache_key not in cache:
mel_energies = get_mel_banks(
num_mel_bins,
Expand Down
8 changes: 4 additions & 4 deletions GPT_SoVITS/export_torch_script_v3v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def init_bigvgan():
from BigVGAN import bigvgan

bigvgan_model = bigvgan.BigVGAN.from_pretrained(
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
use_cuda_kernel=False,
) # if True, RuntimeError: Ninja is required to load C++ extensions
# remove weight norm in the model and set to eval mode
Expand Down Expand Up @@ -533,7 +533,7 @@ def init_hifigan():
hifigan_model.eval()
hifigan_model.remove_weight_norm()
state_dict_g = torch.load(
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), map_location="cpu"
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth", map_location="cpu"
)
print("loading vocoder", hifigan_model.load_state_dict(state_dict_g))
if is_half == True:
Expand Down Expand Up @@ -1042,7 +1042,7 @@ def test_export(
wav_gen = wav_gen[:, :, :wav_gen_length]

audio = wav_gen[0][0].cpu().detach().numpy()
logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
sr = 24000
soundfile.write(output, (audio * 32768).astype(np.int16), sr)

Expand Down Expand Up @@ -1115,7 +1115,7 @@ def test_export(
wav_gen = torch.cat([wav_gen, zero_wav_torch], 0)

audio = wav_gen.cpu().detach().numpy()
logger.info("end bigvgan %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
logger.info(f"end bigvgan {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
soundfile.write(output, (audio * 32768).astype(np.int16), out_sr)


Expand Down
16 changes: 9 additions & 7 deletions GPT_SoVITS/inference_webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ def get_bert_feature(text, word2ph):
for i in range(len(word2ph)):
repeat_feature = res[i].repeat(word2ph[i], 1)
phone_level_feature.append(repeat_feature)
if len(phone_level_feature) == 0:
return torch.empty((res.shape[1], 0), dtype=res.dtype, device=res.device)
phone_level_feature = torch.cat(phone_level_feature, dim=0)
return phone_level_feature.T

Expand Down Expand Up @@ -235,7 +237,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
if if_lora_v3 == True and is_exist == False:
info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重")
info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失,无法加载相应 LoRA 权重")
gr.Warning(info)
raise FileExistsError(info)
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
Expand Down Expand Up @@ -320,7 +322,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
vq_model = vq_model.to(device)
vq_model.eval()
if if_lora_v3 == False:
print("loading sovits_%s" % model_version, vq_model.load_state_dict(dict_s2["weight"], strict=False))
print(f"loading sovits_{model_version}", vq_model.load_state_dict(dict_s2["weight"], strict=False))
else:
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
print(
Expand All @@ -335,7 +337,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
init_lora_weights=True,
)
vq_model.cfm = get_peft_model(vq_model.cfm, lora_config)
print("loading sovits_%s_lora%s" % (model_version, lora_rank))
print(f"loading sovits_{model_version}_lora{lora_rank}")
vq_model.load_state_dict(dict_s2["weight"], strict=False)
vq_model.cfm = vq_model.cfm.merge_and_unload()
# torch.save(vq_model.state_dict(),"merge_win.pth")
Expand Down Expand Up @@ -442,7 +444,7 @@ def init_bigvgan():
from BigVGAN import bigvgan

bigvgan_model = bigvgan.BigVGAN.from_pretrained(
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
use_cuda_kernel=False,
) # if True, RuntimeError: Ninja is required to load C++ extensions
# remove weight norm in the model and set to eval mode
Expand Down Expand Up @@ -472,7 +474,7 @@ def init_hifigan():
hifigan_model.eval()
hifigan_model.remove_weight_norm()
state_dict_g = torch.load(
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
map_location="cpu",
weights_only=False,
)
Expand Down Expand Up @@ -508,7 +510,7 @@ def init_sv_cn():

def resample(audio_tensor, sr0, sr1, device):
global resample_transform_dict
key = "%s-%s-%s" % (sr0, sr1, str(device))
key = f"{sr0}-{sr1}-{str(device)}"
if key not in resample_transform_dict:
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
return resample_transform_dict[key](audio_tensor)
Expand Down Expand Up @@ -1062,7 +1064,7 @@ def cut2(inp):

def cut3(inp):
inp = inp.strip("\n")
opts = ["%s" % item for item in inp.strip("。").split("。")]
opts = [f"{item}" for item in inp.strip("。").split("。")]
opts = [item for item in opts if not set(item).issubset(punctuation)]
return "\n".join(opts)

Expand Down
4 changes: 2 additions & 2 deletions GPT_SoVITS/inference_webui_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def set_high_priority():

now_dir = os.getcwd()
sys.path.append(now_dir)
sys.path.append("%s/GPT_SoVITS" % (now_dir))
sys.path.append(f"{now_dir}/GPT_SoVITS")

logging.getLogger("markdown_it").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
Expand Down Expand Up @@ -239,7 +239,7 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
is_exist = is_exist_s2gv3 if model_version == "v3" else is_exist_s2gv4
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4
if if_lora_v3 == True and is_exist == False:
info = path_sovits + "SoVITS %s" % model_version + i18n("底模缺失,无法加载相应 LoRA 权重")
info = path_sovits + f"SoVITS {model_version}" + i18n("底模缺失,无法加载相应 LoRA 权重")
gr.Warning(info)
raise FileExistsError(info)
dict_language = dict_language_v1 if version == "v1" else dict_language_v2
Expand Down
14 changes: 7 additions & 7 deletions GPT_SoVITS/module/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):

def __init__(self, hparams, version=None, val=False):
exp_dir = hparams.exp_dir
self.path2 = "%s/2-name2text.txt" % exp_dir
self.path4 = "%s/4-cnhubert" % exp_dir
self.path5 = "%s/5-wav32k" % exp_dir
self.path2 = f"{exp_dir}/2-name2text.txt"
self.path4 = f"{exp_dir}/4-cnhubert"
self.path5 = f"{exp_dir}/5-wav32k"
assert os.path.exists(self.path2)
assert os.path.exists(self.path4)
assert os.path.exists(self.path5)
self.is_v2Pro = version in {"v2Pro", "v2ProPlus"}
if self.is_v2Pro:
self.path7 = "%s/7-sv_cn" % exp_dir
self.path7 = f"{exp_dir}/7-sv_cn"
assert os.path.exists(self.path7)
names4 = set([name[:-3] for name in list(os.listdir(self.path4))]) # 去除.pt后缀
names5 = set(os.listdir(self.path5))
Expand Down Expand Up @@ -85,7 +85,7 @@ def __init__(self, hparams, version=None, val=False):
skipped_phone += 1
continue

size = os.path.getsize("%s/%s" % (self.path5, audiopath))
size = os.path.getsize(f"{self.path5}/{audiopath}")
duration = size / self.sampling_rate / 2

if duration == 0:
Expand All @@ -110,9 +110,9 @@ def get_audio_text_speaker_pair(self, audiopath_sid_text):
audiopath, phoneme_ids = audiopath_sid_text
text = torch.FloatTensor(phoneme_ids)
try:
spec, wav = self.get_audio("%s/%s" % (self.path5, audiopath))
spec, wav = self.get_audio(f"{self.path5}/{audiopath}")
with torch.no_grad():
ssl = torch.load("%s/%s.pt" % (self.path4, audiopath), map_location="cpu")
ssl = torch.load(f"{self.path4}/{audiopath}.pt", map_location="cpu")
if ssl.shape[-1] != spec.shape[-1]:
typee = ssl.dtype
ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee)
Expand Down
15 changes: 3 additions & 12 deletions GPT_SoVITS/module/mel_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
global hann_window
dtype_device = str(y.dtype) + "_" + str(y.device)
# wnsize_dtype_device = str(win_size) + '_' + dtype_device
key = "%s-%s-%s-%s-%s" % (dtype_device, n_fft, sampling_rate, hop_size, win_size)
key = f"{dtype_device}-{n_fft}-{sampling_rate}-{hop_size}-{win_size}"
# if wnsize_dtype_device not in hann_window:
if key not in hann_window:
# hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
Expand Down Expand Up @@ -78,7 +78,7 @@ def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
global mel_basis
dtype_device = str(spec.dtype) + "_" + str(spec.device)
# fmax_dtype_device = str(fmax) + '_' + dtype_device
key = "%s-%s-%s-%s-%s-%s" % (dtype_device, n_fft, num_mels, sampling_rate, fmin, fmax)
key = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{fmin}-{fmax}"
# if fmax_dtype_device not in mel_basis:
if key not in mel_basis:
mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
Expand All @@ -99,16 +99,7 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size,
global mel_basis, hann_window
dtype_device = str(y.dtype) + "_" + str(y.device)
# fmax_dtype_device = str(fmax) + '_' + dtype_device
fmax_dtype_device = "%s-%s-%s-%s-%s-%s-%s-%s" % (
dtype_device,
n_fft,
num_mels,
sampling_rate,
hop_size,
win_size,
fmin,
fmax,
)
fmax_dtype_device = f"{dtype_device}-{n_fft}-{num_mels}-{sampling_rate}-{hop_size}-{win_size}-{fmin}-{fmax}"
# wnsize_dtype_device = str(win_size) + '_' + dtype_device
wnsize_dtype_device = fmax_dtype_device
if fmax_dtype_device not in mel_basis:
Expand Down
12 changes: 6 additions & 6 deletions GPT_SoVITS/process_ckpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
dir = os.path.dirname(path)
name = os.path.basename(path)
tmp_path = "%s.pth" % (ttime())
tmp_path = f"{ttime()}.pth"
torch.save(fea, tmp_path)
shutil.move(tmp_path, "%s/%s" % (dir, name))
shutil.move(tmp_path, f"{dir}/{name}")


from io import BytesIO
Expand Down Expand Up @@ -47,14 +47,14 @@ def savee(ckpt, name, epoch, steps, hps, model_version=None, lora_rank=None):
continue
opt["weight"][key] = ckpt[key].half()
opt["config"] = hps
opt["info"] = "%sepoch_%siteration" % (epoch, steps)
opt["info"] = f"{epoch}epoch_{steps}iteration"
if lora_rank:
opt["lora_rank"] = lora_rank
my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version)
elif model_version != None and "Pro" in model_version:
my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
my_save2(opt, f"{hps.save_weight_dir}/{name}.pth", model_version)
else:
my_save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
my_save(opt, f"{hps.save_weight_dir}/{name}.pth")
return "Success."
except:
return traceback.format_exc()
Expand Down
6 changes: 3 additions & 3 deletions GPT_SoVITS/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False
)
except:
traceback.print_exc()
print("error, %s is not in the checkpoint" % k) # shape不对也会,比如text_embedding当cleaner修改时
print(f"error, {k} is not in the checkpoint") # shape不对也会,比如text_embedding当cleaner修改时
new_state_dict[k] = v
if hasattr(model, "module"):
model.module.load_state_dict(new_state_dict)
Expand All @@ -67,9 +67,9 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False
def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path
dir = os.path.dirname(path)
name = os.path.basename(path)
tmp_path = "%s.pth" % (ttime())
tmp_path = f"{ttime()}.pth"
torch.save(fea, tmp_path)
shutil.move(tmp_path, "%s/%s" % (dir, name))
shutil.move(tmp_path, f"{dir}/{name}")


def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
Expand Down
10 changes: 5 additions & 5 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@

now_dir = os.getcwd()
sys.path.append(now_dir)
sys.path.append("%s/GPT_SoVITS" % (now_dir))
sys.path.append(f"{now_dir}/GPT_SoVITS")

import signal
from text.LangSegmenter import LangSegmenter
Expand Down Expand Up @@ -239,7 +239,7 @@ def init_bigvgan():
from BigVGAN import bigvgan

bigvgan_model = bigvgan.BigVGAN.from_pretrained(
"%s/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x" % (now_dir,),
f"{now_dir}/GPT_SoVITS/pretrained_models/models--nvidia--bigvgan_v2_24khz_100band_256x",
use_cuda_kernel=False,
) # if True, RuntimeError: Ninja is required to load C++ extensions
# remove weight norm in the model and set to eval mode
Expand Down Expand Up @@ -268,7 +268,7 @@ def init_hifigan():
hifigan_model.eval()
hifigan_model.remove_weight_norm()
state_dict_g = torch.load(
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
f"{now_dir}/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth",
map_location="cpu",
weights_only=False,
)
Expand All @@ -292,7 +292,7 @@ def init_sv_cn():

def resample(audio_tensor, sr0, sr1, device):
global resample_transform_dict
key = "%s-%s-%s" % (sr0, sr1, str(device))
key = f"{sr0}-{sr1}-{str(device)}"
if key not in resample_transform_dict:
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
return resample_transform_dict[key](audio_tensor)
Expand Down Expand Up @@ -391,7 +391,7 @@ def get_sovits_weights(sovits_path):
path_sovits = path_sovits_v3 if model_version == "v3" else path_sovits_v4

if if_lora_v3 == True and is_exist == False:
logger.info("SoVITS %s 底模缺失,无法加载相应 LoRA 权重" % model_version)
logger.info(f"SoVITS {model_version} 底模缺失,无法加载相应 LoRA 权重")

dict_s2 = load_sovits_new(sovits_path)
hps = dict_s2["config"]
Expand Down
Loading