Fix phonemes

pull/1032/head
WeberJulian 2021-11-06 00:27:58 +01:00 committed by Eren Gölge
parent 846bf16f02
commit 120332d53f
4 changed files with 8 additions and 5 deletions

View File

@ -7,7 +7,7 @@ from tqdm.contrib.concurrent import process_map
from TTS.config import load_config from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.text import text2phone from TTS.tts.utils.text import text2phone, phoneme_to_sequence
def compute_phonemes(item): def compute_phonemes(item):

View File

@ -273,7 +273,7 @@ class TTSDataset(Dataset):
item = args[0] item = args[0]
func_args = args[1] func_args = args[1]
text, wav_file, *_ = item text, wav_file, *_ = item
func_args[3] = item[4] func_args[3] = item[3]
phonemes = TTSDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args) phonemes = TTSDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args)
return phonemes return phonemes

View File

@ -543,6 +543,7 @@ class Vits(BaseTTS):
"style_wav": style_wav, "style_wav": style_wav,
"d_vector": d_vector, "d_vector": d_vector,
"language_id": language_id, "language_id": language_id,
"language_name": language_name,
} }
def forward( def forward(
@ -1061,6 +1062,7 @@ class Vits(BaseTTS):
d_vector=aux_inputs["d_vector"], d_vector=aux_inputs["d_vector"],
style_wav=aux_inputs["style_wav"], style_wav=aux_inputs["style_wav"],
language_id=aux_inputs["language_id"], language_id=aux_inputs["language_id"],
language_name=aux_inputs["language_name"],
enable_eos_bos_chars=self.config.enable_eos_bos_chars, enable_eos_bos_chars=self.config.enable_eos_bos_chars,
use_griffin_lim=True, use_griffin_lim=True,
do_trim_silence=False, do_trim_silence=False,

View File

@ -15,7 +15,7 @@ if "tensorflow" in installed or "tensorflow-gpu" in installed:
import tensorflow as tf import tensorflow as tf
def text_to_seq(text, CONFIG, custom_symbols=None): def text_to_seq(text, CONFIG, custom_symbols=None, language=None):
text_cleaner = [CONFIG.text_cleaner] text_cleaner = [CONFIG.text_cleaner]
# text ot phonemes to sequence vector # text ot phonemes to sequence vector
if CONFIG.use_phonemes: if CONFIG.use_phonemes:
@ -23,7 +23,7 @@ def text_to_seq(text, CONFIG, custom_symbols=None):
phoneme_to_sequence( phoneme_to_sequence(
text, text,
text_cleaner, text_cleaner,
CONFIG.phoneme_language, language if language else CONFIG.phoneme_language,
CONFIG.enable_eos_bos_chars, CONFIG.enable_eos_bos_chars,
tp=CONFIG.characters, tp=CONFIG.characters,
add_blank=CONFIG.add_blank, add_blank=CONFIG.add_blank,
@ -212,6 +212,7 @@ def synthesis(
do_trim_silence=False, do_trim_silence=False,
d_vector=None, d_vector=None,
language_id=None, language_id=None,
language_name=None,
backend="torch", backend="torch",
): ):
"""Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to """Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to
@ -262,7 +263,7 @@ def synthesis(
if hasattr(model, "make_symbols"): if hasattr(model, "make_symbols"):
custom_symbols = model.make_symbols(CONFIG) custom_symbols = model.make_symbols(CONFIG)
# preprocess the given text # preprocess the given text
text_inputs = text_to_seq(text, CONFIG, custom_symbols=custom_symbols) text_inputs = text_to_seq(text, CONFIG, custom_symbols=custom_symbols, language=language_name)
# pass tensors to backend # pass tensors to backend
if backend == "torch": if backend == "torch":
if speaker_id is not None: if speaker_id is not None: