mirror of https://github.com/coqui-ai/TTS.git
Fix phonemes
parent
846bf16f02
commit
120332d53f
|
@ -7,7 +7,7 @@ from tqdm.contrib.concurrent import process_map
|
||||||
|
|
||||||
from TTS.config import load_config
|
from TTS.config import load_config
|
||||||
from TTS.tts.datasets import load_tts_samples
|
from TTS.tts.datasets import load_tts_samples
|
||||||
from TTS.tts.utils.text import text2phone
|
from TTS.tts.utils.text import text2phone, phoneme_to_sequence
|
||||||
|
|
||||||
|
|
||||||
def compute_phonemes(item):
|
def compute_phonemes(item):
|
||||||
|
|
|
@ -273,7 +273,7 @@ class TTSDataset(Dataset):
|
||||||
item = args[0]
|
item = args[0]
|
||||||
func_args = args[1]
|
func_args = args[1]
|
||||||
text, wav_file, *_ = item
|
text, wav_file, *_ = item
|
||||||
func_args[3] = item[4]
|
func_args[3] = item[3]
|
||||||
phonemes = TTSDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args)
|
phonemes = TTSDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args)
|
||||||
return phonemes
|
return phonemes
|
||||||
|
|
||||||
|
|
|
@ -543,6 +543,7 @@ class Vits(BaseTTS):
|
||||||
"style_wav": style_wav,
|
"style_wav": style_wav,
|
||||||
"d_vector": d_vector,
|
"d_vector": d_vector,
|
||||||
"language_id": language_id,
|
"language_id": language_id,
|
||||||
|
"language_name": language_name,
|
||||||
}
|
}
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
|
@ -1061,6 +1062,7 @@ class Vits(BaseTTS):
|
||||||
d_vector=aux_inputs["d_vector"],
|
d_vector=aux_inputs["d_vector"],
|
||||||
style_wav=aux_inputs["style_wav"],
|
style_wav=aux_inputs["style_wav"],
|
||||||
language_id=aux_inputs["language_id"],
|
language_id=aux_inputs["language_id"],
|
||||||
|
language_name=aux_inputs["language_name"],
|
||||||
enable_eos_bos_chars=self.config.enable_eos_bos_chars,
|
enable_eos_bos_chars=self.config.enable_eos_bos_chars,
|
||||||
use_griffin_lim=True,
|
use_griffin_lim=True,
|
||||||
do_trim_silence=False,
|
do_trim_silence=False,
|
||||||
|
|
|
@ -15,7 +15,7 @@ if "tensorflow" in installed or "tensorflow-gpu" in installed:
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
def text_to_seq(text, CONFIG, custom_symbols=None):
|
def text_to_seq(text, CONFIG, custom_symbols=None, language=None):
|
||||||
text_cleaner = [CONFIG.text_cleaner]
|
text_cleaner = [CONFIG.text_cleaner]
|
||||||
# text ot phonemes to sequence vector
|
# text ot phonemes to sequence vector
|
||||||
if CONFIG.use_phonemes:
|
if CONFIG.use_phonemes:
|
||||||
|
@ -23,7 +23,7 @@ def text_to_seq(text, CONFIG, custom_symbols=None):
|
||||||
phoneme_to_sequence(
|
phoneme_to_sequence(
|
||||||
text,
|
text,
|
||||||
text_cleaner,
|
text_cleaner,
|
||||||
CONFIG.phoneme_language,
|
language if language else CONFIG.phoneme_language,
|
||||||
CONFIG.enable_eos_bos_chars,
|
CONFIG.enable_eos_bos_chars,
|
||||||
tp=CONFIG.characters,
|
tp=CONFIG.characters,
|
||||||
add_blank=CONFIG.add_blank,
|
add_blank=CONFIG.add_blank,
|
||||||
|
@ -212,6 +212,7 @@ def synthesis(
|
||||||
do_trim_silence=False,
|
do_trim_silence=False,
|
||||||
d_vector=None,
|
d_vector=None,
|
||||||
language_id=None,
|
language_id=None,
|
||||||
|
language_name=None,
|
||||||
backend="torch",
|
backend="torch",
|
||||||
):
|
):
|
||||||
"""Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to
|
"""Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to
|
||||||
|
@ -262,7 +263,7 @@ def synthesis(
|
||||||
if hasattr(model, "make_symbols"):
|
if hasattr(model, "make_symbols"):
|
||||||
custom_symbols = model.make_symbols(CONFIG)
|
custom_symbols = model.make_symbols(CONFIG)
|
||||||
# preprocess the given text
|
# preprocess the given text
|
||||||
text_inputs = text_to_seq(text, CONFIG, custom_symbols=custom_symbols)
|
text_inputs = text_to_seq(text, CONFIG, custom_symbols=custom_symbols, language=language_name)
|
||||||
# pass tensors to backend
|
# pass tensors to backend
|
||||||
if backend == "torch":
|
if backend == "torch":
|
||||||
if speaker_id is not None:
|
if speaker_id is not None:
|
||||||
|
|
Loading…
Reference in New Issue