mirror of https://github.com/coqui-ai/TTS.git
Merge branch 'dev' of github.com:mozilla/TTS into config_comments
commit
5c17a33789
|
@ -3,10 +3,6 @@ language: python
|
||||||
git:
|
git:
|
||||||
quiet: true
|
quiet: true
|
||||||
|
|
||||||
cache: pip
|
|
||||||
before_cache:
|
|
||||||
- rm ~/.cache/pip/log/debug.log
|
|
||||||
|
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- name: "Lint check"
|
- name: "Lint check"
|
||||||
|
|
|
@ -5,7 +5,7 @@ import torch
|
||||||
import random
|
import random
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
from utils.text import text_to_sequence, phoneme_to_sequence
|
from utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
|
||||||
from utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
from utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
||||||
|
|
||||||
|
|
||||||
|
@ -73,34 +73,44 @@ class MyDataset(Dataset):
|
||||||
data = np.load(filename).astype('float32')
|
data = np.load(filename).astype('float32')
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def load_phoneme_sequence(self, wav_file, text):
|
def _generate_and_cache_phoneme_sequence(self, text, cache_path):
|
||||||
|
"""generate a phoneme sequence from text.
|
||||||
|
|
||||||
|
since the usage is for subsequent caching, we never add bos and
|
||||||
|
eos chars here. Instead we add those dynamically later; based on the
|
||||||
|
config option."""
|
||||||
|
phonemes = phoneme_to_sequence(text, [self.cleaners],
|
||||||
|
language=self.phoneme_language,
|
||||||
|
enable_eos_bos=False)
|
||||||
|
phonemes = np.asarray(phonemes, dtype=np.int32)
|
||||||
|
np.save(cache_path, phonemes)
|
||||||
|
return phonemes
|
||||||
|
|
||||||
|
def _load_or_generate_phoneme_sequence(self, wav_file, text):
|
||||||
file_name = os.path.basename(wav_file).split('.')[0]
|
file_name = os.path.basename(wav_file).split('.')[0]
|
||||||
tmp_path = os.path.join(self.phoneme_cache_path,
|
cache_path = os.path.join(self.phoneme_cache_path,
|
||||||
file_name + '_phoneme.npy')
|
file_name + '_phoneme.npy')
|
||||||
if os.path.isfile(tmp_path):
|
try:
|
||||||
try:
|
phonemes = np.load(cache_path)
|
||||||
text = np.load(tmp_path)
|
except FileNotFoundError:
|
||||||
except (IOError, ValueError):
|
phonemes = self._generate_and_cache_phoneme_sequence(text,
|
||||||
print(" > ERROR: phoneme connot be loaded for {}. Recomputing.".format(wav_file))
|
cache_path)
|
||||||
text = np.asarray(
|
except (ValueError, IOError):
|
||||||
phoneme_to_sequence(
|
print(" > ERROR: failed loading phonemes for {}. "
|
||||||
text, [self.cleaners], language=self.phoneme_language, enable_eos_bos=self.enable_eos_bos),
|
"Recomputing.".format(wav_file))
|
||||||
dtype=np.int32)
|
phonemes = self._generate_and_cache_phoneme_sequence(text,
|
||||||
np.save(tmp_path, text)
|
cache_path)
|
||||||
else:
|
if self.enable_eos_bos:
|
||||||
text = np.asarray(
|
phonemes = pad_with_eos_bos(phonemes)
|
||||||
phoneme_to_sequence(
|
|
||||||
text, [self.cleaners], language=self.phoneme_language, enable_eos_bos=self.enable_eos_bos),
|
return phonemes
|
||||||
dtype=np.int32)
|
|
||||||
np.save(tmp_path, text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
def load_data(self, idx):
|
def load_data(self, idx):
|
||||||
text, wav_file, speaker_name = self.items[idx]
|
text, wav_file, speaker_name = self.items[idx]
|
||||||
wav = np.asarray(self.load_wav(wav_file), dtype=np.float32)
|
wav = np.asarray(self.load_wav(wav_file), dtype=np.float32)
|
||||||
|
|
||||||
if self.use_phonemes:
|
if self.use_phonemes:
|
||||||
text = self.load_phoneme_sequence(wav_file, text)
|
text = self._load_or_generate_phoneme_sequence(wav_file, text)
|
||||||
else:
|
else:
|
||||||
text = np.asarray(
|
text = np.asarray(
|
||||||
text_to_sequence(text, [self.cleaners]), dtype=np.int32)
|
text_to_sequence(text, [self.cleaners]), dtype=np.int32)
|
||||||
|
|
|
@ -4,7 +4,8 @@ import re
|
||||||
import phonemizer
|
import phonemizer
|
||||||
from phonemizer.phonemize import phonemize
|
from phonemizer.phonemize import phonemize
|
||||||
from utils.text import cleaners
|
from utils.text import cleaners
|
||||||
from utils.text.symbols import symbols, phonemes, _phoneme_punctuations
|
from utils.text.symbols import symbols, phonemes, _phoneme_punctuations, _bos, \
|
||||||
|
_eos
|
||||||
|
|
||||||
# Mappings from symbol to numeric ID and vice versa:
|
# Mappings from symbol to numeric ID and vice versa:
|
||||||
_SYMBOL_TO_ID = {s: i for i, s in enumerate(symbols)}
|
_SYMBOL_TO_ID = {s: i for i, s in enumerate(symbols)}
|
||||||
|
@ -45,11 +46,12 @@ def text2phone(text, language):
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
|
|
||||||
|
def pad_with_eos_bos(phoneme_sequence):
|
||||||
|
return [_PHONEMES_TO_ID[_bos]] + phoneme_sequence + [_PHONEMES_TO_ID[_eos]]
|
||||||
|
|
||||||
|
|
||||||
def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False):
|
def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False):
|
||||||
if enable_eos_bos:
|
sequence = []
|
||||||
sequence = [_PHONEMES_TO_ID['^']]
|
|
||||||
else:
|
|
||||||
sequence = []
|
|
||||||
text = text.replace(":", "")
|
text = text.replace(":", "")
|
||||||
clean_text = _clean_text(text, cleaner_names)
|
clean_text = _clean_text(text, cleaner_names)
|
||||||
to_phonemes = text2phone(clean_text, language)
|
to_phonemes = text2phone(clean_text, language)
|
||||||
|
@ -60,7 +62,7 @@ def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False):
|
||||||
sequence += _phoneme_to_sequence(phoneme)
|
sequence += _phoneme_to_sequence(phoneme)
|
||||||
# Append EOS char
|
# Append EOS char
|
||||||
if enable_eos_bos:
|
if enable_eos_bos:
|
||||||
sequence.append(_PHONEMES_TO_ID['~'])
|
sequence = pad_with_eos_bos(sequence)
|
||||||
return sequence
|
return sequence
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue