mirror of https://github.com/coqui-ai/TTS.git
add RUSLAN dataset preprocessor
parent
8993120634
commit
2ca74b8ab3
|
@ -153,7 +153,8 @@ def mailabs(root_path, meta_files=None):
|
||||||
|
|
||||||
|
|
||||||
def ljspeech(root_path, meta_file):
|
def ljspeech(root_path, meta_file):
|
||||||
"""Normalizes the Nancy meta data file to TTS format"""
|
"""Normalizes the LJSpeech meta data file to TTS format
|
||||||
|
https://keithito.com/LJ-Speech-Dataset/"""
|
||||||
txt_file = os.path.join(root_path, meta_file)
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
items = []
|
items = []
|
||||||
speaker_name = "ljspeech"
|
speaker_name = "ljspeech"
|
||||||
|
@ -166,6 +167,21 @@ def ljspeech(root_path, meta_file):
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def ruslan(root_path, meta_file):
|
||||||
|
"""Normalizes the RUSLAN meta data file to TTS format
|
||||||
|
https://ruslan-corpus.github.io/"""
|
||||||
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
|
items = []
|
||||||
|
speaker_name = "ljspeech"
|
||||||
|
with open(txt_file, 'r') as ttf:
|
||||||
|
for line in ttf:
|
||||||
|
cols = line.split('|')
|
||||||
|
wav_file = os.path.join(root_path, 'RUSLAN', cols[0] + '.wav')
|
||||||
|
text = cols[1]
|
||||||
|
items.append([text, wav_file, speaker_name])
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
def css10(root_path, meta_file):
|
def css10(root_path, meta_file):
|
||||||
"""Normalizes the CSS10 dataset file to TTS format"""
|
"""Normalizes the CSS10 dataset file to TTS format"""
|
||||||
txt_file = os.path.join(root_path, meta_file)
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
|
|
Loading…
Reference in New Issue