add RUSLAN dataset preprocessor

2021-02-17 13:35:23 +00:00 · 2021-02-17 13:35:23 +00:00 · 2ca74b8ab3
parent 8993120634
commit 2ca74b8ab3
1 changed files with 17 additions and 1 deletions
--- a/TTS/tts/datasets/preprocess.py
+++ b/TTS/tts/datasets/preprocess.py
@ -153,7 +153,8 @@ def mailabs(root_path, meta_files=None):
 def ljspeech(root_path, meta_file):
-    """Normalizes the Nancy meta data file to TTS format"""
+    """Normalizes the LJSpeech meta data file to TTS format
    https://keithito.com/LJ-Speech-Dataset/"""
    txt_file = os.path.join(root_path, meta_file)
    items = []
    speaker_name = "ljspeech"
@ -166,6 +167,21 @@ def ljspeech(root_path, meta_file):
    return items
 def ruslan(root_path, meta_file):
    """Normalizes the RUSLAN meta data file to TTS format
    https://ruslan-corpus.github.io/"""
    txt_file = os.path.join(root_path, meta_file)
    items = []
    speaker_name = "ljspeech"
    with open(txt_file, 'r') as ttf:
        for line in ttf:
            cols = line.split('|')
            wav_file = os.path.join(root_path, 'RUSLAN', cols[0] + '.wav')
            text = cols[1]
            items.append([text, wav_file, speaker_name])
    return items
 def css10(root_path, meta_file):
    """Normalizes the CSS10 dataset file to TTS format"""
    txt_file = os.path.join(root_path, meta_file)