From 278c7a91b77448236af73d6942da425a5d5a2e34 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Sun, 28 Apr 2019 14:05:06 +0200 Subject: [PATCH] update mozilla preprocessor --- datasets/preprocess.py | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/datasets/preprocess.py b/datasets/preprocess.py index c498577e..2312ee25 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -41,29 +41,18 @@ def tweb(root_path, meta_file): # return {'text': texts, 'wavs': wavs} -def mozilla(root_path, meta_files): +def mozilla(root_path, meta_file): """Normalizes Mozilla meta data files to TTS format""" - import glob - meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True) - folders = [os.path.dirname(f.strip()) for f in meta_files] + txt_file = os.path.join(root_path, meta_file) items = [] - for idx, meta_file in enumerate(meta_files): - folder = folders[idx] - # txt_file = os.path.join(root_path, meta_file) - txt_file = meta_file - with open(txt_file, 'r') as ttf: - for line in ttf: - cols = line.split('|') - # wav_file = os.path.join(root_path, folder, - # 'wavs_no_processing', cols[1].strip()) - wav_file = os.path.join(folder, 'wavs_no_processing', - cols[1].strip()) - if os.path.isfile(wav_file): - text = cols[0].strip() - items.append([text, wav_file]) - else: - print(" > Error: {}".format(wav_file)) - continue + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + batch_no = int(cols[1].strip().split("_")[0]) + wav_folder = "batch{}".format(batch_no) + wav_file = os.path.join(root_path, wav_folder, "wavs_no_processing", cols[1].strip()) + text = cols[0].strip() + items.append([text, wav_file]) return items