Add preprocessor for TWEB dataset

pull/10/head
Eren Golge 2018-12-17 16:33:29 +01:00
parent d38872a4d0
commit 4587c72a03
1 changed files with 14 additions and 4 deletions

View File

@ -13,10 +13,20 @@ def tts_cache(root_path, meta_file):
return items
# def tweb(root_path, meta_file):
# # TODO
# pass
# return
def tweb(root_path, meta_file):
"""Normalize TWEB dataset.
https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset
"""
txt_file = os.path.join(root_path, meta_file)
items = []
with open(txt_file, 'r') as ttf:
for line in ttf:
cols = line.split('\t')
wav_file = os.path.join(root_path, cols[0]+'.wav')
text = cols[1]
items.append([text, wav_file])
random.shuffle(items)
return items
# def kusal(root_path, meta_file):